summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--CONTRIBUTING.md13
-rw-r--r--WORKSPACE17
-rw-r--r--debian/BUILD3
-rw-r--r--images/syzkaller/Dockerfile2
-rw-r--r--nogo.yaml32
-rw-r--r--pkg/abi/linux/errno/errno.go25
-rw-r--r--pkg/abi/linux/msgqueue.go2
-rw-r--r--pkg/amutex/BUILD2
-rw-r--r--pkg/amutex/amutex.go6
-rw-r--r--pkg/atomicbitops/aligned_32bit_unsafe.go12
-rw-r--r--pkg/buffer/view.go14
-rw-r--r--pkg/errors/linuxerr/BUILD6
-rw-r--r--pkg/errors/linuxerr/internal.go120
-rw-r--r--pkg/errors/linuxerr/linuxerr.go8
-rw-r--r--pkg/errors/linuxerr/linuxerr_test.go90
-rw-r--r--pkg/eventchannel/BUILD1
-rw-r--r--pkg/eventchannel/event_any.go5
-rw-r--r--pkg/eventchannel/processor.go130
-rw-r--r--pkg/merkletree/merkletree.go91
-rw-r--r--pkg/p9/file.go16
-rw-r--r--pkg/p9/handlers.go2
-rw-r--r--pkg/safecopy/BUILD3
-rw-r--r--pkg/safecopy/safecopy.go9
-rw-r--r--pkg/safemem/io.go52
-rw-r--r--pkg/sentry/control/BUILD12
-rw-r--r--pkg/sentry/control/control.proto40
-rw-r--r--pkg/sentry/control/events.go65
-rw-r--r--pkg/sentry/control/usage.go183
-rw-r--r--pkg/sentry/devices/memdev/BUILD2
-rw-r--r--pkg/sentry/devices/memdev/full.go6
-rw-r--r--pkg/sentry/devices/quotedev/BUILD2
-rw-r--r--pkg/sentry/devices/quotedev/quotedev.go4
-rw-r--r--pkg/sentry/devices/ttydev/BUILD2
-rw-r--r--pkg/sentry/devices/ttydev/ttydev.go4
-rw-r--r--pkg/sentry/fs/BUILD1
-rw-r--r--pkg/sentry/fs/copy_up.go21
-rw-r--r--pkg/sentry/fs/dev/BUILD1
-rw-r--r--pkg/sentry/fs/dev/full.go4
-rw-r--r--pkg/sentry/fs/dirent.go5
-rw-r--r--pkg/sentry/fs/fdpipe/BUILD2
-rw-r--r--pkg/sentry/fs/fdpipe/pipe.go7
-rw-r--r--pkg/sentry/fs/fdpipe/pipe_opener.go20
-rw-r--r--pkg/sentry/fs/fdpipe/pipe_opener_test.go15
-rw-r--r--pkg/sentry/fs/fdpipe/pipe_test.go11
-rw-r--r--pkg/sentry/fs/file.go50
-rw-r--r--pkg/sentry/fs/file_operations.go2
-rw-r--r--pkg/sentry/fs/file_overlay.go3
-rw-r--r--pkg/sentry/fs/fsutil/BUILD1
-rw-r--r--pkg/sentry/fs/fsutil/file.go9
-rw-r--r--pkg/sentry/fs/fsutil/inode.go11
-rw-r--r--pkg/sentry/fs/gofer/BUILD4
-rw-r--r--pkg/sentry/fs/gofer/file.go6
-rw-r--r--pkg/sentry/fs/gofer/gofer_test.go8
-rw-r--r--pkg/sentry/fs/gofer/inode.go8
-rw-r--r--pkg/sentry/fs/gofer/path.go7
-rw-r--r--pkg/sentry/fs/host/BUILD1
-rw-r--r--pkg/sentry/fs/host/file.go5
-rw-r--r--pkg/sentry/fs/host/inode.go5
-rw-r--r--pkg/sentry/fs/host/tty.go7
-rw-r--r--pkg/sentry/fs/host/util.go2
-rw-r--r--pkg/sentry/fs/inode.go3
-rw-r--r--pkg/sentry/fs/inode_operations.go2
-rw-r--r--pkg/sentry/fs/inode_overlay.go5
-rw-r--r--pkg/sentry/fs/inotify.go7
-rw-r--r--pkg/sentry/fs/proc/BUILD1
-rw-r--r--pkg/sentry/fs/proc/fds.go6
-rw-r--r--pkg/sentry/fs/proc/proc.go5
-rw-r--r--pkg/sentry/fs/proc/task.go11
-rw-r--r--pkg/sentry/fs/ramfs/BUILD1
-rw-r--r--pkg/sentry/fs/ramfs/dir.go5
-rw-r--r--pkg/sentry/fs/splice.go17
-rw-r--r--pkg/sentry/fs/timerfd/BUILD1
-rw-r--r--pkg/sentry/fs/timerfd/timerfd.go3
-rw-r--r--pkg/sentry/fs/tty/BUILD1
-rw-r--r--pkg/sentry/fs/tty/dir.go11
-rw-r--r--pkg/sentry/fs/tty/line_discipline.go10
-rw-r--r--pkg/sentry/fs/tty/queue.go6
-rw-r--r--pkg/sentry/fs/user/BUILD1
-rw-r--r--pkg/sentry/fs/user/path.go7
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/BUILD2
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/base.go9
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/cgroupfs.go181
-rw-r--r--pkg/sentry/fsimpl/devpts/BUILD1
-rw-r--r--pkg/sentry/fsimpl/devpts/devpts.go7
-rw-r--r--pkg/sentry/fsimpl/devpts/line_discipline.go10
-rw-r--r--pkg/sentry/fsimpl/devpts/queue.go6
-rw-r--r--pkg/sentry/fsimpl/eventfd/BUILD2
-rw-r--r--pkg/sentry/fsimpl/eventfd/eventfd.go10
-rw-r--r--pkg/sentry/fsimpl/ext/BUILD0
-rw-r--r--pkg/sentry/fsimpl/fuse/BUILD2
-rw-r--r--pkg/sentry/fsimpl/fuse/dev.go15
-rw-r--r--pkg/sentry/fsimpl/fuse/dev_test.go4
-rw-r--r--pkg/sentry/fsimpl/fuse/directory.go12
-rw-r--r--pkg/sentry/fsimpl/fuse/fusefs.go3
-rw-r--r--pkg/sentry/fsimpl/fuse/read_write.go3
-rw-r--r--pkg/sentry/fsimpl/fuse/regular_file.go7
-rw-r--r--pkg/sentry/fsimpl/gofer/BUILD1
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go31
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go7
-rw-r--r--pkg/sentry/fsimpl/gofer/handle.go41
-rw-r--r--pkg/sentry/fsimpl/gofer/host_named_pipe.go3
-rw-r--r--pkg/sentry/fsimpl/gofer/p9file.go4
-rw-r--r--pkg/sentry/fsimpl/gofer/save_restore.go4
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go136
-rw-r--r--pkg/sentry/fsimpl/host/BUILD1
-rw-r--r--pkg/sentry/fsimpl/host/host.go5
-rw-r--r--pkg/sentry/fsimpl/host/tty.go7
-rw-r--r--pkg/sentry/fsimpl/host/util.go2
-rw-r--r--pkg/sentry/fsimpl/kernfs/BUILD3
-rw-r--r--pkg/sentry/fsimpl/kernfs/fd_impl_util.go3
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go19
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go38
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go67
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs_test.go61
-rw-r--r--pkg/sentry/fsimpl/overlay/BUILD1
-rw-r--r--pkg/sentry/fsimpl/overlay/copy_up.go3
-rw-r--r--pkg/sentry/fsimpl/overlay/filesystem.go25
-rw-r--r--pkg/sentry/fsimpl/proc/BUILD1
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks.go17
-rw-r--r--pkg/sentry/fsimpl/proc/task_fds.go16
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go3
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go6
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_files.go5
-rw-r--r--pkg/sentry/fsimpl/signalfd/BUILD2
-rw-r--r--pkg/sentry/fsimpl/signalfd/signalfd.go4
-rw-r--r--pkg/sentry/fsimpl/sys/BUILD1
-rw-r--r--pkg/sentry/fsimpl/timerfd/BUILD1
-rw-r--r--pkg/sentry/fsimpl/timerfd/timerfd.go3
-rw-r--r--pkg/sentry/fsimpl/tmpfs/BUILD2
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go31
-rw-r--r--pkg/sentry/fsimpl/tmpfs/pipe_test.go3
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file_test.go6
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go3
-rw-r--r--pkg/sentry/fsimpl/verity/BUILD15
-rw-r--r--pkg/sentry/fsimpl/verity/filesystem.go69
-rw-r--r--pkg/sentry/fsimpl/verity/verity.go284
-rw-r--r--pkg/sentry/kernel/BUILD1
-rw-r--r--pkg/sentry/kernel/auth/BUILD1
-rw-r--r--pkg/sentry/kernel/cgroup.go1
-rw-r--r--pkg/sentry/kernel/eventfd/BUILD2
-rw-r--r--pkg/sentry/kernel/eventfd/eventfd.go10
-rw-r--r--pkg/sentry/kernel/futex/BUILD1
-rw-r--r--pkg/sentry/kernel/futex/futex.go5
-rw-r--r--pkg/sentry/kernel/ipc/object.go35
-rw-r--r--pkg/sentry/kernel/msgqueue/msgqueue.go109
-rw-r--r--pkg/sentry/kernel/pipe/BUILD2
-rw-r--r--pkg/sentry/kernel/pipe/node.go5
-rw-r--r--pkg/sentry/kernel/pipe/node_test.go3
-rw-r--r--pkg/sentry/kernel/pipe/pipe.go9
-rw-r--r--pkg/sentry/kernel/pipe/pipe_test.go12
-rw-r--r--pkg/sentry/kernel/pipe/vfs.go5
-rw-r--r--pkg/sentry/kernel/ptrace.go17
-rw-r--r--pkg/sentry/kernel/ptrace_amd64.go4
-rw-r--r--pkg/sentry/kernel/ptrace_arm64.go4
-rw-r--r--pkg/sentry/kernel/seccomp.go4
-rw-r--r--pkg/sentry/kernel/semaphore/BUILD3
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore.go23
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore_test.go10
-rw-r--r--pkg/sentry/kernel/shm/BUILD1
-rw-r--r--pkg/sentry/kernel/shm/shm.go26
-rw-r--r--pkg/sentry/kernel/signalfd/BUILD1
-rw-r--r--pkg/sentry/kernel/signalfd/signalfd.go3
-rw-r--r--pkg/sentry/kernel/task_block.go11
-rw-r--r--pkg/sentry/kernel/task_exec.go4
-rw-r--r--pkg/sentry/kernel/task_exit.go4
-rw-r--r--pkg/sentry/kernel/task_run.go6
-rw-r--r--pkg/sentry/kernel/task_signals.go11
-rw-r--r--pkg/sentry/kernel/task_start.go5
-rw-r--r--pkg/sentry/kernel/task_syscall.go7
-rw-r--r--pkg/sentry/kernel/task_usermem.go7
-rw-r--r--pkg/sentry/kernel/thread_group.go10
-rw-r--r--pkg/sentry/loader/BUILD1
-rw-r--r--pkg/sentry/loader/elf.go57
-rw-r--r--pkg/sentry/loader/interpreter.go8
-rw-r--r--pkg/sentry/loader/loader.go9
-rw-r--r--pkg/sentry/loader/vdso.go31
-rw-r--r--pkg/sentry/memmap/BUILD1
-rw-r--r--pkg/sentry/mm/BUILD1
-rw-r--r--pkg/sentry/mm/aio_context.go18
-rw-r--r--pkg/sentry/mm/syscalls.go47
-rw-r--r--pkg/sentry/mm/vma.go9
-rw-r--r--pkg/sentry/pgalloc/BUILD1
-rw-r--r--pkg/sentry/pgalloc/pgalloc.go3
-rw-r--r--pkg/sentry/platform/kvm/bluepill_fault.go6
-rw-r--r--pkg/sentry/socket/hostinet/BUILD1
-rw-r--r--pkg/sentry/socket/hostinet/socket.go11
-rw-r--r--pkg/sentry/socket/netlink/BUILD1
-rw-r--r--pkg/sentry/socket/netlink/socket.go5
-rw-r--r--pkg/sentry/socket/netstack/BUILD1
-rw-r--r--pkg/sentry/socket/netstack/netstack.go9
-rw-r--r--pkg/sentry/socket/netstack/netstack_vfs2.go7
-rw-r--r--pkg/sentry/socket/unix/BUILD1
-rw-r--r--pkg/sentry/socket/unix/transport/connectioned.go5
-rw-r--r--pkg/sentry/socket/unix/transport/connectionless.go5
-rw-r--r--pkg/sentry/socket/unix/unix.go11
-rw-r--r--pkg/sentry/syscalls/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/error.go10
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go183
-rw-r--r--pkg/sentry/syscalls/linux/sigset.go3
-rw-r--r--pkg/sentry/syscalls/linux/sys_aio.go8
-rw-r--r--pkg/sentry/syscalls/linux/sys_epoll.go4
-rw-r--r--pkg/sentry/syscalls/linux/sys_file.go38
-rw-r--r--pkg/sentry/syscalls/linux/sys_futex.go16
-rw-r--r--pkg/sentry/syscalls/linux/sys_getdents.go3
-rw-r--r--pkg/sentry/syscalls/linux/sys_lseek.go3
-rw-r--r--pkg/sentry/syscalls/linux/sys_mmap.go12
-rw-r--r--pkg/sentry/syscalls/linux/sys_msgqueue.go53
-rw-r--r--pkg/sentry/syscalls/linux/sys_poll.go14
-rw-r--r--pkg/sentry/syscalls/linux/sys_read.go25
-rw-r--r--pkg/sentry/syscalls/linux/sys_rlimit.go3
-rw-r--r--pkg/sentry/syscalls/linux/sys_rseq.go3
-rw-r--r--pkg/sentry/syscalls/linux/sys_sem.go19
-rw-r--r--pkg/sentry/syscalls/linux/sys_signal.go8
-rw-r--r--pkg/sentry/syscalls/linux/sys_socket.go45
-rw-r--r--pkg/sentry/syscalls/linux/sys_splice.go11
-rw-r--r--pkg/sentry/syscalls/linux/sys_sync.go10
-rw-r--r--pkg/sentry/syscalls/linux/sys_syslog.go3
-rw-r--r--pkg/sentry/syscalls/linux/sys_thread.go5
-rw-r--r--pkg/sentry/syscalls/linux/sys_time.go7
-rw-r--r--pkg/sentry/syscalls/linux/sys_timer.go6
-rw-r--r--pkg/sentry/syscalls/linux/sys_tls_amd64.go3
-rw-r--r--pkg/sentry/syscalls/linux/sys_tls_arm64.go4
-rw-r--r--pkg/sentry/syscalls/linux/sys_write.go25
-rw-r--r--pkg/sentry/syscalls/linux/timespec.go9
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/aio.go3
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/execve.go3
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/fd.go3
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/filesystem.go5
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/path.go3
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/poll.go17
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/read_write.go45
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/setstat.go5
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/socket.go48
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/splice.go25
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/stat.go5
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/sync.go6
-rw-r--r--pkg/sentry/syscalls/syscalls.go5
-rw-r--r--pkg/sentry/time/BUILD1
-rw-r--r--pkg/sentry/vfs/BUILD2
-rw-r--r--pkg/sentry/vfs/README.md4
-rw-r--r--pkg/sentry/vfs/epoll.go5
-rw-r--r--pkg/sentry/vfs/file_description_impl_util.go15
-rw-r--r--pkg/sentry/vfs/file_description_impl_util_test.go5
-rw-r--r--pkg/sentry/vfs/inotify.go3
-rw-r--r--pkg/sentry/vfs/lock.go10
-rw-r--r--pkg/sentry/vfs/mount.go3
-rw-r--r--pkg/sentry/vfs/pathname.go4
-rw-r--r--pkg/sentry/vfs/permissions.go5
-rw-r--r--pkg/sentry/vfs/resolving_path.go3
-rw-r--r--pkg/sentry/vfs/vfs.go19
-rw-r--r--pkg/syserr/BUILD1
-rw-r--r--pkg/syserr/syserr.go17
-rw-r--r--pkg/syserror/BUILD10
-rw-r--r--pkg/syserror/syserror.go180
-rw-r--r--pkg/tcpip/checker/checker.go2
-rw-r--r--pkg/tcpip/header/eth.go6
-rw-r--r--pkg/tcpip/header/eth_test.go4
-rw-r--r--pkg/tcpip/link/ethernet/ethernet.go23
-rw-r--r--pkg/tcpip/link/tun/BUILD1
-rw-r--r--pkg/tcpip/link/tun/device.go5
-rw-r--r--pkg/tcpip/socketops.go11
-rw-r--r--pkg/tcpip/stack/packet_buffer.go14
-rw-r--r--pkg/tcpip/stack/packet_buffer_test.go26
-rw-r--r--pkg/tcpip/stack/stack.go16
-rw-r--r--pkg/tcpip/stack/tcp.go6
-rw-r--r--pkg/tcpip/stack/transport_demuxer.go2
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go30
-rw-r--r--pkg/tcpip/transport/tcp/BUILD1
-rw-r--r--pkg/tcpip/transport/tcp/accept.go44
-rw-r--r--pkg/tcpip/transport/tcp/connect.go71
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go92
-rw-r--r--pkg/tcpip/transport/tcp/endpoint_state.go1
-rw-r--r--pkg/tcpip/transport/tcp/forwarder.go2
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go27
-rw-r--r--pkg/tcpip/transport/tcp/snd.go9
-rw-r--r--pkg/tcpip/transport/tcp/tcp_rack_test.go9
-rw-r--r--pkg/tcpip/transport/tcp/tcp_sack_test.go10
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go370
-rw-r--r--pkg/tcpip/transport/tcp/tcp_timestamp_test.go8
-rw-r--r--pkg/tcpip/transport/tcp/testing/context/context.go81
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go2
-rw-r--r--pkg/usermem/usermem.go2
-rw-r--r--runsc/boot/BUILD2
-rw-r--r--runsc/boot/controller.go45
-rw-r--r--runsc/boot/loader.go3
-rw-r--r--runsc/boot/network.go3
-rw-r--r--runsc/boot/strace.go9
-rw-r--r--runsc/cmd/BUILD1
-rw-r--r--runsc/cmd/events.go13
-rw-r--r--runsc/cmd/usage.go93
-rw-r--r--runsc/cmd/verity_prepare.go7
-rw-r--r--runsc/config/BUILD6
-rw-r--r--runsc/config/config.go99
-rw-r--r--runsc/config/config_test.go23
-rw-r--r--runsc/config/flags.go2
-rw-r--r--runsc/container/container.go24
-rw-r--r--runsc/container/container_test.go174
-rw-r--r--runsc/fsgofer/fsgofer.go16
-rw-r--r--runsc/sandbox/BUILD2
-rw-r--r--runsc/sandbox/sandbox.go86
-rw-r--r--test/benchmarks/base/startup_test.go12
-rw-r--r--test/benchmarks/tools/fio_test.go2
-rw-r--r--test/perf/BUILD7
-rw-r--r--test/perf/linux/BUILD20
-rw-r--r--test/perf/linux/randread_benchmark.cc2
-rw-r--r--test/perf/linux/verity_stat_benchmark.cc84
-rw-r--r--test/syscalls/linux/BUILD4
-rw-r--r--test/syscalls/linux/cgroup.cc17
-rw-r--r--test/syscalls/linux/link.cc3
-rw-r--r--test/syscalls/linux/mmap.cc2
-rw-r--r--test/syscalls/linux/msgqueue.cc291
-rw-r--r--test/syscalls/linux/packet_socket.cc13
-rw-r--r--test/syscalls/linux/packet_socket_raw.cc42
-rw-r--r--test/syscalls/linux/pipe.cc24
-rw-r--r--test/syscalls/linux/proc_net.cc78
-rw-r--r--test/syscalls/linux/raw_socket.cc122
-rw-r--r--test/syscalls/linux/raw_socket_hdrincl.cc4
-rw-r--r--test/syscalls/linux/raw_socket_icmp.cc26
-rw-r--r--test/syscalls/linux/tcp_socket.cc60
-rw-r--r--test/util/BUILD1
-rw-r--r--test/util/capability_util.h20
-rw-r--r--test/util/cgroup_util.cc18
-rw-r--r--test/util/cgroup_util.h12
-rw-r--r--test/util/fs_util.cc3
-rw-r--r--test/util/fuchsia_capability_util.cc54
-rw-r--r--test/util/fuchsia_capability_util.h41
-rw-r--r--test/util/linux_capability_util.cc8
-rw-r--r--tools/bazel.mk16
-rw-r--r--tools/go_generics/rules_tests/template_test.go6
-rw-r--r--tools/go_stateify/main.go7
-rw-r--r--tools/nogo/analyzers.go4
-rw-r--r--tools/nogo/defs.bzl5
-rw-r--r--tools/show_paths.bzl25
-rw-r--r--tools/verity/measure_tool.go30
336 files changed, 4619 insertions, 2042 deletions
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c53df7d25..e680428c3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -32,8 +32,9 @@ will need to be added to the appropriate `BUILD` files, and the `:gopath` target
will need to be re-run to generate appropriate symlinks in the `GOPATH`
directory tree.
-Dependencies can be added by using `go mod get`. In order to keep the
-`WORKSPACE` file in sync, run `tools/go_mod.sh` in place of `go mod`.
+Dependencies can be added by using `go get`. In order to keep the `WORKSPACE`
+file in sync, run `bazel run //:gazelle -- update-repos -from_file=go.mod` in
+place of `go mod`.
### Coding Guidelines
@@ -65,10 +66,14 @@ Rules:
* Itself.
* Go standard library.
- * Except (transitively) package "net" (this will result in a non-cgo
- binary). Use `//pkg/unet` instead.
+ * Except (transitively) package "net", which would result in a cgo
+ binary. Use `//pkg/unet` instead.
* `@org_golang_x_sys//unix:go_default_library` (Go import
`golang.org/x/sys/unix`).
+ * `@org_golang_x_time//rate:go_default_library` (Go import
+ `golang.org/x/time/rate`).
+ * `@com_github_google_btree//:go_default_library"` (Go import
+ `github.com/google/btree`).
* Generated Go protobuf packages.
* `@org_golang_google_protobuf//proto:go_default_library` (Go import
`google.golang.org/protobuf`).
diff --git a/WORKSPACE b/WORKSPACE
index a27f9afeb..b00c09682 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -670,13 +670,6 @@ go_repository(
)
go_repository(
- name = "com_github_jonboulle_clockwork",
- importpath = "github.com/jonboulle/clockwork",
- sum = "h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0hcPo=",
- version = "v0.1.0",
-)
-
-go_repository(
name = "com_github_jtolds_gls",
importpath = "github.com/jtolds/gls",
sum = "h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=",
@@ -1245,8 +1238,8 @@ rbe_autoconfig(name = "rbe_default")
http_archive(
name = "rules_pkg",
- sha256 = "6b5969a7acd7b60c02f816773b06fcf32fbe8ba0c7919ccdc2df4f8fb923804a",
- url = "https://github.com/bazelbuild/rules_pkg/releases/download/0.3.0/rules_pkg-0.3.0.tar.gz",
+ sha256 = "353b20e8b093d42dd16889c7f918750fb8701c485ac6cceb69a5236500507c27",
+ url = "https://github.com/bazelbuild/rules_pkg/releases/download/0.5.0/rules_pkg-0.5.0.tar.gz",
)
load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies")
@@ -1640,9 +1633,9 @@ go_repository(
go_repository(
name = "co_honnef_go_tools",
importpath = "honnef.co/go/tools",
- sum = "h1:EVDuO03OCZwpV2t/tLLxPmPiomagMoBOgfPt0FM+4IY=",
- version = "v0.1.1",
-)
+ sum = "h1:Tyybiul3hjaq0dkv+kcf5/MPTfo+ZBiEWrkhgxMPH54=",
+ version = "v0.3.0-0.dev.0.20210801021341-453cb28c0b15",
+ )
go_repository(
name = "com_github_burntsushi_toml",
diff --git a/debian/BUILD b/debian/BUILD
index 32cc209bf..a62164cb0 100644
--- a/debian/BUILD
+++ b/debian/BUILD
@@ -28,12 +28,10 @@ pkg_deb(
amd64 = "amd64",
arm64 = "arm64",
),
- changes = "runsc.changes",
conffiles = [
"/etc/containerd/runsc.toml",
],
data = ":debian-data",
- deb = "runsc.deb",
# Note that the description_file will be flatten (all newlines removed),
# and therefore it is kept to a simple one-line description. The expected
# format for debian packages is "short summary\nLonger explanation of
@@ -42,6 +40,7 @@ pkg_deb(
homepage = "https://gvisor.dev/",
maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>",
package = "runsc",
+ package_file_name = "runsc.deb",
postinst = "postinst.sh",
version_file = version,
visibility = [
diff --git a/images/syzkaller/Dockerfile b/images/syzkaller/Dockerfile
index 9a85ae345..9f739d972 100644
--- a/images/syzkaller/Dockerfile
+++ b/images/syzkaller/Dockerfile
@@ -2,7 +2,7 @@ FROM gcr.io/syzkaller/env
# This image is mostly for investigating syzkaller crashes, so let's install
# developer tools.
-RUN apt update && apt install -y git vim strace gdb procps
+RUN apt update --allow-releaseinfo-change && DEBIAN_FRONTEND=noninteractive apt install -y git vim strace gdb procps
WORKDIR /syzkaller/gopath/src/github.com/google/syzkaller
diff --git a/nogo.yaml b/nogo.yaml
index e7d1741eb..64332cb41 100644
--- a/nogo.yaml
+++ b/nogo.yaml
@@ -48,6 +48,8 @@ global:
- "duplicate import"
# These will never be annotated.
- "unexpected call to atomic function"
+ # Generated proto code creates declarations like 'var start int = iNdEx'
+ - "should omit type .* from declaration; it will be inferred from the right-hand side"
internal:
suppress:
# We use ALL_CAPS for system definitions,
@@ -161,29 +163,35 @@ analyzers:
# targets in the standard library, so we still need to run
# checklinkname on stdlib generally.
- "linkname to unknown symbol"
- SA4016:
+ SA1019: # Use of deprecated identifier.
+ # disable for now due to misattribution from golang.org/issue/44195.
+ generated:
+ exclude: [".*"]
internal:
- exclude:
- - pkg/gohacks/gohacks_unsafe.go # x ^ 0 always equals x.
- SA2001:
+ exclude: [".*"]
+ SA2001: # Empty critical section.
internal:
exclude:
- pkg/sentry/fs/fs.go # Intentional.
- pkg/sentry/fs/gofer/inode.go # Intentional.
- pkg/refs/refcounter_test.go # Intentional.
- ST1019:
+ SA4016: # Useless bitwise operations.
+ internal:
+ exclude:
+ - pkg/gohacks/gohacks_unsafe.go # x ^ 0 always equals x.
+ SA5011: # Possible nil pointer dereference.
+ internal:
+ exclude:
+ # https://github.com/dominikh/go-tools/issues/924
+ - pkg/sentry/fs/fdpipe/pipe_opener_test.go
+ - pkg/tcpip/tests/integration/link_resolution_test.go
+ ST1019: # Multiple imports of the same package.
generated:
exclude:
# package ".../kubeapi/core/v1/v1" is being imported more than once
- generated.gen.pb.go
- ST1021:
+ ST1021: # Doc should start with type name.
internal:
suppress:
- "comment on exported type Translation" # Intentional.
- "comment on exported type PinnedRange" # Intentional.
- SA5011:
- internal:
- exclude:
- # https://github.com/dominikh/go-tools/issues/924
- - pkg/sentry/fs/fdpipe/pipe_opener_test.go
- - pkg/tcpip/tests/integration/link_resolution_test.go
diff --git a/pkg/abi/linux/errno/errno.go b/pkg/abi/linux/errno/errno.go
index 5a09c6605..38ebbb1d7 100644
--- a/pkg/abi/linux/errno/errno.go
+++ b/pkg/abi/linux/errno/errno.go
@@ -157,9 +157,32 @@ const (
EHWPOISON
)
-// errnos derived from other errnos
+// errnos derived from other errnos.
const (
EWOULDBLOCK = EAGAIN
EDEADLOCK = EDEADLK
ENONET = ENOENT
)
+
+// errnos for internal errors.
+const (
+ // ERESTARTSYS is returned by an interrupted syscall to indicate that it
+ // should be converted to EINTR if interrupted by a signal delivered to a
+ // user handler without SA_RESTART set, and restarted otherwise.
+ ERESTARTSYS = 512
+
+ // ERESTARTNOINTR is returned by an interrupted syscall to indicate that it
+ // should always be restarted.
+ ERESTARTNOINTR = 513
+
+ // ERESTARTNOHAND is returned by an interrupted syscall to indicate that it
+ // should be converted to EINTR if interrupted by a signal delivered to a
+ // user handler, and restarted otherwise.
+ ERESTARTNOHAND = 514
+
+ // ERESTART_RESTARTBLOCK is returned by an interrupted syscall to indicate
+ // that it should be restarted using a custom function. The interrupted
+ // syscall must register a custom restart function by calling
+ // Task.SetRestartSyscallFn.
+ ERESTART_RESTARTBLOCK = 516
+)
diff --git a/pkg/abi/linux/msgqueue.go b/pkg/abi/linux/msgqueue.go
index e1e8d0357..0612a8214 100644
--- a/pkg/abi/linux/msgqueue.go
+++ b/pkg/abi/linux/msgqueue.go
@@ -47,7 +47,7 @@ const (
MSGSSZ = 16
// MSGSEG is simplified due to the inexistance of a ternary operator.
- MSGSEG = (MSGPOOL * 1024) / MSGSSZ
+ MSGSEG = 0xffff
)
// MsqidDS is equivelant to struct msqid64_ds. Source:
diff --git a/pkg/amutex/BUILD b/pkg/amutex/BUILD
index bd3a5cce9..6d8b5f818 100644
--- a/pkg/amutex/BUILD
+++ b/pkg/amutex/BUILD
@@ -8,7 +8,7 @@ go_library(
visibility = ["//:sandbox"],
deps = [
"//pkg/context",
- "//pkg/syserror",
+ "//pkg/errors/linuxerr",
],
)
diff --git a/pkg/amutex/amutex.go b/pkg/amutex/amutex.go
index d7acc1d9f..985199cfa 100644
--- a/pkg/amutex/amutex.go
+++ b/pkg/amutex/amutex.go
@@ -20,7 +20,7 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
)
// Sleeper must be implemented by users of the abortable mutex to allow for
@@ -33,7 +33,7 @@ type NoopSleeper = context.Context
// Block blocks until either receiving from ch succeeds (in which case it
// returns nil) or sleeper is interrupted (in which case it returns
-// syserror.ErrInterrupted).
+// linuxerr.ErrInterrupted).
func Block(sleeper Sleeper, ch <-chan struct{}) error {
cancel := sleeper.SleepStart()
select {
@@ -42,7 +42,7 @@ func Block(sleeper Sleeper, ch <-chan struct{}) error {
return nil
case <-cancel:
sleeper.SleepFinish(false)
- return syserror.ErrInterrupted
+ return linuxerr.ErrInterrupted
}
}
diff --git a/pkg/atomicbitops/aligned_32bit_unsafe.go b/pkg/atomicbitops/aligned_32bit_unsafe.go
index a17d317cc..0e4765c48 100644
--- a/pkg/atomicbitops/aligned_32bit_unsafe.go
+++ b/pkg/atomicbitops/aligned_32bit_unsafe.go
@@ -39,9 +39,9 @@ type AlignedAtomicInt64 struct {
}
func (aa *AlignedAtomicInt64) ptr() *int64 {
- // On 32-bit systems, aa.value is is guaranteed to be 32-bit aligned.
- // It means that in the 12-byte aa.value, there are guaranteed to be 8
- // contiguous bytes with 64-bit alignment.
+ // On 32-bit systems, aa.value is guaranteed to be 32-bit aligned. It means
+ // that in the 12-byte aa.value, there are guaranteed to be 8 contiguous bytes
+ // with 64-bit alignment.
return (*int64)(unsafe.Pointer((uintptr(unsafe.Pointer(&aa.value)) + 4) &^ 7))
}
@@ -77,9 +77,9 @@ type AlignedAtomicUint64 struct {
}
func (aa *AlignedAtomicUint64) ptr() *uint64 {
- // On 32-bit systems, aa.value is is guaranteed to be 32-bit aligned.
- // It means that in the 12-byte aa.value, there are guaranteed to be 8
- // contiguous bytes with 64-bit alignment.
+ // On 32-bit systems, aa.value is guaranteed to be 32-bit aligned. It means
+ // that in the 12-byte aa.value, there are guaranteed to be 8 contiguous bytes
+ // with 64-bit alignment.
return (*uint64)(unsafe.Pointer((uintptr(unsafe.Pointer(&aa.value)) + 4) &^ 7))
}
diff --git a/pkg/buffer/view.go b/pkg/buffer/view.go
index 7bcfcd543..a4610f977 100644
--- a/pkg/buffer/view.go
+++ b/pkg/buffer/view.go
@@ -378,6 +378,20 @@ func (v *View) Copy() (other View) {
return
}
+// Clone makes a more shallow copy compared to Copy. The underlying payload
+// slice (buffer.data) is shared but the buffers themselves are copied.
+func (v *View) Clone() *View {
+ other := &View{
+ size: v.size,
+ }
+ for buf := v.data.Front(); buf != nil; buf = buf.Next() {
+ newBuf := other.pool.getNoInit()
+ *newBuf = *buf
+ other.data.PushBack(newBuf)
+ }
+ return other
+}
+
// Apply applies the given function across all valid data.
func (v *View) Apply(fn func([]byte)) {
for buf := v.data.Front(); buf != nil; buf = buf.Next() {
diff --git a/pkg/errors/linuxerr/BUILD b/pkg/errors/linuxerr/BUILD
index 201727780..e73b0e28a 100644
--- a/pkg/errors/linuxerr/BUILD
+++ b/pkg/errors/linuxerr/BUILD
@@ -4,7 +4,10 @@ package(licenses = ["notice"])
go_library(
name = "linuxerr",
- srcs = ["linuxerr.go"],
+ srcs = [
+ "internal.go",
+ "linuxerr.go",
+ ],
visibility = ["//visibility:public"],
deps = [
"//pkg/abi/linux/errno",
@@ -20,7 +23,6 @@ go_test(
":linuxerr",
"//pkg/abi/linux/errno",
"//pkg/errors",
- "//pkg/syserror",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/errors/linuxerr/internal.go b/pkg/errors/linuxerr/internal.go
new file mode 100644
index 000000000..127bba0df
--- /dev/null
+++ b/pkg/errors/linuxerr/internal.go
@@ -0,0 +1,120 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"),;
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linuxerr
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux/errno"
+ "gvisor.dev/gvisor/pkg/errors"
+)
+
+var (
+ // ErrWouldBlock is an internal error used to indicate that an operation
+ // cannot be satisfied immediately, and should be retried at a later
+ // time, possibly when the caller has received a notification that the
+ // operation may be able to complete. It is used by implementations of
+ // the kio.File interface.
+ ErrWouldBlock = errors.New(errno.EWOULDBLOCK, "request would block")
+
+ // ErrInterrupted is returned if a request is interrupted before it can
+ // complete.
+ ErrInterrupted = errors.New(errno.EINTR, "request was interrupted")
+
+ // ErrExceedsFileSizeLimit is returned if a request would exceed the
+ // file's size limit.
+ ErrExceedsFileSizeLimit = errors.New(errno.E2BIG, "exceeds file size limit")
+)
+
+var errorMap = map[error]*errors.Error{
+ ErrWouldBlock: EWOULDBLOCK,
+ ErrInterrupted: EINTR,
+ ErrExceedsFileSizeLimit: EFBIG,
+}
+
+// errorUnwrappers is an array of unwrap functions to extract typed errors.
+var errorUnwrappers = []func(error) (*errors.Error, bool){}
+
+// AddErrorUnwrapper registers an unwrap method that can extract a concrete error
+// from a typed, but not initialized, error.
+func AddErrorUnwrapper(unwrap func(e error) (*errors.Error, bool)) {
+ errorUnwrappers = append(errorUnwrappers, unwrap)
+}
+
+// TranslateError translates errors to errnos, it will return false if
+// the error was not registered.
+func TranslateError(from error) (*errors.Error, bool) {
+ if err, ok := errorMap[from]; ok {
+ return err, true
+ }
+ // Try to unwrap the error if we couldn't match an error
+ // exactly. This might mean that a package has its own
+ // error type.
+ for _, unwrap := range errorUnwrappers {
+ if err, ok := unwrap(from); ok {
+ return err, true
+ }
+ }
+ return nil, false
+}
+
+// These errors are significant because ptrace syscall exit tracing can
+// observe them.
+//
+// For all of the following errors, if the syscall is not interrupted by a
+// signal delivered to a user handler, the syscall is restarted.
+var (
+ // ERESTARTSYS is returned by an interrupted syscall to indicate that it
+ // should be converted to EINTR if interrupted by a signal delivered to a
+ // user handler without SA_RESTART set, and restarted otherwise.
+ ERESTARTSYS = errors.New(errno.ERESTARTSYS, "to be restarted if SA_RESTART is set")
+
+ // ERESTARTNOINTR is returned by an interrupted syscall to indicate that it
+ // should always be restarted.
+ ERESTARTNOINTR = errors.New(errno.ERESTARTNOINTR, "to be restarted")
+
+ // ERESTARTNOHAND is returned by an interrupted syscall to indicate that it
+ // should be converted to EINTR if interrupted by a signal delivered to a
+ // user handler, and restarted otherwise.
+ ERESTARTNOHAND = errors.New(errno.ERESTARTNOHAND, "to be restarted if no handler")
+
+ // ERESTART_RESTARTBLOCK is returned by an interrupted syscall to indicate
+ // that it should be restarted using a custom function. The interrupted
+ // syscall must register a custom restart function by calling
+ // Task.SetRestartSyscallFn.
+ ERESTART_RESTARTBLOCK = errors.New(errno.ERESTART_RESTARTBLOCK, "interrupted by signal")
+)
+
+var restartMap = map[int]*errors.Error{
+ -int(errno.ERESTARTSYS): ERESTARTSYS,
+ -int(errno.ERESTARTNOINTR): ERESTARTNOINTR,
+ -int(errno.ERESTARTNOHAND): ERESTARTNOHAND,
+ -int(errno.ERESTART_RESTARTBLOCK): ERESTART_RESTARTBLOCK,
+}
+
+// IsRestartError checks if a given error is a restart error.
+func IsRestartError(err error) bool {
+ switch err {
+ case ERESTARTSYS, ERESTARTNOINTR, ERESTARTNOHAND, ERESTART_RESTARTBLOCK:
+ return true
+ default:
+ return false
+ }
+}
+
+// SyscallRestartErrorFromReturn returns the SyscallRestartErrno represented by
+// rv, the value in a syscall return register.
+func SyscallRestartErrorFromReturn(rv uintptr) (*errors.Error, bool) {
+ err, ok := restartMap[int(rv)]
+ return err, ok
+}
diff --git a/pkg/errors/linuxerr/linuxerr.go b/pkg/errors/linuxerr/linuxerr.go
index f9f8412e0..5905ef593 100644
--- a/pkg/errors/linuxerr/linuxerr.go
+++ b/pkg/errors/linuxerr/linuxerr.go
@@ -27,6 +27,12 @@ import (
const maxErrno uint32 = errno.EHWPOISON + 1
+// The following errors are semantically identical to Errno of type unix.Errno
+// or sycall.Errno. However, since the type are distinct ( these are
+// *errors.Error), they are not directly comperable. However, the Errno method
+// returns an Errno number such that the error can be compared to unix/syscall.Errno
+// (e.g. unix.Errno(EPERM.Errno()) == unix.EPERM is true). Converting unix/syscall.Errno
+// to the errors should be done via the lookup methods provided.
var (
NOERROR = errors.New(errno.NOERRNO, "not an error")
EPERM = errors.New(errno.EPERM, "operation not permitted")
@@ -177,7 +183,7 @@ var (
var errNotValidError = errors.New(errno.Errno(maxErrno), "not a valid error")
// The following errorSlice holds errors by errno for fast translation between
-// errnos (especially uint32(sycall.Errno)) and *Error.
+// errnos (especially uint32(sycall.Errno)) and *errors.Error.
var errorSlice = []*errors.Error{
// Errno values from include/uapi/asm-generic/errno-base.h.
errno.NOERRNO: NOERROR,
diff --git a/pkg/errors/linuxerr/linuxerr_test.go b/pkg/errors/linuxerr/linuxerr_test.go
index f09d61b02..df7cd1c5a 100644
--- a/pkg/errors/linuxerr/linuxerr_test.go
+++ b/pkg/errors/linuxerr/linuxerr_test.go
@@ -12,10 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package syserror_test
+package linuxerr_test
import (
"errors"
+ "fmt"
"io"
"io/fs"
"syscall"
@@ -25,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux/errno"
gErrors "gvisor.dev/gvisor/pkg/errors"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
- "gvisor.dev/gvisor/pkg/syserror"
)
var globalError error
@@ -42,12 +42,6 @@ func BenchmarkAssignLinuxerr(b *testing.B) {
}
}
-func BenchmarkAssignSyserror(b *testing.B) {
- for i := b.N; i > 0; i-- {
- globalError = linuxerr.ENOMSG
- }
-}
-
func BenchmarkCompareUnix(b *testing.B) {
globalError = unix.EAGAIN
j := 0
@@ -68,16 +62,6 @@ func BenchmarkCompareLinuxerr(b *testing.B) {
}
}
-func BenchmarkCompareSyserror(b *testing.B) {
- globalError = linuxerr.EAGAIN
- j := 0
- for i := b.N; i > 0; i-- {
- if globalError == linuxerr.EACCES {
- j++
- }
- }
-}
-
func BenchmarkSwitchUnix(b *testing.B) {
globalError = unix.EPERM
j := 0
@@ -108,21 +92,6 @@ func BenchmarkSwitchLinuxerr(b *testing.B) {
}
}
-func BenchmarkSwitchSyserror(b *testing.B) {
- globalError = linuxerr.EPERM
- j := 0
- for i := b.N; i > 0; i-- {
- switch globalError {
- case linuxerr.EACCES:
- j++
- case syserror.EINTR:
- j += 2
- case linuxerr.EAGAIN:
- j += 3
- }
- }
-}
-
func BenchmarkReturnUnix(b *testing.B) {
var localError error
f := func() error {
@@ -170,47 +139,40 @@ func BenchmarkConvertUnixLinuxerrZero(b *testing.B) {
}
type translationTestTable struct {
- fn string
errIn error
- syscallErrorIn unix.Errno
expectedBool bool
- expectedTranslation unix.Errno
+ expectedTranslation *gErrors.Error
}
func TestErrorTranslation(t *testing.T) {
- myError := errors.New("My test error")
- myError2 := errors.New("Another test error")
testTable := []translationTestTable{
- {"TranslateError", myError, 0, false, 0},
- {"TranslateError", myError2, 0, false, 0},
- {"AddErrorTranslation", myError, unix.EAGAIN, true, 0},
- {"AddErrorTranslation", myError, unix.EAGAIN, false, 0},
- {"AddErrorTranslation", myError, unix.EPERM, false, 0},
- {"TranslateError", myError, 0, true, unix.EAGAIN},
- {"TranslateError", myError2, 0, false, 0},
- {"AddErrorTranslation", myError2, unix.EPERM, true, 0},
- {"AddErrorTranslation", myError2, unix.EPERM, false, 0},
- {"AddErrorTranslation", myError2, unix.EAGAIN, false, 0},
- {"TranslateError", myError, 0, true, unix.EAGAIN},
- {"TranslateError", myError2, 0, true, unix.EPERM},
+ {
+ errIn: linuxerr.ENOENT,
+ },
+ {
+ errIn: unix.ENOENT,
+ },
+ {
+ errIn: linuxerr.ErrInterrupted,
+ expectedBool: true,
+ expectedTranslation: linuxerr.EINTR,
+ },
+ {
+ errIn: linuxerr.ERESTART_RESTARTBLOCK,
+ },
+ {
+ errIn: errors.New("some new error"),
+ },
}
for _, tt := range testTable {
- switch tt.fn {
- case "TranslateError":
- err, ok := syserror.TranslateError(tt.errIn)
- if ok != tt.expectedBool {
- t.Fatalf("%v(%v) => %v expected %v", tt.fn, tt.errIn, ok, tt.expectedBool)
+ t.Run(fmt.Sprintf("err: %v %T", tt.errIn, tt.errIn), func(t *testing.T) {
+ err, ok := linuxerr.TranslateError(tt.errIn)
+ if (!tt.expectedBool && err != nil) || (tt.expectedBool != ok) {
+ t.Fatalf("%v => %v %v expected %v err: nil", tt.errIn, err, ok, tt.expectedBool)
} else if err != tt.expectedTranslation {
- t.Fatalf("%v(%v) (error) => %v expected %v", tt.fn, tt.errIn, err, tt.expectedTranslation)
- }
- case "AddErrorTranslation":
- ok := syserror.AddErrorTranslation(tt.errIn, tt.syscallErrorIn)
- if ok != tt.expectedBool {
- t.Fatalf("%v(%v) => %v expected %v", tt.fn, tt.errIn, ok, tt.expectedBool)
+ t.Fatalf("%v => %v expected %v", tt.errIn, err, tt.expectedTranslation)
}
- default:
- t.Fatalf("Unknown function %v", tt.fn)
- }
+ })
}
}
diff --git a/pkg/eventchannel/BUILD b/pkg/eventchannel/BUILD
index ad15d3672..56399232a 100644
--- a/pkg/eventchannel/BUILD
+++ b/pkg/eventchannel/BUILD
@@ -7,6 +7,7 @@ go_library(
srcs = [
"event.go",
"event_any.go",
+ "processor.go",
"rate.go",
],
visibility = ["//:sandbox"],
diff --git a/pkg/eventchannel/event_any.go b/pkg/eventchannel/event_any.go
index 13f300061..b708937a4 100644
--- a/pkg/eventchannel/event_any.go
+++ b/pkg/eventchannel/event_any.go
@@ -26,3 +26,8 @@ import (
func newAny(m proto.Message) (*anypb.Any, error) {
return anypb.New(m)
}
+
+func emptyAny() *anypb.Any {
+ var any anypb.Any
+ return &any
+}
diff --git a/pkg/eventchannel/processor.go b/pkg/eventchannel/processor.go
new file mode 100644
index 000000000..e765c10d1
--- /dev/null
+++ b/pkg/eventchannel/processor.go
@@ -0,0 +1,130 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package eventchannel
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+ "os"
+ "time"
+
+ "google.golang.org/protobuf/proto"
+ pb "gvisor.dev/gvisor/pkg/eventchannel/eventchannel_go_proto"
+)
+
+// eventProcessor carries display state across multiple events.
+type eventProcessor struct {
+ filtering bool
+ // filtered is the number of events omitted since printing the last matching
+ // event. Only meaningful when filtering == true.
+ filtered uint64
+ // allowlist is the set of event names to display. If empty, all events are
+ // displayed.
+ allowlist map[string]bool
+}
+
+// newEventProcessor creates a new EventProcessor with filters.
+func newEventProcessor(filters []string) *eventProcessor {
+ e := &eventProcessor{
+ filtering: len(filters) > 0,
+ allowlist: make(map[string]bool),
+ }
+ for _, f := range filters {
+ e.allowlist[f] = true
+ }
+ return e
+}
+
+// processOne reads, parses and displays a single event from the event channel.
+//
+// The event channel is a stream of (msglen, payload) packets; this function
+// processes a single such packet. The msglen is a uvarint-encoded length for
+// the associated payload. The payload is a binary-encoded 'Any' protobuf, which
+// in turn encodes an arbitrary event protobuf.
+func (e *eventProcessor) processOne(src io.Reader, out *os.File) error {
+ // Read and parse the msglen.
+ lenbuf := make([]byte, binary.MaxVarintLen64)
+ if _, err := io.ReadFull(src, lenbuf); err != nil {
+ return err
+ }
+ msglen, consumed := binary.Uvarint(lenbuf)
+ if consumed <= 0 {
+ return fmt.Errorf("couldn't parse the message length")
+ }
+
+ // Read the payload.
+ buf := make([]byte, msglen)
+ // Copy any unused bytes from the len buffer into the payload buffer. These
+ // bytes are actually part of the payload.
+ extraBytes := copy(buf, lenbuf[consumed:])
+ if _, err := io.ReadFull(src, buf[extraBytes:]); err != nil {
+ return err
+ }
+
+ // Unmarshal the payload into an "Any" protobuf, which encodes the actual
+ // event.
+ encodedEv := emptyAny()
+ if err := proto.Unmarshal(buf, encodedEv); err != nil {
+ return fmt.Errorf("failed to unmarshal 'any' protobuf message: %v", err)
+ }
+
+ var ev pb.DebugEvent
+ if err := (encodedEv).UnmarshalTo(&ev); err != nil {
+ return fmt.Errorf("failed to decode 'any' protobuf message: %v", err)
+ }
+
+ if e.filtering && e.allowlist[ev.Name] {
+ e.filtered++
+ return nil
+ }
+
+ if e.filtering && e.filtered > 0 {
+ if e.filtered == 1 {
+ fmt.Fprintf(out, "... filtered %d event ...\n\n", e.filtered)
+ } else {
+ fmt.Fprintf(out, "... filtered %d events ...\n\n", e.filtered)
+ }
+ e.filtered = 0
+ }
+
+ // Extract the inner event and display it. Example:
+ //
+ // 2017-10-04 14:35:05.316180374 -0700 PDT m=+1.132485846
+ // cloud_gvisor.MemoryUsage {
+ // total: 23822336
+ // }
+ fmt.Fprintf(out, "%v\n%v {\n", time.Now(), ev.Name)
+ fmt.Fprintf(out, "%v", ev.Text)
+ fmt.Fprintf(out, "}\n\n")
+
+ return nil
+}
+
+// ProcessAll reads, parses and displays all events from src. The events are
+// displayed to out.
+func ProcessAll(src io.Reader, filters []string, out *os.File) error {
+ ep := newEventProcessor(filters)
+ for {
+ switch err := ep.processOne(src, out); err {
+ case nil:
+ continue
+ case io.EOF:
+ return nil
+ default:
+ return err
+ }
+ }
+}
diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go
index 0b961d3d9..6358ad8e9 100644
--- a/pkg/merkletree/merkletree.go
+++ b/pkg/merkletree/merkletree.go
@@ -384,6 +384,14 @@ func verifyMetadata(params *VerifyParams, layout *Layout) error {
return descriptor.verify(params.Expected, params.HashAlgorithms)
}
+// cachedHashes stores verified hashes from a previous hash step.
+type cachedHashes struct {
+ // offset is the offset of cached hash in each level.
+ offset []int64
+ // hash is the verified cache for each level from previous hash steps.
+ hash [][]byte
+}
+
// Verify verifies the content read from data with offset. The content is
// verified against tree. If content spans across multiple blocks, each block is
// verified. Verification fails if the hash of the data does not match the tree
@@ -409,29 +417,32 @@ func Verify(params *VerifyParams) (int64, error) {
firstDataBlock := params.ReadOffset / layout.blockSize
lastDataBlock := (params.ReadOffset + params.ReadSize - 1) / layout.blockSize
- buf := make([]byte, layout.blockSize)
- var readErr error
- total := int64(0)
+ size := (lastDataBlock - firstDataBlock + 1) * layout.blockSize
+ retBuf := make([]byte, size)
+ n, err := params.File.ReadAt(retBuf, firstDataBlock*layout.blockSize)
+ if err != nil && err != io.EOF {
+ return 0, err
+ }
+ total := int64(n)
+ bytesRead := int64(0)
+
+ // Only cache hash results if reading more than a block.
+ var ch *cachedHashes
+ if lastDataBlock > firstDataBlock {
+ ch = &cachedHashes{
+ offset: make([]int64, layout.numLevels()),
+ hash: make([][]byte, layout.numLevels()),
+ }
+ }
for i := firstDataBlock; i <= lastDataBlock; i++ {
+ // Reach the end of file during verification.
+ if total <= 0 {
+ return bytesRead, io.EOF
+ }
// Read a block that includes all or part of target range in
// input data.
- bytesRead, err := params.File.ReadAt(buf, i*layout.blockSize)
- readErr = err
- // If at the end of input data and all previous blocks are
- // verified, return the verified input data and EOF.
- if readErr == io.EOF && bytesRead == 0 {
- break
- }
- if readErr != nil && readErr != io.EOF {
- return 0, fmt.Errorf("read from data failed: %w", err)
- }
- // If this is the end of file, zero the remaining bytes in buf,
- // otherwise they are still from the previous block.
- if bytesRead < len(buf) {
- for j := bytesRead; j < len(buf); j++ {
- buf[j] = 0
- }
- }
+ buf := retBuf[(i-firstDataBlock)*layout.blockSize : (i-firstDataBlock+1)*layout.blockSize]
+
descriptor := VerityDescriptor{
Name: params.Name,
FileSize: params.Size,
@@ -441,8 +452,8 @@ func Verify(params *VerifyParams) (int64, error) {
SymlinkTarget: params.SymlinkTarget,
Children: params.Children,
}
- if err := verifyBlock(params.Tree, &descriptor, &layout, buf, i, params.HashAlgorithms, params.Expected); err != nil {
- return 0, err
+ if err := verifyBlock(params.Tree, &descriptor, &layout, buf, i, params.HashAlgorithms, params.Expected, ch); err != nil {
+ return bytesRead, err
}
// startOff is the beginning of the read range within the
@@ -459,22 +470,24 @@ func Verify(params *VerifyParams) (int64, error) {
if i == lastDataBlock {
endOff = (params.ReadOffset+params.ReadSize-1)%layout.blockSize + 1
}
+
// If the provided size exceeds the end of input data, we should
// only copy the parts in buf that's part of input data.
- if startOff > int64(bytesRead) {
- startOff = int64(bytesRead)
+ if startOff > total {
+ startOff = total
}
- if endOff > int64(bytesRead) {
- endOff = int64(bytesRead)
+ if endOff > total {
+ endOff = total
}
+
n, err := params.Out.Write(buf[startOff:endOff])
if err != nil {
- return total, err
+ return bytesRead, err
}
- total += int64(n)
-
+ bytesRead += int64(n)
+ total -= endOff
}
- return total, readErr
+ return bytesRead, nil
}
// verifyBlock verifies a block against tree. index is the number of block in
@@ -482,7 +495,7 @@ func Verify(params *VerifyParams) (int64, error) {
// fails if the calculated hash from block is different from any level of
// hashes stored in tree. And the final root hash is compared with
// expected.
-func verifyBlock(tree io.ReaderAt, descriptor *VerityDescriptor, layout *Layout, dataBlock []byte, blockIndex int64, hashAlgorithms int, expected []byte) error {
+func verifyBlock(tree io.ReaderAt, descriptor *VerityDescriptor, layout *Layout, dataBlock []byte, blockIndex int64, hashAlgorithms int, expected []byte, ch *cachedHashes) error {
if len(dataBlock) != int(layout.blockSize) {
return fmt.Errorf("incorrect block size")
}
@@ -491,6 +504,12 @@ func verifyBlock(tree io.ReaderAt, descriptor *VerityDescriptor, layout *Layout,
treeBlock := make([]byte, layout.blockSize)
var digest []byte
for level := 0; level < layout.numLevels(); level++ {
+ // No need to verify remaining levels if the current block has
+ // been verified in a previous call and cached.
+ if ch != nil && ch.offset[level] == layout.digestOffset(level, blockIndex) && ch.hash[level] != nil {
+ break
+ }
+
// Calculate hash.
if level == 0 {
h, err := hashData(dataBlock, hashAlgorithms)
@@ -521,11 +540,19 @@ func verifyBlock(tree io.ReaderAt, descriptor *VerityDescriptor, layout *Layout,
if !bytes.Equal(digest, expectedDigest) {
return fmt.Errorf("verification failed")
}
+ if ch != nil {
+ ch.offset[level] = layout.digestOffset(level, blockIndex)
+ ch.hash[level] = expectedDigest
+ }
blockIndex = blockIndex / layout.hashesPerBlock()
}
// Verification for the tree succeeded. Now hash the descriptor with
// the root hash and compare it with expected.
- descriptor.RootHash = digest
+ if ch != nil {
+ descriptor.RootHash = ch.hash[layout.rootLevel()]
+ } else {
+ descriptor.RootHash = digest
+ }
return descriptor.verify(expected, hashAlgorithms)
}
diff --git a/pkg/p9/file.go b/pkg/p9/file.go
index 97e0231d6..8d6af2d6b 100644
--- a/pkg/p9/file.go
+++ b/pkg/p9/file.go
@@ -325,6 +325,12 @@ func (*DisallowServerCalls) Renamed(File, string) {
func DefaultMultiGetAttr(start File, names []string) ([]FullStat, error) {
stats := make([]FullStat, 0, len(names))
parent := start
+ closeParent := func() {
+ if parent != start {
+ _ = parent.Close()
+ }
+ }
+ defer closeParent()
mask := AttrMaskAll()
for i, name := range names {
if len(name) == 0 && i == 0 {
@@ -340,15 +346,14 @@ func DefaultMultiGetAttr(start File, names []string) ([]FullStat, error) {
continue
}
qids, child, valid, attr, err := parent.WalkGetAttr([]string{name})
- if parent != start {
- _ = parent.Close()
- }
if err != nil {
if errors.Is(err, unix.ENOENT) {
return stats, nil
}
return nil, err
}
+ closeParent()
+ parent = child
stats = append(stats, FullStat{
QID: qids[0],
Valid: valid,
@@ -357,13 +362,8 @@ func DefaultMultiGetAttr(start File, names []string) ([]FullStat, error) {
if attr.Mode.FileType() != ModeDirectory {
// Doesn't need to continue if entry is not a dir. Including symlinks
// that cannot be followed.
- _ = child.Close()
break
}
- parent = child
- }
- if parent != start {
- _ = parent.Close()
}
return stats, nil
}
diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go
index 161b451cc..a8f8a9d03 100644
--- a/pkg/p9/handlers.go
+++ b/pkg/p9/handlers.go
@@ -45,6 +45,8 @@ func ExtractErrno(err error) unix.Errno {
// Attempt to unwrap.
switch e := err.(type) {
+ case *errors.Error:
+ return unix.Errno(e.Errno())
case unix.Errno:
return e
case *os.PathError:
diff --git a/pkg/safecopy/BUILD b/pkg/safecopy/BUILD
index db5787302..0a045fc8e 100644
--- a/pkg/safecopy/BUILD
+++ b/pkg/safecopy/BUILD
@@ -19,7 +19,8 @@ go_library(
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
- "//pkg/syserror",
+ "//pkg/errors",
+ "//pkg/errors/linuxerr",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/safecopy/safecopy.go b/pkg/safecopy/safecopy.go
index df63dd5f1..5e6f903ff 100644
--- a/pkg/safecopy/safecopy.go
+++ b/pkg/safecopy/safecopy.go
@@ -21,7 +21,8 @@ import (
"runtime"
"golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/errors"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
)
// SegvError is returned when a safecopy function receives SIGSEGV.
@@ -137,12 +138,12 @@ func init() {
if err := ReplaceSignalHandler(unix.SIGBUS, addrOfSignalHandler(), &savedSigBusHandler); err != nil {
panic(fmt.Sprintf("Unable to set handler for SIGBUS: %v", err))
}
- syserror.AddErrorUnwrapper(func(e error) (unix.Errno, bool) {
+ linuxerr.AddErrorUnwrapper(func(e error) (*errors.Error, bool) {
switch e.(type) {
case SegvError, BusError, AlignmentError:
- return unix.EFAULT, true
+ return linuxerr.EFAULT, true
default:
- return 0, false
+ return nil, false
}
})
}
diff --git a/pkg/safemem/io.go b/pkg/safemem/io.go
index f039a5c34..9551ca853 100644
--- a/pkg/safemem/io.go
+++ b/pkg/safemem/io.go
@@ -207,58 +207,6 @@ func (r FromIOReader) readToBlock(dst Block, buf []byte) (int, []byte, error) {
return wbn, buf, rerr
}
-// FromIOReaderAt implements Reader for an io.ReaderAt. Does not repeatedly
-// invoke io.ReaderAt.ReadAt because ReadAt is more strict than Read. A partial
-// read indicates an error. This is not thread-safe.
-type FromIOReaderAt struct {
- ReaderAt io.ReaderAt
- Offset int64
-}
-
-// ReadToBlocks implements Reader.ReadToBlocks.
-func (r FromIOReaderAt) ReadToBlocks(dsts BlockSeq) (uint64, error) {
- var buf []byte
- var done uint64
- for !dsts.IsEmpty() {
- dst := dsts.Head()
- var n int
- var err error
- n, buf, err = r.readToBlock(dst, buf)
- done += uint64(n)
- if n != dst.Len() {
- return done, err
- }
- dsts = dsts.Tail()
- if err != nil {
- if dsts.IsEmpty() && err == io.EOF {
- return done, nil
- }
- return done, err
- }
- }
- return done, nil
-}
-
-func (r FromIOReaderAt) readToBlock(dst Block, buf []byte) (int, []byte, error) {
- // io.Reader isn't safecopy-aware, so we have to buffer Blocks that require
- // safecopy.
- if !dst.NeedSafecopy() {
- n, err := r.ReaderAt.ReadAt(dst.ToSlice(), r.Offset)
- r.Offset += int64(n)
- return n, buf, err
- }
- if len(buf) < dst.Len() {
- buf = make([]byte, dst.Len())
- }
- rn, rerr := r.ReaderAt.ReadAt(buf[:dst.Len()], r.Offset)
- r.Offset += int64(rn)
- wbn, wberr := Copy(dst, BlockFromSafeSlice(buf[:rn]))
- if wberr != nil {
- return wbn, buf, wberr
- }
- return wbn, buf, rerr
-}
-
// FromIOWriter implements Writer for an io.Writer by repeatedly invoking
// io.Writer.Write until it returns an error or partial write.
//
diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD
index 7ee237c9f..cfb33a398 100644
--- a/pkg/sentry/control/BUILD
+++ b/pkg/sentry/control/BUILD
@@ -1,17 +1,25 @@
-load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test", "proto_library")
package(licenses = ["notice"])
+proto_library(
+ name = "control",
+ srcs = ["control.proto"],
+ visibility = ["//visibility:public"],
+)
+
go_library(
name = "control",
srcs = [
"control.go",
+ "events.go",
"fs.go",
"lifecycle.go",
"logging.go",
"pprof.go",
"proc.go",
"state.go",
+ "usage.go",
],
visibility = [
"//:sandbox",
@@ -19,6 +27,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/eventchannel",
"//pkg/fd",
"//pkg/log",
"//pkg/sentry/fdimport",
@@ -39,6 +48,7 @@ go_library(
"//pkg/tcpip/link/sniffer",
"//pkg/urpc",
"//pkg/usermem",
+ "@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/control/control.proto b/pkg/sentry/control/control.proto
new file mode 100644
index 000000000..72dda3fbc
--- /dev/null
+++ b/pkg/sentry/control/control.proto
@@ -0,0 +1,40 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package gvisor;
+
+// ControlConfig configures the permission of controls.
+message ControlConfig {
+ // Names for individual control URPC service objects.
+ // Any new service object that should be given conditional access should be
+ // named here and conditionally added based on presence in allowed_controls.
+ enum Endpoint {
+ UNKNOWN = 0;
+ EVENTS = 1;
+ FS = 2;
+ LIFECYCLE = 3;
+ LOGGING = 4;
+ PROFILE = 5;
+ USAGE = 6;
+ PROC = 7;
+ STATE = 8;
+ DEBUG = 9;
+ }
+
+ // allowed_controls represents which endpoints may be registered to the
+ // server.
+ repeated Endpoint allowed_controls = 1;
+}
diff --git a/pkg/sentry/control/events.go b/pkg/sentry/control/events.go
new file mode 100644
index 000000000..92e437ae7
--- /dev/null
+++ b/pkg/sentry/control/events.go
@@ -0,0 +1,65 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package control
+
+import (
+ "errors"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/urpc"
+)
+
+// EventsOpts are the arguments for eventchannel-related commands.
+type EventsOpts struct {
+ urpc.FilePayload
+}
+
+// Events is the control server state for eventchannel-related commands.
+type Events struct {
+ emitter eventchannel.Emitter
+}
+
+// AttachDebugEmitter receives a connected unix domain socket FD from the client
+// and establishes it as a new emitter for the sentry eventchannel. Any existing
+// emitters are replaced on a subsequent attach.
+func (e *Events) AttachDebugEmitter(o *EventsOpts, _ *struct{}) error {
+ if len(o.FilePayload.Files) < 1 {
+ return errors.New("no output writer provided")
+ }
+
+ sock, err := o.ReleaseFD(0)
+ if err != nil {
+ return err
+ }
+ sockFD := sock.Release()
+
+ // SocketEmitter takes ownership of sockFD.
+ emitter, err := eventchannel.SocketEmitter(sockFD)
+ if err != nil {
+ return fmt.Errorf("failed to create SocketEmitter for FD %d: %v", sockFD, err)
+ }
+
+ // If there is already a debug emitter, close the old one.
+ if e.emitter != nil {
+ e.emitter.Close()
+ }
+
+ e.emitter = eventchannel.DebugEmitterFrom(emitter)
+
+ // Register the new stream destination.
+ eventchannel.AddEmitter(e.emitter)
+ return nil
+}
diff --git a/pkg/sentry/control/usage.go b/pkg/sentry/control/usage.go
new file mode 100644
index 000000000..cc78d3f45
--- /dev/null
+++ b/pkg/sentry/control/usage.go
@@ -0,0 +1,183 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package control
+
+import (
+ "fmt"
+ "os"
+ "runtime"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/urpc"
+)
+
+// Usage includes usage-related RPC stubs.
+type Usage struct {
+ Kernel *kernel.Kernel
+}
+
+// MemoryUsageOpts contains usage options.
+type MemoryUsageOpts struct {
+ // Full indicates that a full accounting should be done. If Full is not
+ // specified, then a partial accounting will be done, and Unknown will
+ // contain a majority of memory. See Collect for more information.
+ Full bool `json:"Full"`
+}
+
+// MemoryUsage is a memory usage structure.
+type MemoryUsage struct {
+ Unknown uint64 `json:"Unknown"`
+ System uint64 `json:"System"`
+ Anonymous uint64 `json:"Anonymous"`
+ PageCache uint64 `json:"PageCache"`
+ Mapped uint64 `json:"Mapped"`
+ Tmpfs uint64 `json:"Tmpfs"`
+ Ramdiskfs uint64 `json:"Ramdiskfs"`
+ Total uint64 `json:"Total"`
+}
+
+// MemoryUsageFileOpts contains usage file options.
+type MemoryUsageFileOpts struct {
+ // Version is used to ensure both sides agree on the format of the
+ // shared memory buffer.
+ Version uint64 `json:"Version"`
+}
+
+// MemoryUsageFile contains the file handle to the usage file.
+type MemoryUsageFile struct {
+ urpc.FilePayload
+}
+
+// UsageFD returns the file that tracks the memory usage of the application.
+func (u *Usage) UsageFD(opts *MemoryUsageFileOpts, out *MemoryUsageFile) error {
+ // Only support version 1 for now.
+ if opts.Version != 1 {
+ return fmt.Errorf("unsupported version requested: %d", opts.Version)
+ }
+
+ mf := u.Kernel.MemoryFile()
+ *out = MemoryUsageFile{
+ FilePayload: urpc.FilePayload{
+ Files: []*os.File{
+ usage.MemoryAccounting.File,
+ mf.File(),
+ },
+ },
+ }
+
+ return nil
+}
+
+// Collect returns memory used by the sandboxed application.
+func (u *Usage) Collect(opts *MemoryUsageOpts, out *MemoryUsage) error {
+ if opts.Full {
+ // Ensure everything is up to date.
+ if err := u.Kernel.MemoryFile().UpdateUsage(); err != nil {
+ return err
+ }
+
+ // Copy out a snapshot.
+ snapshot, total := usage.MemoryAccounting.Copy()
+ *out = MemoryUsage{
+ System: snapshot.System,
+ Anonymous: snapshot.Anonymous,
+ PageCache: snapshot.PageCache,
+ Mapped: snapshot.Mapped,
+ Tmpfs: snapshot.Tmpfs,
+ Ramdiskfs: snapshot.Ramdiskfs,
+ Total: total,
+ }
+ } else {
+ // Get total usage from the MemoryFile implementation.
+ total, err := u.Kernel.MemoryFile().TotalUsage()
+ if err != nil {
+ return err
+ }
+
+ // The memory accounting is guaranteed to be accurate only when
+ // UpdateUsage is called. If UpdateUsage is not called, then only Mapped
+ // will be up-to-date.
+ snapshot, _ := usage.MemoryAccounting.Copy()
+ *out = MemoryUsage{
+ Unknown: total,
+ Mapped: snapshot.Mapped,
+ Total: total + snapshot.Mapped,
+ }
+
+ }
+
+ return nil
+}
+
+// UsageReduceOpts contains options to Usage.Reduce().
+type UsageReduceOpts struct {
+ // If Wait is true, Reduce blocks until all activity initiated by
+ // Usage.Reduce() has completed.
+ Wait bool `json:"wait"`
+}
+
+// UsageReduceOutput contains output from Usage.Reduce().
+type UsageReduceOutput struct{}
+
+// Reduce requests that the sentry attempt to reduce its memory usage.
+func (u *Usage) Reduce(opts *UsageReduceOpts, out *UsageReduceOutput) error {
+ mf := u.Kernel.MemoryFile()
+ mf.StartEvictions()
+ if opts.Wait {
+ mf.WaitForEvictions()
+ }
+ return nil
+}
+
+// MemoryUsageRecord contains the mapping and platform memory file.
+type MemoryUsageRecord struct {
+ mmap uintptr
+ stats *usage.RTMemoryStats
+ mf os.File
+}
+
+// NewMemoryUsageRecord creates a new MemoryUsageRecord from usageFile and
+// platformFile.
+func NewMemoryUsageRecord(usageFile, platformFile os.File) (*MemoryUsageRecord, error) {
+ mmap, _, e := unix.RawSyscall6(unix.SYS_MMAP, 0, usage.RTMemoryStatsSize, unix.PROT_READ, unix.MAP_SHARED, usageFile.Fd(), 0)
+ if e != 0 {
+ return nil, fmt.Errorf("mmap returned %d, want 0", e)
+ }
+
+ m := MemoryUsageRecord{
+ mmap: mmap,
+ stats: usage.RTMemoryStatsPointer(mmap),
+ mf: platformFile,
+ }
+
+ runtime.SetFinalizer(&m, finalizer)
+ return &m, nil
+}
+
+func finalizer(m *MemoryUsageRecord) {
+ unix.RawSyscall(unix.SYS_MUNMAP, m.mmap, usage.RTMemoryStatsSize, 0)
+}
+
+// Fetch fetches the usage info from a MemoryUsageRecord.
+func (m *MemoryUsageRecord) Fetch() (mapped, unknown, total uint64, err error) {
+ var stat unix.Stat_t
+ if err := unix.Fstat(int(m.mf.Fd()), &stat); err != nil {
+ return 0, 0, 0, err
+ }
+ fmem := uint64(stat.Blocks) * 512
+ return m.stats.RTMapped, fmem, m.stats.RTMapped + fmem, nil
+}
diff --git a/pkg/sentry/devices/memdev/BUILD b/pkg/sentry/devices/memdev/BUILD
index 4c8604d58..66b9ed523 100644
--- a/pkg/sentry/devices/memdev/BUILD
+++ b/pkg/sentry/devices/memdev/BUILD
@@ -15,6 +15,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/rand",
"//pkg/safemem",
"//pkg/sentry/fsimpl/devtmpfs",
@@ -23,7 +24,6 @@ go_library(
"//pkg/sentry/kernel/auth",
"//pkg/sentry/memmap",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/devices/memdev/full.go b/pkg/sentry/devices/memdev/full.go
index fece3e762..fc702c9f6 100644
--- a/pkg/sentry/devices/memdev/full.go
+++ b/pkg/sentry/devices/memdev/full.go
@@ -16,8 +16,8 @@ package memdev
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -66,12 +66,12 @@ func (fd *fullFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.Rea
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (fd *fullFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.ENOSPC
+ return 0, linuxerr.ENOSPC
}
// Write implements vfs.FileDescriptionImpl.Write.
func (fd *fullFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.ENOSPC
+ return 0, linuxerr.ENOSPC
}
// Seek implements vfs.FileDescriptionImpl.Seek.
diff --git a/pkg/sentry/devices/quotedev/BUILD b/pkg/sentry/devices/quotedev/BUILD
index d09214e3e..ee946610a 100644
--- a/pkg/sentry/devices/quotedev/BUILD
+++ b/pkg/sentry/devices/quotedev/BUILD
@@ -9,8 +9,8 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/sentry/fsimpl/devtmpfs",
"//pkg/sentry/vfs",
- "//pkg/syserror",
],
)
diff --git a/pkg/sentry/devices/quotedev/quotedev.go b/pkg/sentry/devices/quotedev/quotedev.go
index 6114cb724..140856a4a 100644
--- a/pkg/sentry/devices/quotedev/quotedev.go
+++ b/pkg/sentry/devices/quotedev/quotedev.go
@@ -18,9 +18,9 @@ package quotedev
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -35,7 +35,7 @@ type quoteDevice struct{}
// Open implements vfs.Device.Open.
// TODO(b/157161182): Add support for attestation ioctls.
func (quoteDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- return nil, syserror.EIO
+ return nil, linuxerr.EIO
}
// Register registers all devices implemented by this package in vfsObj.
diff --git a/pkg/sentry/devices/ttydev/BUILD b/pkg/sentry/devices/ttydev/BUILD
index b4b6ca38a..ab4cd0b33 100644
--- a/pkg/sentry/devices/ttydev/BUILD
+++ b/pkg/sentry/devices/ttydev/BUILD
@@ -9,8 +9,8 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/sentry/fsimpl/devtmpfs",
"//pkg/sentry/vfs",
- "//pkg/syserror",
],
)
diff --git a/pkg/sentry/devices/ttydev/ttydev.go b/pkg/sentry/devices/ttydev/ttydev.go
index a287c65ca..29b79b5d6 100644
--- a/pkg/sentry/devices/ttydev/ttydev.go
+++ b/pkg/sentry/devices/ttydev/ttydev.go
@@ -18,9 +18,9 @@ package ttydev
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -36,7 +36,7 @@ type ttyDevice struct{}
// Open implements vfs.Device.Open.
func (ttyDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- return nil, syserror.EIO
+ return nil, linuxerr.EIO
}
// Register registers all devices implemented by this package in vfsObj.
diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD
index 58fe1e77c..4e573d249 100644
--- a/pkg/sentry/fs/BUILD
+++ b/pkg/sentry/fs/BUILD
@@ -68,7 +68,6 @@ go_library(
"//pkg/sentry/usage",
"//pkg/state",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go
index a8591052c..e48bd4dba 100644
--- a/pkg/sentry/fs/copy_up.go
+++ b/pkg/sentry/fs/copy_up.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -195,7 +194,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error {
attrs, err := next.Inode.overlay.lower.UnstableAttr(ctx)
if err != nil {
log.Warningf("copy up failed to get lower attributes: %v", err)
- return syserror.EIO
+ return linuxerr.EIO
}
var childUpperInode *Inode
@@ -211,7 +210,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error {
childFile, err := parentUpper.Create(ctx, root, next.name, FileFlags{Read: true, Write: true}, attrs.Perms)
if err != nil {
log.Warningf("copy up failed to create file: %v", err)
- return syserror.EIO
+ return linuxerr.EIO
}
defer childFile.DecRef(ctx)
childUpperInode = childFile.Dirent.Inode
@@ -219,13 +218,13 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error {
case Directory:
if err := parentUpper.CreateDirectory(ctx, root, next.name, attrs.Perms); err != nil {
log.Warningf("copy up failed to create directory: %v", err)
- return syserror.EIO
+ return linuxerr.EIO
}
childUpper, err := parentUpper.Lookup(ctx, next.name)
if err != nil {
werr := fmt.Errorf("copy up failed to lookup directory: %v", err)
cleanupUpper(ctx, parentUpper, next.name, werr)
- return syserror.EIO
+ return linuxerr.EIO
}
defer childUpper.DecRef(ctx)
childUpperInode = childUpper.Inode
@@ -235,17 +234,17 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error {
link, err := childLower.Readlink(ctx)
if err != nil {
log.Warningf("copy up failed to read symlink value: %v", err)
- return syserror.EIO
+ return linuxerr.EIO
}
if err := parentUpper.CreateLink(ctx, root, link, next.name); err != nil {
log.Warningf("copy up failed to create symlink: %v", err)
- return syserror.EIO
+ return linuxerr.EIO
}
childUpper, err := parentUpper.Lookup(ctx, next.name)
if err != nil {
werr := fmt.Errorf("copy up failed to lookup symlink: %v", err)
cleanupUpper(ctx, parentUpper, next.name, werr)
- return syserror.EIO
+ return linuxerr.EIO
}
defer childUpper.DecRef(ctx)
childUpperInode = childUpper.Inode
@@ -259,14 +258,14 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error {
if err := copyAttributesLocked(ctx, childUpperInode, next.Inode.overlay.lower); err != nil {
werr := fmt.Errorf("copy up failed to copy up attributes: %v", err)
cleanupUpper(ctx, parentUpper, next.name, werr)
- return syserror.EIO
+ return linuxerr.EIO
}
// Copy the entire file.
if err := copyContentsLocked(ctx, childUpperInode, next.Inode.overlay.lower, attrs.Size); err != nil {
werr := fmt.Errorf("copy up failed to copy up contents: %v", err)
cleanupUpper(ctx, parentUpper, next.name, werr)
- return syserror.EIO
+ return linuxerr.EIO
}
lowerMappable := next.Inode.overlay.lower.Mappable()
@@ -274,7 +273,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error {
if lowerMappable != nil && upperMappable == nil {
werr := fmt.Errorf("copy up failed: cannot ensure memory mapping coherence")
cleanupUpper(ctx, parentUpper, next.name, werr)
- return syserror.EIO
+ return linuxerr.EIO
}
// Propagate memory mappings to the upper Inode.
diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD
index e28a8961b..7baf26b24 100644
--- a/pkg/sentry/fs/dev/BUILD
+++ b/pkg/sentry/fs/dev/BUILD
@@ -34,7 +34,6 @@ go_library(
"//pkg/sentry/mm",
"//pkg/sentry/pgalloc",
"//pkg/sentry/socket/netstack",
- "//pkg/syserror",
"//pkg/tcpip/link/tun",
"//pkg/usermem",
"//pkg/waiter",
diff --git a/pkg/sentry/fs/dev/full.go b/pkg/sentry/fs/dev/full.go
index deb9c6ad8..6f0c1fc68 100644
--- a/pkg/sentry/fs/dev/full.go
+++ b/pkg/sentry/fs/dev/full.go
@@ -17,9 +17,9 @@ package dev
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -77,5 +77,5 @@ var _ fs.FileOperations = (*fullFileOperations)(nil)
// Write implements FileOperations.Write.
func (*fullFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
- return 0, syserror.ENOSPC
+ return 0, linuxerr.ENOSPC
}
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go
index ad8ff227e..d300a32e0 100644
--- a/pkg/sentry/fs/dirent.go
+++ b/pkg/sentry/fs/dirent.go
@@ -28,7 +28,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/uniqueid"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
type globalDirentMap struct {
@@ -963,7 +962,7 @@ func (d *Dirent) isMountPointLocked() bool {
func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err error) {
// Did we race with deletion?
if atomic.LoadInt32(&d.deleted) != 0 {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
// Refuse to mount a symlink.
@@ -998,7 +997,7 @@ func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err
func (d *Dirent) unmount(ctx context.Context, replacement *Dirent) error {
// Did we race with deletion?
if atomic.LoadInt32(&d.deleted) != 0 {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
// Remount our former child in its place.
diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD
index 5c889c861..9f1fe5160 100644
--- a/pkg/sentry/fs/fdpipe/BUILD
+++ b/pkg/sentry/fs/fdpipe/BUILD
@@ -22,7 +22,6 @@ go_library(
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
@@ -46,7 +45,6 @@ go_test(
"//pkg/hostarch",
"//pkg/sentry/contexttest",
"//pkg/sentry/fs",
- "//pkg/syserror",
"//pkg/usermem",
"@com_github_google_uuid//:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go
index f8a29816b..4370cce33 100644
--- a/pkg/sentry/fs/fdpipe/pipe.go
+++ b/pkg/sentry/fs/fdpipe/pipe.go
@@ -29,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -142,7 +141,7 @@ func (p *pipeOperations) Read(ctx context.Context, file *fs.File, dst usermem.IO
n, err := dst.CopyOutFrom(ctx, safemem.FromIOReader{secio.FullReader{p.file}})
total := int64(bufN) + n
if err != nil && isBlockError(err) {
- return total, syserror.ErrWouldBlock
+ return total, linuxerr.ErrWouldBlock
}
return total, err
}
@@ -151,13 +150,13 @@ func (p *pipeOperations) Read(ctx context.Context, file *fs.File, dst usermem.IO
func (p *pipeOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
n, err := src.CopyInTo(ctx, safemem.FromIOWriter{p.file})
if err != nil && isBlockError(err) {
- return n, syserror.ErrWouldBlock
+ return n, linuxerr.ErrWouldBlock
}
return n, err
}
// isBlockError unwraps os errors and checks if they are caused by EAGAIN or
-// EWOULDBLOCK. This is so they can be transformed into syserror.ErrWouldBlock.
+// EWOULDBLOCK. This is so they can be transformed into linuxerr.ErrWouldBlock.
func isBlockError(err error) bool {
if linuxerr.Equals(linuxerr.EAGAIN, err) || linuxerr.Equals(linuxerr.EWOULDBLOCK, err) {
return true
diff --git a/pkg/sentry/fs/fdpipe/pipe_opener.go b/pkg/sentry/fs/fdpipe/pipe_opener.go
index adda19168..e91e1b5cb 100644
--- a/pkg/sentry/fs/fdpipe/pipe_opener.go
+++ b/pkg/sentry/fs/fdpipe/pipe_opener.go
@@ -21,9 +21,9 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// NonBlockingOpener is a generic host file opener used to retry opening host
@@ -40,7 +40,7 @@ func Open(ctx context.Context, opener NonBlockingOpener, flags fs.FileFlags) (fs
p := &pipeOpenState{}
canceled := false
for {
- if file, err := p.TryOpen(ctx, opener, flags); err != syserror.ErrWouldBlock {
+ if file, err := p.TryOpen(ctx, opener, flags); err != linuxerr.ErrWouldBlock {
return file, err
}
@@ -51,7 +51,7 @@ func Open(ctx context.Context, opener NonBlockingOpener, flags fs.FileFlags) (fs
if p.hostFile != nil {
p.hostFile.Close()
}
- return nil, syserror.ErrInterrupted
+ return nil, linuxerr.ErrInterrupted
}
cancel := ctx.SleepStart()
@@ -106,13 +106,13 @@ func (p *pipeOpenState) TryOpen(ctx context.Context, opener NonBlockingOpener, f
}
return newPipeOperations(ctx, opener, flags, f, nil)
- // Handle opening O_WRONLY blocking: convert ENXIO to syserror.ErrWouldBlock.
+ // Handle opening O_WRONLY blocking: convert ENXIO to linuxerr.ErrWouldBlock.
// See TryOpenWriteOnly for more details.
case flags.Write:
return p.TryOpenWriteOnly(ctx, opener)
default:
- // Handle opening O_RDONLY blocking: convert EOF from read to syserror.ErrWouldBlock.
+ // Handle opening O_RDONLY blocking: convert EOF from read to linuxerr.ErrWouldBlock.
// See TryOpenReadOnly for more details.
return p.TryOpenReadOnly(ctx, opener)
}
@@ -120,7 +120,7 @@ func (p *pipeOpenState) TryOpen(ctx context.Context, opener NonBlockingOpener, f
// TryOpenReadOnly tries to open a host pipe read only but only returns a fs.File when
// there is a coordinating writer. Call TryOpenReadOnly repeatedly on the same pipeOpenState
-// until syserror.ErrWouldBlock is no longer returned.
+// until linuxerr.ErrWouldBlock is no longer returned.
//
// How it works:
//
@@ -150,7 +150,7 @@ func (p *pipeOpenState) TryOpenReadOnly(ctx context.Context, opener NonBlockingO
if n == 0 {
// EOF means that we're not ready yet.
if rerr == nil || rerr == io.EOF {
- return nil, syserror.ErrWouldBlock
+ return nil, linuxerr.ErrWouldBlock
}
// Any error that is not EWOULDBLOCK also means we're not
// ready yet, and probably never will be ready. In this
@@ -175,16 +175,16 @@ func (p *pipeOpenState) TryOpenReadOnly(ctx context.Context, opener NonBlockingO
// TryOpenWriteOnly tries to open a host pipe write only but only returns a fs.File when
// there is a coordinating reader. Call TryOpenWriteOnly repeatedly on the same pipeOpenState
-// until syserror.ErrWouldBlock is no longer returned.
+// until linuxerr.ErrWouldBlock is no longer returned.
//
// How it works:
//
// Opening a pipe write only will return ENXIO until readers are available. Converts the ENXIO
-// to an syserror.ErrWouldBlock, to tell callers to retry.
+// to an linuxerr.ErrWouldBlock, to tell callers to retry.
func (*pipeOpenState) TryOpenWriteOnly(ctx context.Context, opener NonBlockingOpener) (*pipeOperations, error) {
hostFile, err := opener.NonBlockingOpen(ctx, fs.PermMask{Write: true})
if unwrapError(err) == unix.ENXIO {
- return nil, syserror.ErrWouldBlock
+ return nil, linuxerr.ErrWouldBlock
}
if err != nil {
return nil, err
diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go
index 89d8be741..e1587288e 100644
--- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go
+++ b/pkg/sentry/fs/fdpipe/pipe_opener_test.go
@@ -30,7 +30,6 @@ import (
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -146,18 +145,18 @@ func TestTryOpen(t *testing.T) {
err: unix.ENOENT,
},
{
- desc: "Blocking Write only returns with syserror.ErrWouldBlock",
+ desc: "Blocking Write only returns with linuxerr.ErrWouldBlock",
makePipe: true,
flags: fs.FileFlags{Write: true},
expectFile: false,
- err: syserror.ErrWouldBlock,
+ err: linuxerr.ErrWouldBlock,
},
{
- desc: "Blocking Read only returns with syserror.ErrWouldBlock",
+ desc: "Blocking Read only returns with linuxerr.ErrWouldBlock",
makePipe: true,
flags: fs.FileFlags{Read: true},
expectFile: false,
- err: syserror.ErrWouldBlock,
+ err: linuxerr.ErrWouldBlock,
},
} {
name := pipename()
@@ -316,7 +315,7 @@ func TestCopiedReadAheadBuffer(t *testing.T) {
// another writer comes along. This means we can open the same pipe write only
// with no problems + write to it, given that opener.Open already tried to open
// the pipe RDONLY and succeeded, which we know happened if TryOpen returns
- // syserror.ErrwouldBlock.
+ // linuxerr.ErrwouldBlock.
//
// This simulates the open(RDONLY) <-> open(WRONLY)+write race we care about, but
// does not cause our test to be racy (which would be terrible).
@@ -328,8 +327,8 @@ func TestCopiedReadAheadBuffer(t *testing.T) {
pipeOps.Release(ctx)
t.Fatalf("open(%s, %o) got file, want nil", name, unix.O_RDONLY)
}
- if err != syserror.ErrWouldBlock {
- t.Fatalf("open(%s, %o) got error %v, want %v", name, unix.O_RDONLY, err, syserror.ErrWouldBlock)
+ if err != linuxerr.ErrWouldBlock {
+ t.Fatalf("open(%s, %o) got error %v, want %v", name, unix.O_RDONLY, err, linuxerr.ErrWouldBlock)
}
// Then open the same pipe write only and write some bytes to it. The next
diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go
index 4c8905a7e..63900e766 100644
--- a/pkg/sentry/fs/fdpipe/pipe_test.go
+++ b/pkg/sentry/fs/fdpipe/pipe_test.go
@@ -28,7 +28,6 @@ import (
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -238,7 +237,7 @@ func TestPipeRequest(t *testing.T) {
context: &Readv{Dst: usermem.BytesIOSequence(make([]byte, 10))},
flags: fs.FileFlags{Read: true},
keepOpenPartner: true,
- err: syserror.ErrWouldBlock,
+ err: linuxerr.ErrWouldBlock,
},
{
desc: "Writev on pipe from empty buffer returns nil",
@@ -410,8 +409,8 @@ func TestPipeReadsAccumulate(t *testing.T) {
n, err := p.Read(ctx, file, iov, 0)
total := n
iov = iov.DropFirst64(n)
- if err != syserror.ErrWouldBlock {
- t.Fatalf("Readv got error %v, want %v", err, syserror.ErrWouldBlock)
+ if err != linuxerr.ErrWouldBlock {
+ t.Fatalf("Readv got error %v, want %v", err, linuxerr.ErrWouldBlock)
}
// Write a few more bytes to allow us to read more/accumulate.
@@ -479,8 +478,8 @@ func TestPipeWritesAccumulate(t *testing.T) {
}
iov := usermem.BytesIOSequence(writeBuffer)
n, err := p.Write(ctx, file, iov, 0)
- if err != syserror.ErrWouldBlock {
- t.Fatalf("Writev got error %v, want %v", err, syserror.ErrWouldBlock)
+ if err != linuxerr.ErrWouldBlock {
+ t.Fatalf("Writev got error %v, want %v", err, linuxerr.ErrWouldBlock)
}
if n != int64(pipeSize) {
t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize)
diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go
index 57f904801..df04f044d 100644
--- a/pkg/sentry/fs/file.go
+++ b/pkg/sentry/fs/file.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/amutex"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsmetric"
@@ -27,7 +28,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/uniqueid"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -195,10 +195,10 @@ func (f *File) EventUnregister(e *waiter.Entry) {
// offset to the value returned by f.FileOperations.Seek if the operation
// is successful.
//
-// Returns syserror.ErrInterrupted if seeking was interrupted.
+// Returns linuxerr.ErrInterrupted if seeking was interrupted.
func (f *File) Seek(ctx context.Context, whence SeekWhence, offset int64) (int64, error) {
if !f.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
defer f.mu.Unlock()
@@ -217,10 +217,10 @@ func (f *File) Seek(ctx context.Context, whence SeekWhence, offset int64) (int64
// Readdir unconditionally updates the access time on the File's Inode,
// see fs/readdir.c:iterate_dir.
//
-// Returns syserror.ErrInterrupted if reading was interrupted.
+// Returns linuxerr.ErrInterrupted if reading was interrupted.
func (f *File) Readdir(ctx context.Context, serializer DentrySerializer) error {
if !f.mu.Lock(ctx) {
- return syserror.ErrInterrupted
+ return linuxerr.ErrInterrupted
}
defer f.mu.Unlock()
@@ -232,13 +232,13 @@ func (f *File) Readdir(ctx context.Context, serializer DentrySerializer) error {
// Readv calls f.FileOperations.Read with f as the File, advancing the file
// offset if f.FileOperations.Read returns bytes read > 0.
//
-// Returns syserror.ErrInterrupted if reading was interrupted.
+// Returns linuxerr.ErrInterrupted if reading was interrupted.
func (f *File) Readv(ctx context.Context, dst usermem.IOSequence) (int64, error) {
start := fsmetric.StartReadWait()
defer fsmetric.FinishReadWait(fsmetric.ReadWait, start)
if !f.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
fsmetric.Reads.Increment()
@@ -260,7 +260,7 @@ func (f *File) Preadv(ctx context.Context, dst usermem.IOSequence, offset int64)
defer fsmetric.FinishReadWait(fsmetric.ReadWait, start)
if !f.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
fsmetric.Reads.Increment()
@@ -276,10 +276,10 @@ func (f *File) Preadv(ctx context.Context, dst usermem.IOSequence, offset int64)
// unavoidably racy for network file systems. Writev also truncates src
// to avoid overrunning the current file size limit if necessary.
//
-// Returns syserror.ErrInterrupted if writing was interrupted.
+// Returns linuxerr.ErrInterrupted if writing was interrupted.
func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error) {
if !f.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
// Handle append mode.
@@ -297,7 +297,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
case ok && limit == 0:
unlockAppendMu()
f.mu.Unlock()
- return 0, syserror.ErrExceedsFileSizeLimit
+ return 0, linuxerr.ErrExceedsFileSizeLimit
case ok:
src = src.TakeFirst64(limit)
}
@@ -335,7 +335,7 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
limit, ok := f.checkLimit(ctx, offset)
switch {
case ok && limit == 0:
- return 0, syserror.ErrExceedsFileSizeLimit
+ return 0, linuxerr.ErrExceedsFileSizeLimit
case ok:
src = src.TakeFirst64(limit)
}
@@ -352,7 +352,7 @@ func (f *File) offsetForAppend(ctx context.Context, offset *int64) error {
if err != nil {
// This is an odd error, we treat it as evidence that
// something is terribly wrong with the filesystem.
- return syserror.EIO
+ return linuxerr.EIO
}
// Update the offset.
@@ -381,10 +381,10 @@ func (f *File) checkLimit(ctx context.Context, offset int64) (int64, bool) {
// Fsync calls f.FileOperations.Fsync with f as the File.
//
-// Returns syserror.ErrInterrupted if syncing was interrupted.
+// Returns linuxerr.ErrInterrupted if syncing was interrupted.
func (f *File) Fsync(ctx context.Context, start int64, end int64, syncType SyncType) error {
if !f.mu.Lock(ctx) {
- return syserror.ErrInterrupted
+ return linuxerr.ErrInterrupted
}
defer f.mu.Unlock()
@@ -393,10 +393,10 @@ func (f *File) Fsync(ctx context.Context, start int64, end int64, syncType SyncT
// Flush calls f.FileOperations.Flush with f as the File.
//
-// Returns syserror.ErrInterrupted if syncing was interrupted.
+// Returns linuxerr.ErrInterrupted if syncing was interrupted.
func (f *File) Flush(ctx context.Context) error {
if !f.mu.Lock(ctx) {
- return syserror.ErrInterrupted
+ return linuxerr.ErrInterrupted
}
defer f.mu.Unlock()
@@ -405,10 +405,10 @@ func (f *File) Flush(ctx context.Context) error {
// ConfigureMMap calls f.FileOperations.ConfigureMMap with f as the File.
//
-// Returns syserror.ErrInterrupted if interrupted.
+// Returns linuxerr.ErrInterrupted if interrupted.
func (f *File) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
if !f.mu.Lock(ctx) {
- return syserror.ErrInterrupted
+ return linuxerr.ErrInterrupted
}
defer f.mu.Unlock()
@@ -417,10 +417,10 @@ func (f *File) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
// UnstableAttr calls f.FileOperations.UnstableAttr with f as the File.
//
-// Returns syserror.ErrInterrupted if interrupted.
+// Returns linuxerr.ErrInterrupted if interrupted.
func (f *File) UnstableAttr(ctx context.Context) (UnstableAttr, error) {
if !f.mu.Lock(ctx) {
- return UnstableAttr{}, syserror.ErrInterrupted
+ return UnstableAttr{}, linuxerr.ErrInterrupted
}
defer f.mu.Unlock()
@@ -495,7 +495,7 @@ type lockedReader struct {
// Read implements io.Reader.Read.
func (r *lockedReader) Read(buf []byte) (int, error) {
if r.Ctx.Interrupted() {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
n, err := r.File.FileOperations.Read(r.Ctx, r.File, usermem.BytesIOSequence(buf), r.Offset)
r.Offset += n
@@ -505,7 +505,7 @@ func (r *lockedReader) Read(buf []byte) (int, error) {
// ReadAt implements io.Reader.ReadAt.
func (r *lockedReader) ReadAt(buf []byte, offset int64) (int, error) {
if r.Ctx.Interrupted() {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
n, err := r.File.FileOperations.Read(r.Ctx, r.File, usermem.BytesIOSequence(buf), offset)
return int(n), err
@@ -530,7 +530,7 @@ type lockedWriter struct {
// Write implements io.Writer.Write.
func (w *lockedWriter) Write(buf []byte) (int, error) {
if w.Ctx.Interrupted() {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
n, err := w.WriteAt(buf, w.Offset)
w.Offset += int64(n)
@@ -549,7 +549,7 @@ func (w *lockedWriter) WriteAt(buf []byte, offset int64) (int, error) {
// contract. Enforce that here.
for written < len(buf) {
if w.Ctx.Interrupted() {
- return written, syserror.ErrInterrupted
+ return written, linuxerr.ErrInterrupted
}
var n int64
n, err = w.File.FileOperations.Write(w.Ctx, w.File, usermem.BytesIOSequence(buf[written:]), offset+int64(written))
diff --git a/pkg/sentry/fs/file_operations.go b/pkg/sentry/fs/file_operations.go
index 6ec721022..ce47c3907 100644
--- a/pkg/sentry/fs/file_operations.go
+++ b/pkg/sentry/fs/file_operations.go
@@ -120,7 +120,7 @@ type FileOperations interface {
// Files with !FileFlags.Pwrite.
//
// If only part of src could be written, Write must return an error
- // indicating why (e.g. syserror.ErrWouldBlock).
+ // indicating why (e.g. linuxerr.ErrWouldBlock).
//
// Write does not check permissions nor flags.
//
diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go
index 06c07c807..031cd33ce 100644
--- a/pkg/sentry/fs/file_overlay.go
+++ b/pkg/sentry/fs/file_overlay.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -246,7 +245,7 @@ func (f *overlayFileOperations) onTop(ctx context.Context, file *File, fn func(*
// Something very wrong; return a generic filesystem
// error to avoid propagating internals.
f.upperMu.Unlock()
- return syserror.EIO
+ return linuxerr.EIO
}
// Save upper file.
diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD
index 6bf2d51cb..1a59800ea 100644
--- a/pkg/sentry/fs/fsutil/BUILD
+++ b/pkg/sentry/fs/fsutil/BUILD
@@ -90,7 +90,6 @@ go_library(
"//pkg/sentry/usage",
"//pkg/state",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/sentry/fs/fsutil/file.go b/pkg/sentry/fs/fsutil/file.go
index 00b3bb29b..3ece73b81 100644
--- a/pkg/sentry/fs/fsutil/file.go
+++ b/pkg/sentry/fs/fsutil/file.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -232,12 +231,12 @@ type FileNoSplice struct{}
// WriteTo implements fs.FileOperations.WriteTo.
func (FileNoSplice) WriteTo(context.Context, *fs.File, io.Writer, int64, bool) (int64, error) {
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
// ReadFrom implements fs.FileOperations.ReadFrom.
func (FileNoSplice) ReadFrom(context.Context, *fs.File, io.Reader, int64) (int64, error) {
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
// DirFileOperations implements most of fs.FileOperations for directories,
@@ -255,12 +254,12 @@ type DirFileOperations struct {
// Read implements fs.FileOperations.Read
func (*DirFileOperations) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// Write implements fs.FileOperations.Write.
func (*DirFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// StaticDirFileOperations implements fs.FileOperations for directories with
diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go
index 7c2de04c1..06a994193 100644
--- a/pkg/sentry/fs/fsutil/inode.go
+++ b/pkg/sentry/fs/fsutil/inode.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -167,7 +166,7 @@ func (i *InodeSimpleAttributes) DropLink() {
// StatFS implements fs.InodeOperations.StatFS.
func (i *InodeSimpleAttributes) StatFS(context.Context) (fs.Info, error) {
if i.fsType == 0 {
- return fs.Info{}, syserror.ENOSYS
+ return fs.Info{}, linuxerr.ENOSYS
}
return fs.Info{Type: i.fsType}, nil
}
@@ -294,7 +293,7 @@ type InodeNoStatFS struct{}
// StatFS implements fs.InodeOperations.StatFS.
func (InodeNoStatFS) StatFS(context.Context) (fs.Info, error) {
- return fs.Info{}, syserror.ENOSYS
+ return fs.Info{}, linuxerr.ENOSYS
}
// InodeStaticFileGetter implements GetFile for a file with static contents.
@@ -401,7 +400,7 @@ type InodeIsDirTruncate struct{}
// Truncate implements fs.InodeOperations.Truncate.
func (InodeIsDirTruncate) Truncate(context.Context, *fs.Inode, int64) error {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
// InodeNoopTruncate implements fs.InodeOperations.Truncate as a noop.
@@ -425,7 +424,7 @@ type InodeNotOpenable struct{}
// GetFile implements fs.InodeOperations.GetFile.
func (InodeNotOpenable) GetFile(context.Context, *fs.Dirent, fs.FileFlags) (*fs.File, error) {
- return nil, syserror.EIO
+ return nil, linuxerr.EIO
}
// InodeNotVirtual can be used by Inodes that are not virtual.
@@ -529,5 +528,5 @@ type InodeIsDirAllocate struct{}
// Allocate implements fs.InodeOperations.Allocate.
func (InodeIsDirAllocate) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD
index c08301d19..ee2f287d9 100644
--- a/pkg/sentry/fs/gofer/BUILD
+++ b/pkg/sentry/fs/gofer/BUILD
@@ -26,6 +26,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors",
"//pkg/errors/linuxerr",
"//pkg/fd",
"//pkg/hostarch",
@@ -48,7 +49,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sync",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/unet",
"//pkg/usermem",
"//pkg/waiter",
@@ -63,10 +63,10 @@ go_test(
library = ":gofer",
deps = [
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/p9",
"//pkg/p9/p9test",
"//pkg/sentry/contexttest",
"//pkg/sentry/fs",
- "@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/fs/gofer/file.go b/pkg/sentry/fs/gofer/file.go
index 73d80d9b5..62a517cd7 100644
--- a/pkg/sentry/fs/gofer/file.go
+++ b/pkg/sentry/fs/gofer/file.go
@@ -20,6 +20,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/p9"
@@ -28,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/fsmetric"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -226,7 +226,7 @@ func (f *fileOperations) maybeSync(ctx context.Context, file *fs.File, offset, n
func (f *fileOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
if fs.IsDir(file.Dirent.Inode.StableAttr) {
// Not all remote file systems enforce this so this client does.
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
var (
@@ -294,7 +294,7 @@ func (f *fileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IO
if fs.IsDir(file.Dirent.Inode.StableAttr) {
// Not all remote file systems enforce this so this client does.
f.incrementReadCounters(start)
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
if f.inodeOperations.session().cachePolicy.useCachingInodeOps(file.Dirent.Inode) {
diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go
index 546ee7d04..4924debeb 100644
--- a/pkg/sentry/fs/gofer/gofer_test.go
+++ b/pkg/sentry/fs/gofer/gofer_test.go
@@ -19,8 +19,8 @@ import (
"testing"
"time"
- "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/p9/p9test"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
@@ -97,7 +97,7 @@ func TestLookup(t *testing.T) {
},
{
name: "mock Walk fails (function fails)",
- want: unix.ENOENT,
+ want: linuxerr.ENOENT,
},
}
@@ -123,7 +123,7 @@ func TestLookup(t *testing.T) {
var newInodeOperations fs.InodeOperations
if dirent != nil {
if dirent.IsNegative() {
- err = unix.ENOENT
+ err = linuxerr.ENOENT
} else {
newInodeOperations = dirent.Inode.InodeOperations
}
@@ -131,9 +131,11 @@ func TestLookup(t *testing.T) {
// Check return values.
if err != test.want {
+ t.Logf("err: %v %T", err, err)
t.Errorf("Lookup got err %v, want %v", err, test.want)
}
if err == nil && newInodeOperations == nil {
+ t.Logf("err: %v %T", err, err)
t.Errorf("Lookup got non-nil err and non-nil node, wanted at least one non-nil")
}
})
diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go
index 9ff64a8b6..c3856094f 100644
--- a/pkg/sentry/fs/gofer/inode.go
+++ b/pkg/sentry/fs/gofer/inode.go
@@ -20,6 +20,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ gErr "gvisor.dev/gvisor/pkg/errors"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/log"
@@ -32,7 +33,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs/host"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// inodeOperations implements fs.InodeOperations.
@@ -719,12 +719,12 @@ func (i *inodeOperations) configureMMap(file *fs.File, opts *memmap.MMapOpts) er
}
func init() {
- syserror.AddErrorUnwrapper(func(err error) (unix.Errno, bool) {
+ linuxerr.AddErrorUnwrapper(func(err error) (*gErr.Error, bool) {
if _, ok := err.(p9.ErrSocket); ok {
// Treat as an I/O error.
- return unix.EIO, true
+ return linuxerr.EIO, true
}
- return 0, false
+ return nil, false
})
}
diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go
index 88d83060c..2f8769f1e 100644
--- a/pkg/sentry/fs/gofer/path.go
+++ b/pkg/sentry/fs/gofer/path.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.dev/gvisor/pkg/syserror"
)
// maxFilenameLen is the maximum length of a filename. This is dictated by 9P's
@@ -60,7 +59,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
if cp.cacheNegativeDirents() {
return fs.NewNegativeDirent(name), nil
}
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
i.readdirMu.Unlock()
}
@@ -74,7 +73,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
// is created over it.
return fs.NewNegativeDirent(name), nil
}
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
return nil, err
}
@@ -169,7 +168,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string
hostFile.Close()
}
unopened.close(ctx)
- return nil, syserror.EIO
+ return nil, linuxerr.EIO
}
qid := qids[0]
diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD
index 24fc6305c..921612e9c 100644
--- a/pkg/sentry/fs/host/BUILD
+++ b/pkg/sentry/fs/host/BUILD
@@ -52,7 +52,6 @@ go_library(
"//pkg/sentry/uniqueid",
"//pkg/sync",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/tcpip",
"//pkg/unet",
"//pkg/usermem",
diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go
index 77c08a7ce..1d0d95634 100644
--- a/pkg/sentry/fs/host/file.go
+++ b/pkg/sentry/fs/host/file.go
@@ -28,7 +28,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -201,7 +200,7 @@ func (f *fileOperations) Write(ctx context.Context, file *fs.File, src usermem.I
writer := fd.NewReadWriter(f.iops.fileState.FD())
n, err := src.CopyInTo(ctx, safemem.FromIOWriter{writer})
if isBlockError(err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
return n, err
}
@@ -232,7 +231,7 @@ func (f *fileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IO
if n != 0 {
err = nil
} else {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
}
return n, err
diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go
index 5f6af2067..92d58e3e9 100644
--- a/pkg/sentry/fs/host/inode.go
+++ b/pkg/sentry/fs/host/inode.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -220,7 +219,7 @@ func (i *inodeOperations) Release(context.Context) {
// Lookup implements fs.InodeOperations.Lookup.
func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
// Create implements fs.InodeOperations.Create.
@@ -400,7 +399,7 @@ func (i *inodeOperations) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error
// StatFS implements fs.InodeOperations.StatFS.
func (i *inodeOperations) StatFS(context.Context) (fs.Info, error) {
- return fs.Info{}, syserror.ENOSYS
+ return fs.Info{}, linuxerr.ENOSYS
}
// AddLink implements fs.InodeOperations.AddLink.
diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go
index 6f38b25c3..4e561c5ed 100644
--- a/pkg/sentry/fs/host/tty.go
+++ b/pkg/sentry/fs/host/tty.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/unimpl"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -327,7 +326,7 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e
// If the signal is SIGTTIN, then we are attempting to read
// from the TTY. Don't send the signal and return EIO.
if sig == linux.SIGTTIN {
- return syserror.EIO
+ return linuxerr.EIO
}
// Otherwise, we are writing or changing terminal state. This is allowed.
@@ -336,7 +335,7 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e
// If the process group is an orphan, return EIO.
if pg.IsOrphan() {
- return syserror.EIO
+ return linuxerr.EIO
}
// Otherwise, send the signal to the process group and return ERESTARTSYS.
@@ -349,7 +348,7 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e
//
// Linux ignores the result of kill_pgrp().
_ = pg.SendSignal(kernel.SignalInfoPriv(sig))
- return syserror.ERESTARTSYS
+ return linuxerr.ERESTARTSYS
}
// LINT.ThenChange(../../fsimpl/host/tty.go)
diff --git a/pkg/sentry/fs/host/util.go b/pkg/sentry/fs/host/util.go
index e7db79189..f2a33cc14 100644
--- a/pkg/sentry/fs/host/util.go
+++ b/pkg/sentry/fs/host/util.go
@@ -96,7 +96,7 @@ type dirInfo struct {
// LINT.IfChange
// isBlockError unwraps os errors and checks if they are caused by EAGAIN or
-// EWOULDBLOCK. This is so they can be transformed into syserror.ErrWouldBlock.
+// EWOULDBLOCK. This is so they can be transformed into linuxerr.ErrWouldBlock.
func isBlockError(err error) bool {
if linuxerr.Equals(linuxerr.EAGAIN, err) || linuxerr.Equals(linuxerr.EWOULDBLOCK, err) {
return true
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index ec204e5cf..2c6b9e9db 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Inode is a file system object that can be simultaneously referenced by different
@@ -357,7 +356,7 @@ func (i *Inode) SetTimestamps(ctx context.Context, d *Dirent, ts TimeSpec) error
// Truncate calls i.InodeOperations.Truncate with i as the Inode.
func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error {
if IsDir(i.StableAttr) {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if i.overlay != nil {
diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go
index 98e9fb2b1..0f8022906 100644
--- a/pkg/sentry/fs/inode_operations.go
+++ b/pkg/sentry/fs/inode_operations.go
@@ -66,7 +66,7 @@ type InodeOperations interface {
//
// * A nil Dirent and a non-nil error. If the reason that Lookup failed
// was because the name does not exist under Inode, then must return
- // syserror.ENOENT.
+ // linuxerr.ENOENT.
//
// * If name does not exist under dir and the file system wishes this
// fact to be cached, a non-nil Dirent containing a nil Inode and a
diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index c47b9ce58..21ad7fa69 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.dev/gvisor/pkg/syserror"
)
func overlayHasWhiteout(ctx context.Context, parent *Inode, name string) bool {
@@ -103,7 +102,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name
// Upper fs is not OK with a negative Dirent
// being cached in the Dirent tree, so don't
// return one.
- return nil, false, syserror.ENOENT
+ return nil, false, linuxerr.ENOENT
}
entry, err := newOverlayEntry(ctx, upperInode, nil, false)
if err != nil {
@@ -165,7 +164,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name
if negativeUpperChild {
return NewNegativeDirent(name), false, nil
}
- return nil, false, syserror.ENOENT
+ return nil, false, linuxerr.ENOENT
}
// Did we find a lower Inode? Remember this because we may decide we don't
diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go
index ee28b0f99..51cd6cd37 100644
--- a/pkg/sentry/fs/inotify.go
+++ b/pkg/sentry/fs/inotify.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/uniqueid"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -141,7 +140,7 @@ func (i *Inotify) Read(ctx context.Context, _ *File, dst usermem.IOSequence, _ i
if i.events.Empty() {
// Nothing to read yet, tell caller to block.
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
var writeLen int64
@@ -179,7 +178,7 @@ func (i *Inotify) Read(ctx context.Context, _ *File, dst usermem.IOSequence, _ i
// WriteTo implements FileOperations.WriteTo.
func (*Inotify) WriteTo(context.Context, *File, io.Writer, int64, bool) (int64, error) {
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
// Fsync implements FileOperations.Fsync.
@@ -189,7 +188,7 @@ func (*Inotify) Fsync(context.Context, *File, int64, int64, SyncType) error {
// ReadFrom implements FileOperations.ReadFrom.
func (*Inotify) ReadFrom(context.Context, *File, io.Reader, int64) (int64, error) {
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
// Flush implements FileOperations.Flush.
diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD
index e6d74b949..bc75ae505 100644
--- a/pkg/sentry/fs/proc/BUILD
+++ b/pkg/sentry/fs/proc/BUILD
@@ -50,7 +50,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/usage",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/tcpip/header",
"//pkg/tcpip/network/ipv4",
"//pkg/usermem",
diff --git a/pkg/sentry/fs/proc/fds.go b/pkg/sentry/fs/proc/fds.go
index e90da225a..e68bb46c0 100644
--- a/pkg/sentry/fs/proc/fds.go
+++ b/pkg/sentry/fs/proc/fds.go
@@ -20,12 +20,12 @@ import (
"strconv"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/fs/proc/device"
"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
)
// LINT.IfChange
@@ -37,7 +37,7 @@ func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDF
n, err := strconv.ParseUint(p, 10, 64)
if err != nil {
// Not found.
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
var file *fs.File
@@ -48,7 +48,7 @@ func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDF
}
})
if file == nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
return toInode(file, fdFlags), nil
}
diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go
index 546b57287..dc8211871 100644
--- a/pkg/sentry/fs/proc/proc.go
+++ b/pkg/sentry/fs/proc/proc.go
@@ -28,7 +28,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
)
// LINT.IfChange
@@ -125,7 +124,7 @@ func (s *self) Readlink(ctx context.Context, inode *fs.Inode) (string, error) {
if t := kernel.TaskFromContext(ctx); t != nil {
tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
if tgid == 0 {
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
return strconv.FormatUint(uint64(tgid), 10), nil
}
@@ -149,7 +148,7 @@ func (s *threadSelf) Readlink(ctx context.Context, inode *fs.Inode) (string, err
tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
tid := s.pidns.IDOfTask(t)
if tid == 0 || tgid == 0 {
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
return fmt.Sprintf("%d/task/%d", tgid, tid), nil
}
diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go
index edd62b857..89a799b21 100644
--- a/pkg/sentry/fs/proc/task.go
+++ b/pkg/sentry/fs/proc/task.go
@@ -35,7 +35,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/sentry/usage"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -182,7 +181,7 @@ func (f *subtasksFile) Readdir(ctx context.Context, file *fs.File, ser fs.Dentry
tasks := f.t.ThreadGroup().MemberIDs(f.pidns)
if len(tasks) == 0 {
- return offset, syserror.ENOENT
+ return offset, linuxerr.ENOENT
}
if offset == 0 {
@@ -234,15 +233,15 @@ var _ fs.FileOperations = (*subtasksFile)(nil)
func (s *subtasks) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent, error) {
tid, err := strconv.ParseUint(p, 10, 32)
if err != nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
task := s.p.pidns.TaskWithID(kernel.ThreadID(tid))
if task == nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
if task.ThreadGroup() != s.t.ThreadGroup() {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
td := s.p.newTaskDir(ctx, task, dir.MountSource, false)
@@ -479,7 +478,7 @@ func (m *memDataFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen
return int64(n), nil
}
if readErr != nil {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
return 0, nil
}
diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD
index b46567cf8..bfff010c5 100644
--- a/pkg/sentry/fs/ramfs/BUILD
+++ b/pkg/sentry/fs/ramfs/BUILD
@@ -21,7 +21,6 @@ go_library(
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/socket/unix/transport",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
],
diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go
index 33023af77..b1fadee7a 100644
--- a/pkg/sentry/fs/ramfs/dir.go
+++ b/pkg/sentry/fs/ramfs/dir.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// CreateOps represents operations to create different file types.
@@ -284,9 +283,9 @@ func (d *Dir) walkLocked(ctx context.Context, p string) (*fs.Inode, error) {
return inode, nil
}
- // fs.InodeOperations.Lookup returns syserror.ENOENT if p
+ // fs.InodeOperations.Lookup returns linuxerr.ENOENT if p
// does not exist.
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
// createInodeOperationsCommon creates a new child node at this dir by calling
diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go
index fff4befb2..266140f6f 100644
--- a/pkg/sentry/fs/splice.go
+++ b/pkg/sentry/fs/splice.go
@@ -20,7 +20,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Splice moves data to this file, directly from another.
@@ -55,26 +54,26 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64,
case dst.UniqueID < src.UniqueID:
// Acquire dst first.
if !dst.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
if !src.mu.Lock(ctx) {
dst.mu.Unlock()
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
case dst.UniqueID > src.UniqueID:
// Acquire src first.
if !src.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
if !dst.mu.Lock(ctx) {
src.mu.Unlock()
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
case dst.UniqueID == src.UniqueID:
// Acquire only one lock; it's the same file. This is a
// bit of a edge case, but presumably it's possible.
if !dst.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
srcLock = false // Only need one unlock.
}
@@ -84,13 +83,13 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64,
case dstLock:
// Acquire only dst.
if !dst.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
opts.DstStart = dst.offset // Safe: locked.
case srcLock:
// Acquire only src.
if !src.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
+ return 0, linuxerr.ErrInterrupted
}
opts.SrcStart = src.offset // Safe: locked.
}
@@ -108,7 +107,7 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64,
limit, ok := dst.checkLimit(ctx, opts.DstStart)
switch {
case ok && limit == 0:
- err = syserror.ErrExceedsFileSizeLimit
+ err = linuxerr.ErrExceedsFileSizeLimit
case ok && limit < opts.Length:
opts.Length = limit // Cap the write.
}
diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD
index 0148b33cf..e61115932 100644
--- a/pkg/sentry/fs/timerfd/BUILD
+++ b/pkg/sentry/fs/timerfd/BUILD
@@ -14,7 +14,6 @@ go_library(
"//pkg/sentry/fs/anon",
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/kernel/time",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go
index 093a14c1f..1c8518d71 100644
--- a/pkg/sentry/fs/timerfd/timerfd.go
+++ b/pkg/sentry/fs/timerfd/timerfd.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs/anon"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -134,7 +133,7 @@ func (t *TimerOperations) Read(ctx context.Context, file *fs.File, dst usermem.I
}
return sizeofUint64, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// Write implements fs.FileOperations.Write.
diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD
index 5933cb67b..9e9dc06f3 100644
--- a/pkg/sentry/fs/tty/BUILD
+++ b/pkg/sentry/fs/tty/BUILD
@@ -31,7 +31,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/unimpl",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go
index 3242dcb6a..5716e2ee9 100644
--- a/pkg/sentry/fs/tty/dir.go
+++ b/pkg/sentry/fs/tty/dir.go
@@ -29,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -155,12 +154,12 @@ func (d *dirInodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name str
n, err := strconv.ParseUint(name, 10, 32)
if err != nil {
// Not found.
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
s, ok := d.replicas[uint32(n)]
if !ok {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
s.IncRef()
@@ -235,7 +234,7 @@ func (d *dirInodeOperations) allocateTerminal(ctx context.Context) (*Terminal, e
n := d.next
if n == math.MaxUint32 {
- return nil, syserror.ENOMEM
+ return nil, linuxerr.ENOMEM
}
if _, ok := d.replicas[n]; ok {
@@ -335,10 +334,10 @@ func (df *dirFileOperations) Readdir(ctx context.Context, file *fs.File, seriali
// Read implements FileOperations.Read
func (df *dirFileOperations) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// Write implements FileOperations.Write.
func (df *dirFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go
index 3ba02c218..f9fca6d8e 100644
--- a/pkg/sentry/fs/tty/line_discipline.go
+++ b/pkg/sentry/fs/tty/line_discipline.go
@@ -20,10 +20,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -193,7 +193,7 @@ func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSeque
}
return n, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) {
@@ -207,7 +207,7 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ
l.replicaWaiter.Notify(waiter.ReadableEvents)
return n, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, args arch.SyscallArguments) error {
@@ -228,7 +228,7 @@ func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequ
}
return n, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) {
@@ -242,7 +242,7 @@ func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSeq
l.masterWaiter.Notify(waiter.ReadableEvents)
return n, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// transformer is a helper interface to make it easier to stateify queue.
diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go
index 11d6c15d0..25d3c887e 100644
--- a/pkg/sentry/fs/tty/queue.go
+++ b/pkg/sentry/fs/tty/queue.go
@@ -17,12 +17,12 @@ package tty
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -110,7 +110,7 @@ func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipl
defer q.mu.Unlock()
if !q.readable {
- return 0, false, syserror.ErrWouldBlock
+ return 0, false, linuxerr.ErrWouldBlock
}
if dst.NumBytes() > canonMaxBytes {
@@ -155,7 +155,7 @@ func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscip
room := waitBufMaxBytes - q.waitBufLen
// If out of room, return EAGAIN.
if room == 0 && copyLen > 0 {
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// Cap the size of the wait buffer.
if copyLen > room {
diff --git a/pkg/sentry/fs/user/BUILD b/pkg/sentry/fs/user/BUILD
index 4acc73ee0..23b5508fd 100644
--- a/pkg/sentry/fs/user/BUILD
+++ b/pkg/sentry/fs/user/BUILD
@@ -19,7 +19,6 @@ go_library(
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fs/user/path.go b/pkg/sentry/fs/user/path.go
index f6eaab2bd..67a9adfd7 100644
--- a/pkg/sentry/fs/user/path.go
+++ b/pkg/sentry/fs/user/path.go
@@ -28,7 +28,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// ResolveExecutablePath resolves the given executable name given the working
@@ -81,7 +80,7 @@ func resolve(ctx context.Context, mns *fs.MountNamespace, paths []string, name s
root := fs.RootFromContext(ctx)
if root == nil {
// Caller has no root. Don't bother traversing anything.
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
defer root.DecRef(ctx)
for _, p := range paths {
@@ -117,7 +116,7 @@ func resolve(ctx context.Context, mns *fs.MountNamespace, paths []string, name s
}
// Couldn't find it.
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
func resolveVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, paths []string, name string) (string, error) {
@@ -156,7 +155,7 @@ func resolveVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNam
}
// Couldn't find it.
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
// getPath returns the PATH as a slice of strings given the environment
diff --git a/pkg/sentry/fsimpl/cgroupfs/BUILD b/pkg/sentry/fsimpl/cgroupfs/BUILD
index 4c9c5b344..e5fdcc776 100644
--- a/pkg/sentry/fsimpl/cgroupfs/BUILD
+++ b/pkg/sentry/fsimpl/cgroupfs/BUILD
@@ -32,6 +32,7 @@ go_library(
"//pkg/context",
"//pkg/coverage",
"//pkg/errors/linuxerr",
+ "//pkg/fspath",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
@@ -43,7 +44,6 @@ go_library(
"//pkg/sentry/usage",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/cgroupfs/base.go b/pkg/sentry/fsimpl/cgroupfs/base.go
index 4290ffe0d..71bb0a9c8 100644
--- a/pkg/sentry/fsimpl/cgroupfs/base.go
+++ b/pkg/sentry/fsimpl/cgroupfs/base.go
@@ -88,7 +88,6 @@ type controller interface {
// +stateify savable
type cgroupInode struct {
dir
- fs *filesystem
// ts is the list of tasks in this cgroup. The kernel is responsible for
// removing tasks from this list before they're destroyed, so any tasks on
@@ -102,9 +101,10 @@ var _ kernel.CgroupImpl = (*cgroupInode)(nil)
func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credentials) kernfs.Inode {
c := &cgroupInode{
- fs: fs,
- ts: make(map[*kernel.Task]struct{}),
+ dir: dir{fs: fs},
+ ts: make(map[*kernel.Task]struct{}),
}
+ c.dir.cgi = c
contents := make(map[string]kernfs.Inode)
contents["cgroup.procs"] = fs.newControllerFile(ctx, creds, &cgroupProcsData{c})
@@ -115,8 +115,7 @@ func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credential
}
c.dir.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|linux.FileMode(0555))
- c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
- c.dir.InitRefs()
+ c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true})
c.dir.IncLinks(c.dir.OrderedChildren.Populate(contents))
atomic.AddUint64(&fs.numCgroups, 1)
diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
index 22c8b7fda..edc3b50b9 100644
--- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
+++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
@@ -32,7 +32,8 @@
// controllers associated with them.
//
// Since cgroupfs doesn't allow hardlinks, there is a unique mapping between
-// cgroupfs dentries and inodes.
+// cgroupfs dentries and inodes. Thus, cgroupfs inodes don't need to be ref
+// counted and exist until they're unlinked once or the FS is destroyed.
//
// # Synchronization
//
@@ -48,10 +49,11 @@
// Lock order:
//
// kernel.CgroupRegistry.mu
-// cgroupfs.filesystem.mu
-// kernel.TaskSet.mu
-// kernel.Task.mu
-// cgroupfs.filesystem.tasksMu.
+// kernfs.filesystem.mu
+// kernel.TaskSet.mu
+// kernel.Task.mu
+// cgroupfs.filesystem.tasksMu.
+// cgroupfs.dir.OrderedChildren.mu
package cgroupfs
import (
@@ -63,6 +65,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -108,6 +111,7 @@ type FilesystemType struct{}
// +stateify savable
type InternalData struct {
DefaultControlValues map[string]int64
+ InitialCgroupPath string
}
// filesystem implements vfs.FilesystemImpl and kernel.cgroupFS.
@@ -134,6 +138,11 @@ type filesystem struct {
numCgroups uint64 // Protected by atomic ops.
root *kernfs.Dentry
+ // effectiveRoot is the initial cgroup new tasks are created in. Unless
+ // overwritten by internal mount options, root == effectiveRoot. If
+ // effectiveRoot != root, an extra reference is held on effectiveRoot for
+ // the lifetime of the filesystem.
+ effectiveRoot *kernfs.Dentry
// tasksMu serializes task membership changes across all cgroups within a
// filesystem.
@@ -229,6 +238,9 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
fs := vfsfs.Impl().(*filesystem)
ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: mounting new view to hierarchy %v", fs.hierarchyID)
fs.root.IncRef()
+ if fs.effectiveRoot != fs.root {
+ fs.effectiveRoot.IncRef()
+ }
return vfsfs, fs.root.VFSDentry(), nil
}
@@ -245,8 +257,8 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
var defaults map[string]int64
if opts.InternalData != nil {
- ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults)
defaults = opts.InternalData.(*InternalData).DefaultControlValues
+ ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults)
}
for _, ty := range wantControllers {
@@ -286,6 +298,14 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
var rootD kernfs.Dentry
rootD.InitRoot(&fs.Filesystem, root)
fs.root = &rootD
+ fs.effectiveRoot = fs.root
+
+ if err := fs.prepareInitialCgroup(ctx, vfsObj, opts); err != nil {
+ ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: failed to prepare initial cgroup: %v", err)
+ rootD.DecRef(ctx)
+ fs.VFSFilesystem().DecRef(ctx)
+ return nil, nil, err
+ }
// Register controllers. The registry may be modified concurrently, so if we
// get an error, we raced with someone else who registered the same
@@ -303,10 +323,47 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
return fs.VFSFilesystem(), rootD.VFSDentry(), nil
}
+// prepareInitialCgroup creates the initial cgroup according to opts. An initial
+// cgroup is optional, and if not specified, this function is a no-op.
+func (fs *filesystem) prepareInitialCgroup(ctx context.Context, vfsObj *vfs.VirtualFilesystem, opts vfs.GetFilesystemOptions) error {
+ if opts.InternalData == nil {
+ return nil
+ }
+ initPathStr := opts.InternalData.(*InternalData).InitialCgroupPath
+ if initPathStr == "" {
+ return nil
+ }
+ ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup path: %v", initPathStr)
+ initPath := fspath.Parse(initPathStr)
+ if !initPath.Absolute || !initPath.HasComponents() {
+ ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup path invalid: %+v", initPath)
+ return linuxerr.EINVAL
+ }
+
+ // Have initial cgroup target, create the tree.
+ cgDir := fs.root.Inode().(*cgroupInode)
+ for pit := initPath.Begin; pit.Ok(); pit = pit.Next() {
+ cgDirI, err := cgDir.NewDir(ctx, pit.String(), vfs.MkdirOptions{})
+ if err != nil {
+ return err
+ }
+ cgDir = cgDirI.(*cgroupInode)
+ }
+
+ // Walk to target dentry.
+ initDentry, err := fs.root.WalkDentryTree(ctx, vfsObj, initPath)
+ if err != nil {
+ ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup dentry not found: %v", err)
+ return linuxerr.ENOENT
+ }
+ fs.effectiveRoot = initDentry // Reference from WalkDentryTree transferred here.
+ return nil
+}
+
func (fs *filesystem) rootCgroup() kernel.Cgroup {
return kernel.Cgroup{
- Dentry: fs.root,
- CgroupImpl: fs.root.Inode().(kernel.CgroupImpl),
+ Dentry: fs.effectiveRoot,
+ CgroupImpl: fs.effectiveRoot.Inode().(kernel.CgroupImpl),
}
}
@@ -320,6 +377,10 @@ func (fs *filesystem) Release(ctx context.Context) {
r.Unregister(fs.hierarchyID)
}
+ if fs.root != fs.effectiveRoot {
+ fs.effectiveRoot.DecRef(ctx)
+ }
+
fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
fs.Filesystem.Release(ctx)
}
@@ -346,15 +407,18 @@ func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error
//
// +stateify savable
type dir struct {
- dirRefs
+ kernfs.InodeNoopRefCount
kernfs.InodeAlwaysValid
kernfs.InodeAttrs
kernfs.InodeNotSymlink
- kernfs.InodeDirectoryNoNewChildren // TODO(b/183137098): Implement mkdir.
+ kernfs.InodeDirectoryNoNewChildren
kernfs.OrderedChildren
implStatFS
locks vfs.FileLocks
+
+ fs *filesystem // Immutable.
+ cgi *cgroupInode // Immutable.
}
// Keep implements kernfs.Inode.Keep.
@@ -378,9 +442,100 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry
return fd.VFSFileDescription(), nil
}
-// DecRef implements kernfs.Inode.DecRef.
-func (d *dir) DecRef(ctx context.Context) {
- d.dirRefs.DecRef(func() { d.Destroy(ctx) })
+// NewDir implements kernfs.Inode.NewDir.
+func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) {
+ // "Do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable."
+ // -- Linux, kernel/cgroup.c:cgroup_mkdir().
+ if strings.Contains(name, "\n") {
+ return nil, linuxerr.EINVAL
+ }
+ return d.OrderedChildren.Inserter(name, func() kernfs.Inode {
+ d.IncLinks(1)
+ return d.fs.newCgroupInode(ctx, auth.CredentialsFromContext(ctx))
+ })
+}
+
+// Rename implements kernfs.Inode.Rename. Cgroupfs only allows renaming of
+// cgroup directories, and the rename may only change the name within the same
+// parent. See linux, kernel/cgroup.c:cgroup_rename().
+func (d *dir) Rename(ctx context.Context, oldname, newname string, child, dst kernfs.Inode) error {
+ if _, ok := child.(*cgroupInode); !ok {
+ // Not a cgroup directory. Control files are backed by different types.
+ return linuxerr.ENOTDIR
+ }
+
+ dstCGInode, ok := dst.(*cgroupInode)
+ if !ok {
+ // Not a cgroup inode, so definitely can't be *this* inode.
+ return linuxerr.EIO
+ }
+ // Note: We're intentionally comparing addresses, since two different dirs
+ // could plausibly be identical in memory, but would occupy different
+ // locations in memory.
+ if d != &dstCGInode.dir {
+ // Destination dir is a different cgroup inode. Cross directory renames
+ // aren't allowed.
+ return linuxerr.EIO
+ }
+
+ // Rename moves oldname to newname within d. Proceed.
+ return d.OrderedChildren.Rename(ctx, oldname, newname, child, dst)
+}
+
+// Unlink implements kernfs.Inode.Unlink. Cgroupfs disallows unlink, as the only
+// files in the filesystem are control files, which can't be deleted.
+func (d *dir) Unlink(ctx context.Context, name string, child kernfs.Inode) error {
+ return linuxerr.EPERM
+}
+
+// hasChildrenLocked returns whether the cgroup dir contains any objects that
+// prevent it from being deleted.
+func (d *dir) hasChildrenLocked() bool {
+ // Subdirs take a link on the parent, so checks if there are any direct
+ // children cgroups. Exclude the dir's self link and the link from ".".
+ if d.InodeAttrs.Links()-2 > 0 {
+ return true
+ }
+ return len(d.cgi.ts) > 0
+}
+
+// HasChildren implements kernfs.Inode.HasChildren.
+//
+// The empty check for a cgroupfs directory is unlike a regular directory since
+// a cgroupfs directory will always have control files. A cgroupfs directory can
+// be deleted if cgroup contains no tasks and has no sub-cgroups.
+func (d *dir) HasChildren() bool {
+ d.fs.tasksMu.RLock()
+ defer d.fs.tasksMu.RUnlock()
+ return d.hasChildrenLocked()
+}
+
+// RmDir implements kernfs.Inode.RmDir.
+func (d *dir) RmDir(ctx context.Context, name string, child kernfs.Inode) error {
+ // Unlike a normal directory, we need to recheck if d is empty again, since
+ // vfs/kernfs can't stop tasks from entering or leaving the cgroup.
+ d.fs.tasksMu.RLock()
+ defer d.fs.tasksMu.RUnlock()
+
+ cgi, ok := child.(*cgroupInode)
+ if !ok {
+ return linuxerr.ENOTDIR
+ }
+ if cgi.dir.hasChildrenLocked() {
+ return linuxerr.ENOTEMPTY
+ }
+
+ // Disallow deletion of the effective root cgroup.
+ if cgi == d.fs.effectiveRoot.Inode().(*cgroupInode) {
+ ctx.Warningf("Cannot delete initial cgroup for new tasks %q", d.fs.effectiveRoot.FSLocalPath())
+ return linuxerr.EBUSY
+ }
+
+ err := d.OrderedChildren.RmDir(ctx, name, child)
+ if err == nil {
+ d.InodeAttrs.DecLinks()
+ }
+ return err
}
// controllerFile represents a generic control file that appears within a cgroup
diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD
index f981ff296..e0b879339 100644
--- a/pkg/sentry/fsimpl/devpts/BUILD
+++ b/pkg/sentry/fsimpl/devpts/BUILD
@@ -45,7 +45,6 @@ go_library(
"//pkg/sentry/unimpl",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index 7a488e9fd..e711debcb 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -29,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Name is the filesystem name.
@@ -180,7 +179,7 @@ func (i *rootInode) allocateTerminal(ctx context.Context, creds *auth.Credential
i.mu.Lock()
defer i.mu.Unlock()
if i.nextIdx == math.MaxUint32 {
- return nil, syserror.ENOMEM
+ return nil, linuxerr.ENOMEM
}
idx := i.nextIdx
i.nextIdx++
@@ -241,7 +240,7 @@ func (i *rootInode) Lookup(ctx context.Context, name string) (kernfs.Inode, erro
// Not a static entry.
idx, err := strconv.ParseUint(name, 10, 32)
if err != nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
i.mu.Lock()
defer i.mu.Unlock()
@@ -250,7 +249,7 @@ func (i *rootInode) Lookup(ctx context.Context, name string) (kernfs.Inode, erro
return ri, nil
}
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
// IterDirents implements kernfs.Inode.IterDirents.
diff --git a/pkg/sentry/fsimpl/devpts/line_discipline.go b/pkg/sentry/fsimpl/devpts/line_discipline.go
index 9cb21e83b..609623f9f 100644
--- a/pkg/sentry/fsimpl/devpts/line_discipline.go
+++ b/pkg/sentry/fsimpl/devpts/line_discipline.go
@@ -20,10 +20,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -203,7 +203,7 @@ func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSeque
} else if notifyEcho {
l.masterWaiter.Notify(waiter.ReadableEvents)
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) {
@@ -220,7 +220,7 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ
l.replicaWaiter.Notify(waiter.ReadableEvents)
return n, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, io usermem.IO, args arch.SyscallArguments) error {
@@ -242,7 +242,7 @@ func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequ
}
return n, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) {
@@ -257,7 +257,7 @@ func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSeq
l.masterWaiter.Notify(waiter.ReadableEvents)
return n, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// transformer is a helper interface to make it easier to stateify queue.
diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go
index ff1d89955..85aeefa43 100644
--- a/pkg/sentry/fsimpl/devpts/queue.go
+++ b/pkg/sentry/fsimpl/devpts/queue.go
@@ -17,12 +17,12 @@ package devpts
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -110,7 +110,7 @@ func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipl
defer q.mu.Unlock()
if !q.readable {
- return 0, false, false, syserror.ErrWouldBlock
+ return 0, false, false, linuxerr.ErrWouldBlock
}
if dst.NumBytes() > canonMaxBytes {
@@ -156,7 +156,7 @@ func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscip
room := waitBufMaxBytes - q.waitBufLen
// If out of room, return EAGAIN.
if room == 0 && copyLen > 0 {
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// Cap the size of the wait buffer.
if copyLen > room {
diff --git a/pkg/sentry/fsimpl/eventfd/BUILD b/pkg/sentry/fsimpl/eventfd/BUILD
index c09fdc7f9..1cb049a29 100644
--- a/pkg/sentry/fsimpl/eventfd/BUILD
+++ b/pkg/sentry/fsimpl/eventfd/BUILD
@@ -9,11 +9,11 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fdnotifier",
"//pkg/hostarch",
"//pkg/log",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/sentry/fsimpl/eventfd/eventfd.go b/pkg/sentry/fsimpl/eventfd/eventfd.go
index 4f79cfcb7..af5ba5131 100644
--- a/pkg/sentry/fsimpl/eventfd/eventfd.go
+++ b/pkg/sentry/fsimpl/eventfd/eventfd.go
@@ -22,11 +22,11 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -149,7 +149,7 @@ func (efd *EventFileDescription) hostReadLocked(ctx context.Context, dst usermem
var buf [8]byte
if _, err := unix.Read(efd.hostfd, buf[:]); err != nil {
if err == unix.EWOULDBLOCK {
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
return err
}
@@ -167,7 +167,7 @@ func (efd *EventFileDescription) read(ctx context.Context, dst usermem.IOSequenc
// We can't complete the read if the value is currently zero.
if efd.val == 0 {
efd.mu.Unlock()
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
// Update the value based on the mode the event is operating in.
@@ -200,7 +200,7 @@ func (efd *EventFileDescription) hostWriteLocked(val uint64) error {
hostarch.ByteOrder.PutUint64(buf[:], val)
_, err := unix.Write(efd.hostfd, buf[:])
if err == unix.EWOULDBLOCK {
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
return err
}
@@ -232,7 +232,7 @@ func (efd *EventFileDescription) Signal(val uint64) error {
// uint64 minus 1.
if val > math.MaxUint64-1-efd.val {
efd.mu.Unlock()
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
efd.val += val
diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/pkg/sentry/fsimpl/ext/BUILD
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 871df5984..05c4fbeb2 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -59,7 +59,6 @@ go_library(
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
@@ -84,7 +83,6 @@ go_test(
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index dab1e779d..0f855ac59 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -38,7 +37,7 @@ type fuseDevice struct{}
// Open implements vfs.Device.Open.
func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
if !kernel.FUSEEnabled {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
var fd DeviceFD
@@ -126,7 +125,7 @@ func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset in
return 0, linuxerr.EPERM
}
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
// Read implements vfs.FileDescriptionImpl.Read.
@@ -192,7 +191,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
}
if req == nil {
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// We already checked the size: dst must be able to fit the whole request.
@@ -205,7 +204,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
return 0, err
}
if n != len(req.data) {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
if req.hdr.Opcode == linux.FUSE_WRITE {
@@ -214,7 +213,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
return 0, err
}
if written != len(req.payload) {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
n += int(written)
}
@@ -238,7 +237,7 @@ func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset i
return 0, linuxerr.EPERM
}
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
// Write implements vfs.FileDescriptionImpl.Write.
@@ -395,7 +394,7 @@ func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64
return 0, linuxerr.EPERM
}
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
// sendResponse sends a response to the waiting task (if any).
diff --git a/pkg/sentry/fsimpl/fuse/dev_test.go b/pkg/sentry/fsimpl/fuse/dev_test.go
index 04250d796..8951b5ba8 100644
--- a/pkg/sentry/fsimpl/fuse/dev_test.go
+++ b/pkg/sentry/fsimpl/fuse/dev_test.go
@@ -20,11 +20,11 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -186,7 +186,7 @@ func ReadTest(serverTask *kernel.Task, fd *vfs.FileDescription, inIOseq usermem.
// "would block".
n, err = dev.Read(serverTask, inIOseq, vfs.ReadOptions{})
total += n
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
break
}
diff --git a/pkg/sentry/fsimpl/fuse/directory.go b/pkg/sentry/fsimpl/fuse/directory.go
index fcc5d9a2a..9611edd5a 100644
--- a/pkg/sentry/fsimpl/fuse/directory.go
+++ b/pkg/sentry/fsimpl/fuse/directory.go
@@ -19,10 +19,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -32,27 +32,27 @@ type directoryFD struct {
// Allocate implements directoryFD.Allocate.
func (*directoryFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
// PRead implements vfs.FileDescriptionImpl.PRead.
func (*directoryFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// Read implements vfs.FileDescriptionImpl.Read.
func (*directoryFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (*directoryFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// Write implements vfs.FileDescriptionImpl.Write.
func (*directoryFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 172cbd88f..af16098d2 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -30,7 +30,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -612,7 +611,7 @@ func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMo
return nil, err
}
if opcode != linux.FUSE_LOOKUP && ((out.Attr.Mode&linux.S_IFMT)^uint32(fileType) != 0 || out.NodeID == 0 || out.NodeID == linux.FUSE_ROOT_ID) {
- return nil, syserror.EIO
+ return nil, linuxerr.EIO
}
child := i.fs.newInode(ctx, out.NodeID, out.Attr)
return child, nil
diff --git a/pkg/sentry/fsimpl/fuse/read_write.go b/pkg/sentry/fsimpl/fuse/read_write.go
index 35d0ab6f4..fe119aa43 100644
--- a/pkg/sentry/fsimpl/fuse/read_write.go
+++ b/pkg/sentry/fsimpl/fuse/read_write.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/syserror"
)
// ReadInPages sends FUSE_READ requests for the size after round it up to
@@ -221,7 +220,7 @@ func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64,
// Write more than requested? EIO.
if out.Size > toWrite {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
written += out.Size
diff --git a/pkg/sentry/fsimpl/fuse/regular_file.go b/pkg/sentry/fsimpl/fuse/regular_file.go
index 6c4de3507..38cde8208 100644
--- a/pkg/sentry/fsimpl/fuse/regular_file.go
+++ b/pkg/sentry/fsimpl/fuse/regular_file.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -108,7 +107,7 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
return 0, err
}
if int64(cp) != toCopy {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
copied += toCopy
}
@@ -205,7 +204,7 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
return 0, offset, err
}
if int64(cp) != srclen {
- return 0, offset, syserror.EIO
+ return 0, offset, linuxerr.EIO
}
n, err := fd.inode().fs.Write(ctx, fd, uint64(offset), uint32(srclen), data)
@@ -216,7 +215,7 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
if n == 0 {
// We have checked srclen != 0 previously.
// If err == nil, then it's a short write and we return EIO.
- return 0, offset, syserror.EIO
+ return 0, offset, linuxerr.EIO
}
written = int64(n)
diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD
index 752060044..4244f2cf5 100644
--- a/pkg/sentry/fsimpl/gofer/BUILD
+++ b/pkg/sentry/fsimpl/gofer/BUILD
@@ -79,7 +79,6 @@ go_library(
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/unet",
"//pkg/usermem",
"//pkg/waiter",
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 05b776c2e..00228c469 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -33,7 +33,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Sync implements vfs.FilesystemImpl.Sync.
@@ -235,7 +234,7 @@ func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name s
}
if child, ok := parent.children[name]; ok || parent.isSynthetic() {
if child == nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
return child, nil
}
@@ -349,7 +348,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
return linuxerr.EEXIST
}
if parent.isDeleted() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, name, &ds); err != nil {
return err
@@ -395,7 +394,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
return err
}
if !dir && rp.MustBeDir() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if parent.isSynthetic() {
if createInSyntheticDir == nil {
@@ -463,7 +462,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
}
} else {
if name == "." || name == ".." {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
}
@@ -486,7 +485,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
child, ok = parent.children[name]
if ok && child == nil {
// Hit a negative cached entry, child doesn't exist.
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
} else {
child, _, err = fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
@@ -552,7 +551,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
// child must be a non-directory file.
if child != nil && child.isDir() {
vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if rp.MustBeDir() {
if child != nil {
@@ -563,7 +562,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
}
if parent.isSynthetic() {
if child == nil {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
} else if child == nil || !child.isSynthetic() {
err = parent.file.unlinkAt(ctx, name, flags)
@@ -674,7 +673,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return err
}
if d.nlink == 0 {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if d.nlink == math.MaxUint32 {
return linuxerr.EMLINK
@@ -811,7 +810,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if rp.Done() {
// Reject attempts to open mount root directory with O_CREAT.
if mayCreate && rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if mustCreate {
return nil, linuxerr.EEXIST
@@ -841,7 +840,7 @@ afterTrailingSymlink:
}
// Reject attempts to open directories with O_CREAT.
if mayCreate && rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, rp.Component(), &ds); err != nil {
return nil, err
@@ -922,11 +921,11 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
case linux.S_IFDIR:
// Can't open directories with O_CREAT.
if opts.Flags&linux.O_CREAT != 0 {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
// Can't open directories writably.
if ats&vfs.MayWrite != 0 {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if opts.Flags&linux.O_DIRECT != 0 {
return nil, linuxerr.EINVAL
@@ -1054,7 +1053,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
return nil, err
}
if d.isDeleted() {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
mnt := rp.Mount()
if err := mnt.CheckBeginWrite(); err != nil {
@@ -1268,7 +1267,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
defer newParent.dirMu.Unlock()
}
if newParent.isDeleted() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
replaced, err := fs.getChildLocked(ctx, newParent, newName, &ds)
if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) {
@@ -1282,7 +1281,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
replacedVFSD = &replaced.vfsd
if replaced.isDir() {
if !renamed.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if genericIsAncestorDentry(replaced, renamed) {
return linuxerr.ENOTEMPTY
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 25d2e39d6..bd6b30397 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -62,7 +62,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/unet"
)
@@ -865,11 +864,11 @@ func dentryAttrMask() p9.AttrMask {
func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, mask p9.AttrMask, attr *p9.Attr) (*dentry, error) {
if !mask.Mode {
ctx.Warningf("can't create gofer.dentry without file type")
- return nil, syserror.EIO
+ return nil, linuxerr.EIO
}
if attr.Mode.FileType() == p9.ModeRegular && !mask.Size {
ctx.Warningf("can't create regular file gofer.dentry without file size")
- return nil, syserror.EIO
+ return nil, linuxerr.EIO
}
d := &dentry{
@@ -1112,7 +1111,7 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs
case linux.S_IFREG:
// ok
case linux.S_IFDIR:
- return syserror.EISDIR
+ return linuxerr.EISDIR
default:
return linuxerr.EINVAL
}
diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go
index 5c57f6fea..02540a754 100644
--- a/pkg/sentry/fsimpl/gofer/handle.go
+++ b/pkg/sentry/fsimpl/gofer/handle.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/hostfd"
+ "gvisor.dev/gvisor/pkg/sync"
)
// handle represents a remote "open file descriptor", consisting of an opened
@@ -130,3 +131,43 @@ func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, o
}
return uint64(n), cperr
}
+
+type handleReadWriter struct {
+ ctx context.Context
+ h *handle
+ off uint64
+}
+
+var handleReadWriterPool = sync.Pool{
+ New: func() interface{} {
+ return &handleReadWriter{}
+ },
+}
+
+func getHandleReadWriter(ctx context.Context, h *handle, offset int64) *handleReadWriter {
+ rw := handleReadWriterPool.Get().(*handleReadWriter)
+ rw.ctx = ctx
+ rw.h = h
+ rw.off = uint64(offset)
+ return rw
+}
+
+func putHandleReadWriter(rw *handleReadWriter) {
+ rw.ctx = nil
+ rw.h = nil
+ handleReadWriterPool.Put(rw)
+}
+
+// ReadToBlocks implements safemem.Reader.ReadToBlocks.
+func (rw *handleReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+ n, err := rw.h.readToBlocksAt(rw.ctx, dsts, rw.off)
+ rw.off += n
+ return n, err
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
+func (rw *handleReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+ n, err := rw.h.writeFromBlocksAt(rw.ctx, srcs, rw.off)
+ rw.off += n
+ return n, err
+}
diff --git a/pkg/sentry/fsimpl/gofer/host_named_pipe.go b/pkg/sentry/fsimpl/gofer/host_named_pipe.go
index 398288ee3..505916a57 100644
--- a/pkg/sentry/fsimpl/gofer/host_named_pipe.go
+++ b/pkg/sentry/fsimpl/gofer/host_named_pipe.go
@@ -22,7 +22,6 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Global pipe used by blockUntilNonblockingPipeHasWriter since we can't create
@@ -109,6 +108,6 @@ func sleepBetweenNamedPipeOpenChecks(ctx context.Context) error {
return nil
case <-cancel:
ctx.SleepFinish(false)
- return syserror.ErrInterrupted
+ return linuxerr.ErrInterrupted
}
}
diff --git a/pkg/sentry/fsimpl/gofer/p9file.go b/pkg/sentry/fsimpl/gofer/p9file.go
index b0a429d42..5a3ddfc9d 100644
--- a/pkg/sentry/fsimpl/gofer/p9file.go
+++ b/pkg/sentry/fsimpl/gofer/p9file.go
@@ -16,9 +16,9 @@ package gofer
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/p9"
- "gvisor.dev/gvisor/pkg/syserror"
)
// p9file is a wrapper around p9.File that provides methods that are
@@ -59,7 +59,7 @@ func (f p9file) walkGetAttrOne(ctx context.Context, name string) (p9.QID, p9file
if newfile != nil {
p9file{newfile}.close(ctx)
}
- return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, syserror.EIO
+ return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, linuxerr.EIO
}
return qids[0], p9file{newfile}, attrMask, attr, nil
}
diff --git a/pkg/sentry/fsimpl/gofer/save_restore.go b/pkg/sentry/fsimpl/gofer/save_restore.go
index e67422a2f..8dcbc61ed 100644
--- a/pkg/sentry/fsimpl/gofer/save_restore.go
+++ b/pkg/sentry/fsimpl/gofer/save_restore.go
@@ -158,6 +158,10 @@ func (d *dentryPlatformFile) afterLoad() {
// afterLoad is invoked by stateify.
func (fd *specialFileFD) afterLoad() {
fd.handle.fd = -1
+ if fd.hostFileMapper.IsInited() {
+ // Ensure that we don't call fd.hostFileMapper.Init() again.
+ fd.hostFileMapperInitOnce.Do(func() {})
+ }
}
// CompleteRestore implements
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index 4b59c1c3c..a8d47b65b 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -22,13 +22,15 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/fsmetric"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -76,6 +78,16 @@ type specialFileFD struct {
bufMu sync.Mutex `state:"nosave"`
haveBuf uint32
buf []byte
+
+ // If handle.fd >= 0, hostFileMapper caches mappings of handle.fd, and
+ // hostFileMapperInitOnce is used to initialize it on first use.
+ hostFileMapperInitOnce sync.Once `state:"nosave"`
+ hostFileMapper fsutil.HostFileMapper
+
+ // If handle.fd >= 0, fileRefs counts references on memmap.File offsets.
+ // fileRefs is protected by fileRefsMu.
+ fileRefsMu sync.Mutex `state:"nosave"`
+ fileRefs fsutil.FrameRefSet
}
func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) {
@@ -230,23 +242,13 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
}
}
- // Going through dst.CopyOutFrom() would hold MM locks around file
- // operations of unknown duration. For regularFileFD, doing so is necessary
- // to support mmap due to lock ordering; MM locks precede dentry.dataMu.
- // That doesn't hold here since specialFileFD doesn't client-cache data.
- // Just buffer the read instead.
- buf := make([]byte, dst.NumBytes())
- n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
+ rw := getHandleReadWriter(ctx, &fd.handle, offset)
+ n, err := dst.CopyOutFrom(ctx, rw)
+ putHandleReadWriter(rw)
if linuxerr.Equals(linuxerr.EAGAIN, err) {
- err = syserror.ErrWouldBlock
- }
- if n == 0 {
- return bufN, err
+ err = linuxerr.ErrWouldBlock
}
- if cp, cperr := dst.CopyOut(ctx, buf[:n]); cperr != nil {
- return bufN + int64(cp), cperr
- }
- return bufN + int64(n), err
+ return bufN + n, err
}
// Read implements vfs.FileDescriptionImpl.Read.
@@ -317,20 +319,15 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
}
}
- // Do a buffered write. See rationale in PRead.
- buf := make([]byte, src.NumBytes())
- copied, copyErr := src.CopyIn(ctx, buf)
- if copied == 0 && copyErr != nil {
- // Only return the error if we didn't get any data.
- return 0, offset, copyErr
- }
- n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:copied])), uint64(offset))
+ rw := getHandleReadWriter(ctx, &fd.handle, offset)
+ n, err := src.CopyInTo(ctx, rw)
+ putHandleReadWriter(rw)
if linuxerr.Equals(linuxerr.EAGAIN, err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
// Update offset if the offset is valid.
if offset >= 0 {
- offset += int64(n)
+ offset += n
}
// Update file size for regular files.
if fd.isRegularFile {
@@ -341,10 +338,7 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
atomic.StoreUint64(&d.size, uint64(offset))
}
}
- if err != nil {
- return int64(n), offset, err
- }
- return int64(n), offset, copyErr
+ return int64(n), offset, err
}
// Write implements vfs.FileDescriptionImpl.Write.
@@ -412,3 +406,85 @@ func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error
}
return nil
}
+
+// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
+func (fd *specialFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+ if fd.handle.fd < 0 || fd.filesystem().opts.forcePageCache {
+ return linuxerr.ENODEV
+ }
+ // After this point, fd may be used as a memmap.Mappable and memmap.File.
+ fd.hostFileMapperInitOnce.Do(fd.hostFileMapper.Init)
+ return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts)
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (fd *specialFileFD) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
+ fd.hostFileMapper.IncRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
+ return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (fd *specialFileFD) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
+ fd.hostFileMapper.DecRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (fd *specialFileFD) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
+ return fd.AddMapping(ctx, ms, dstAR, offset, writable)
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (fd *specialFileFD) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
+ mr := optional
+ if fd.filesystem().opts.limitHostFDTranslation {
+ mr = maxFillRange(required, optional)
+ }
+ return []memmap.Translation{
+ {
+ Source: mr,
+ File: fd,
+ Offset: mr.Start,
+ Perms: hostarch.AnyAccess,
+ },
+ }, nil
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (fd *specialFileFD) InvalidateUnsavable(ctx context.Context) error {
+ return nil
+}
+
+// IncRef implements memmap.File.IncRef.
+func (fd *specialFileFD) IncRef(fr memmap.FileRange) {
+ fd.fileRefsMu.Lock()
+ defer fd.fileRefsMu.Unlock()
+ fd.fileRefs.IncRefAndAccount(fr)
+}
+
+// DecRef implements memmap.File.DecRef.
+func (fd *specialFileFD) DecRef(fr memmap.FileRange) {
+ fd.fileRefsMu.Lock()
+ defer fd.fileRefsMu.Unlock()
+ fd.fileRefs.DecRefAndAccount(fr)
+}
+
+// MapInternal implements memmap.File.MapInternal.
+func (fd *specialFileFD) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) {
+ fd.requireHostFD()
+ return fd.hostFileMapper.MapInternal(fr, int(fd.handle.fd), at.Write)
+}
+
+// FD implements memmap.File.FD.
+func (fd *specialFileFD) FD() int {
+ fd.requireHostFD()
+ return int(fd.handle.fd)
+}
+
+func (fd *specialFileFD) requireHostFD() {
+ if fd.handle.fd < 0 {
+ // This is possible if fd was successfully mmapped before saving, then
+ // was restored without a host FD. This is unrecoverable: without a
+ // host FD, we can't mmap this file post-restore.
+ panic("gofer.specialFileFD can no longer be memory-mapped without a host FD")
+ }
+}
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index 476545d00..180a35583 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -70,7 +70,6 @@ go_library(
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/tcpip",
"//pkg/unet",
"//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 89aa7b3d9..984c6e8ee 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -37,7 +37,6 @@ import (
unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -712,7 +711,7 @@ func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts
if total != 0 {
err = nil
} else {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
}
return total, err
@@ -766,7 +765,7 @@ func (f *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opt
if !i.seekable {
n, err := f.writeToHostFD(ctx, src, -1, opts.Flags)
if isBlockError(err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
return n, err
}
diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go
index 7f6ce4ee5..04ac73255 100644
--- a/pkg/sentry/fsimpl/host/tty.go
+++ b/pkg/sentry/fsimpl/host/tty.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/unimpl"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -346,7 +345,7 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal)
// If the signal is SIGTTIN, then we are attempting to read
// from the TTY. Don't send the signal and return EIO.
if sig == linux.SIGTTIN {
- return syserror.EIO
+ return linuxerr.EIO
}
// Otherwise, we are writing or changing terminal state. This is allowed.
@@ -355,7 +354,7 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal)
// If the process group is an orphan, return EIO.
if pg.IsOrphan() {
- return syserror.EIO
+ return linuxerr.EIO
}
// Otherwise, send the signal to the process group and return ERESTARTSYS.
@@ -368,5 +367,5 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal)
//
// Linux ignores the result of kill_pgrp().
_ = pg.SendSignal(kernel.SignalInfoPriv(sig))
- return syserror.ERESTARTSYS
+ return linuxerr.ERESTARTSYS
}
diff --git a/pkg/sentry/fsimpl/host/util.go b/pkg/sentry/fsimpl/host/util.go
index 95d7ebe2e..9850f3f41 100644
--- a/pkg/sentry/fsimpl/host/util.go
+++ b/pkg/sentry/fsimpl/host/util.go
@@ -42,7 +42,7 @@ func timespecToStatxTimestamp(ts unix.Timespec) linux.StatxTimestamp {
}
// isBlockError checks if an error is EAGAIN or EWOULDBLOCK.
-// If so, they can be transformed into syserror.ErrWouldBlock.
+// If so, they can be transformed into linuxerr.ErrWouldBlock.
func isBlockError(err error) bool {
return linuxerr.Equals(linuxerr.EAGAIN, err) || linuxerr.Equals(linuxerr.EWOULDBLOCK, err)
}
diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD
index d53937db6..4b577ea43 100644
--- a/pkg/sentry/fsimpl/kernfs/BUILD
+++ b/pkg/sentry/fsimpl/kernfs/BUILD
@@ -119,7 +119,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
],
)
@@ -137,6 +136,7 @@ go_test(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/errors/linuxerr",
+ "//pkg/fspath",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
@@ -144,7 +144,6 @@ go_test(
"//pkg/sentry/fsimpl/testutil",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
"@com_github_google_go_cmp//cmp:go_default_library",
],
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index 8b008dc10..7db1473c4 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -99,7 +98,7 @@ func NewGenericDirectoryFD(m *vfs.Mount, d *Dentry, children *OrderedChildren, l
func (fd *GenericDirectoryFD) Init(children *OrderedChildren, locks *vfs.FileLocks, opts *vfs.OpenOptions, fdOpts GenericDirectoryFDOptions) error {
if vfs.AccessTypesForOpenFlags(opts)&vfs.MayWrite != 0 {
// Can't open directories for writing.
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
fd.LockFD.Init(locks)
fd.seekEnd = fdOpts.SeekEnd
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index a97473f7d..363ebc466 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// stepExistingLocked resolves rp.Component() in parent directory vfsd.
@@ -224,7 +223,7 @@ func checkCreateLocked(ctx context.Context, creds *auth.Credentials, name string
return linuxerr.EEXIST
}
if parent.VFSDentry().IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := parent.inode.CheckPermissions(ctx, creds, vfs.MayWrite); err != nil {
return err
@@ -241,7 +240,7 @@ func checkDeleteLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry) er
return linuxerr.EBUSY
}
if parent.vfsd.IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
return err
@@ -362,7 +361,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return err
}
if rp.MustBeDir() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if rp.Mount() != vd.Mount() {
return linuxerr.EXDEV
@@ -443,7 +442,7 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return err
}
if rp.MustBeDir() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := rp.Mount().CheckBeginWrite(); err != nil {
return err
@@ -509,7 +508,7 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
defer unlock()
if rp.Done() {
if rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if mustCreate {
return nil, linuxerr.EEXIST
@@ -536,11 +535,11 @@ afterTrailingSymlink:
}
// Reject attempts to open directories with O_CREAT.
if rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
pc := rp.Component()
if pc == "." || pc == ".." {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if len(pc) > linux.NAME_MAX {
return nil, linuxerr.ENAMETOOLONG
@@ -861,7 +860,7 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
return err
}
if rp.MustBeDir() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := rp.Mount().CheckBeginWrite(); err != nil {
return err
@@ -895,7 +894,7 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
if d.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
virtfs := rp.VirtualFilesystem()
parentDentry := d.parent
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index a42fc79b4..b96dc9ef7 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -26,7 +26,6 @@ import (
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// InodeNoopRefCount partially implements the Inode interface, specifically the
@@ -234,6 +233,11 @@ func (a *InodeAttrs) Mode() linux.FileMode {
return linux.FileMode(atomic.LoadUint32(&a.mode))
}
+// Links returns the link count.
+func (a *InodeAttrs) Links() uint32 {
+ return atomic.LoadUint32(&a.nlink)
+}
+
// TouchAtime updates a.atime to the current time.
func (a *InodeAttrs) TouchAtime(ctx context.Context, mnt *vfs.Mount) {
if mnt.Flags.NoATime || mnt.ReadOnly() {
@@ -289,7 +293,7 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut
return linuxerr.EPERM
}
if opts.Stat.Mask&linux.STATX_SIZE != 0 && a.Mode().IsDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if err := vfs.CheckSetStat(ctx, creds, &opts, a.Mode(), auth.KUID(atomic.LoadUint32(&a.uid)), auth.KGID(atomic.LoadUint32(&a.gid))); err != nil {
return err
@@ -475,7 +479,7 @@ func (o *OrderedChildren) Lookup(ctx context.Context, name string) (Inode, error
s, ok := o.set[name]
if !ok {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
s.inode.IncRef() // This ref is passed to the dentry upon creation via Init.
@@ -502,6 +506,30 @@ func (o *OrderedChildren) Insert(name string, child Inode) error {
return o.insert(name, child, false)
}
+// Inserter is like Insert, but obtains the child to insert by calling
+// makeChild. makeChild is only called if the insert will succeed. This allows
+// the caller to atomically check and insert a child without having to
+// clean up the child on failure.
+func (o *OrderedChildren) Inserter(name string, makeChild func() Inode) (Inode, error) {
+ o.mu.Lock()
+ defer o.mu.Unlock()
+ if _, ok := o.set[name]; ok {
+ return nil, linuxerr.EEXIST
+ }
+
+ // Note: We must not fail after we call makeChild().
+
+ child := makeChild()
+ s := &slot{
+ name: name,
+ inode: child,
+ static: false,
+ }
+ o.order.PushBack(s)
+ o.set[name] = s
+ return child, nil
+}
+
// insert inserts child into o.
//
// Precondition: Caller must be holding a ref on child if static is true.
@@ -559,7 +587,7 @@ func (o *OrderedChildren) replaceChildLocked(ctx context.Context, name string, n
func (o *OrderedChildren) checkExistingLocked(name string, child Inode) error {
s, ok := o.set[name]
if !ok {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if s.inode != child {
panic(fmt.Sprintf("Inode doesn't match what kernfs thinks! OrderedChild: %+v, kernfs: %+v", s.inode, child))
@@ -746,5 +774,5 @@ type InodeNoStatFS struct{}
// StatFS implements Inode.StatFS.
func (*InodeNoStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) {
- return linux.Statfs{}, syserror.ENOSYS
+ return linux.Statfs{}, linuxerr.ENOSYS
}
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 0e2867d49..544698694 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -61,6 +61,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/refsvfs2"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -542,6 +543,63 @@ func (d *Dentry) FSLocalPath() string {
return b.String()
}
+// WalkDentryTree traverses p in the dentry tree for this filesystem. Note that
+// this only traverses the dentry tree and is not a general path traversal. No
+// symlinks and dynamic children are resolved, and no permission checks are
+// performed. The caller is responsible for ensuring the returned Dentry exists
+// for an appropriate lifetime.
+//
+// p is interpreted starting at d, and may be absolute or relative (absolute vs
+// relative paths both refer to the same target here, since p is absolute from
+// d). p may contain "." and "..", but will not allow traversal above d (similar
+// to ".." at the root dentry).
+//
+// This is useful for filesystem internals, where the filesystem may not be
+// mounted yet. For a mounted filesystem, use GetDentryAt.
+func (d *Dentry) WalkDentryTree(ctx context.Context, vfsObj *vfs.VirtualFilesystem, p fspath.Path) (*Dentry, error) {
+ d.fs.mu.RLock()
+ defer d.fs.processDeferredDecRefs(ctx)
+ defer d.fs.mu.RUnlock()
+
+ target := d
+
+ for pit := p.Begin; pit.Ok(); pit = pit.Next() {
+ pc := pit.String()
+
+ switch {
+ case target == nil:
+ return nil, linuxerr.ENOENT
+ case pc == ".":
+ // No-op, consume component and continue.
+ case pc == "..":
+ if target == d {
+ // Don't let .. traverse above the start point of the walk.
+ continue
+ }
+ target = target.parent
+ // Parent doesn't need revalidation since we revalidated it on the
+ // way to the child, and we're still holding fs.mu.
+ default:
+ var err error
+
+ d.dirMu.Lock()
+ target, err = d.fs.revalidateChildLocked(ctx, vfsObj, target, pc, target.children[pc])
+ d.dirMu.Unlock()
+
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ if target == nil {
+ return nil, linuxerr.ENOENT
+ }
+
+ target.IncRef()
+ return target, nil
+}
+
// The Inode interface maps filesystem-level operations that operate on paths to
// equivalent operations on specific filesystem nodes.
//
@@ -667,12 +725,15 @@ type inodeDirectory interface {
// RmDir removes an empty child directory from this directory
// inode. Implementations must update the parent directory's link count,
// if required. Implementations are not responsible for checking that child
- // is a directory, checking for an empty directory.
+ // is a directory, or checking for an empty directory.
RmDir(ctx context.Context, name string, child Inode) error
// Rename is called on the source directory containing an inode being
- // renamed. child should point to the resolved child in the source
- // directory.
+ // renamed. child points to the resolved child in the source directory.
+ // dstDir is guaranteed to be a directory inode.
+ //
+ // On a successful call to Rename, the caller updates the dentry tree to
+ // reflect the name change.
//
// Precondition: Caller must serialize concurrent calls to Rename.
Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index 609887943..a2aba9321 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
@@ -346,3 +347,63 @@ func TestDirFDIterDirents(t *testing.T) {
"file1": linux.DT_REG,
})
}
+
+func TestDirWalkDentryTree(t *testing.T) {
+ sys := newTestSystem(t, func(ctx context.Context, creds *auth.Credentials, fs *filesystem) kernfs.Inode {
+ return fs.newDir(ctx, creds, 0755, map[string]kernfs.Inode{
+ "dir1": fs.newDir(ctx, creds, 0755, nil),
+ "dir2": fs.newDir(ctx, creds, 0755, map[string]kernfs.Inode{
+ "file1": fs.newFile(ctx, creds, staticFileContent),
+ "dir3": fs.newDir(ctx, creds, 0755, nil),
+ }),
+ })
+ })
+ defer sys.Destroy()
+
+ testWalk := func(from *kernfs.Dentry, getDentryPath, walkPath string, expectedErr error) {
+ var d *kernfs.Dentry
+ if getDentryPath != "" {
+ pop := sys.PathOpAtRoot(getDentryPath)
+ vd := sys.GetDentryOrDie(pop)
+ defer vd.DecRef(sys.Ctx)
+ d = vd.Dentry().Impl().(*kernfs.Dentry)
+ }
+
+ match, err := from.WalkDentryTree(sys.Ctx, sys.VFS, fspath.Parse(walkPath))
+ if err == nil {
+ defer match.DecRef(sys.Ctx)
+ }
+
+ if err != expectedErr {
+ t.Fatalf("WalkDentryTree from %q to %q (with expected error: %v) unexpected error, want: %v, got: %v", from.FSLocalPath(), walkPath, expectedErr, expectedErr, err)
+ }
+ if expectedErr != nil {
+ return
+ }
+
+ if d != match {
+ t.Fatalf("WalkDentryTree from %q to %q (with expected error: %v) found unexpected dentry; want: %v, got: %v", from.FSLocalPath(), walkPath, expectedErr, d, match)
+ }
+ }
+
+ rootD := sys.Root.Dentry().Impl().(*kernfs.Dentry)
+
+ testWalk(rootD, "dir1", "/dir1", nil)
+ testWalk(rootD, "", "/dir-non-existent", linuxerr.ENOENT)
+ testWalk(rootD, "", "/dir1/child-non-existent", linuxerr.ENOENT)
+ testWalk(rootD, "", "/dir2/inner-non-existent/dir3", linuxerr.ENOENT)
+
+ testWalk(rootD, "dir2/dir3", "/dir2/../dir2/dir3", nil)
+ testWalk(rootD, "dir2/dir3", "/dir2/././dir3", nil)
+ testWalk(rootD, "dir2/dir3", "/dir2/././dir3/.././dir3", nil)
+
+ pop := sys.PathOpAtRoot("dir2")
+ dir2VD := sys.GetDentryOrDie(pop)
+ defer dir2VD.DecRef(sys.Ctx)
+ dir2D := dir2VD.Dentry().Impl().(*kernfs.Dentry)
+
+ testWalk(dir2D, "dir2/dir3", "/dir3", nil)
+ testWalk(dir2D, "dir2/dir3", "/../../../dir3", nil)
+ testWalk(dir2D, "dir2/file1", "/file1", nil)
+ testWalk(dir2D, "dir2/file1", "file1", nil)
+}
diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD
index ed730e215..d16dfef9b 100644
--- a/pkg/sentry/fsimpl/overlay/BUILD
+++ b/pkg/sentry/fsimpl/overlay/BUILD
@@ -42,7 +42,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go
index 1f85a1f0d..618092ef1 100644
--- a/pkg/sentry/fsimpl/overlay/copy_up.go
+++ b/pkg/sentry/fsimpl/overlay/copy_up.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
func (d *dentry) isCopiedUp() bool {
@@ -72,7 +71,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
}
if d.vfsd.IsDead() {
// Raced with deletion of d.
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
// Obtain settable timestamps from the lower layer.
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 5e89928c5..c04c80590 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -28,7 +28,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// _OVL_XATTR_PREFIX is an extended attribute key prefix to identify overlayfs
@@ -314,7 +313,7 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
}
if !topLookupLayer.existsInOverlay() {
child.destroyLocked(ctx)
- return nil, topLookupLayer, syserror.ENOENT
+ return nil, topLookupLayer, linuxerr.ENOENT
}
// Device and inode numbers were copied from the topmost layer above. Remap
@@ -483,7 +482,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
return linuxerr.EEXIST
}
if parent.vfsd.IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
@@ -506,7 +505,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
}
if !dir && rp.MustBeDir() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
mnt := rp.Mount()
@@ -780,7 +779,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
start := rp.Start().Impl().(*dentry)
if rp.Done() {
if mayCreate && rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if mustCreate {
return nil, linuxerr.EEXIST
@@ -807,7 +806,7 @@ afterTrailingSymlink:
}
// Reject attempts to open directories with O_CREAT.
if mayCreate && rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
// Determine whether or not we need to create a file.
parent.dirMu.Lock()
@@ -865,11 +864,11 @@ func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts *
if ftype == linux.S_IFDIR {
// Can't open directories with O_CREAT.
if opts.Flags&linux.O_CREAT != 0 {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
// Can't open directories writably.
if ats.MayWrite() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if opts.Flags&linux.O_DIRECT != 0 {
return nil, linuxerr.EINVAL
@@ -919,7 +918,7 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
return nil, err
}
if parent.vfsd.IsDead() {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
mnt := rp.Mount()
if err := mnt.CheckBeginWrite(); err != nil {
@@ -1086,7 +1085,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
defer newParent.dirMu.Unlock()
}
if newParent.vfsd.IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
var (
replaced *dentry
@@ -1105,7 +1104,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
replacedVFSD = &replaced.vfsd
if replaced.isDir() {
if !renamed.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if genericIsAncestorDentry(replaced, renamed) {
return linuxerr.ENOTEMPTY
@@ -1533,7 +1532,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
defer rp.Mount().EndWrite()
name := rp.Component()
if name == "." || name == ".." {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if rp.MustBeDir() {
return linuxerr.ENOTDIR
@@ -1557,7 +1556,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
if child.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if err := parent.mayDelete(rp.Credentials(), child); err != nil {
return err
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
index 1d3d2d95f..95cfbdc42 100644
--- a/pkg/sentry/fsimpl/proc/BUILD
+++ b/pkg/sentry/fsimpl/proc/BUILD
@@ -102,7 +102,6 @@ go_library(
"//pkg/sentry/usage",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/tcpip/header",
"//pkg/tcpip/network/ipv4",
"//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index d99f90b36..e04ae6660 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// subtasksInode represents the inode for /proc/[pid]/task/ directory.
@@ -71,15 +70,15 @@ func (fs *filesystem) newSubtasks(ctx context.Context, task *kernel.Task, pidns
func (i *subtasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
tid, err := strconv.ParseUint(name, 10, 32)
if err != nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
subTask := i.pidns.TaskWithID(kernel.ThreadID(tid))
if subTask == nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
if subTask.ThreadGroup() != i.task.ThreadGroup() {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
return i.fs.newTaskInode(ctx, subTask, i.pidns, false, i.cgroupControllers)
}
@@ -88,7 +87,7 @@ func (i *subtasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode,
func (i *subtasksInode) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
tasks := i.task.ThreadGroup().MemberIDs(i.pidns)
if len(tasks) == 0 {
- return offset, syserror.ENOENT
+ return offset, linuxerr.ENOENT
}
if relOffset >= int64(len(tasks)) {
return offset, nil
@@ -124,7 +123,7 @@ type subtasksFD struct {
func (fd *subtasksFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
if fd.task.ExitState() >= kernel.TaskExitZombie {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
return fd.GenericDirectoryFD.IterDirents(ctx, cb)
}
@@ -132,7 +131,7 @@ func (fd *subtasksFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallbac
// Seek implements vfs.FileDescriptionImpl.Seek.
func (fd *subtasksFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
if fd.task.ExitState() >= kernel.TaskExitZombie {
- return 0, syserror.ENOENT
+ return 0, linuxerr.ENOENT
}
return fd.GenericDirectoryFD.Seek(ctx, offset, whence)
}
@@ -140,7 +139,7 @@ func (fd *subtasksFD) Seek(ctx context.Context, offset int64, whence int32) (int
// Stat implements vfs.FileDescriptionImpl.Stat.
func (fd *subtasksFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
if fd.task.ExitState() >= kernel.TaskExitZombie {
- return linux.Statx{}, syserror.ENOENT
+ return linux.Statx{}, linuxerr.ENOENT
}
return fd.GenericDirectoryFD.Stat(ctx, opts)
}
@@ -148,7 +147,7 @@ func (fd *subtasksFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Sta
// SetStat implements vfs.FileDescriptionImpl.SetStat.
func (fd *subtasksFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
if fd.task.ExitState() >= kernel.TaskExitZombie {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
return fd.GenericDirectoryFD.SetStat(ctx, opts)
}
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index dfc0a924e..5c6412fc0 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -22,11 +22,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
func getTaskFD(t *kernel.Task, fd int32) (*vfs.FileDescription, kernel.FDFlags) {
@@ -142,11 +142,11 @@ func (i *fdDirInode) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.Ite
func (i *fdDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
fdInt, err := strconv.ParseInt(name, 10, 32)
if err != nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
fd := int32(fdInt)
if !taskFDExists(ctx, i.fs, i.task, fd) {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
return i.fs.newFDSymlink(ctx, i.task, fd, i.fs.NextIno()), nil
}
@@ -218,7 +218,7 @@ func (fs *filesystem) newFDSymlink(ctx context.Context, task *kernel.Task, fd in
func (s *fdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
file, _ := getTaskFD(s.task, s.fd)
if file == nil {
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
defer s.fs.SafeDecRefFD(ctx, file)
root := vfs.RootFromContext(ctx)
@@ -231,7 +231,7 @@ func (s *fdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error)
func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
file, _ := getTaskFD(s.task, s.fd)
if file == nil {
- return vfs.VirtualDentry{}, "", syserror.ENOENT
+ return vfs.VirtualDentry{}, "", linuxerr.ENOENT
}
defer s.fs.SafeDecRefFD(ctx, file)
vd := file.VirtualDentry()
@@ -278,11 +278,11 @@ func (fs *filesystem) newFDInfoDirInode(ctx context.Context, task *kernel.Task)
func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
fdInt, err := strconv.ParseInt(name, 10, 32)
if err != nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
fd := int32(fdInt)
if !taskFDExists(ctx, i.fs, i.task, fd) {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
data := &fdInfoData{
fs: i.fs,
@@ -330,7 +330,7 @@ var _ dynamicInode = (*fdInfoData)(nil)
func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
file, descriptorFlags := getTaskFD(d.task, d.fd)
if file == nil {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
defer d.fs.SafeDecRefFD(ctx, file)
// TODO(b/121266871): Include pos, locks, and other data. For now we only
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 0ce3ed797..34b0c4f63 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -33,7 +33,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -491,7 +490,7 @@ func (fd *memFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64
return int64(n), nil
}
if readErr != nil {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
return 0, nil
}
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index cf905fae4..26d44744b 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -21,11 +21,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -116,12 +116,12 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, err
case threadSelfName:
return i.newThreadSelfSymlink(ctx, root), nil
}
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
task := i.pidns.TaskWithID(kernel.ThreadID(tid))
if task == nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
return i.fs.newTaskInode(ctx, task, i.pidns, true, i.fakeCgroupControllers)
diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go
index 03bed22a3..4d3a2f7e6 100644
--- a/pkg/sentry/fsimpl/proc/tasks_files.go
+++ b/pkg/sentry/fsimpl/proc/tasks_files.go
@@ -29,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// +stateify savable
@@ -58,7 +57,7 @@ func (s *selfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error
}
tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
if tgid == 0 {
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
return strconv.FormatUint(uint64(tgid), 10), nil
}
@@ -100,7 +99,7 @@ func (s *threadSelfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string,
tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
tid := s.pidns.IDOfTask(t)
if tid == 0 || tgid == 0 {
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
return fmt.Sprintf("%d/task/%d", tgid, tid), nil
}
diff --git a/pkg/sentry/fsimpl/signalfd/BUILD b/pkg/sentry/fsimpl/signalfd/BUILD
index adb610213..403c6f254 100644
--- a/pkg/sentry/fsimpl/signalfd/BUILD
+++ b/pkg/sentry/fsimpl/signalfd/BUILD
@@ -9,10 +9,10 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/sentry/kernel",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/fsimpl/signalfd/signalfd.go b/pkg/sentry/fsimpl/signalfd/signalfd.go
index a7f5928b7..bdb03ef96 100644
--- a/pkg/sentry/fsimpl/signalfd/signalfd.go
+++ b/pkg/sentry/fsimpl/signalfd/signalfd.go
@@ -18,10 +18,10 @@ package signalfd
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -91,7 +91,7 @@ func (sfd *SignalFileDescription) Read(ctx context.Context, dst usermem.IOSequen
info, err := sfd.target.Sigtimedwait(sfd.Mask(), 0)
if err != nil {
// There must be no signal available.
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// Copy out the signal info using the specified format.
diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD
index 1af0a5cbc..ab21f028e 100644
--- a/pkg/sentry/fsimpl/sys/BUILD
+++ b/pkg/sentry/fsimpl/sys/BUILD
@@ -36,7 +36,6 @@ go_library(
"//pkg/sentry/kernel/auth",
"//pkg/sentry/memmap",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/timerfd/BUILD b/pkg/sentry/fsimpl/timerfd/BUILD
index e6980a314..2b83d7d9a 100644
--- a/pkg/sentry/fsimpl/timerfd/BUILD
+++ b/pkg/sentry/fsimpl/timerfd/BUILD
@@ -12,7 +12,6 @@ go_library(
"//pkg/hostarch",
"//pkg/sentry/kernel/time",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/fsimpl/timerfd/timerfd.go b/pkg/sentry/fsimpl/timerfd/timerfd.go
index 655a1c76a..68b785791 100644
--- a/pkg/sentry/fsimpl/timerfd/timerfd.go
+++ b/pkg/sentry/fsimpl/timerfd/timerfd.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/hostarch"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -82,7 +81,7 @@ func (tfd *TimerFileDescription) Read(ctx context.Context, dst usermem.IOSequenc
}
return sizeofUint64, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// Clock returns the timer fd's Clock.
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
index dc8b9bfeb..94486bb63 100644
--- a/pkg/sentry/fsimpl/tmpfs/BUILD
+++ b/pkg/sentry/fsimpl/tmpfs/BUILD
@@ -82,7 +82,6 @@ go_library(
"//pkg/sentry/vfs",
"//pkg/sentry/vfs/memxattr",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
],
)
@@ -125,7 +124,6 @@ go_test(
"//pkg/sentry/fs/lock",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 8b04df038..e067f136e 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Sync implements vfs.FilesystemImpl.Sync.
@@ -75,7 +74,7 @@ afterSymlink:
}
child, ok := dir.childMap[name]
if !ok {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
return nil, err
@@ -171,12 +170,12 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
return linuxerr.EEXIST
}
if !dir && rp.MustBeDir() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
// tmpfs never calls VFS.InvalidateDentry(), so parentDir.dentry can only
// be dead if it was deleted.
if parentDir.dentry.vfsd.IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
mnt := rp.Mount()
if err := mnt.CheckBeginWrite(); err != nil {
@@ -258,7 +257,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return err
}
if i.nlink == 0 {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if i.nlink == maxLinks {
return linuxerr.EMLINK
@@ -345,7 +344,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if rp.Done() {
// Reject attempts to open mount root directory with O_CREAT.
if rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if mustCreate {
return nil, linuxerr.EEXIST
@@ -366,11 +365,11 @@ afterTrailingSymlink:
}
// Reject attempts to open directories with O_CREAT.
if rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
name := rp.Component()
if name == "." || name == ".." {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if len(name) > linux.NAME_MAX {
return nil, linuxerr.ENAMETOOLONG
@@ -457,7 +456,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
case *directory:
// Can't open directories writably.
if ats&vfs.MayWrite != 0 {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
var fd directoryFD
fd.LockFD.Init(&d.inode.locks)
@@ -532,7 +531,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
renamed, ok := oldParentDir.childMap[oldName]
if !ok {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := oldParentDir.mayDelete(rp.Credentials(), renamed); err != nil {
return err
@@ -567,7 +566,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
replacedDir, ok := replaced.inode.impl.(*directory)
if ok {
if !renamed.inode.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if len(replacedDir.childMap) != 0 {
return linuxerr.ENOTEMPTY
@@ -588,7 +587,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
// tmpfs never calls VFS.InvalidateDentry(), so newParentDir.dentry can
// only be dead if it was deleted.
if newParentDir.dentry.vfsd.IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
// Linux places this check before some of those above; we do it here for
@@ -654,7 +653,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
}
child, ok := parentDir.childMap[name]
if !ok {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := parentDir.mayDelete(rp.Credentials(), child); err != nil {
return err
@@ -754,17 +753,17 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
}
name := rp.Component()
if name == "." || name == ".." {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
child, ok := parentDir.childMap[name]
if !ok {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := parentDir.mayDelete(rp.Credentials(), child); err != nil {
return err
}
if child.inode.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if rp.MustBeDir() {
return linuxerr.ENOTDIR
diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go
index 418c7994e..99afd9817 100644
--- a/pkg/sentry/fsimpl/tmpfs/pipe_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/pipe_test.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -202,7 +201,7 @@ func checkEmpty(ctx context.Context, t *testing.T, fd *vfs.FileDescription) {
readData := make([]byte, 1)
dst := usermem.BytesIOSequence(readData)
bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{})
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
t.Fatalf("expected ErrWouldBlock reading from empty pipe %q, but got: %v", fileName, err)
}
if bytesRead != 0 {
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
index 4393cc13b..cb7711b39 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
@@ -21,10 +21,10 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -146,7 +146,7 @@ func TestLocks(t *testing.T) {
if err := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, nil); err != nil {
t.Fatalf("fd.Impl().LockBSD failed: err = %v", err)
}
- if got, want := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.WriteLock, nil), syserror.ErrWouldBlock; got != want {
+ if got, want := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.WriteLock, nil), linuxerr.ErrWouldBlock; got != want {
t.Fatalf("fd.Impl().LockBSD failed: got = %v, want = %v", got, want)
}
if err := fd.Impl().UnlockBSD(ctx, uid1); err != nil {
@@ -165,7 +165,7 @@ func TestLocks(t *testing.T) {
if err := fd.Impl().LockPOSIX(ctx, uid1, 0 /* ownerPID */, lock.WriteLock, lock.LockRange{Start: 0, End: 1}, nil); err != nil {
t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err)
}
- if got, want := fd.Impl().LockPOSIX(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 0, End: 1}, nil), syserror.ErrWouldBlock; got != want {
+ if got, want := fd.Impl().LockPOSIX(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 0, End: 1}, nil), linuxerr.ErrWouldBlock; got != want {
t.Fatalf("fd.Impl().LockPOSIX failed: got = %v, want = %v", got, want)
}
if err := fd.Impl().UnlockPOSIX(ctx, uid1, lock.LockRange{Start: 0, End: 1}); err != nil {
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index f2250c025..feafb06e4 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -44,7 +44,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sentry/vfs/memxattr"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Name is the default filesystem name.
@@ -556,7 +555,7 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs.
needsCtimeBump = true
}
case *directory:
- return syserror.EISDIR
+ return linuxerr.EISDIR
default:
return linuxerr.EINVAL
}
diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD
index 1d855234c..c12abdf33 100644
--- a/pkg/sentry/fsimpl/verity/BUILD
+++ b/pkg/sentry/fsimpl/verity/BUILD
@@ -1,10 +1,24 @@
load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
licenses(["notice"])
+go_template_instance(
+ name = "dentry_list",
+ out = "dentry_list.go",
+ package = "verity",
+ prefix = "dentry",
+ template = "//pkg/ilist:generic_list",
+ types = {
+ "Element": "*dentry",
+ "Linker": "*dentry",
+ },
+)
+
go_library(
name = "verity",
srcs = [
+ "dentry_list.go",
"filesystem.go",
"save_restore.go",
"verity.go",
@@ -28,7 +42,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go
index 930016a3e..52d47994d 100644
--- a/pkg/sentry/fsimpl/verity/filesystem.go
+++ b/pkg/sentry/fsimpl/verity/filesystem.go
@@ -32,7 +32,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -67,40 +66,23 @@ func putDentrySlice(ds *[]*dentry) {
dentrySlicePool.Put(ds)
}
-// renameMuRUnlockAndCheckDrop calls fs.renameMu.RUnlock(), then calls
-// dentry.checkDropLocked on all dentries in *ds with fs.renameMu locked for
+// renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls
+// dentry.checkCachingLocked on all dentries in *ds with fs.renameMu locked for
// writing.
//
// ds is a pointer-to-pointer since defer evaluates its arguments immediately,
// but dentry slices are allocated lazily, and it's much easier to say "defer
-// fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
-// fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
+// fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
+// fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
// +checklocksrelease:fs.renameMu
-func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
+func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) {
fs.renameMu.RUnlock()
if *ds == nil {
return
}
- if len(**ds) != 0 {
- fs.renameMu.Lock()
- for _, d := range **ds {
- d.checkDropLocked(ctx)
- }
- fs.renameMu.Unlock()
- }
- putDentrySlice(*ds)
-}
-
-// +checklocksrelease:fs.renameMu
-func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
- if *ds == nil {
- fs.renameMu.Unlock()
- return
- }
for _, d := range **ds {
- d.checkDropLocked(ctx)
+ d.checkCachingLocked(ctx, false /* renameMuWriteLocked */)
}
- fs.renameMu.Unlock()
putDentrySlice(*ds)
}
@@ -166,7 +148,7 @@ afterSymlink:
// verifyChildLocked verifies the hash of child against the already verified
// hash of the parent to ensure the child is expected. verifyChild triggers a
// sentry panic if unexpected modifications to the file system are detected. In
-// ErrorOnViolation mode it returns a syserror instead.
+// ErrorOnViolation mode it returns a linuxerr instead.
//
// Preconditions:
// * fs.renameMu must be locked.
@@ -547,7 +529,7 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry,
if parent.verityEnabled() {
if _, ok := parent.childrenNames[name]; !ok {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
}
@@ -595,23 +577,6 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry,
}
}
- // Clear the Merkle tree file if they are to be generated at runtime.
- // TODO(b/182315468): Optimize the Merkle tree generate process to
- // allow only updating certain files/directories.
- if fs.allowRuntimeEnable {
- childMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
- Root: childMerkleVD,
- Start: childMerkleVD,
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR | linux.O_TRUNC,
- Mode: 0644,
- })
- if err != nil {
- return nil, err
- }
- childMerkleFD.DecRef(ctx)
- }
-
// The dentry needs to be cleaned up if any error occurs. IncRef will be
// called if a verity child dentry is successfully created.
defer childMerkleVD.DecRef(ctx)
@@ -718,7 +683,7 @@ func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds
}
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return err
@@ -730,7 +695,7 @@ func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds
func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return nil, err
@@ -751,7 +716,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
start := rp.Start().Impl().(*dentry)
d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
if err != nil {
@@ -788,7 +753,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
start := rp.Start().Impl().(*dentry)
if rp.Done() {
@@ -970,7 +935,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return "", err
@@ -1000,7 +965,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return linux.Statx{}, err
@@ -1046,7 +1011,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
if _, err := fs.resolveLocked(ctx, rp, &ds); err != nil {
return nil, err
}
@@ -1057,7 +1022,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return nil, err
@@ -1073,7 +1038,7 @@ func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return "", err
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index c5fa9855b..d2526263c 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -23,10 +23,12 @@
// Lock order:
//
// filesystem.renameMu
-// dentry.dirMu
-// fileDescription.mu
-// filesystem.verityMu
-// dentry.hashMu
+// dentry.cachingMu
+// filesystem.cacheMu
+// dentry.dirMu
+// fileDescription.mu
+// filesystem.verityMu
+// dentry.hashMu
//
// Locking dentry.dirMu in multiple dentries requires that parent dentries are
// locked before child dentries, and that filesystem.renameMu is locked to
@@ -60,7 +62,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -97,6 +98,9 @@ const (
// sizeOfStringInt32 is the size for a 32 bit integer stored as string in
// extended attributes. The maximum value of a 32 bit integer has 10 digits.
sizeOfStringInt32 = 10
+
+ // defaultMaxCachedDentries is the default limit of dentry cache.
+ defaultMaxCachedDentries = uint64(1000)
)
var (
@@ -107,9 +111,10 @@ var (
// Mount option names for verityfs.
const (
- moptLowerPath = "lower_path"
- moptRootHash = "root_hash"
- moptRootName = "root_name"
+ moptLowerPath = "lower_path"
+ moptRootHash = "root_hash"
+ moptRootName = "root_name"
+ moptDentryCacheLimit = "dentry_cache_limit"
)
// HashAlgorithm is a type specifying the algorithm used to hash the file
@@ -189,6 +194,17 @@ type filesystem struct {
// dentries.
renameMu sync.RWMutex `state:"nosave"`
+ // cachedDentries contains all dentries with 0 references. (Due to race
+ // conditions, it may also contain dentries with non-zero references.)
+ // cachedDentriesLen is the number of dentries in cachedDentries. These
+ // fields are protected by cacheMu.
+ cacheMu sync.Mutex `state:"nosave"`
+ cachedDentries dentryList
+ cachedDentriesLen uint64
+
+ // maxCachedDentries is the maximum size of filesystem.cachedDentries.
+ maxCachedDentries uint64
+
// verityMu synchronizes enabling verity files, protects files or
// directories from being enabled by different threads simultaneously.
// It also ensures that verity does not access files that are being
@@ -199,6 +215,10 @@ type filesystem struct {
// is for the whole file system to ensure that no more than one file is
// enabled the same time.
verityMu sync.RWMutex `state:"nosave"`
+
+ // released is nonzero once filesystem.Release has been called. It is accessed
+ // with atomic memory operations.
+ released int32
}
// InternalFilesystemOptions may be passed as
@@ -239,7 +259,7 @@ func (FilesystemType) Release(ctx context.Context) {}
// mode, it returns EIO, otherwise it panic.
func (fs *filesystem) alertIntegrityViolation(msg string) error {
if fs.action == ErrorOnViolation {
- return syserror.EIO
+ return linuxerr.EIO
}
panic(msg)
}
@@ -267,6 +287,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
delete(mopts, moptRootName)
rootName = root
}
+ maxCachedDentries := defaultMaxCachedDentries
+ if str, ok := mopts[moptDentryCacheLimit]; ok {
+ delete(mopts, moptDentryCacheLimit)
+ maxCD, err := strconv.ParseUint(str, 10, 64)
+ if err != nil {
+ ctx.Warningf("verity.FilesystemType.GetFilesystem: invalid dentry cache limit: %s=%s", moptDentryCacheLimit, str)
+ return nil, nil, linuxerr.EINVAL
+ }
+ maxCachedDentries = maxCD
+ }
// Check for unparsed options.
if len(mopts) != 0 {
@@ -340,12 +370,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
action: iopts.Action,
opts: opts.Data,
allowRuntimeEnable: iopts.AllowRuntimeEnable,
+ maxCachedDentries: maxCachedDentries,
}
fs.vfsfs.Init(vfsObj, &fstype, fs)
// Construct the root dentry.
d := fs.newDentry()
- d.refs = 1
+ // Set the root's reference count to 2. One reference is returned to
+ // the caller, and the other is held by fs to prevent the root from
+ // being "cached" and subsequently evicted.
+ d.refs = 2
lowerVD := vfs.MakeVirtualDentry(lowerMount, lowerMount.Root())
lowerVD.IncRef()
d.lowerVD = lowerVD
@@ -520,7 +554,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
// Release implements vfs.FilesystemImpl.Release.
func (fs *filesystem) Release(ctx context.Context) {
+ atomic.StoreInt32(&fs.released, 1)
fs.lowerMount.DecRef(ctx)
+
+ fs.renameMu.Lock()
+ fs.evictAllCachedDentriesLocked(ctx)
+ fs.renameMu.Unlock()
+
+ // An extra reference was held by the filesystem on the root to prevent
+ // it from being cached/evicted.
+ fs.rootDentry.DecRef(ctx)
}
// MountOptions implements vfs.FilesystemImpl.MountOptions.
@@ -534,6 +577,11 @@ func (fs *filesystem) MountOptions() string {
type dentry struct {
vfsd vfs.Dentry
+ // refs is the reference count. Each dentry holds a reference on its
+ // parent, even if disowned. When refs reaches 0, the dentry may be
+ // added to the cache or destroyed. If refs == -1, the dentry has
+ // already been destroyed. refs is accessed using atomic memory
+ // operations.
refs int64
// fs is the owning filesystem. fs is immutable.
@@ -588,13 +636,23 @@ type dentry struct {
// is protected by hashMu.
hashMu sync.RWMutex `state:"nosave"`
hash []byte
+
+ // cachingMu is used to synchronize concurrent dentry caching attempts on
+ // this dentry.
+ cachingMu sync.Mutex `state:"nosave"`
+
+ // If cached is true, dentryEntry links dentry into
+ // filesystem.cachedDentries. cached and dentryEntry are protected by
+ // cachingMu.
+ cached bool
+ dentryEntry
}
// newDentry creates a new dentry representing the given verity file. The
-// dentry initially has no references; it is the caller's responsibility to set
-// the dentry's reference count and/or call dentry.destroy() as appropriate.
-// The dentry is initially invalid in that it contains no underlying dentry;
-// the caller is responsible for setting them.
+// dentry initially has no references, but is not cached; it is the caller's
+// responsibility to set the dentry's reference count and/or call
+// dentry.destroy() as appropriate. The dentry is initially invalid in that it
+// contains no underlying dentry; the caller is responsible for setting them.
func (fs *filesystem) newDentry() *dentry {
d := &dentry{
fs: fs,
@@ -630,42 +688,23 @@ func (d *dentry) TryIncRef() bool {
// DecRef implements vfs.DentryImpl.DecRef.
func (d *dentry) DecRef(ctx context.Context) {
- r := atomic.AddInt64(&d.refs, -1)
- if d.LogRefs() {
- refsvfs2.LogDecRef(d, r)
- }
- if r == 0 {
- d.fs.renameMu.Lock()
- d.checkDropLocked(ctx)
- d.fs.renameMu.Unlock()
- } else if r < 0 {
- panic("verity.dentry.DecRef() called without holding a reference")
+ if d.decRefNoCaching() == 0 {
+ d.checkCachingLocked(ctx, false /* renameMuWriteLocked */)
}
}
-func (d *dentry) decRefLocked(ctx context.Context) {
+// decRefNoCaching decrements d's reference count without calling
+// d.checkCachingLocked, even if d's reference count reaches 0; callers are
+// responsible for ensuring that d.checkCachingLocked will be called later.
+func (d *dentry) decRefNoCaching() int64 {
r := atomic.AddInt64(&d.refs, -1)
if d.LogRefs() {
refsvfs2.LogDecRef(d, r)
}
- if r == 0 {
- d.checkDropLocked(ctx)
- } else if r < 0 {
- panic("verity.dentry.decRefLocked() called without holding a reference")
+ if r < 0 {
+ panic("verity.dentry.decRefNoCaching() called without holding a reference")
}
-}
-
-// checkDropLocked should be called after d's reference count becomes 0 or it
-// becomes deleted.
-func (d *dentry) checkDropLocked(ctx context.Context) {
- // Dentries with a positive reference count must be retained. Dentries
- // with a negative reference count have already been destroyed.
- if atomic.LoadInt64(&d.refs) != 0 {
- return
- }
- // Refs is still zero; destroy it.
- d.destroyLocked(ctx)
- return
+ return r
}
// destroyLocked destroys the dentry.
@@ -684,6 +723,12 @@ func (d *dentry) destroyLocked(ctx context.Context) {
panic("verity.dentry.destroyLocked() called with references on the dentry")
}
+ // Drop the reference held by d on its parent without recursively
+ // locking d.fs.renameMu.
+ if d.parent != nil && d.parent.decRefNoCaching() == 0 {
+ d.parent.checkCachingLocked(ctx, true /* renameMuWriteLocked */)
+ }
+
if d.lowerVD.Ok() {
d.lowerVD.DecRef(ctx)
}
@@ -696,7 +741,6 @@ func (d *dentry) destroyLocked(ctx context.Context) {
delete(d.parent.children, d.name)
}
d.parent.dirMu.Unlock()
- d.parent.decRefLocked(ctx)
}
refsvfs2.Unregister(d)
}
@@ -735,6 +779,140 @@ func (d *dentry) OnZeroWatches(context.Context) {
//TODO(b/159261227): Implement OnZeroWatches.
}
+// checkCachingLocked should be called after d's reference count becomes 0 or
+// it becomes disowned.
+//
+// For performance, checkCachingLocked can also be called after d's reference
+// count becomes non-zero, so that d can be removed from the LRU cache. This
+// may help in reducing the size of the cache and hence reduce evictions. Note
+// that this is not necessary for correctness.
+//
+// It may be called on a destroyed dentry. For example,
+// renameMu[R]UnlockAndCheckCaching may call checkCachingLocked multiple times
+// for the same dentry when the dentry is visited more than once in the same
+// operation. One of the calls may destroy the dentry, so subsequent calls will
+// do nothing.
+//
+// Preconditions: d.fs.renameMu must be locked for writing if
+// renameMuWriteLocked is true; it may be temporarily unlocked.
+func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked bool) {
+ d.cachingMu.Lock()
+ refs := atomic.LoadInt64(&d.refs)
+ if refs == -1 {
+ // Dentry has already been destroyed.
+ d.cachingMu.Unlock()
+ return
+ }
+ if refs > 0 {
+ // fs.cachedDentries is permitted to contain dentries with non-zero refs,
+ // which are skipped by fs.evictCachedDentryLocked() upon reaching the end
+ // of the LRU. But it is still beneficial to remove d from the cache as we
+ // are already holding d.cachingMu. Keeping a cleaner cache also reduces
+ // the number of evictions (which is expensive as it acquires fs.renameMu).
+ d.removeFromCacheLocked()
+ d.cachingMu.Unlock()
+ return
+ }
+
+ if atomic.LoadInt32(&d.fs.released) != 0 {
+ d.cachingMu.Unlock()
+ if !renameMuWriteLocked {
+ // Need to lock d.fs.renameMu to access d.parent. Lock it for writing as
+ // needed by d.destroyLocked() later.
+ d.fs.renameMu.Lock()
+ defer d.fs.renameMu.Unlock()
+ }
+ if d.parent != nil {
+ d.parent.dirMu.Lock()
+ delete(d.parent.children, d.name)
+ d.parent.dirMu.Unlock()
+ }
+ d.destroyLocked(ctx) // +checklocksforce: see above.
+ return
+ }
+
+ d.fs.cacheMu.Lock()
+ // If d is already cached, just move it to the front of the LRU.
+ if d.cached {
+ d.fs.cachedDentries.Remove(d)
+ d.fs.cachedDentries.PushFront(d)
+ d.fs.cacheMu.Unlock()
+ d.cachingMu.Unlock()
+ return
+ }
+ // Cache the dentry, then evict the least recently used cached dentry if
+ // the cache becomes over-full.
+ d.fs.cachedDentries.PushFront(d)
+ d.fs.cachedDentriesLen++
+ d.cached = true
+ shouldEvict := d.fs.cachedDentriesLen > d.fs.maxCachedDentries
+ d.fs.cacheMu.Unlock()
+ d.cachingMu.Unlock()
+
+ if shouldEvict {
+ if !renameMuWriteLocked {
+ // Need to lock d.fs.renameMu for writing as needed by
+ // d.evictCachedDentryLocked().
+ d.fs.renameMu.Lock()
+ defer d.fs.renameMu.Unlock()
+ }
+ d.fs.evictCachedDentryLocked(ctx) // +checklocksforce: see above.
+ }
+}
+
+// Preconditions: d.cachingMu must be locked.
+func (d *dentry) removeFromCacheLocked() {
+ if d.cached {
+ d.fs.cacheMu.Lock()
+ d.fs.cachedDentries.Remove(d)
+ d.fs.cachedDentriesLen--
+ d.fs.cacheMu.Unlock()
+ d.cached = false
+ }
+}
+
+// Precondition: fs.renameMu must be locked for writing; it may be temporarily
+// unlocked.
+// +checklocks:fs.renameMu
+func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) {
+ for fs.cachedDentriesLen != 0 {
+ fs.evictCachedDentryLocked(ctx)
+ }
+}
+
+// Preconditions:
+// * fs.renameMu must be locked for writing; it may be temporarily unlocked.
+// +checklocks:fs.renameMu
+func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
+ fs.cacheMu.Lock()
+ victim := fs.cachedDentries.Back()
+ fs.cacheMu.Unlock()
+ if victim == nil {
+ // fs.cachedDentries may have become empty between when it was
+ // checked and when we locked fs.cacheMu.
+ return
+ }
+
+ victim.cachingMu.Lock()
+ victim.removeFromCacheLocked()
+ // victim.refs may have become non-zero from an earlier path resolution
+ // since it was inserted into fs.cachedDentries.
+ if atomic.LoadInt64(&victim.refs) != 0 {
+ victim.cachingMu.Unlock()
+ return
+ }
+ if victim.parent != nil {
+ victim.parent.dirMu.Lock()
+ // Note that victim can't be a mount point (in any mount
+ // namespace), since VFS holds references on mount points.
+ fs.vfsfs.VirtualFilesystem().InvalidateDentry(ctx, &victim.vfsd)
+ delete(victim.parent.children, victim.name)
+ victim.parent.dirMu.Unlock()
+ }
+ victim.cachingMu.Unlock()
+ victim.destroyLocked(ctx) // +checklocksforce: owned as precondition, victim.fs == fs.
+}
+
func (d *dentry) isSymlink() bool {
return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK
}
@@ -1091,6 +1269,21 @@ func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) {
return 0, fd.d.fs.alertIntegrityViolation("Unexpected verity fd: missing expected underlying fds")
}
+ // Populate children names here. We cannot rely on the children
+ // dentries to populate parent dentry's children names, because the
+ // parent dentry may be destroyed before users enable verity if its ref
+ // count drops to zero.
+ if fd.d.isDir() {
+ if err := fd.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error {
+ if dirent.Name != "." && dirent.Name != ".." {
+ fd.d.childrenNames[dirent.Name] = struct{}{}
+ }
+ return nil
+ })); err != nil {
+ return 0, err
+ }
+ }
+
hash, dataSize, err := fd.generateMerkleLocked(ctx)
if err != nil {
return 0, err
@@ -1118,9 +1311,6 @@ func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) {
}); err != nil {
return 0, err
}
-
- // Add the current child's name to parent's childrenNames.
- fd.d.parent.childrenNames[fd.d.name] = struct{}{}
}
// Record the size of the data being hashed for fd.
@@ -1215,7 +1405,7 @@ func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.
case linux.FS_IOC_GETFLAGS:
return fd.verityFlags(ctx, args[2].Pointer())
default:
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
}
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index e4e0dc04f..816e60329 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -281,7 +281,6 @@ go_library(
"//pkg/state/wire",
"//pkg/sync",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/tcpip",
"//pkg/tcpip/stack",
"//pkg/usermem",
diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD
index 7a1a36454..9aa03f506 100644
--- a/pkg/sentry/kernel/auth/BUILD
+++ b/pkg/sentry/kernel/auth/BUILD
@@ -66,6 +66,5 @@ go_library(
"//pkg/errors/linuxerr",
"//pkg/log",
"//pkg/sync",
- "//pkg/syserror",
],
)
diff --git a/pkg/sentry/kernel/cgroup.go b/pkg/sentry/kernel/cgroup.go
index c93ef6ac1..a0e291f58 100644
--- a/pkg/sentry/kernel/cgroup.go
+++ b/pkg/sentry/kernel/cgroup.go
@@ -196,6 +196,7 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files
// uniqueness of controllers enforced by Register, drop the
// dying hierarchy now. The eventual unregister by the FS
// teardown will become a no-op.
+ r.unregisterLocked(h.id)
return nil
}
return h.fs
diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD
index 564c3d42e..f240a68aa 100644
--- a/pkg/sentry/kernel/eventfd/BUILD
+++ b/pkg/sentry/kernel/eventfd/BUILD
@@ -9,13 +9,13 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fdnotifier",
"//pkg/hostarch",
"//pkg/sentry/fs",
"//pkg/sentry/fs/anon",
"//pkg/sentry/fs/fsutil",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go
index 4466fbc9d..5ea44a2c2 100644
--- a/pkg/sentry/kernel/eventfd/eventfd.go
+++ b/pkg/sentry/kernel/eventfd/eventfd.go
@@ -22,13 +22,13 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/anon"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -145,7 +145,7 @@ func (e *EventOperations) hostRead(ctx context.Context, dst usermem.IOSequence)
if _, err := unix.Read(e.hostfd, buf[:]); err != nil {
if err == unix.EWOULDBLOCK {
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
return err
}
@@ -165,7 +165,7 @@ func (e *EventOperations) read(ctx context.Context, dst usermem.IOSequence) erro
// We can't complete the read if the value is currently zero.
if e.val == 0 {
e.mu.Unlock()
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
// Update the value based on the mode the event is operating in.
@@ -198,7 +198,7 @@ func (e *EventOperations) hostWrite(val uint64) error {
hostarch.ByteOrder.PutUint64(buf[:], val)
_, err := unix.Write(e.hostfd, buf[:])
if err == unix.EWOULDBLOCK {
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
return err
}
@@ -230,7 +230,7 @@ func (e *EventOperations) Signal(val uint64) error {
// uint64 minus 1.
if val > math.MaxUint64-1-e.val {
e.mu.Unlock()
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
e.val += val
diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD
index cfdea5cf7..c897e3a5f 100644
--- a/pkg/sentry/kernel/futex/BUILD
+++ b/pkg/sentry/kernel/futex/BUILD
@@ -42,7 +42,6 @@ go_library(
"//pkg/log",
"//pkg/sentry/memmap",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go
index f5c364c96..2c9ea65aa 100644
--- a/pkg/sentry/kernel/futex/futex.go
+++ b/pkg/sentry/kernel/futex/futex.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// KeyKind indicates the type of a Key.
@@ -166,7 +165,7 @@ func atomicOp(t Target, addr hostarch.Addr, opIn uint32) (bool, error) {
case linux.FUTEX_OP_XOR:
newVal = oldVal ^ opArg
default:
- return false, syserror.ENOSYS
+ return false, linuxerr.ENOSYS
}
prev, err := t.CompareAndSwapUint32(addr, oldVal, newVal)
if err != nil {
@@ -192,7 +191,7 @@ func atomicOp(t Target, addr hostarch.Addr, opIn uint32) (bool, error) {
case linux.FUTEX_OP_CMP_GE:
return oldVal >= cmpArg, nil
default:
- return false, syserror.ENOSYS
+ return false, linuxerr.ENOSYS
}
}
diff --git a/pkg/sentry/kernel/ipc/object.go b/pkg/sentry/kernel/ipc/object.go
index 387b35e7e..facd157c7 100644
--- a/pkg/sentry/kernel/ipc/object.go
+++ b/pkg/sentry/kernel/ipc/object.go
@@ -19,6 +19,8 @@ package ipc
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
@@ -113,3 +115,36 @@ func (o *Object) CheckPermissions(creds *auth.Credentials, req fs.PermMask) bool
}
return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, o.UserNS)
}
+
+// Set modifies attributes for an IPC object. See *ctl(IPC_SET).
+//
+// Precondition: Mechanism.mu must be held.
+func (o *Object) Set(ctx context.Context, perm *linux.IPCPerm) error {
+ creds := auth.CredentialsFromContext(ctx)
+ uid := creds.UserNamespace.MapToKUID(auth.UID(perm.UID))
+ gid := creds.UserNamespace.MapToKGID(auth.GID(perm.GID))
+ if !uid.Ok() || !gid.Ok() {
+ // The man pages don't specify an errno for invalid uid/gid, but EINVAL
+ // is generally used for invalid arguments.
+ return linuxerr.EINVAL
+ }
+
+ if !o.CheckOwnership(creds) {
+ // "The argument cmd has the value IPC_SET or IPC_RMID, but the
+ // effective user ID of the calling process is not the creator (as
+ // found in msg_perm.cuid) or the owner (as found in msg_perm.uid)
+ // of the message queue, and the caller is not privileged (Linux:
+ // does not have the CAP_SYS_ADMIN capability)."
+ return linuxerr.EPERM
+ }
+
+ // User may only modify the lower 9 bits of the mode. All the other bits are
+ // always 0 for the underlying inode.
+ mode := linux.FileMode(perm.Mode & 0x1ff)
+
+ o.Perms = fs.FilePermsFromMode(mode)
+ o.Owner.UID = uid
+ o.Owner.GID = gid
+
+ return nil
+}
diff --git a/pkg/sentry/kernel/msgqueue/msgqueue.go b/pkg/sentry/kernel/msgqueue/msgqueue.go
index fab396d7c..7c459d076 100644
--- a/pkg/sentry/kernel/msgqueue/msgqueue.go
+++ b/pkg/sentry/kernel/msgqueue/msgqueue.go
@@ -206,6 +206,48 @@ func (r *Registry) FindByID(id ipc.ID) (*Queue, error) {
return mech.(*Queue), nil
}
+// IPCInfo reports global parameters for message queues. See msgctl(IPC_INFO).
+func (r *Registry) IPCInfo(ctx context.Context) *linux.MsgInfo {
+ return &linux.MsgInfo{
+ MsgPool: linux.MSGPOOL,
+ MsgMap: linux.MSGMAP,
+ MsgMax: linux.MSGMAX,
+ MsgMnb: linux.MSGMNB,
+ MsgMni: linux.MSGMNI,
+ MsgSsz: linux.MSGSSZ,
+ MsgTql: linux.MSGTQL,
+ MsgSeg: linux.MSGSEG,
+ }
+}
+
+// MsgInfo reports global parameters for message queues. See msgctl(MSG_INFO).
+func (r *Registry) MsgInfo(ctx context.Context) *linux.MsgInfo {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ var messages, bytes uint64
+ r.reg.ForAllObjects(
+ func(o ipc.Mechanism) {
+ q := o.(*Queue)
+ q.mu.Lock()
+ messages += q.messageCount
+ bytes += q.byteCount
+ q.mu.Unlock()
+ },
+ )
+
+ return &linux.MsgInfo{
+ MsgPool: int32(r.reg.ObjectCount()),
+ MsgMap: int32(messages),
+ MsgTql: int32(bytes),
+ MsgMax: linux.MSGMAX,
+ MsgMnb: linux.MSGMNB,
+ MsgMni: linux.MSGMNI,
+ MsgSsz: linux.MSGSSZ,
+ MsgSeg: linux.MSGSEG,
+ }
+}
+
// Send appends a message to the message queue, and returns an error if sending
// fails. See msgsnd(2).
func (q *Queue) Send(ctx context.Context, m Message, b Blocker, wait bool, pid int32) error {
@@ -465,6 +507,73 @@ func (q *Queue) msgAtIndex(mType int64) *Message {
return msg
}
+// Set modifies some values of the queue. See msgctl(IPC_SET).
+func (q *Queue) Set(ctx context.Context, ds *linux.MsqidDS) error {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+
+ creds := auth.CredentialsFromContext(ctx)
+ if ds.MsgQbytes > maxQueueBytes && !creds.HasCapabilityIn(linux.CAP_SYS_RESOURCE, q.obj.UserNS) {
+ // "An attempt (IPC_SET) was made to increase msg_qbytes beyond the
+ // system parameter MSGMNB, but the caller is not privileged (Linux:
+ // does not have the CAP_SYS_RESOURCE capability)."
+ return linuxerr.EPERM
+ }
+
+ if err := q.obj.Set(ctx, &ds.MsgPerm); err != nil {
+ return err
+ }
+
+ q.maxBytes = ds.MsgQbytes
+ q.changeTime = ktime.NowFromContext(ctx)
+ return nil
+}
+
+// Stat returns a MsqidDS object filled with information about the queue. See
+// msgctl(IPC_STAT) and msgctl(MSG_STAT).
+func (q *Queue) Stat(ctx context.Context) (*linux.MsqidDS, error) {
+ return q.stat(ctx, fs.PermMask{Read: true})
+}
+
+// StatAny is similar to Queue.Stat, but doesn't require read permission. See
+// msgctl(MSG_STAT_ANY).
+func (q *Queue) StatAny(ctx context.Context) (*linux.MsqidDS, error) {
+ return q.stat(ctx, fs.PermMask{})
+}
+
+// stat returns a MsqidDS object filled with information about the queue. An
+// error is returned if the user doesn't have the specified permissions.
+func (q *Queue) stat(ctx context.Context, mask fs.PermMask) (*linux.MsqidDS, error) {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+
+ creds := auth.CredentialsFromContext(ctx)
+ if !q.obj.CheckPermissions(creds, mask) {
+ // "The caller must have read permission on the message queue."
+ return nil, linuxerr.EACCES
+ }
+
+ return &linux.MsqidDS{
+ MsgPerm: linux.IPCPerm{
+ Key: uint32(q.obj.Key),
+ UID: uint32(creds.UserNamespace.MapFromKUID(q.obj.Owner.UID)),
+ GID: uint32(creds.UserNamespace.MapFromKGID(q.obj.Owner.GID)),
+ CUID: uint32(creds.UserNamespace.MapFromKUID(q.obj.Creator.UID)),
+ CGID: uint32(creds.UserNamespace.MapFromKGID(q.obj.Creator.GID)),
+ Mode: uint16(q.obj.Perms.LinuxMode()),
+ Seq: 0, // IPC sequences not supported.
+ },
+ MsgStime: q.sendTime.TimeT(),
+ MsgRtime: q.receiveTime.TimeT(),
+ MsgCtime: q.changeTime.TimeT(),
+ MsgCbytes: q.byteCount,
+ MsgQnum: q.messageCount,
+ MsgQbytes: q.maxBytes,
+ MsgLspid: q.sendPID,
+ MsgLrpid: q.receivePID,
+ }, nil
+}
+
// Lock implements ipc.Mechanism.Lock.
func (q *Queue) Lock() {
q.mu.Lock()
diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD
index 94ebac7c5..5b2bac783 100644
--- a/pkg/sentry/kernel/pipe/BUILD
+++ b/pkg/sentry/kernel/pipe/BUILD
@@ -31,7 +31,6 @@ go_library(
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
@@ -51,7 +50,6 @@ go_test(
"//pkg/errors/linuxerr",
"//pkg/sentry/contexttest",
"//pkg/sentry/fs",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go
index 08786d704..615591507 100644
--- a/pkg/sentry/kernel/pipe/node.go
+++ b/pkg/sentry/kernel/pipe/node.go
@@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// inodeOperations implements fs.InodeOperations for pipes.
@@ -95,7 +94,7 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi
if i.p.isNamed && !flags.NonBlocking && !i.p.HasWriters() {
if !waitFor(&i.mu, &i.wWakeup, ctx) {
r.DecRef(ctx)
- return nil, syserror.ErrInterrupted
+ return nil, linuxerr.ErrInterrupted
}
}
@@ -118,7 +117,7 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi
if !waitFor(&i.mu, &i.rWakeup, ctx) {
w.DecRef(ctx)
- return nil, syserror.ErrInterrupted
+ return nil, linuxerr.ErrInterrupted
}
}
return w, nil
diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go
index d25cf658e..31bd7910a 100644
--- a/pkg/sentry/kernel/pipe/node_test.go
+++ b/pkg/sentry/kernel/pipe/node_test.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/syserror"
)
type sleeper struct {
@@ -240,7 +239,7 @@ func TestBlockedOpenIsCancellable(t *testing.T) {
// If the cancel on the sleeper didn't work, the open for read would never
// return.
res := <-done
- if res.error != syserror.ErrInterrupted {
+ if res.error != linuxerr.ErrInterrupted {
t.Fatalf("Cancellation didn't cause GetFile to return fs.ErrInterrupted, got %v.",
res.error)
}
diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go
index 85e3ce9f4..86beee6fe 100644
--- a/pkg/sentry/kernel/pipe/pipe.go
+++ b/pkg/sentry/kernel/pipe/pipe.go
@@ -27,7 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -201,7 +200,7 @@ func (p *Pipe) peekLocked(count int64, f func(safemem.BlockSeq) (uint64, error))
if !p.HasWriters() {
return 0, io.EOF
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
count = p.size
}
@@ -250,7 +249,7 @@ func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error)
avail := p.max - p.size
if avail == 0 {
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
short := false
if count > avail {
@@ -258,7 +257,7 @@ func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error)
// (PIPE_BUF) be atomic, but requires no atomicity for writes
// larger than this.
if count <= atomicIOBytes {
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
count = avail
short = true
@@ -307,7 +306,7 @@ func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error)
// If we shortened the write, adjust the returned error appropriately.
if short {
- return done, syserror.ErrWouldBlock
+ return done, linuxerr.ErrWouldBlock
}
return done, nil
diff --git a/pkg/sentry/kernel/pipe/pipe_test.go b/pkg/sentry/kernel/pipe/pipe_test.go
index 867f4a76b..aa3ab305d 100644
--- a/pkg/sentry/kernel/pipe/pipe_test.go
+++ b/pkg/sentry/kernel/pipe/pipe_test.go
@@ -18,8 +18,8 @@ import (
"bytes"
"testing"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -51,8 +51,8 @@ func TestPipeReadBlock(t *testing.T) {
defer w.DecRef(ctx)
n, err := r.Readv(ctx, usermem.BytesIOSequence(make([]byte, 1)))
- if n != 0 || err != syserror.ErrWouldBlock {
- t.Fatalf("Readv: got (%d, %v), wanted (0, %v)", n, err, syserror.ErrWouldBlock)
+ if n != 0 || err != linuxerr.ErrWouldBlock {
+ t.Fatalf("Readv: got (%d, %v), wanted (0, %v)", n, err, linuxerr.ErrWouldBlock)
}
}
@@ -67,7 +67,7 @@ func TestPipeWriteBlock(t *testing.T) {
msg := make([]byte, capacity+1)
n, err := w.Writev(ctx, usermem.BytesIOSequence(msg))
- if wantN, wantErr := int64(capacity), syserror.ErrWouldBlock; n != wantN || err != wantErr {
+ if wantN, wantErr := int64(capacity), linuxerr.ErrWouldBlock; n != wantN || err != wantErr {
t.Fatalf("Writev: got (%d, %v), wanted (%d, %v)", n, err, wantN, wantErr)
}
}
@@ -102,7 +102,7 @@ func TestPipeWriteUntilEnd(t *testing.T) {
for {
n, err := r.Readv(ctx, dst)
dst = dst.DropFirst64(n)
- if err == syserror.ErrWouldBlock {
+ if err == linuxerr.ErrWouldBlock {
select {
case <-ch:
continue
@@ -129,7 +129,7 @@ func TestPipeWriteUntilEnd(t *testing.T) {
for src.NumBytes() != 0 {
n, err := w.Writev(ctx, src)
src = src.DropFirst64(n)
- if err == syserror.ErrWouldBlock {
+ if err == linuxerr.ErrWouldBlock {
<-ch
continue
}
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index 077d5fd7f..a6f1989f5 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -121,7 +120,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s
// writer, we have to wait for a writer to open the other end.
if vp.pipe.isNamed && statusFlags&linux.O_NONBLOCK == 0 && !vp.pipe.HasWriters() && !waitFor(&vp.mu, &vp.wWakeup, ctx) {
fd.DecRef(ctx)
- return nil, syserror.EINTR
+ return nil, linuxerr.EINTR
}
case writable:
@@ -137,7 +136,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s
// Wait for a reader to open the other end.
if !waitFor(&vp.mu, &vp.rWakeup, ctx) {
fd.DecRef(ctx)
- return nil, syserror.EINTR
+ return nil, linuxerr.EINTR
}
}
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index 079294f81..717c9a6b3 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/mm"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -465,7 +464,7 @@ func (t *Task) ptraceUnfreezeLocked() {
// stop.
func (t *Task) ptraceUnstop(mode ptraceSyscallMode, singlestep bool, sig linux.Signal) error {
if sig != 0 && !sig.IsValid() {
- return syserror.EIO
+ return linuxerr.EIO
}
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
@@ -532,7 +531,7 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error {
}
if seize {
if err := target.ptraceSetOptionsLocked(opts); err != nil {
- return syserror.EIO
+ return linuxerr.EIO
}
}
target.ptraceTracer.Store(t)
@@ -569,7 +568,7 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error {
// ptrace stop.
func (t *Task) ptraceDetach(target *Task, sig linux.Signal) error {
if sig != 0 && !sig.IsValid() {
- return syserror.EIO
+ return linuxerr.EIO
}
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
@@ -967,7 +966,7 @@ func (t *Task) ptraceInterrupt(target *Task) error {
return linuxerr.ESRCH
}
if !target.ptraceSeized {
- return syserror.EIO
+ return linuxerr.EIO
}
target.tg.signalHandlers.mu.Lock()
defer target.tg.signalHandlers.mu.Unlock()
@@ -1030,7 +1029,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
if req == linux.PTRACE_ATTACH || req == linux.PTRACE_SEIZE {
seize := req == linux.PTRACE_SEIZE
if seize && addr != 0 {
- return syserror.EIO
+ return linuxerr.EIO
}
return t.ptraceAttach(target, seize, uintptr(data))
}
@@ -1120,13 +1119,13 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
if !target.ptraceSeized {
- return syserror.EIO
+ return linuxerr.EIO
}
if target.ptraceSiginfo == nil {
- return syserror.EIO
+ return linuxerr.EIO
}
if target.ptraceSiginfo.Code>>8 != linux.PTRACE_EVENT_STOP {
- return syserror.EIO
+ return linuxerr.EIO
}
target.tg.signalHandlers.mu.Lock()
defer target.tg.signalHandlers.mu.Unlock()
diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go
index 63422e155..564add01b 100644
--- a/pkg/sentry/kernel/ptrace_amd64.go
+++ b/pkg/sentry/kernel/ptrace_amd64.go
@@ -19,8 +19,8 @@ package kernel
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -88,6 +88,6 @@ func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) err
return err
default:
- return syserror.EIO
+ return linuxerr.EIO
}
}
diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go
index 27514d67b..7c2b94339 100644
--- a/pkg/sentry/kernel/ptrace_arm64.go
+++ b/pkg/sentry/kernel/ptrace_arm64.go
@@ -18,11 +18,11 @@
package kernel
import (
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
- "gvisor.dev/gvisor/pkg/syserror"
)
// ptraceArch implements arch-specific ptrace commands.
func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) error {
- return syserror.EIO
+ return linuxerr.EIO
}
diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go
index 54ca43c2e..0d66648c3 100644
--- a/pkg/sentry/kernel/seccomp.go
+++ b/pkg/sentry/kernel/seccomp.go
@@ -18,9 +18,9 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/syserror"
)
const maxSyscallFilterInstructions = 1 << 15
@@ -176,7 +176,7 @@ func (t *Task) AppendSyscallFilter(p bpf.Program, syncAll bool) error {
}
if totalLength > maxSyscallFilterInstructions {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
newFilters = append(newFilters, p)
diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD
index 2ae08ed12..6aa74219e 100644
--- a/pkg/sentry/kernel/semaphore/BUILD
+++ b/pkg/sentry/kernel/semaphore/BUILD
@@ -31,7 +31,6 @@ go_library(
"//pkg/sentry/kernel/ipc",
"//pkg/sentry/kernel/time",
"//pkg/sync",
- "//pkg/syserror",
],
)
@@ -43,9 +42,9 @@ go_test(
deps = [
"//pkg/abi/linux", # keep
"//pkg/context", # keep
+ "//pkg/errors/linuxerr", #keep
"//pkg/sentry/contexttest", # keep
"//pkg/sentry/kernel/auth", # keep
"//pkg/sentry/kernel/ipc", # keep
- "//pkg/syserror", # keep
],
)
diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go
index 8610d3fc1..28e466948 100644
--- a/pkg/sentry/kernel/semaphore/semaphore.go
+++ b/pkg/sentry/kernel/semaphore/semaphore.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -151,10 +150,10 @@ func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, m
// Map reg.objects and map indexes in a registry are of the same size,
// check map reg.objects only here for the system limit.
if r.reg.ObjectCount() >= setsMax {
- return nil, syserror.ENOSPC
+ return nil, linuxerr.ENOSPC
}
if r.totalSems() > int(semsTotalMax-nsems) {
- return nil, syserror.ENOSPC
+ return nil, linuxerr.ENOSPC
}
// Finally create a new set.
@@ -337,19 +336,15 @@ func (s *Set) Size() int {
return len(s.sems)
}
-// Change changes some fields from the set atomically.
-func (s *Set) Change(ctx context.Context, creds *auth.Credentials, owner fs.FileOwner, perms fs.FilePermissions) error {
+// Set modifies attributes for a semaphore set. See semctl(IPC_SET).
+func (s *Set) Set(ctx context.Context, ds *linux.SemidDS) error {
s.mu.Lock()
defer s.mu.Unlock()
- // "The effective UID of the calling process must match the owner or creator
- // of the semaphore set, or the caller must be privileged."
- if !s.obj.CheckOwnership(creds) {
- return linuxerr.EACCES
+ if err := s.obj.Set(ctx, &ds.SemPerm); err != nil {
+ return err
}
- s.obj.Owner = owner
- s.obj.Perms = perms
s.changeTime = ktime.NowFromContext(ctx)
return nil
}
@@ -549,7 +544,7 @@ func (s *Set) ExecuteOps(ctx context.Context, ops []linux.Sembuf, creds *auth.Cr
// Did it race with a removal operation?
if s.dead {
- return nil, 0, syserror.EIDRM
+ return nil, 0, linuxerr.EIDRM
}
// Validate the operations.
@@ -588,7 +583,7 @@ func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (ch
if tmpVals[op.SemNum] != 0 {
// Semaphore isn't 0, must wait.
if op.SemFlg&linux.IPC_NOWAIT != 0 {
- return nil, 0, syserror.ErrWouldBlock
+ return nil, 0, linuxerr.ErrWouldBlock
}
w := newWaiter(op.SemOp)
@@ -604,7 +599,7 @@ func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (ch
if -op.SemOp > tmpVals[op.SemNum] {
// Not enough resources, must wait.
if op.SemFlg&linux.IPC_NOWAIT != 0 {
- return nil, 0, syserror.ErrWouldBlock
+ return nil, 0, linuxerr.ErrWouldBlock
}
w := newWaiter(op.SemOp)
diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go
index 2e4ab8121..59ac92ef1 100644
--- a/pkg/sentry/kernel/semaphore/semaphore_test.go
+++ b/pkg/sentry/kernel/semaphore/semaphore_test.go
@@ -19,10 +19,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
- "gvisor.dev/gvisor/pkg/syserror"
)
func executeOps(ctx context.Context, t *testing.T, set *Set, ops []linux.Sembuf, block bool) chan struct{} {
@@ -124,14 +124,14 @@ func TestNoWait(t *testing.T) {
ops[0].SemOp = -2
ops[0].SemFlg = linux.IPC_NOWAIT
- if _, _, err := set.executeOps(ctx, ops, 123); err != syserror.ErrWouldBlock {
- t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, syserror.ErrWouldBlock)
+ if _, _, err := set.executeOps(ctx, ops, 123); err != linuxerr.ErrWouldBlock {
+ t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, linuxerr.ErrWouldBlock)
}
ops[0].SemOp = 0
ops[0].SemFlg = linux.IPC_NOWAIT
- if _, _, err := set.executeOps(ctx, ops, 123); err != syserror.ErrWouldBlock {
- t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, syserror.ErrWouldBlock)
+ if _, _, err := set.executeOps(ctx, ops, 123); err != linuxerr.ErrWouldBlock {
+ t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, linuxerr.ErrWouldBlock)
}
}
diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD
index 4e8deac4c..2547957ba 100644
--- a/pkg/sentry/kernel/shm/BUILD
+++ b/pkg/sentry/kernel/shm/BUILD
@@ -42,7 +42,6 @@ go_library(
"//pkg/sentry/pgalloc",
"//pkg/sentry/usage",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index 2abf467d7..ab938fa3c 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -49,7 +49,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Registry tracks all shared memory segments in an IPC namespace. The registry
@@ -151,7 +150,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, siz
if r.reg.ObjectCount() >= linux.SHMMNI {
// "All possible shared memory IDs have been taken (SHMMNI) ..."
// - man shmget(2)
- return nil, syserror.ENOSPC
+ return nil, linuxerr.ENOSPC
}
if !private {
@@ -184,7 +183,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, siz
// "... allocating a segment of the requested size would cause the
// system to exceed the system-wide limit on shared memory (SHMALL)."
// - man shmget(2)
- return nil, syserror.ENOSPC
+ return nil, linuxerr.ENOSPC
}
// Need to create a new segment.
@@ -521,7 +520,7 @@ func (s *Shm) ConfigureAttach(ctx context.Context, addr hostarch.Addr, opts Atta
s.mu.Lock()
defer s.mu.Unlock()
if s.pendingDestruction && s.ReadRefs() == 0 {
- return memmap.MMapOpts{}, syserror.EIDRM
+ return memmap.MMapOpts{}, linuxerr.EIDRM
}
creds := auth.CredentialsFromContext(ctx)
@@ -619,25 +618,10 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error {
s.mu.Lock()
defer s.mu.Unlock()
- creds := auth.CredentialsFromContext(ctx)
- if !s.obj.CheckOwnership(creds) {
- return linuxerr.EPERM
- }
-
- uid := creds.UserNamespace.MapToKUID(auth.UID(ds.ShmPerm.UID))
- gid := creds.UserNamespace.MapToKGID(auth.GID(ds.ShmPerm.GID))
- if !uid.Ok() || !gid.Ok() {
- return linuxerr.EINVAL
+ if err := s.obj.Set(ctx, &ds.ShmPerm); err != nil {
+ return err
}
- // User may only modify the lower 9 bits of the mode. All the other bits are
- // always 0 for the underlying inode.
- mode := linux.FileMode(ds.ShmPerm.Mode & 0x1ff)
- s.obj.Perms = fs.FilePermsFromMode(mode)
-
- s.obj.Owner.UID = uid
- s.obj.Owner.GID = gid
-
s.changeTime = ktime.NowFromContext(ctx)
return nil
}
diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD
index 1110ecca5..4180ca28e 100644
--- a/pkg/sentry/kernel/signalfd/BUILD
+++ b/pkg/sentry/kernel/signalfd/BUILD
@@ -15,7 +15,6 @@ go_library(
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/kernel",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/kernel/signalfd/signalfd.go b/pkg/sentry/kernel/signalfd/signalfd.go
index 47958e2d4..9c5e6698c 100644
--- a/pkg/sentry/kernel/signalfd/signalfd.go
+++ b/pkg/sentry/kernel/signalfd/signalfd.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -99,7 +98,7 @@ func (s *SignalOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS
info, err := s.target.Sigtimedwait(s.Mask(), 0)
if err != nil {
// There must be no signal available.
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// Copy out the signal info using the specified format.
diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go
index b2520eecf..9bfc155e4 100644
--- a/pkg/sentry/kernel/task_block.go
+++ b/pkg/sentry/kernel/task_block.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// BlockWithTimeout blocks t until an event is received from C, the application
@@ -33,7 +32,7 @@ import (
// and is unspecified if haveTimeout is false.
//
// - An error which is nil if an event is received from C, ETIMEDOUT if the timeout
-// expired, and syserror.ErrInterrupted if t is interrupted.
+// expired, and linuxerr.ErrInterrupted if t is interrupted.
//
// Preconditions: The caller must be running on the task goroutine.
func (t *Task) BlockWithTimeout(C chan struct{}, haveTimeout bool, timeout time.Duration) (time.Duration, error) {
@@ -67,7 +66,7 @@ func (t *Task) BlockWithTimeout(C chan struct{}, haveTimeout bool, timeout time.
// application monotonic clock indicates a time of deadline (only if
// haveDeadline is true), or t is interrupted. It returns nil if an event is
// received from C, ETIMEDOUT if the deadline expired, and
-// syserror.ErrInterrupted if t is interrupted.
+// linuxerr.ErrInterrupted if t is interrupted.
//
// Preconditions: The caller must be running on the task goroutine.
func (t *Task) BlockWithDeadline(C <-chan struct{}, haveDeadline bool, deadline ktime.Time) error {
@@ -95,7 +94,7 @@ func (t *Task) BlockWithDeadline(C <-chan struct{}, haveDeadline bool, deadline
// BlockWithTimer blocks t until an event is received from C or tchan, or t is
// interrupted. It returns nil if an event is received from C, ETIMEDOUT if an
-// event is received from tchan, and syserror.ErrInterrupted if t is
+// event is received from tchan, and linuxerr.ErrInterrupted if t is
// interrupted.
//
// Most clients should use BlockWithDeadline or BlockWithTimeout instead.
@@ -106,7 +105,7 @@ func (t *Task) BlockWithTimer(C <-chan struct{}, tchan <-chan struct{}) error {
}
// Block blocks t until an event is received from C or t is interrupted. It
-// returns nil if an event is received from C and syserror.ErrInterrupted if t
+// returns nil if an event is received from C and linuxerr.ErrInterrupted if t
// is interrupted.
//
// Preconditions: The caller must be running on the task goroutine.
@@ -157,7 +156,7 @@ func (t *Task) block(C <-chan struct{}, timerChan <-chan struct{}) error {
region.End()
t.SleepFinish(false)
// Return the indicated error on interrupt.
- return syserror.ErrInterrupted
+ return linuxerr.ErrInterrupted
case <-timerChan:
region.End()
diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go
index cf8571262..9175b911c 100644
--- a/pkg/sentry/kernel/task_exec.go
+++ b/pkg/sentry/kernel/task_exec.go
@@ -66,10 +66,10 @@ package kernel
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// execStop is a TaskStop that a task sets on itself when it wants to execve
@@ -97,7 +97,7 @@ func (t *Task) Execve(newImage *TaskImage) (*SyscallControl, error) {
// We lost to a racing group-exit, kill, or exec from another thread
// and should just exit.
newImage.release()
- return nil, syserror.EINTR
+ return nil, linuxerr.EINTR
}
// Cancel any racing group stops.
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index fbfcc19e5..342e5debe 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -32,7 +32,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -859,7 +859,7 @@ func (t *Task) Wait(opts *WaitOptions) (*WaitResult, error) {
return wr, err
}
if err := t.Block(ch); err != nil {
- return wr, syserror.ConvertIntr(err, opts.BlockInterruptErr)
+ return wr, syserr.ConvertIntr(err, opts.BlockInterruptErr)
}
}
}
diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go
index 054ff212f..7b336a46b 100644
--- a/pkg/sentry/kernel/task_run.go
+++ b/pkg/sentry/kernel/task_run.go
@@ -22,6 +22,7 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/goid"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -29,7 +30,6 @@ import (
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/syserror"
)
// A taskRunState is a reified state in the task state machine. See README.md
@@ -197,8 +197,8 @@ func (app *runApp) execute(t *Task) taskRunState {
// a pending signal, causing another interruption, but that signal should
// not interact with the interrupted syscall.)
if t.haveSyscallReturn {
- if sre, ok := syserror.SyscallRestartErrnoFromReturn(t.Arch().Return()); ok {
- if sre == syserror.ERESTART_RESTARTBLOCK {
+ if sre, ok := linuxerr.SyscallRestartErrorFromReturn(t.Arch().Return()); ok {
+ if sre == linuxerr.ERESTART_RESTARTBLOCK {
t.Debugf("Restarting syscall %d with restart block after errno %d: not interrupted by handled signal", t.Arch().SyscallNo(), sre)
t.Arch().RestartSyscallWithRestartBlock()
} else {
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index 7065ac79c..eeb3c5e69 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -28,7 +28,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -161,7 +160,7 @@ func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRu
sigact := computeAction(sig, act)
if t.haveSyscallReturn {
- if sre, ok := syserror.SyscallRestartErrnoFromReturn(t.Arch().Return()); ok {
+ if sre, ok := linuxerr.SyscallRestartErrorFromReturn(t.Arch().Return()); ok {
// Signals that are ignored, cause a thread group stop, or
// terminate the thread group do not interact with interrupted
// syscalls; in Linux terms, they are never returned to the signal
@@ -170,13 +169,13 @@ func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRu
// signal that is actually handled (by userspace).
if sigact == SignalActionHandler {
switch {
- case sre == syserror.ERESTARTNOHAND:
+ case sre == linuxerr.ERESTARTNOHAND:
fallthrough
- case sre == syserror.ERESTART_RESTARTBLOCK:
+ case sre == linuxerr.ERESTART_RESTARTBLOCK:
fallthrough
- case (sre == syserror.ERESTARTSYS && act.Flags&linux.SA_RESTART == 0):
+ case (sre == linuxerr.ERESTARTSYS && act.Flags&linux.SA_RESTART == 0):
t.Debugf("Not restarting syscall %d after errno %d: interrupted by signal %d", t.Arch().SyscallNo(), sre, info.Signo)
- t.Arch().SetReturn(uintptr(-ExtractErrno(syserror.EINTR, -1)))
+ t.Arch().SetReturn(uintptr(-ExtractErrno(linuxerr.EINTR, -1)))
default:
t.Debugf("Restarting syscall %d after errno %d: interrupted by signal %d", t.Arch().SyscallNo(), sre, info.Signo)
t.Arch().RestartSyscall()
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index 0565059c1..217c6f531 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/sched"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// TaskConfig defines the configuration of a new Task (see below).
@@ -170,7 +169,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
// doesn't matter too much since the caller will exit before it returns
// to userspace. If the caller isn't in the same thread group, then
// we're in uncharted territory and can return whatever we want.
- return nil, syserror.EINTR
+ return nil, linuxerr.EINTR
}
if err := ts.assignTIDsLocked(t); err != nil {
return nil, err
@@ -268,7 +267,7 @@ func (ns *PIDNamespace) allocateTID() (ThreadID, error) {
// fail with the error ENOMEM; it is not possible to create a new
// processes [sic] in a PID namespace whose init process has
// terminated." - pid_namespaces(7)
- return 0, syserror.ENOMEM
+ return 0, linuxerr.ENOMEM
}
tid := ns.last
for {
diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go
index 0586c9def..2b1d7e114 100644
--- a/pkg/sentry/kernel/task_syscall.go
+++ b/pkg/sentry/kernel/task_syscall.go
@@ -29,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/syserror"
)
// SyscallRestartBlock represents the restart block for a syscall restartable
@@ -383,8 +382,6 @@ func ExtractErrno(err error, sysno int) int {
return int(err)
case *errors.Error:
return int(err.Errno())
- case syserror.SyscallRestartErrno:
- return int(err)
case *memmap.BusError:
// Bus errors may generate SIGBUS, but for syscalls they still
// return EFAULT. See case in task_run.go where the fault is
@@ -397,8 +394,8 @@ func ExtractErrno(err error, sysno int) int {
case *os.SyscallError:
return ExtractErrno(err.Err, sysno)
default:
- if errno, ok := syserror.TranslateError(err); ok {
- return int(errno)
+ if errno, ok := linuxerr.TranslateError(err); ok {
+ return int(errno.Errno())
}
}
panic(fmt.Sprintf("Unknown syscall %d error: %v", sysno, err))
diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go
index 8e2c36598..bff226a11 100644
--- a/pkg/sentry/kernel/task_usermem.go
+++ b/pkg/sentry/kernel/task_usermem.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/mm"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -105,7 +104,7 @@ func (t *Task) CopyInVector(addr hostarch.Addr, maxElemSize, maxTotalSize int) (
// Each string has a zero terminating byte counted, so copying out a string
// requires at least one byte of space. Also, see the calculation below.
if maxTotalSize <= 0 {
- return nil, syserror.ENOMEM
+ return nil, linuxerr.ENOMEM
}
thisMax := maxElemSize
if maxTotalSize < thisMax {
@@ -148,7 +147,7 @@ func (t *Task) CopyOutIovecs(addr hostarch.Addr, src hostarch.AddrRangeSeq) erro
}
default:
- return syserror.ENOSYS
+ return linuxerr.ENOSYS
}
return nil
@@ -220,7 +219,7 @@ func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRan
}
default:
- return hostarch.AddrRangeSeq{}, syserror.ENOSYS
+ return hostarch.AddrRangeSeq{}, linuxerr.ENOSYS
}
// Truncate to MAX_RW_COUNT.
diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go
index 0f741602d..5814a4eca 100644
--- a/pkg/sentry/kernel/thread_group.go
+++ b/pkg/sentry/kernel/thread_group.go
@@ -511,14 +511,14 @@ func (tg *ThreadGroup) SetForegroundProcessGroup(tty *TTY, pgid ProcessGroupID)
return -1, linuxerr.EPERM
}
- signalAction:= tg.signalHandlers.actions[linux.SIGTTOU]
+ signalAction := tg.signalHandlers.actions[linux.SIGTTOU]
// If the calling process is a member of a background group, a SIGTTOU
// signal is sent to all members of this background process group.
// We need also need to check whether it is ignoring or blocking SIGTTOU.
- ignored:= signalAction.Handler == linux.SIG_IGN
- blocked:= tg.leader.signalMask == linux.SignalSetOf(linux.SIGTTOU)
- if tg.processGroup.id != tg.processGroup.session.foreground.id && !ignored && !blocked{
- tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGTTOU),true)
+ ignored := signalAction.Handler == linux.SIG_IGN
+ blocked := tg.leader.signalMask == linux.SignalSetOf(linux.SIGTTOU)
+ if tg.processGroup.id != tg.processGroup.session.foreground.id && !ignored && !blocked {
+ tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGTTOU), true)
}
tg.processGroup.session.foreground.id = pgid
diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD
index 54bfed644..560a0f33c 100644
--- a/pkg/sentry/loader/BUILD
+++ b/pkg/sentry/loader/BUILD
@@ -37,7 +37,6 @@ go_library(
"//pkg/sentry/usage",
"//pkg/sentry/vfs",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go
index 577374fa4..fb213d109 100644
--- a/pkg/sentry/loader/elf.go
+++ b/pkg/sentry/loader/elf.go
@@ -32,7 +32,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/mm"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -116,7 +115,7 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) {
log.Infof("Error reading ELF ident: %v", err)
// The entire ident array always exists.
if err == io.EOF || err == io.ErrUnexpectedEOF {
- err = syserror.ENOEXEC
+ err = linuxerr.ENOEXEC
}
return elfInfo{}, err
}
@@ -124,22 +123,22 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) {
// Only some callers pre-check the ELF magic.
if !bytes.Equal(ident[:len(elfMagic)], []byte(elfMagic)) {
log.Infof("File is not an ELF")
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
// We only support 64-bit, little endian binaries
if class := elf.Class(ident[elf.EI_CLASS]); class != elf.ELFCLASS64 {
log.Infof("Unsupported ELF class: %v", class)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
if endian := elf.Data(ident[elf.EI_DATA]); endian != elf.ELFDATA2LSB {
log.Infof("Unsupported ELF endianness: %v", endian)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
if version := elf.Version(ident[elf.EI_VERSION]); version != elf.EV_CURRENT {
log.Infof("Unsupported ELF version: %v", version)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
// EI_OSABI is ignored by Linux, which is the only OS supported.
os := abi.Linux
@@ -151,7 +150,7 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) {
log.Infof("Error reading ELF header: %v", err)
// The entire header always exists.
if err == io.EOF || err == io.ErrUnexpectedEOF {
- err = syserror.ENOEXEC
+ err = linuxerr.ENOEXEC
}
return elfInfo{}, err
}
@@ -166,7 +165,7 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) {
a = arch.ARM64
default:
log.Infof("Unsupported ELF machine %d", machine)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
var sharedObject bool
@@ -178,25 +177,25 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) {
sharedObject = true
default:
log.Infof("Unsupported ELF type %v", elfType)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
if int(hdr.Phentsize) != prog64Size {
log.Infof("Unsupported phdr size %d", hdr.Phentsize)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
totalPhdrSize := prog64Size * int(hdr.Phnum)
if totalPhdrSize < prog64Size {
log.Warningf("No phdrs or total phdr size overflows: prog64Size: %d phnum: %d", prog64Size, int(hdr.Phnum))
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
if totalPhdrSize > maxTotalPhdrSize {
log.Infof("Too many phdrs (%d): total size %d > %d", hdr.Phnum, totalPhdrSize, maxTotalPhdrSize)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
if int64(hdr.Phoff) < 0 || int64(hdr.Phoff+uint64(totalPhdrSize)) < 0 {
ctx.Infof("Unsupported phdr offset %d", hdr.Phoff)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
phdrBuf := make([]byte, totalPhdrSize)
@@ -205,7 +204,7 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) {
log.Infof("Error reading ELF phdrs: %v", err)
// If phdrs were specified, they should all exist.
if err == io.EOF || err == io.ErrUnexpectedEOF {
- err = syserror.ENOEXEC
+ err = linuxerr.ENOEXEC
}
return elfInfo{}, err
}
@@ -248,19 +247,19 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr
if !ok {
// If offset != 0 we should have ensured this would fit.
ctx.Warningf("Computed segment load address overflows: %#x + %#x", phdr.Vaddr, offset)
- return syserror.ENOEXEC
+ return linuxerr.ENOEXEC
}
addr -= hostarch.Addr(adjust)
fileSize := phdr.Filesz + adjust
if fileSize < phdr.Filesz {
ctx.Infof("Computed segment file size overflows: %#x + %#x", phdr.Filesz, adjust)
- return syserror.ENOEXEC
+ return linuxerr.ENOEXEC
}
ms, ok := hostarch.Addr(fileSize).RoundUp()
if !ok {
ctx.Infof("fileSize %#x too large", fileSize)
- return syserror.ENOEXEC
+ return linuxerr.ENOEXEC
}
mapSize := uint64(ms)
@@ -321,7 +320,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr
memSize := phdr.Memsz + adjust
if memSize < phdr.Memsz {
ctx.Infof("Computed segment mem size overflows: %#x + %#x", phdr.Memsz, adjust)
- return syserror.ENOEXEC
+ return linuxerr.ENOEXEC
}
// Allocate more anonymous pages if necessary.
@@ -333,7 +332,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr
anonSize, ok := hostarch.Addr(memSize - mapSize).RoundUp()
if !ok {
ctx.Infof("extra anon pages too large: %#x", memSize-mapSize)
- return syserror.ENOEXEC
+ return linuxerr.ENOEXEC
}
// N.B. Linux uses vm_brk_flags to map these pages, which only
@@ -423,27 +422,27 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in
// NOTE(b/37474556): Linux allows out-of-order
// segments, in violation of the spec.
ctx.Infof("PT_LOAD headers out-of-order. %#x < %#x", vaddr, end)
- return loadedELF{}, syserror.ENOEXEC
+ return loadedELF{}, linuxerr.ENOEXEC
}
var ok bool
end, ok = vaddr.AddLength(phdr.Memsz)
if !ok {
ctx.Infof("PT_LOAD header size overflows. %#x + %#x", vaddr, phdr.Memsz)
- return loadedELF{}, syserror.ENOEXEC
+ return loadedELF{}, linuxerr.ENOEXEC
}
case elf.PT_INTERP:
if phdr.Filesz < 2 {
ctx.Infof("PT_INTERP path too small: %v", phdr.Filesz)
- return loadedELF{}, syserror.ENOEXEC
+ return loadedELF{}, linuxerr.ENOEXEC
}
if phdr.Filesz > linux.PATH_MAX {
ctx.Infof("PT_INTERP path too big: %v", phdr.Filesz)
- return loadedELF{}, syserror.ENOEXEC
+ return loadedELF{}, linuxerr.ENOEXEC
}
if int64(phdr.Off) < 0 || int64(phdr.Off+phdr.Filesz) < 0 {
ctx.Infof("Unsupported PT_INTERP offset %d", phdr.Off)
- return loadedELF{}, syserror.ENOEXEC
+ return loadedELF{}, linuxerr.ENOEXEC
}
path := make([]byte, phdr.Filesz)
@@ -451,12 +450,12 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in
if err != nil {
// If an interpreter was specified, it should exist.
ctx.Infof("Error reading PT_INTERP path: %v", err)
- return loadedELF{}, syserror.ENOEXEC
+ return loadedELF{}, linuxerr.ENOEXEC
}
if path[len(path)-1] != 0 {
ctx.Infof("PT_INTERP path not NUL-terminated: %v", path)
- return loadedELF{}, syserror.ENOEXEC
+ return loadedELF{}, linuxerr.ENOEXEC
}
// Strip NUL-terminator and everything beyond from
@@ -498,7 +497,7 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in
totalSize, ok := totalSize.RoundUp()
if !ok {
ctx.Infof("ELF PT_LOAD segments too big")
- return loadedELF{}, syserror.ENOEXEC
+ return loadedELF{}, linuxerr.ENOEXEC
}
var err error
@@ -592,7 +591,7 @@ func loadInitialELF(ctx context.Context, m *mm.MemoryManager, fs *cpuid.FeatureS
// Check Image Compatibility.
if arch.Host != info.arch {
ctx.Warningf("Found mismatch for platform %s with ELF type %s", arch.Host.String(), info.arch.String())
- return loadedELF{}, nil, syserror.ENOEXEC
+ return loadedELF{}, nil, linuxerr.ENOEXEC
}
// Create the arch.Context now so we can prepare the mmap layout before
@@ -681,7 +680,7 @@ func loadELF(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, error
if interp.interpreter != "" {
// No recursive interpreters!
ctx.Infof("Interpreter requires an interpreter")
- return loadedELF{}, nil, syserror.ENOEXEC
+ return loadedELF{}, nil, linuxerr.ENOEXEC
}
}
diff --git a/pkg/sentry/loader/interpreter.go b/pkg/sentry/loader/interpreter.go
index 3e302d92c..1ec0d7019 100644
--- a/pkg/sentry/loader/interpreter.go
+++ b/pkg/sentry/loader/interpreter.go
@@ -19,8 +19,8 @@ import (
"io"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -43,14 +43,14 @@ func parseInterpreterScript(ctx context.Context, filename string, f fsbridge.Fil
// Short read is OK.
if err != nil && err != io.ErrUnexpectedEOF {
if err == io.EOF {
- err = syserror.ENOEXEC
+ err = linuxerr.ENOEXEC
}
return "", []string{}, err
}
line = line[:n]
if !bytes.Equal(line[:2], []byte(interpreterScriptMagic)) {
- return "", []string{}, syserror.ENOEXEC
+ return "", []string{}, linuxerr.ENOEXEC
}
// Ignore #!.
line = line[2:]
@@ -82,7 +82,7 @@ func parseInterpreterScript(ctx context.Context, filename string, f fsbridge.Fil
if string(interp) == "" {
ctx.Infof("Interpreter script contains no interpreter: %v", line)
- return "", []string{}, syserror.ENOEXEC
+ return "", []string{}, linuxerr.ENOEXEC
}
// Build the new argument list:
diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go
index 86d0c54cd..6a356779c 100644
--- a/pkg/sentry/loader/loader.go
+++ b/pkg/sentry/loader/loader.go
@@ -35,7 +35,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -91,7 +90,7 @@ type LoadArgs struct {
func openPath(ctx context.Context, args LoadArgs) (fsbridge.File, error) {
if args.Filename == "" {
ctx.Infof("cannot open empty name")
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
// TODO(gvisor.dev/issue/160): Linux requires only execute permission,
@@ -172,7 +171,7 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context
// (e.g., #!a).
if err != nil && err != io.ErrUnexpectedEOF {
if err == io.EOF {
- err = syserror.ENOEXEC
+ err = linuxerr.ENOEXEC
}
return loadedELF{}, nil, nil, nil, err
}
@@ -190,7 +189,7 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context
case bytes.Equal(hdr[:2], []byte(interpreterScriptMagic)):
if args.CloseOnExec {
- return loadedELF{}, nil, nil, nil, syserror.ENOENT
+ return loadedELF{}, nil, nil, nil, linuxerr.ENOENT
}
args.Filename, args.Argv, err = parseInterpreterScript(ctx, args.Filename, args.File, args.Argv)
if err != nil {
@@ -202,7 +201,7 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context
default:
ctx.Infof("Unknown magic: %v", hdr)
- return loadedELF{}, nil, nil, nil, syserror.ENOEXEC
+ return loadedELF{}, nil, nil, nil, linuxerr.ENOEXEC
}
// Set to nil in case we loop on a Interpreter Script.
args.File = nil
diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go
index 054ef1723..3abd2ee7d 100644
--- a/pkg/sentry/loader/vdso.go
+++ b/pkg/sentry/loader/vdso.go
@@ -34,7 +34,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/uniqueid"
"gvisor.dev/gvisor/pkg/sentry/usage"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -102,14 +101,14 @@ func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, erro
first = &info.phdrs[i]
if phdr.Off != 0 {
log.Warningf("First PT_LOAD segment has non-zero file offset")
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
}
memoryOffset := phdr.Vaddr - first.Vaddr
if memoryOffset != phdr.Off {
log.Warningf("PT_LOAD segment memory offset %#x != file offset %#x", memoryOffset, phdr.Off)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
// memsz larger than filesz means that extra zeroed space should be
@@ -118,24 +117,24 @@ func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, erro
// zeroes.
if phdr.Memsz != phdr.Filesz {
log.Warningf("PT_LOAD segment memsz %#x != filesz %#x", phdr.Memsz, phdr.Filesz)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
start := hostarch.Addr(memoryOffset)
end, ok := start.AddLength(phdr.Memsz)
if !ok {
log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
if uint64(end) > size {
log.Warningf("PT_LOAD segment end %#x extends beyond end of file %#x", end, size)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
if prev != nil {
if start < prevEnd {
log.Warningf("PT_LOAD segments out of order")
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
// We mprotect entire pages, so each segment must be in
@@ -144,7 +143,7 @@ func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, erro
startPage := start.RoundDown()
if prevEndPage >= startPage {
log.Warningf("PT_LOAD segments share a page: %#x", prevEndPage)
- return elfInfo{}, syserror.ENOEXEC
+ return elfInfo{}, linuxerr.ENOEXEC
}
}
prev = &info.phdrs[i]
@@ -271,11 +270,11 @@ func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) {
func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (hostarch.Addr, error) {
if v.os != bin.os {
ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os)
- return 0, syserror.ENOEXEC
+ return 0, linuxerr.ENOEXEC
}
if v.arch != bin.arch {
ctx.Warningf("Binary ELF arch %v and VDSO ELF arch %v differ", bin.arch, v.arch)
- return 0, syserror.ENOEXEC
+ return 0, linuxerr.ENOEXEC
}
// Reserve address space for the VDSO and its parameter page, which is
@@ -348,35 +347,35 @@ func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF)
segAddr, ok := vdsoAddr.AddLength(memoryOffset)
if !ok {
ctx.Warningf("PT_LOAD segment address overflows: %#x + %#x", segAddr, memoryOffset)
- return 0, syserror.ENOEXEC
+ return 0, linuxerr.ENOEXEC
}
segPage := segAddr.RoundDown()
segSize := hostarch.Addr(phdr.Memsz)
segSize, ok = segSize.AddLength(segAddr.PageOffset())
if !ok {
ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset())
- return 0, syserror.ENOEXEC
+ return 0, linuxerr.ENOEXEC
}
segSize, ok = segSize.RoundUp()
if !ok {
ctx.Warningf("PT_LOAD segment size overflows: %#x", phdr.Memsz+segAddr.PageOffset())
- return 0, syserror.ENOEXEC
+ return 0, linuxerr.ENOEXEC
}
segEnd, ok := segPage.AddLength(uint64(segSize))
if !ok {
ctx.Warningf("PT_LOAD segment range overflows: %#x + %#x", segAddr, segSize)
- return 0, syserror.ENOEXEC
+ return 0, linuxerr.ENOEXEC
}
if segEnd > vdsoEnd {
ctx.Warningf("PT_LOAD segment ends beyond VDSO: %#x > %#x", segEnd, vdsoEnd)
- return 0, syserror.ENOEXEC
+ return 0, linuxerr.ENOEXEC
}
perms := progFlagsAsPerms(phdr.Flags)
if perms != hostarch.Read {
if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil {
ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err)
- return 0, syserror.ENOEXEC
+ return 0, linuxerr.ENOEXEC
}
}
}
diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD
index c30e88725..a89bfa680 100644
--- a/pkg/sentry/memmap/BUILD
+++ b/pkg/sentry/memmap/BUILD
@@ -54,7 +54,6 @@ go_library(
"//pkg/hostarch",
"//pkg/log",
"//pkg/safemem",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD
index 69aff21b6..b7d782b7f 100644
--- a/pkg/sentry/mm/BUILD
+++ b/pkg/sentry/mm/BUILD
@@ -144,7 +144,6 @@ go_library(
"//pkg/sentry/platform",
"//pkg/sentry/usage",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/tcpip/buffer",
"//pkg/usermem",
],
diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go
index b7f765cd7..d71d64580 100644
--- a/pkg/sentry/mm/aio_context.go
+++ b/pkg/sentry/mm/aio_context.go
@@ -77,15 +77,6 @@ func (mm *MemoryManager) destroyAIOContextLocked(ctx context.Context, id uint64)
return nil
}
- // Only unmaps after it assured that the address is a valid aio context to
- // prevent random memory from been unmapped.
- //
- // Note: It's possible to unmap this address and map something else into
- // the same address. Then it would be unmapping memory that it doesn't own.
- // This is, however, the way Linux implements AIO. Keeps the same [weird]
- // semantics in case anyone relies on it.
- mm.MUnmap(ctx, hostarch.Addr(id), aioRingBufferSize)
-
delete(mm.aioManager.contexts, id)
aioCtx.destroy()
return aioCtx
@@ -411,6 +402,15 @@ func (mm *MemoryManager) DestroyAIOContext(ctx context.Context, id uint64) *AIOC
return nil
}
+ // Only unmaps after it assured that the address is a valid aio context to
+ // prevent random memory from been unmapped.
+ //
+ // Note: It's possible to unmap this address and map something else into
+ // the same address. Then it would be unmapping memory that it doesn't own.
+ // This is, however, the way Linux implements AIO. Keeps the same [weird]
+ // semantics in case anyone relies on it.
+ mm.MUnmap(ctx, hostarch.Addr(id), aioRingBufferSize)
+
mm.aioManager.mu.Lock()
defer mm.aioManager.mu.Unlock()
return mm.destroyAIOContextLocked(ctx, id)
diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go
index 256eb4afb..9e00c2cec 100644
--- a/pkg/sentry/mm/syscalls.go
+++ b/pkg/sentry/mm/syscalls.go
@@ -27,7 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/syserror"
)
// HandleUserFault handles an application page fault. sp is the faulting
@@ -79,7 +78,7 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostar
}
length, ok := hostarch.Addr(opts.Length).RoundUp()
if !ok {
- return 0, syserror.ENOMEM
+ return 0, linuxerr.ENOMEM
}
opts.Length = uint64(length)
@@ -90,7 +89,7 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostar
}
// Offset + length must not overflow.
if end := opts.Offset + opts.Length; end < opts.Offset {
- return 0, syserror.ENOMEM
+ return 0, linuxerr.ENOMEM
}
} else {
opts.Offset = 0
@@ -253,7 +252,7 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (hostarch.AddrRange, erro
ctx.Warningf("Capping stack size from RLIMIT_STACK of %v down to %v.", sz, maxStackSize)
sz = maxStackSize
} else if sz == 0 {
- return hostarch.AddrRange{}, syserror.ENOMEM
+ return hostarch.AddrRange{}, linuxerr.ENOMEM
}
szaddr := hostarch.Addr(sz)
ctx.Debugf("Allocating stack with size of %v bytes", sz)
@@ -262,7 +261,7 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (hostarch.AddrRange, erro
// randomization can't be disabled.
stackEnd := mm.layout.MaxAddr - hostarch.Addr(mrand.Int63n(int64(mm.layout.MaxStackRand))).RoundDown()
if stackEnd < szaddr {
- return hostarch.AddrRange{}, syserror.ENOMEM
+ return hostarch.AddrRange{}, linuxerr.ENOMEM
}
stackStart := stackEnd - szaddr
mm.mappingMu.Lock()
@@ -500,7 +499,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS
// Check against RLIMIT_AS.
newUsageAS := mm.usageAS - uint64(oldAR.Length()) + uint64(newAR.Length())
if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS {
- return 0, syserror.ENOMEM
+ return 0, linuxerr.ENOMEM
}
if vma := vseg.ValuePtr(); vma.mappable != nil {
@@ -599,11 +598,11 @@ func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms h
}
rlength, ok := hostarch.Addr(length).RoundUp()
if !ok {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
ar, ok := addr.ToRange(uint64(rlength))
if !ok {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
effectivePerms := realPerms.Effective()
@@ -616,19 +615,19 @@ func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms h
// the non-growsDown case.
vseg := mm.vmas.LowerBoundSegment(ar.Start)
if !vseg.Ok() {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
if growsDown {
if !vseg.ValuePtr().growsDown {
return linuxerr.EINVAL
}
if ar.End <= vseg.Start() {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
ar.Start = vseg.Start()
} else {
if ar.Start < vseg.Start() {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
}
@@ -688,7 +687,7 @@ func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms h
}
vseg, _ = vseg.NextNonEmpty()
if !vseg.Ok() {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
}
}
@@ -724,7 +723,7 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr hostarch.Addr) (hostarch.
if uint64(addr-mm.brk.Start) > limits.FromContext(ctx).Get(limits.Data).Cur {
addr = mm.brk.End
mm.mappingMu.Unlock()
- return addr, syserror.ENOMEM
+ return addr, linuxerr.ENOMEM
}
oldbrkpg, _ := mm.brk.End.RoundUp()
@@ -798,7 +797,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u
}
if newLockedAS := mm.lockedAS + uint64(ar.Length()) - mm.mlockedBytesRangeLocked(ar); newLockedAS > mlockLimit {
mm.mappingMu.Unlock()
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
}
}
@@ -835,7 +834,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u
mm.vmas.MergeAdjacent(ar)
if unmapped {
mm.mappingMu.Unlock()
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
if mode == memmap.MLockEager {
@@ -850,7 +849,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u
// case, which is converted to ENOMEM by mlock.
mm.activeMu.Unlock()
mm.mappingMu.RUnlock()
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
_, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), hostarch.NoAccess)
if err != nil {
@@ -858,7 +857,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u
mm.mappingMu.RUnlock()
// Linux: mm/mlock.c:__mlock_posix_error_return()
if linuxerr.Equals(linuxerr.EFAULT, err) {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
if linuxerr.Equals(linuxerr.ENOMEM, err) {
return linuxerr.EAGAIN
@@ -917,7 +916,7 @@ func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error
}
if uint64(mm.vmas.Span()) > mlockLimit {
mm.mappingMu.Unlock()
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
}
}
@@ -1040,7 +1039,7 @@ func (mm *MemoryManager) SetDontFork(addr hostarch.Addr, length uint64, dontfork
}
if mm.vmas.SpanRange(ar) != ar.Length() {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
return nil
}
@@ -1099,7 +1098,7 @@ func (mm *MemoryManager) Decommit(addr hostarch.Addr, length uint64) error {
// to the rest (but returns ENOMEM from the system call, as it should)." -
// madvise(2)
if mm.vmas.SpanRange(ar) != ar.Length() {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
return nil
}
@@ -1123,11 +1122,11 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u
}
la, ok := hostarch.Addr(length).RoundUp()
if !ok {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
ar, ok := addr.ToRange(uint64(la))
if !ok {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
mm.mappingMu.RLock()
@@ -1135,7 +1134,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u
vseg := mm.vmas.LowerBoundSegment(ar.Start)
if !vseg.Ok() {
mm.mappingMu.RUnlock()
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
var unmapped bool
lastEnd := ar.Start
@@ -1184,7 +1183,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u
}
if unmapped {
- return syserror.ENOMEM
+ return linuxerr.ENOMEM
}
return nil
}
diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go
index 5f8ab7ca3..e34b7a2f7 100644
--- a/pkg/sentry/mm/vma.go
+++ b/pkg/sentry/mm/vma.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Preconditions:
@@ -59,7 +58,7 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
newUsageAS -= uint64(mm.vmas.SpanRange(ar))
}
if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS {
- return vmaIterator{}, hostarch.AddrRange{}, syserror.ENOMEM
+ return vmaIterator{}, hostarch.AddrRange{}, linuxerr.ENOMEM
}
if opts.MLockMode != memmap.MLockNone {
@@ -178,7 +177,7 @@ func (mm *MemoryManager) findAvailableLocked(length uint64, opts findAvailableOp
// Fixed mappings accept only the requested address.
if opts.Fixed {
- return 0, syserror.ENOMEM
+ return 0, linuxerr.ENOMEM
}
// Prefer hugepage alignment if a hugepage or more is requested.
@@ -216,7 +215,7 @@ func (mm *MemoryManager) findLowestAvailableLocked(length, alignment uint64, bou
return gr.Start, nil
}
}
- return 0, syserror.ENOMEM
+ return 0, linuxerr.ENOMEM
}
// Preconditions: mm.mappingMu must be locked.
@@ -236,7 +235,7 @@ func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bo
return start, nil
}
}
- return 0, syserror.ENOMEM
+ return 0, linuxerr.ENOMEM
}
// Preconditions: mm.mappingMu must be locked.
diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD
index d351869ef..496a9fd97 100644
--- a/pkg/sentry/pgalloc/BUILD
+++ b/pkg/sentry/pgalloc/BUILD
@@ -97,7 +97,6 @@ go_library(
"//pkg/state",
"//pkg/state/wire",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"@org_golang_x_sys//unix:go_default_library",
],
diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go
index 0c8542485..68e17d343 100644
--- a/pkg/sentry/pgalloc/pgalloc.go
+++ b/pkg/sentry/pgalloc/pgalloc.go
@@ -39,7 +39,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// MemoryFile is a memmap.File whose pages may be allocated to arbitrary
@@ -404,7 +403,7 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (memmap.File
// Find a range in the underlying file.
fr, ok := findAvailableRange(&f.usage, f.fileSize, length, alignment)
if !ok {
- return memmap.FileRange{}, syserror.ENOMEM
+ return memmap.FileRange{}, linuxerr.ENOMEM
}
// Expand the file if needed.
diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go
index 8fd8287b3..7a3c97c5a 100644
--- a/pkg/sentry/platform/kvm/bluepill_fault.go
+++ b/pkg/sentry/platform/kvm/bluepill_fault.go
@@ -55,11 +55,7 @@ func calculateBluepillFault(physical uintptr, phyRegions []physicalRegion) (virt
}
// Adjust the block to match our size.
- physicalStart = alignedPhysical & faultBlockMask
- if physicalStart < pr.physical {
- // Bound the starting point to the start of the region.
- physicalStart = pr.physical
- }
+ physicalStart = pr.physical + (alignedPhysical-pr.physical)&faultBlockMask
virtualStart = pr.virtual + (physicalStart - pr.physical)
physicalEnd := physicalStart + faultBlockSize
if physicalEnd > end {
diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD
index 3950caa0f..4ea89f9d0 100644
--- a/pkg/sentry/socket/hostinet/BUILD
+++ b/pkg/sentry/socket/hostinet/BUILD
@@ -38,7 +38,6 @@ go_library(
"//pkg/sentry/socket/control",
"//pkg/sentry/vfs",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/tcpip",
"//pkg/tcpip/stack",
"//pkg/usermem",
diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go
index 38cb2c99c..1c1e501ba 100644
--- a/pkg/sentry/socket/hostinet/socket.go
+++ b/pkg/sentry/socket/hostinet/socket.go
@@ -35,7 +35,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/socket/control"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -288,7 +287,7 @@ func (s *socketOpsCommon) Accept(t *kernel.Task, peerRequested bool, flags int,
fd, syscallErr := accept4(s.fd, peerAddrPtr, peerAddrlenPtr, unix.SOCK_NONBLOCK|unix.SOCK_CLOEXEC)
if blocking {
var ch chan struct{}
- for syscallErr == syserror.ErrWouldBlock {
+ for syscallErr == linuxerr.ErrWouldBlock {
if ch != nil {
if syscallErr = t.Block(ch); syscallErr != nil {
break
@@ -535,7 +534,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
n, err := copyToDst()
// recv*(MSG_ERRQUEUE) never blocks, even without MSG_DONTWAIT.
if flags&(unix.MSG_DONTWAIT|unix.MSG_ERRQUEUE) == 0 {
- for err == syserror.ErrWouldBlock {
+ for err == linuxerr.ErrWouldBlock {
// We only expect blocking to come from the actual syscall, in which
// case it can't have returned any data.
if n != 0 {
@@ -707,7 +706,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b
var ch chan struct{}
n, err := src.CopyInTo(t, sendmsgFromBlocks)
if flags&unix.MSG_DONTWAIT == 0 {
- for err == syserror.ErrWouldBlock {
+ for err == linuxerr.ErrWouldBlock {
// We only expect blocking to come from the actual syscall, in which
// case it can't have returned any data.
if n != 0 {
@@ -716,7 +715,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b
if ch != nil {
if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
break
}
@@ -735,7 +734,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b
func translateIOSyscallError(err error) error {
if err == unix.EAGAIN || err == unix.EWOULDBLOCK {
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
return err
}
diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD
index ed85404da..9710a15ee 100644
--- a/pkg/sentry/socket/netlink/BUILD
+++ b/pkg/sentry/socket/netlink/BUILD
@@ -36,7 +36,6 @@ go_library(
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/tcpip",
"//pkg/usermem",
"//pkg/waiter",
diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go
index 5c3ae26f8..ed5fa9c38 100644
--- a/pkg/sentry/socket/netlink/socket.go
+++ b/pkg/sentry/socket/netlink/socket.go
@@ -39,7 +39,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
@@ -530,7 +529,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
}
}
- if n, err := doRead(); err != syserror.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 {
+ if n, err := doRead(); err != linuxerr.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 {
var mflags int
if n < int64(r.MsgSize) {
mflags |= linux.MSG_TRUNC
@@ -548,7 +547,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
defer s.EventUnregister(&e)
for {
- if n, err := doRead(); err != syserror.ErrWouldBlock {
+ if n, err := doRead(); err != linuxerr.ErrWouldBlock {
var mflags int
if n < int64(r.MsgSize) {
mflags |= linux.MSG_TRUNC
diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD
index e828982eb..e347442e7 100644
--- a/pkg/sentry/socket/netstack/BUILD
+++ b/pkg/sentry/socket/netstack/BUILD
@@ -42,7 +42,6 @@ go_library(
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/tcpip",
"//pkg/tcpip/header",
"//pkg/tcpip/link/tun",
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 9b844b0c0..2f9462cee 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -56,7 +56,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/unimpl"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
@@ -459,7 +458,7 @@ func (s *SocketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS
}
n, _, _, _, _, err := s.nonBlockingRead(ctx, dst, false, false, false)
if err == syserr.ErrWouldBlock {
- return int64(n), syserror.ErrWouldBlock
+ return int64(n), linuxerr.ErrWouldBlock
}
if err != nil {
return 0, err.ToError()
@@ -492,14 +491,14 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO
r := src.Reader(ctx)
n, err := s.Endpoint.Write(r, tcpip.WriteOptions{})
if _, ok := err.(*tcpip.ErrWouldBlock); ok {
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
if err != nil {
return 0, syserr.TranslateNetstackError(err).ToError()
}
if n < src.NumBytes() {
- return n, syserror.ErrWouldBlock
+ return n, linuxerr.ErrWouldBlock
}
return n, nil
@@ -2951,7 +2950,7 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy
s.readMu.Lock()
defer s.readMu.Unlock()
if !s.timestampValid {
- return 0, syserror.ENOENT
+ return 0, linuxerr.ENOENT
}
tv := linux.NsecToTimeval(s.timestampNS)
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index edc160b1b..3cdf29b80 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -27,7 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
@@ -113,7 +112,7 @@ func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.
}
n, _, _, _, _, err := s.nonBlockingRead(ctx, dst, false, false, false)
if err == syserr.ErrWouldBlock {
- return int64(n), syserror.ErrWouldBlock
+ return int64(n), linuxerr.ErrWouldBlock
}
if err != nil {
return 0, err.ToError()
@@ -132,14 +131,14 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs
r := src.Reader(ctx)
n, err := s.Endpoint.Write(r, tcpip.WriteOptions{})
if _, ok := err.(*tcpip.ErrWouldBlock); ok {
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
if err != nil {
return 0, syserr.TranslateNetstackError(err).ToError()
}
if n < src.NumBytes() {
- return n, syserror.ErrWouldBlock
+ return n, linuxerr.ErrWouldBlock
}
return n, nil
diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD
index 5c3cdef6a..7b546c04d 100644
--- a/pkg/sentry/socket/unix/BUILD
+++ b/pkg/sentry/socket/unix/BUILD
@@ -62,7 +62,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/tcpip",
"//pkg/usermem",
"//pkg/waiter",
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index 33f9aeb06..9a398c3b5 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -129,9 +129,9 @@ func newConnectioned(ctx context.Context, stype linux.SockType, uid UniqueIDProv
stype: stype,
}
+ ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits, getReceiveBufferLimits)
ep.ops.SetSendBufferSize(defaultBufferSize, false /* notify */)
ep.ops.SetReceiveBufferSize(defaultBufferSize, false /* notify */)
- ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits, getReceiveBufferLimits)
return ep
}
@@ -517,3 +517,6 @@ func (e *connectionedEndpoint) OnSetSendBufferSize(v int64) (newSz int64) {
}
return v
}
+
+// WakeupWriters implements tcpip.SocketOptionsHandler.WakeupWriters.
+func (e *connectionedEndpoint) WakeupWriters() {}
diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go
index 61338728a..61311718e 100644
--- a/pkg/sentry/socket/unix/transport/connectionless.go
+++ b/pkg/sentry/socket/unix/transport/connectionless.go
@@ -44,9 +44,9 @@ func NewConnectionless(ctx context.Context) Endpoint {
q := queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: defaultBufferSize}
q.InitRefs()
ep.receiver = &queueReceiver{readQueue: &q}
+ ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits, getReceiveBufferLimits)
ep.ops.SetSendBufferSize(defaultBufferSize, false /* notify */)
ep.ops.SetReceiveBufferSize(defaultBufferSize, false /* notify */)
- ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits, getReceiveBufferLimits)
return ep
}
@@ -227,3 +227,6 @@ func (e *connectionlessEndpoint) OnSetSendBufferSize(v int64) (newSz int64) {
}
return v
}
+
+// WakeupWriters implements tcpip.SocketOptionsHandler.WakeupWriters.
+func (e *connectionlessEndpoint) WakeupWriters() {}
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 8ccdadae9..e9e482017 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -38,7 +38,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
@@ -494,7 +493,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b
}
n, err := src.CopyInTo(t, &w)
- if err != syserror.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 {
+ if err != linuxerr.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 {
return int(n), syserr.FromError(err)
}
@@ -514,13 +513,13 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b
n, err = src.CopyInTo(t, &w)
total += n
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
break
}
if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
break
}
@@ -648,7 +647,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
}
var total int64
- if n, err := doRead(); err != syserror.ErrWouldBlock || dontWait {
+ if n, err := doRead(); err != linuxerr.ErrWouldBlock || dontWait {
var from linux.SockAddr
var fromLen uint32
if r.From != nil && len([]byte(r.From.Addr)) != 0 {
@@ -683,7 +682,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
defer s.EventUnregister(&e)
for {
- if n, err := doRead(); err != syserror.ErrWouldBlock {
+ if n, err := doRead(); err != linuxerr.ErrWouldBlock {
var from linux.SockAddr
var fromLen uint32
if r.From != nil {
diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD
index f2c55588f..7a7c80ac6 100644
--- a/pkg/sentry/syscalls/BUILD
+++ b/pkg/sentry/syscalls/BUILD
@@ -16,7 +16,6 @@ go_library(
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/epoll",
"//pkg/sentry/kernel/time",
- "//pkg/syserror",
"//pkg/waiter",
],
)
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index b5a371d9a..394396cde 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -104,7 +104,6 @@ go_library(
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go
index 76389fbe3..f4d549a3f 100644
--- a/pkg/sentry/syscalls/linux/error.go
+++ b/pkg/sentry/syscalls/linux/error.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
var (
@@ -90,9 +89,9 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, errOrig, intr er
}
// Translate error, if possible, to consolidate errors from other packages
- // into a smaller set of errors from syserror package.
+ // into a smaller set of errors from linuxerr package.
translatedErr := errOrig
- if errno, ok := syserror.TranslateError(errOrig); ok {
+ if errno, ok := linuxerr.TranslateError(errOrig); ok {
translatedErr = errno
}
switch {
@@ -167,10 +166,7 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, errOrig, intr er
// files. Since we have a partial read/write, we consume
// ErrWouldBlock, returning the partial result.
return true, nil
- }
-
- switch errOrig.(type) {
- case syserror.SyscallRestartErrno:
+ case linuxerr.IsRestartError(translatedErr):
// Identical to the EINTR case.
return true, nil
}
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index 1ead3c7e8..2046a48b9 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/syscalls"
- "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -124,7 +123,7 @@ var AMD64 = &kernel.SyscallTable{
68: syscalls.Supported("msgget", Msgget),
69: syscalls.Supported("msgsnd", Msgsnd),
70: syscalls.Supported("msgrcv", Msgrcv),
- 71: syscalls.PartiallySupported("msgctl", Msgctl, "Only supports IPC_RMID option.", []string{"gvisor.dev/issue/135"}),
+ 71: syscalls.Supported("msgctl", Msgctl),
72: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil),
73: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil),
74: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil),
@@ -175,8 +174,8 @@ var AMD64 = &kernel.SyscallTable{
119: syscalls.Supported("setresgid", Setresgid),
120: syscalls.Supported("getresgid", Getresgid),
121: syscalls.Supported("getpgid", Getpgid),
- 122: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
- 123: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 122: syscalls.ErrorWithEvent("setfsuid", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 123: syscalls.ErrorWithEvent("setfsgid", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
124: syscalls.Supported("getsid", Getsid),
125: syscalls.Supported("capget", Capget),
126: syscalls.Supported("capset", Capset),
@@ -187,12 +186,12 @@ var AMD64 = &kernel.SyscallTable{
131: syscalls.Supported("sigaltstack", Sigaltstack),
132: syscalls.Supported("utime", Utime),
133: syscalls.PartiallySupported("mknod", Mknod, "Device creation is not generally supported. Only regular file and FIFO creation are supported.", nil),
- 134: syscalls.Error("uselib", syserror.ENOSYS, "Obsolete", nil),
+ 134: syscalls.Error("uselib", linuxerr.ENOSYS, "Obsolete", nil),
135: syscalls.ErrorWithEvent("personality", linuxerr.EINVAL, "Unable to change personality.", nil),
- 136: syscalls.ErrorWithEvent("ustat", syserror.ENOSYS, "Needs filesystem support.", nil),
+ 136: syscalls.ErrorWithEvent("ustat", linuxerr.ENOSYS, "Needs filesystem support.", nil),
137: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil),
138: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil),
- 139: syscalls.ErrorWithEvent("sysfs", syserror.ENOSYS, "", []string{"gvisor.dev/issue/165"}),
+ 139: syscalls.ErrorWithEvent("sysfs", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/165"}),
140: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil),
141: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil),
142: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil),
@@ -230,15 +229,15 @@ var AMD64 = &kernel.SyscallTable{
174: syscalls.CapError("create_module", linux.CAP_SYS_MODULE, "", nil),
175: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil),
176: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil),
- 177: syscalls.Error("get_kernel_syms", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil),
- 178: syscalls.Error("query_module", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil),
+ 177: syscalls.Error("get_kernel_syms", linuxerr.ENOSYS, "Not supported in Linux > 2.6.", nil),
+ 178: syscalls.Error("query_module", linuxerr.ENOSYS, "Not supported in Linux > 2.6.", nil),
179: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations
- 180: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil),
- 181: syscalls.Error("getpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil),
- 182: syscalls.Error("putpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil),
- 183: syscalls.Error("afs_syscall", syserror.ENOSYS, "Not implemented in Linux.", nil),
- 184: syscalls.Error("tuxcall", syserror.ENOSYS, "Not implemented in Linux.", nil),
- 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil),
+ 180: syscalls.Error("nfsservctl", linuxerr.ENOSYS, "Removed after Linux 3.1.", nil),
+ 181: syscalls.Error("getpmsg", linuxerr.ENOSYS, "Not implemented in Linux.", nil),
+ 182: syscalls.Error("putpmsg", linuxerr.ENOSYS, "Not implemented in Linux.", nil),
+ 183: syscalls.Error("afs_syscall", linuxerr.ENOSYS, "Not implemented in Linux.", nil),
+ 184: syscalls.Error("tuxcall", linuxerr.ENOSYS, "Not implemented in Linux.", nil),
+ 185: syscalls.Error("security", linuxerr.ENOSYS, "Not implemented in Linux.", nil),
186: syscalls.Supported("gettid", Gettid),
187: syscalls.Supported("readahead", Readahead),
188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil),
@@ -258,18 +257,18 @@ var AMD64 = &kernel.SyscallTable{
202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil),
203: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil),
204: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil),
- 205: syscalls.Error("set_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
+ 205: syscalls.Error("set_thread_area", linuxerr.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
206: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
207: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
208: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
209: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
210: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 211: syscalls.Error("get_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
+ 211: syscalls.Error("get_thread_area", linuxerr.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
212: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil),
213: syscalls.Supported("epoll_create", EpollCreate),
- 214: syscalls.ErrorWithEvent("epoll_ctl_old", syserror.ENOSYS, "Deprecated.", nil),
- 215: syscalls.ErrorWithEvent("epoll_wait_old", syserror.ENOSYS, "Deprecated.", nil),
- 216: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil),
+ 214: syscalls.ErrorWithEvent("epoll_ctl_old", linuxerr.ENOSYS, "Deprecated.", nil),
+ 215: syscalls.ErrorWithEvent("epoll_wait_old", linuxerr.ENOSYS, "Deprecated.", nil),
+ 216: syscalls.ErrorWithEvent("remap_file_pages", linuxerr.ENOSYS, "Deprecated since Linux 3.16.", nil),
217: syscalls.Supported("getdents64", Getdents64),
218: syscalls.Supported("set_tid_address", SetTidAddress),
219: syscalls.Supported("restart_syscall", RestartSyscall),
@@ -289,16 +288,16 @@ var AMD64 = &kernel.SyscallTable{
233: syscalls.Supported("epoll_ctl", EpollCtl),
234: syscalls.Supported("tgkill", Tgkill),
235: syscalls.Supported("utimes", Utimes),
- 236: syscalls.Error("vserver", syserror.ENOSYS, "Not implemented by Linux", nil),
+ 236: syscalls.Error("vserver", linuxerr.ENOSYS, "Not implemented by Linux", nil),
237: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}),
238: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil),
239: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil),
- 240: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 241: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 242: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 243: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 244: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 245: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 240: syscalls.ErrorWithEvent("mq_open", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 241: syscalls.ErrorWithEvent("mq_unlink", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 242: syscalls.ErrorWithEvent("mq_timedsend", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 243: syscalls.ErrorWithEvent("mq_timedreceive", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 244: syscalls.ErrorWithEvent("mq_notify", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 245: syscalls.ErrorWithEvent("mq_getsetattr", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
246: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil),
247: syscalls.Supported("waitid", Waitid),
248: syscalls.Error("add_key", linuxerr.EACCES, "Not available to user.", nil),
@@ -331,7 +330,7 @@ var AMD64 = &kernel.SyscallTable{
275: syscalls.Supported("splice", Splice),
276: syscalls.Supported("tee", Tee),
277: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil),
- 278: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 278: syscalls.ErrorWithEvent("vmsplice", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
279: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly)
280: syscalls.Supported("utimensat", Utimensat),
281: syscalls.Supported("epoll_pwait", EpollPwait),
@@ -353,8 +352,8 @@ var AMD64 = &kernel.SyscallTable{
297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo),
298: syscalls.ErrorWithEvent("perf_event_open", linuxerr.ENODEV, "No support for perf counters", nil),
299: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil),
- 300: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
- 301: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 300: syscalls.ErrorWithEvent("fanotify_init", linuxerr.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 301: syscalls.ErrorWithEvent("fanotify_mark", linuxerr.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
302: syscalls.Supported("prlimit64", Prlimit64),
303: syscalls.Error("name_to_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
304: syscalls.Error("open_by_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
@@ -363,48 +362,48 @@ var AMD64 = &kernel.SyscallTable{
307: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil),
308: syscalls.ErrorWithEvent("setns", linuxerr.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995)
309: syscalls.Supported("getcpu", Getcpu),
- 310: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
- 311: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 310: syscalls.ErrorWithEvent("process_vm_readv", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 311: syscalls.ErrorWithEvent("process_vm_writev", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
312: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil),
313: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil),
- 314: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
- 315: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
- 316: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772)
+ 314: syscalls.ErrorWithEvent("sched_setattr", linuxerr.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 315: syscalls.ErrorWithEvent("sched_getattr", linuxerr.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 316: syscalls.ErrorWithEvent("renameat2", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772)
317: syscalls.Supported("seccomp", Seccomp),
318: syscalls.Supported("getrandom", GetRandom),
319: syscalls.Supported("memfd_create", MemfdCreate),
320: syscalls.CapError("kexec_file_load", linux.CAP_SYS_BOOT, "", nil),
321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil),
322: syscalls.Supported("execveat", Execveat),
- 323: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
+ 323: syscalls.ErrorWithEvent("userfaultfd", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
324: syscalls.PartiallySupported("membarrier", Membarrier, "Not supported on all platforms.", nil),
325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
// Syscalls implemented after 325 are "backports" from versions
// of Linux after 4.4.
- 326: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil),
+ 326: syscalls.ErrorWithEvent("copy_file_range", linuxerr.ENOSYS, "", nil),
327: syscalls.Supported("preadv2", Preadv2),
328: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil),
- 329: syscalls.ErrorWithEvent("pkey_mprotect", syserror.ENOSYS, "", nil),
- 330: syscalls.ErrorWithEvent("pkey_alloc", syserror.ENOSYS, "", nil),
- 331: syscalls.ErrorWithEvent("pkey_free", syserror.ENOSYS, "", nil),
+ 329: syscalls.ErrorWithEvent("pkey_mprotect", linuxerr.ENOSYS, "", nil),
+ 330: syscalls.ErrorWithEvent("pkey_alloc", linuxerr.ENOSYS, "", nil),
+ 331: syscalls.ErrorWithEvent("pkey_free", linuxerr.ENOSYS, "", nil),
332: syscalls.Supported("statx", Statx),
- 333: syscalls.ErrorWithEvent("io_pgetevents", syserror.ENOSYS, "", nil),
+ 333: syscalls.ErrorWithEvent("io_pgetevents", linuxerr.ENOSYS, "", nil),
334: syscalls.PartiallySupported("rseq", RSeq, "Not supported on all platforms.", nil),
// Linux skips ahead to syscall 424 to sync numbers between arches.
- 424: syscalls.ErrorWithEvent("pidfd_send_signal", syserror.ENOSYS, "", nil),
- 425: syscalls.ErrorWithEvent("io_uring_setup", syserror.ENOSYS, "", nil),
- 426: syscalls.ErrorWithEvent("io_uring_enter", syserror.ENOSYS, "", nil),
- 427: syscalls.ErrorWithEvent("io_uring_register", syserror.ENOSYS, "", nil),
- 428: syscalls.ErrorWithEvent("open_tree", syserror.ENOSYS, "", nil),
- 429: syscalls.ErrorWithEvent("move_mount", syserror.ENOSYS, "", nil),
- 430: syscalls.ErrorWithEvent("fsopen", syserror.ENOSYS, "", nil),
- 431: syscalls.ErrorWithEvent("fsconfig", syserror.ENOSYS, "", nil),
- 432: syscalls.ErrorWithEvent("fsmount", syserror.ENOSYS, "", nil),
- 433: syscalls.ErrorWithEvent("fspick", syserror.ENOSYS, "", nil),
- 434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil),
- 435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil),
+ 424: syscalls.ErrorWithEvent("pidfd_send_signal", linuxerr.ENOSYS, "", nil),
+ 425: syscalls.ErrorWithEvent("io_uring_setup", linuxerr.ENOSYS, "", nil),
+ 426: syscalls.ErrorWithEvent("io_uring_enter", linuxerr.ENOSYS, "", nil),
+ 427: syscalls.ErrorWithEvent("io_uring_register", linuxerr.ENOSYS, "", nil),
+ 428: syscalls.ErrorWithEvent("open_tree", linuxerr.ENOSYS, "", nil),
+ 429: syscalls.ErrorWithEvent("move_mount", linuxerr.ENOSYS, "", nil),
+ 430: syscalls.ErrorWithEvent("fsopen", linuxerr.ENOSYS, "", nil),
+ 431: syscalls.ErrorWithEvent("fsconfig", linuxerr.ENOSYS, "", nil),
+ 432: syscalls.ErrorWithEvent("fsmount", linuxerr.ENOSYS, "", nil),
+ 433: syscalls.ErrorWithEvent("fspick", linuxerr.ENOSYS, "", nil),
+ 434: syscalls.ErrorWithEvent("pidfd_open", linuxerr.ENOSYS, "", nil),
+ 435: syscalls.ErrorWithEvent("clone3", linuxerr.ENOSYS, "", nil),
441: syscalls.Supported("epoll_pwait2", EpollPwait2),
},
Emulate: map[hostarch.Addr]uintptr{
@@ -414,7 +413,7 @@ var AMD64 = &kernel.SyscallTable{
},
Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) {
t.Kernel().EmitUnimplementedEvent(t)
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
},
}
@@ -472,7 +471,7 @@ var ARM64 = &kernel.SyscallTable{
39: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil),
40: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil),
41: syscalls.Error("pivot_root", linuxerr.EPERM, "", nil),
- 42: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil),
+ 42: syscalls.Error("nfsservctl", linuxerr.ENOSYS, "Removed after Linux 3.1.", nil),
43: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil),
44: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil),
45: syscalls.Supported("truncate", Truncate),
@@ -505,7 +504,7 @@ var ARM64 = &kernel.SyscallTable{
72: syscalls.Supported("pselect", Pselect),
73: syscalls.Supported("ppoll", Ppoll),
74: syscalls.PartiallySupported("signalfd4", Signalfd4, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}),
- 75: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 75: syscalls.ErrorWithEvent("vmsplice", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
76: syscalls.Supported("splice", Splice),
77: syscalls.Supported("tee", Tee),
78: syscalls.Supported("readlinkat", Readlinkat),
@@ -581,8 +580,8 @@ var ARM64 = &kernel.SyscallTable{
148: syscalls.Supported("getresuid", Getresuid),
149: syscalls.Supported("setresgid", Setresgid),
150: syscalls.Supported("getresgid", Getresgid),
- 151: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
- 152: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 151: syscalls.ErrorWithEvent("setfsuid", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 152: syscalls.ErrorWithEvent("setfsgid", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
153: syscalls.Supported("times", Times),
154: syscalls.Supported("setpgid", Setpgid),
155: syscalls.Supported("getpgid", Getpgid),
@@ -610,14 +609,14 @@ var ARM64 = &kernel.SyscallTable{
177: syscalls.Supported("getegid", Getegid),
178: syscalls.Supported("gettid", Gettid),
179: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil),
- 180: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 181: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 182: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 183: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 184: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 185: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 180: syscalls.ErrorWithEvent("mq_open", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 181: syscalls.ErrorWithEvent("mq_unlink", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 182: syscalls.ErrorWithEvent("mq_timedsend", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 183: syscalls.ErrorWithEvent("mq_timedreceive", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 184: syscalls.ErrorWithEvent("mq_notify", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 185: syscalls.ErrorWithEvent("mq_getsetattr", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
186: syscalls.Supported("msgget", Msgget),
- 187: syscalls.PartiallySupported("msgctl", Msgctl, "Only supports IPC_RMID option.", []string{"gvisor.dev/issue/135"}),
+ 187: syscalls.Supported("msgctl", Msgctl),
188: syscalls.Supported("msgrcv", Msgrcv),
189: syscalls.Supported("msgsnd", Msgsnd),
190: syscalls.Supported("semget", Semget),
@@ -664,7 +663,7 @@ var ARM64 = &kernel.SyscallTable{
231: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
232: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil),
233: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil),
- 234: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil),
+ 234: syscalls.ErrorWithEvent("remap_file_pages", linuxerr.ENOSYS, "Deprecated since Linux 3.16.", nil),
235: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}),
236: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil),
237: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil),
@@ -676,60 +675,60 @@ var ARM64 = &kernel.SyscallTable{
243: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil),
260: syscalls.Supported("wait4", Wait4),
261: syscalls.Supported("prlimit64", Prlimit64),
- 262: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
- 263: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 262: syscalls.ErrorWithEvent("fanotify_init", linuxerr.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 263: syscalls.ErrorWithEvent("fanotify_mark", linuxerr.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
264: syscalls.Error("name_to_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
265: syscalls.Error("open_by_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
266: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil),
267: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil),
268: syscalls.ErrorWithEvent("setns", linuxerr.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995)
269: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil),
- 270: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
- 271: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 270: syscalls.ErrorWithEvent("process_vm_readv", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 271: syscalls.ErrorWithEvent("process_vm_writev", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
272: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil),
273: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil),
- 274: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
- 275: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
- 276: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772)
+ 274: syscalls.ErrorWithEvent("sched_setattr", linuxerr.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 275: syscalls.ErrorWithEvent("sched_getattr", linuxerr.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 276: syscalls.ErrorWithEvent("renameat2", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772)
277: syscalls.Supported("seccomp", Seccomp),
278: syscalls.Supported("getrandom", GetRandom),
279: syscalls.Supported("memfd_create", MemfdCreate),
280: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil),
281: syscalls.Supported("execveat", Execveat),
- 282: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
+ 282: syscalls.ErrorWithEvent("userfaultfd", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
283: syscalls.PartiallySupported("membarrier", Membarrier, "Not supported on all platforms.", nil),
284: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
// Syscalls after 284 are "backports" from versions of Linux after 4.4.
- 285: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil),
+ 285: syscalls.ErrorWithEvent("copy_file_range", linuxerr.ENOSYS, "", nil),
286: syscalls.Supported("preadv2", Preadv2),
287: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil),
- 288: syscalls.ErrorWithEvent("pkey_mprotect", syserror.ENOSYS, "", nil),
- 289: syscalls.ErrorWithEvent("pkey_alloc", syserror.ENOSYS, "", nil),
- 290: syscalls.ErrorWithEvent("pkey_free", syserror.ENOSYS, "", nil),
+ 288: syscalls.ErrorWithEvent("pkey_mprotect", linuxerr.ENOSYS, "", nil),
+ 289: syscalls.ErrorWithEvent("pkey_alloc", linuxerr.ENOSYS, "", nil),
+ 290: syscalls.ErrorWithEvent("pkey_free", linuxerr.ENOSYS, "", nil),
291: syscalls.Supported("statx", Statx),
- 292: syscalls.ErrorWithEvent("io_pgetevents", syserror.ENOSYS, "", nil),
+ 292: syscalls.ErrorWithEvent("io_pgetevents", linuxerr.ENOSYS, "", nil),
293: syscalls.PartiallySupported("rseq", RSeq, "Not supported on all platforms.", nil),
// Linux skips ahead to syscall 424 to sync numbers between arches.
- 424: syscalls.ErrorWithEvent("pidfd_send_signal", syserror.ENOSYS, "", nil),
- 425: syscalls.ErrorWithEvent("io_uring_setup", syserror.ENOSYS, "", nil),
- 426: syscalls.ErrorWithEvent("io_uring_enter", syserror.ENOSYS, "", nil),
- 427: syscalls.ErrorWithEvent("io_uring_register", syserror.ENOSYS, "", nil),
- 428: syscalls.ErrorWithEvent("open_tree", syserror.ENOSYS, "", nil),
- 429: syscalls.ErrorWithEvent("move_mount", syserror.ENOSYS, "", nil),
- 430: syscalls.ErrorWithEvent("fsopen", syserror.ENOSYS, "", nil),
- 431: syscalls.ErrorWithEvent("fsconfig", syserror.ENOSYS, "", nil),
- 432: syscalls.ErrorWithEvent("fsmount", syserror.ENOSYS, "", nil),
- 433: syscalls.ErrorWithEvent("fspick", syserror.ENOSYS, "", nil),
- 434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil),
- 435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil),
+ 424: syscalls.ErrorWithEvent("pidfd_send_signal", linuxerr.ENOSYS, "", nil),
+ 425: syscalls.ErrorWithEvent("io_uring_setup", linuxerr.ENOSYS, "", nil),
+ 426: syscalls.ErrorWithEvent("io_uring_enter", linuxerr.ENOSYS, "", nil),
+ 427: syscalls.ErrorWithEvent("io_uring_register", linuxerr.ENOSYS, "", nil),
+ 428: syscalls.ErrorWithEvent("open_tree", linuxerr.ENOSYS, "", nil),
+ 429: syscalls.ErrorWithEvent("move_mount", linuxerr.ENOSYS, "", nil),
+ 430: syscalls.ErrorWithEvent("fsopen", linuxerr.ENOSYS, "", nil),
+ 431: syscalls.ErrorWithEvent("fsconfig", linuxerr.ENOSYS, "", nil),
+ 432: syscalls.ErrorWithEvent("fsmount", linuxerr.ENOSYS, "", nil),
+ 433: syscalls.ErrorWithEvent("fspick", linuxerr.ENOSYS, "", nil),
+ 434: syscalls.ErrorWithEvent("pidfd_open", linuxerr.ENOSYS, "", nil),
+ 435: syscalls.ErrorWithEvent("clone3", linuxerr.ENOSYS, "", nil),
441: syscalls.Supported("epoll_pwait2", EpollPwait2),
},
Emulate: map[hostarch.Addr]uintptr{},
Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) {
t.Kernel().EmitUnimplementedEvent(t)
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
},
}
diff --git a/pkg/sentry/syscalls/linux/sigset.go b/pkg/sentry/syscalls/linux/sigset.go
index 9dea78085..373948991 100644
--- a/pkg/sentry/syscalls/linux/sigset.go
+++ b/pkg/sentry/syscalls/linux/sigset.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
)
// CopyInSigSet copies in a sigset_t, checks its size, and ensures that KILL and
@@ -67,6 +66,6 @@ func copyInSigSetWithSize(t *kernel.Task, addr hostarch.Addr) (hostarch.Addr, ui
maskSize := uint(hostarch.ByteOrder.Uint64(in[8:]))
return maskAddr, maskSize, nil
default:
- return 0, 0, syserror.ENOSYS
+ return 0, 0, linuxerr.ENOSYS
}
}
diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go
index 4ce3430e2..2f00c3783 100644
--- a/pkg/sentry/syscalls/linux/sys_aio.go
+++ b/pkg/sentry/syscalls/linux/sys_aio.go
@@ -26,7 +26,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/eventfd"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/mm"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -138,7 +138,7 @@ func IoGetevents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
if count > 0 || linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
return uintptr(count), nil, nil
}
- return 0, nil, syserror.ConvertIntr(err, syserror.EINTR)
+ return 0, nil, syserr.ConvertIntr(err, linuxerr.EINTR)
}
}
@@ -216,7 +216,7 @@ func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error)
// It is not presently supported (ENOSYS indicates no support on this
// architecture).
func IoCancel(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
// LINT.IfChange
@@ -355,7 +355,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
cbAddr = hostarch.Addr(cbAddrP)
default:
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
// Copy in this callback.
diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go
index daa151bb4..6c807124c 100644
--- a/pkg/sentry/syscalls/linux/sys_epoll.go
+++ b/pkg/sentry/syscalls/linux/sys_epoll.go
@@ -22,7 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/epoll"
"gvisor.dev/gvisor/pkg/sentry/syscalls"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -109,7 +109,7 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
func waitEpoll(t *kernel.Task, fd int32, eventsAddr hostarch.Addr, max int, timeoutInNanos int64) (uintptr, *kernel.SyscallControl, error) {
r, err := syscalls.WaitEpoll(t, fd, max, timeoutInNanos)
if err != nil {
- return 0, nil, syserror.ConvertIntr(err, syserror.EINTR)
+ return 0, nil, syserr.ConvertIntr(err, linuxerr.EINTR)
}
if len(r) != 0 {
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index 3528d325f..e79b92fb6 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -30,7 +30,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/fasync"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/limits"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserr"
)
// fileOpAt performs an operation on the second last component in the path.
@@ -122,7 +122,7 @@ func copyInPath(t *kernel.Task, addr hostarch.Addr, allowEmpty bool) (path strin
return "", false, err
}
if path == "" && !allowEmpty {
- return "", false, syserror.ENOENT
+ return "", false, linuxerr.ENOENT
}
// If the path ends with a /, then checks must be enforced in various
@@ -162,7 +162,7 @@ func openAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint) (fd uin
if fs.IsDir(d.Inode.StableAttr) {
// Don't allow directories to be opened writable.
if fileFlags.Write {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
} else {
// If O_DIRECTORY is set, but the file is not a directory, then fail.
@@ -177,7 +177,7 @@ func openAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint) (fd uin
file, err := d.Inode.GetFile(t, d, fileFlags)
if err != nil {
- return syserror.ConvertIntr(err, syserror.ERESTARTSYS)
+ return syserr.ConvertIntr(err, linuxerr.ERESTARTSYS)
}
defer file.DecRef(t)
@@ -215,7 +215,7 @@ func mknodAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMod
return err
}
if dirPath {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error {
@@ -308,7 +308,7 @@ func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode
return 0, err
}
if dirPath {
- return 0, syserror.ENOENT
+ return 0, linuxerr.ENOENT
}
fileFlags := linuxToFlags(flags)
@@ -416,7 +416,7 @@ func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode
// Create a new fs.File.
newFile, err = found.Inode.GetFile(t, found, fileFlags)
if err != nil {
- return syserror.ConvertIntr(err, syserror.ERESTARTSYS)
+ return syserr.ConvertIntr(err, linuxerr.ERESTARTSYS)
}
defer newFile.DecRef(t)
case linuxerr.Equals(linuxerr.ENOENT, err):
@@ -795,7 +795,7 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
defer file.DecRef(t)
err := file.Flush(t)
- return 0, nil, handleIOError(t, false /* partial */, err, syserror.EINTR, "close", file)
+ return 0, nil, handleIOError(t, false /* partial */, err, linuxerr.EINTR, "close", file)
}
// Dup implements linux syscall dup(2).
@@ -1020,7 +1020,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
} else {
// Blocking lock, pass in the task to satisfy the lock.Blocker interface.
if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, t) {
- return 0, nil, syserror.EINTR
+ return 0, nil, linuxerr.EINTR
}
}
return 0, nil, nil
@@ -1036,7 +1036,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
} else {
// Blocking lock, pass in the task to satisfy the lock.Blocker interface.
if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, t) {
- return 0, nil, syserror.EINTR
+ return 0, nil, linuxerr.EINTR
}
}
return 0, nil, nil
@@ -1263,7 +1263,7 @@ func symlinkAt(t *kernel.Task, dirFD int32, newAddr hostarch.Addr, oldAddr hosta
return err
}
if dirPath {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
// The oldPath is copied in verbatim. This is because the symlink
@@ -1273,7 +1273,7 @@ func symlinkAt(t *kernel.Task, dirFD int32, newAddr hostarch.Addr, oldAddr hosta
return err
}
if oldPath == "" {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
return fileOpAt(t, dirFD, newPath, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error {
@@ -1352,7 +1352,7 @@ func linkAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD int3
return err
}
if dirPath {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if allowEmpty && oldPath == "" {
@@ -1439,7 +1439,7 @@ func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
allowEmpty := flags&linux.AT_EMPTY_PATH == linux.AT_EMPTY_PATH
if allowEmpty && !t.HasCapabilityIn(linux.CAP_DAC_READ_SEARCH, t.UserNamespace().Root()) {
- return 0, nil, syserror.ENOENT
+ return 0, nil, linuxerr.ENOENT
}
return 0, nil, linkAt(t, oldDirFD, oldAddr, newDirFD, newAddr, resolve, allowEmpty)
@@ -1455,7 +1455,7 @@ func readlinkAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, bufAddr hostarc
return 0, err
}
if dirPath {
- return 0, syserror.ENOENT
+ return 0, linuxerr.ENOENT
}
err = fileOpOn(t, dirFD, path, false /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
@@ -1579,7 +1579,7 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
if fs.IsDir(d.Inode.StableAttr) {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
// In contrast to open(O_TRUNC), truncate(2) is only valid for file
// types.
@@ -2131,7 +2131,7 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, linuxerr.ESPIPE
}
if fs.IsDir(file.Dirent.Inode.StableAttr) {
- return 0, nil, syserror.EISDIR
+ return 0, nil, linuxerr.EISDIR
}
if !fs.IsRegular(file.Dirent.Inode.StableAttr) {
return 0, nil, linuxerr.ENODEV
@@ -2189,7 +2189,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
} else {
// Because we're blocking we will pass the task to satisfy the lock.Blocker interface.
if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, t) {
- return 0, nil, syserror.EINTR
+ return 0, nil, linuxerr.EINTR
}
}
case linux.LOCK_SH:
@@ -2201,7 +2201,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
} else {
// Because we're blocking we will pass the task to satisfy the lock.Blocker interface.
if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, t) {
- return 0, nil, syserror.EINTR
+ return 0, nil, linuxerr.EINTR
}
}
case linux.LOCK_UN:
diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go
index 717cec04d..bcdd7b633 100644
--- a/pkg/sentry/syscalls/linux/sys_futex.go
+++ b/pkg/sentry/syscalls/linux/sys_futex.go
@@ -23,7 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserr"
)
// futexWaitRestartBlock encapsulates the state required to restart futex(2)
@@ -75,7 +75,7 @@ func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, fo
}
t.Futex().WaitComplete(w, t)
- return 0, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
+ return 0, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS)
}
// futexWaitDuration performs a FUTEX_WAIT, blocking until the wait is
@@ -103,7 +103,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add
// The wait was unsuccessful for some reason other than interruption. Simply
// forward the error.
- if err != syserror.ErrInterrupted {
+ if err != linuxerr.ErrInterrupted {
return 0, err
}
@@ -111,7 +111,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add
// The wait duration was absolute, restart with the original arguments.
if forever {
- return 0, syserror.ERESTARTSYS
+ return 0, linuxerr.ERESTARTSYS
}
// The wait duration was relative, restart with the remaining duration.
@@ -122,7 +122,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add
val: val,
mask: mask,
})
- return 0, syserror.ERESTART_RESTARTBLOCK
+ return 0, linuxerr.ERESTART_RESTARTBLOCK
}
func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr hostarch.Addr, private bool) error {
@@ -150,7 +150,7 @@ func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr hostarch.
}
t.Futex().WaitComplete(w, t)
- return syserror.ConvertIntr(err, syserror.ERESTARTSYS)
+ return syserr.ConvertIntr(err, linuxerr.ERESTARTSYS)
}
func tryLockPI(t *kernel.Task, addr hostarch.Addr, private bool) error {
@@ -280,11 +280,11 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
case linux.FUTEX_WAIT_REQUEUE_PI, linux.FUTEX_CMP_REQUEUE_PI:
t.Kernel().EmitUnimplementedEvent(t)
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
default:
// We don't even know about this command.
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
}
diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go
index 917717e31..9f7a5ae8a 100644
--- a/pkg/sentry/syscalls/linux/sys_getdents.go
+++ b/pkg/sentry/syscalls/linux/sys_getdents.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -83,7 +82,7 @@ func getdents(t *kernel.Task, fd int32, addr hostarch.Addr, size int, f func(*di
ds := newDirentSerializer(f, w, t.Arch(), size)
rerr := dir.Readdir(t, ds)
- switch err := handleIOError(t, ds.Written() > 0, rerr, syserror.ERESTARTSYS, "getdents", dir); err {
+ switch err := handleIOError(t, ds.Written() > 0, rerr, linuxerr.ERESTARTSYS, "getdents", dir); err {
case nil:
dir.Dirent.InotifyEvent(linux.IN_ACCESS, 0)
return uintptr(ds.Written()), nil
diff --git a/pkg/sentry/syscalls/linux/sys_lseek.go b/pkg/sentry/syscalls/linux/sys_lseek.go
index bf71a9af3..4a5712a29 100644
--- a/pkg/sentry/syscalls/linux/sys_lseek.go
+++ b/pkg/sentry/syscalls/linux/sys_lseek.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
)
// LINT.IfChange
@@ -49,7 +48,7 @@ func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
offset, serr := file.Seek(t, sw, offset)
- err := handleIOError(t, false /* partialResult */, serr, syserror.ERESTARTSYS, "lseek", file)
+ err := handleIOError(t, false /* partialResult */, serr, linuxerr.ERESTARTSYS, "lseek", file)
if err != nil {
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go
index cee621791..7efd17d40 100644
--- a/pkg/sentry/syscalls/linux/sys_mmap.go
+++ b/pkg/sentry/syscalls/linux/sys_mmap.go
@@ -24,7 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/mm"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserr"
)
// Brk implements linux syscall brk(2).
@@ -211,7 +211,7 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
case linux.MADV_REMOVE:
// These "suggestions" have application-visible side effects, so we
// have to indicate that we don't support them.
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
case linux.MADV_HWPOISON:
// Only privileged processes are allowed to poison pages.
return 0, nil, linuxerr.EPERM
@@ -235,18 +235,18 @@ func Mincore(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// rounded up to the next multiple of the page size." - mincore(2)
la, ok := hostarch.Addr(length).RoundUp()
if !ok {
- return 0, nil, syserror.ENOMEM
+ return 0, nil, linuxerr.ENOMEM
}
ar, ok := addr.ToRange(uint64(la))
if !ok {
- return 0, nil, syserror.ENOMEM
+ return 0, nil, linuxerr.ENOMEM
}
// Pretend that all mapped pages are "resident in core".
mapped := t.MemoryManager().VirtualMemorySizeRange(ar)
// "ENOMEM: addr to addr + length contained unmapped memory."
if mapped != uint64(la) {
- return 0, nil, syserror.ENOMEM
+ return 0, nil, linuxerr.ENOMEM
}
resident := bytes.Repeat([]byte{1}, int(mapped/hostarch.PageSize))
_, err := t.CopyOutBytes(vec, resident)
@@ -277,7 +277,7 @@ func Msync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
})
// MSync calls fsync, the same interrupt conversion rules apply, see
// mm/msync.c, fsync POSIX.1-2008.
- return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
+ return 0, nil, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS)
}
// Mlock implements linux syscall mlock(2).
diff --git a/pkg/sentry/syscalls/linux/sys_msgqueue.go b/pkg/sentry/syscalls/linux/sys_msgqueue.go
index 5259ade90..60b989ee7 100644
--- a/pkg/sentry/syscalls/linux/sys_msgqueue.go
+++ b/pkg/sentry/syscalls/linux/sys_msgqueue.go
@@ -130,12 +130,63 @@ func receive(t *kernel.Task, id ipc.ID, mType int64, maxSize int64, msgCopy, wai
func Msgctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
id := ipc.ID(args[0].Int())
cmd := args[1].Int()
+ buf := args[2].Pointer()
creds := auth.CredentialsFromContext(t)
+ r := t.IPCNamespace().MsgqueueRegistry()
+
switch cmd {
+ case linux.IPC_INFO:
+ info := r.IPCInfo(t)
+ _, err := info.CopyOut(t, buf)
+ return 0, nil, err
+ case linux.MSG_INFO:
+ msgInfo := r.MsgInfo(t)
+ _, err := msgInfo.CopyOut(t, buf)
+ return 0, nil, err
case linux.IPC_RMID:
- return 0, nil, t.IPCNamespace().MsgqueueRegistry().Remove(id, creds)
+ return 0, nil, r.Remove(id, creds)
+ }
+
+ // Remaining commands use a queue.
+ queue, err := r.FindByID(id)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ switch cmd {
+ case linux.MSG_STAT:
+ // Technically, we should be treating id as "an index into the kernel's
+ // internal array that maintains information about all shared memory
+ // segments on the system". Since we don't track segments in an array,
+ // we'll just pretend the msqid is the index and do the same thing as
+ // IPC_STAT. Linux also uses the index as the msqid.
+ fallthrough
+ case linux.IPC_STAT:
+ stat, err := queue.Stat(t)
+ if err != nil {
+ return 0, nil, err
+ }
+ _, err = stat.CopyOut(t, buf)
+ return 0, nil, err
+
+ case linux.MSG_STAT_ANY:
+ stat, err := queue.StatAny(t)
+ if err != nil {
+ return 0, nil, err
+ }
+ _, err = stat.CopyOut(t, buf)
+ return 0, nil, err
+
+ case linux.IPC_SET:
+ var ds linux.MsqidDS
+ if _, err := ds.CopyIn(t, buf); err != nil {
+ return 0, nil, linuxerr.EINVAL
+ }
+ err := queue.Set(t, &ds)
+ return 0, nil, err
+
default:
return 0, nil, linuxerr.EINVAL
}
diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go
index a80c84fcd..ee4dbbc64 100644
--- a/pkg/sentry/syscalls/linux/sys_poll.go
+++ b/pkg/sentry/syscalls/linux/sys_poll.go
@@ -25,7 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/limits"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -185,7 +185,7 @@ func doPoll(t *kernel.Task, addr hostarch.Addr, nfds uint, timeout time.Duration
pfd[i].Events |= linux.POLLHUP | linux.POLLERR
}
remainingTimeout, n, err := pollBlock(t, pfd, timeout)
- err = syserror.ConvertIntr(err, syserror.EINTR)
+ err = syserr.ConvertIntr(err, linuxerr.EINTR)
// The poll entries are copied out regardless of whether
// any are set or not. This aligns with the Linux behavior.
@@ -295,7 +295,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Ad
// Do the syscall, then count the number of bits set.
if _, _, err = pollBlock(t, pfd, timeout); err != nil {
- return 0, syserror.ConvertIntr(err, syserror.EINTR)
+ return 0, syserr.ConvertIntr(err, linuxerr.EINTR)
}
// r, w, and e are currently event mask bitsets; unset bits corresponding
@@ -411,7 +411,7 @@ func poll(t *kernel.Task, pfdAddr hostarch.Addr, nfds uint, timeout time.Duratio
nfds: nfds,
timeout: remainingTimeout,
})
- return 0, syserror.ERESTART_RESTARTBLOCK
+ return 0, linuxerr.ERESTART_RESTARTBLOCK
}
return n, err
}
@@ -465,7 +465,7 @@ func Ppoll(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// Note that this means that if err is nil but copyErr is not, copyErr is
// ignored. This is consistent with Linux.
if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil {
- err = syserror.ERESTARTNOHAND
+ err = linuxerr.ERESTARTNOHAND
}
return n, nil, err
}
@@ -495,7 +495,7 @@ func Select(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
copyErr := copyOutTimevalRemaining(t, startNs, timeout, timevalAddr)
// See comment in Ppoll.
if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil {
- err = syserror.ERESTARTNOHAND
+ err = linuxerr.ERESTARTNOHAND
}
return n, nil, err
}
@@ -540,7 +540,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
copyErr := copyOutTimespecRemaining(t, startNs, timeout, timespecAddr)
// See comment in Ppoll.
if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil {
- err = syserror.ERESTARTNOHAND
+ err = linuxerr.ERESTARTNOHAND
}
return n, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go
index b54a3a11f..18ea23913 100644
--- a/pkg/sentry/syscalls/linux/sys_read.go
+++ b/pkg/sentry/syscalls/linux/sys_read.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/socket"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -72,7 +71,7 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
n, err := readv(t, file, dst)
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "read", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "read", file)
}
// Readahead implements readahead(2).
@@ -152,7 +151,7 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
n, err := preadv(t, file, dst, offset)
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pread64", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "pread64", file)
}
// Readv implements linux syscall readv(2).
@@ -182,7 +181,7 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
n, err := readv(t, file, dst)
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "readv", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "readv", file)
}
// Preadv implements linux syscall preadv(2).
@@ -223,7 +222,7 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
n, err := preadv(t, file, dst, offset)
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "preadv", file)
}
// Preadv2 implements linux syscall preadv2(2).
@@ -281,17 +280,17 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
if offset == -1 {
n, err := readv(t, file, dst)
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "preadv2", file)
}
n, err := preadv(t, file, dst, offset)
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "preadv2", file)
}
func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) {
n, err := f.Readv(t, dst)
- if err != syserror.ErrWouldBlock || f.Flags().NonBlocking {
+ if err != linuxerr.ErrWouldBlock || f.Flags().NonBlocking {
if n > 0 {
// Queue notification if we read anything.
f.Dirent.InotifyEvent(linux.IN_ACCESS, 0)
@@ -304,7 +303,7 @@ func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) {
var deadline ktime.Time
if s, ok := f.FileOperations.(socket.Socket); ok {
dl := s.RecvTimeout()
- if dl < 0 && err == syserror.ErrWouldBlock {
+ if dl < 0 && err == linuxerr.ErrWouldBlock {
return n, err
}
if dl > 0 {
@@ -326,14 +325,14 @@ func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) {
// other than "would block".
n, err = f.Readv(t, dst)
total += n
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
break
}
// Wait for a notification that we should retry.
if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
break
}
@@ -351,7 +350,7 @@ func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) {
func preadv(t *kernel.Task, f *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
n, err := f.Preadv(t, dst, offset)
- if err != syserror.ErrWouldBlock || f.Flags().NonBlocking {
+ if err != linuxerr.ErrWouldBlock || f.Flags().NonBlocking {
if n > 0 {
// Queue notification if we read anything.
f.Dirent.InotifyEvent(linux.IN_ACCESS, 0)
@@ -372,7 +371,7 @@ func preadv(t *kernel.Task, f *fs.File, dst usermem.IOSequence, offset int64) (i
// other than "would block".
n, err = f.Preadv(t, dst, offset+total)
total += n
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
break
}
diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go
index a12e1c915..7210333d2 100644
--- a/pkg/sentry/syscalls/linux/sys_rlimit.go
+++ b/pkg/sentry/syscalls/linux/sys_rlimit.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/limits"
- "gvisor.dev/gvisor/pkg/syserror"
)
// rlimit describes an implementation of 'struct rlimit', which may vary from
@@ -44,7 +43,7 @@ func newRlimit(t *kernel.Task) (rlimit, error) {
// On 64-bit system, struct rlimit and struct rlimit64 are identical.
return &rlimit64{}, nil
default:
- return nil, syserror.ENOSYS
+ return nil, linuxerr.ENOSYS
}
}
diff --git a/pkg/sentry/syscalls/linux/sys_rseq.go b/pkg/sentry/syscalls/linux/sys_rseq.go
index 5fe196647..8328a3742 100644
--- a/pkg/sentry/syscalls/linux/sys_rseq.go
+++ b/pkg/sentry/syscalls/linux/sys_rseq.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
)
// RSeq implements syscall rseq(2).
@@ -33,7 +32,7 @@ func RSeq(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
// Event for applications that want rseq on a configuration
// that doesn't support them.
t.Kernel().EmitUnimplementedEvent(t)
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
switch flags {
diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go
index f61cc466c..5a119b21c 100644
--- a/pkg/sentry/syscalls/linux/sys_sem.go
+++ b/pkg/sentry/syscalls/linux/sys_sem.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
@@ -166,8 +165,7 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
return 0, nil, err
}
- perms := fs.FilePermsFromMode(linux.FileMode(s.SemPerm.Mode & 0777))
- return 0, nil, ipcSet(t, id, auth.UID(s.SemPerm.UID), auth.GID(s.SemPerm.GID), perms)
+ return 0, nil, ipcSet(t, id, &s)
case linux.GETPID:
v, err := getPID(t, id, num)
@@ -243,24 +241,13 @@ func remove(t *kernel.Task, id ipc.ID) error {
return r.Remove(id, creds)
}
-func ipcSet(t *kernel.Task, id ipc.ID, uid auth.UID, gid auth.GID, perms fs.FilePermissions) error {
+func ipcSet(t *kernel.Task, id ipc.ID, ds *linux.SemidDS) error {
r := t.IPCNamespace().SemaphoreRegistry()
set := r.FindByID(id)
if set == nil {
return linuxerr.EINVAL
}
-
- creds := auth.CredentialsFromContext(t)
- kuid := creds.UserNamespace.MapToKUID(uid)
- if !kuid.Ok() {
- return linuxerr.EINVAL
- }
- kgid := creds.UserNamespace.MapToKGID(gid)
- if !kgid.Ok() {
- return linuxerr.EINVAL
- }
- owner := fs.FileOwner{UID: kuid, GID: kgid}
- return set.Change(t, creds, owner, perms)
+ return set.Set(t, ds)
}
func ipcStat(t *kernel.Task, id ipc.ID) (*linux.SemidDS, error) {
diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go
index 45608f3fa..03871d713 100644
--- a/pkg/sentry/syscalls/linux/sys_signal.go
+++ b/pkg/sentry/syscalls/linux/sys_signal.go
@@ -25,7 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/signalfd"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserr"
)
// "For a process to have permission to send a signal it must
@@ -348,7 +348,7 @@ func Sigaltstack(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
// Pause implements linux syscall pause(2).
func Pause(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- return 0, nil, syserror.ConvertIntr(t.Block(nil), syserror.ERESTARTNOHAND)
+ return 0, nil, syserr.ConvertIntr(t.Block(nil), linuxerr.ERESTARTNOHAND)
}
// RtSigpending implements linux syscall rt_sigpending(2).
@@ -496,7 +496,7 @@ func RtSigsuspend(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.
t.SetSavedSignalMask(oldmask)
// Perform the wait.
- return 0, nil, syserror.ConvertIntr(t.Block(nil), syserror.ERESTARTNOHAND)
+ return 0, nil, syserr.ConvertIntr(t.Block(nil), linuxerr.ERESTARTNOHAND)
}
// RestartSyscall implements the linux syscall restart_syscall(2).
@@ -512,7 +512,7 @@ func RestartSyscall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
// function is never null by (re)initializing it with one that translates
// the restart into EINTR. We'll emulate that behaviour.
t.Debugf("Restart block missing in restart_syscall(2). Did ptrace inject a return value of ERESTART_RESTARTBLOCK?")
- return 0, nil, syserror.EINTR
+ return 0, nil, linuxerr.EINTR
}
// sharedSignalfd is shared between the two calls.
diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go
index 06eb8f319..50ddbc142 100644
--- a/pkg/sentry/syscalls/linux/sys_socket.go
+++ b/pkg/sentry/syscalls/linux/sys_socket.go
@@ -30,7 +30,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/control"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -260,7 +259,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Capture address and call syscall implementation.
@@ -270,7 +269,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
}
blocking := !file.Flags().NonBlocking
- return 0, nil, syserror.ConvertIntr(s.Connect(t, a, blocking).ToError(), syserror.ERESTARTSYS)
+ return 0, nil, syserr.ConvertIntr(s.Connect(t, a, blocking).ToError(), linuxerr.ERESTARTSYS)
}
// accept is the implementation of the accept syscall. It is called by accept
@@ -291,7 +290,7 @@ func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr,
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, syserror.ENOTSOCK
+ return 0, linuxerr.ENOTSOCK
}
// Call the syscall implementation for this socket, then copy the
@@ -301,7 +300,7 @@ func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr,
peerRequested := addrLen != 0
nfd, peer, peerLen, e := s.Accept(t, peerRequested, flags, blocking)
if e != nil {
- return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
+ return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS)
}
if peerRequested {
// NOTE(magi): Linux does not give you an error if it can't
@@ -350,7 +349,7 @@ func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Capture address and call syscall implementation.
@@ -377,7 +376,7 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
if backlog > maxListenBacklog {
@@ -415,7 +414,7 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Validate how, then call syscall implementation.
@@ -446,7 +445,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Read the length. Reject negative values.
@@ -527,7 +526,7 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
if optLen < 0 {
@@ -565,7 +564,7 @@ func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Get the socket name and copy it to the caller.
@@ -593,7 +592,7 @@ func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Get the socket peer name and copy it to the caller.
@@ -626,7 +625,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Reject flags that we don't handle yet.
@@ -683,7 +682,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
if file.Flags().NonBlocking {
@@ -763,7 +762,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr hostarch.Addr, flags
if msg.ControlLen == 0 && msg.NameLen == 0 {
n, mflags, _, _, cms, err := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, false, 0)
if err != nil {
- return 0, syserror.ConvertIntr(err.ToError(), syserror.ERESTARTSYS)
+ return 0, syserr.ConvertIntr(err.ToError(), linuxerr.ERESTARTSYS)
}
if !cms.Unix.Empty() {
mflags |= linux.MSG_CTRUNC
@@ -785,7 +784,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr hostarch.Addr, flags
}
n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen)
if e != nil {
- return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
+ return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS)
}
defer cms.Release(t)
@@ -848,7 +847,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, fla
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, syserror.ENOTSOCK
+ return 0, linuxerr.ENOTSOCK
}
if file.Flags().NonBlocking {
@@ -874,7 +873,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, fla
n, _, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0)
cm.Release(t)
if e != nil {
- return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
+ return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS)
}
// Copy the address to the caller.
@@ -921,7 +920,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Reject flags that we don't handle yet.
@@ -963,7 +962,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Reject flags that we don't handle yet.
@@ -1060,7 +1059,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr hostar
// Call the syscall implementation.
n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, controlMessages)
- err = handleIOError(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendmsg", file)
+ err = handleIOError(t, n != 0, e.ToError(), linuxerr.ERESTARTSYS, "sendmsg", file)
// Control messages should be released on error as well as for zero-length
// messages, which are discarded by the receiver.
if n == 0 || err != nil {
@@ -1087,7 +1086,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, syserror.ENOTSOCK
+ return 0, linuxerr.ENOTSOCK
}
if file.Flags().NonBlocking {
@@ -1122,7 +1121,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags
// Call the syscall implementation.
n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, socket.ControlMessages{Unix: control.New(t, s, nil)})
- return uintptr(n), handleIOError(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendto", file)
+ return uintptr(n), handleIOError(t, n != 0, e.ToError(), linuxerr.ERESTARTSYS, "sendto", file)
}
// SendTo implements the linux syscall sendto(2).
diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go
index 34d87ac1f..8c8847efa 100644
--- a/pkg/sentry/syscalls/linux/sys_splice.go
+++ b/pkg/sentry/syscalls/linux/sys_splice.go
@@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -46,9 +45,9 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB
for {
n, err = fs.Splice(t, outFile, inFile, opts)
- if n != 0 || err != syserror.ErrWouldBlock {
+ if n != 0 || err != linuxerr.ErrWouldBlock {
break
- } else if err == syserror.ErrWouldBlock && nonBlocking {
+ } else if err == linuxerr.ErrWouldBlock && nonBlocking {
break
}
@@ -177,7 +176,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// We can only pass a single file to handleIOError, so pick inFile
// arbitrarily. This is used only for debugging purposes.
- return uintptr(n), nil, handleIOError(t, false, err, syserror.ERESTARTSYS, "sendfile", inFile)
+ return uintptr(n), nil, handleIOError(t, false, err, linuxerr.ERESTARTSYS, "sendfile", inFile)
}
// Splice implements splice(2).
@@ -287,7 +286,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
}
// See above; inFile is chosen arbitrarily here.
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "splice", inFile)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "splice", inFile)
}
// Tee imlements tee(2).
@@ -340,5 +339,5 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo
}
// See above; inFile is chosen arbitrarily here.
- return uintptr(n), nil, handleIOError(t, false, err, syserror.ERESTARTSYS, "tee", inFile)
+ return uintptr(n), nil, handleIOError(t, false, err, linuxerr.ERESTARTSYS, "tee", inFile)
}
diff --git a/pkg/sentry/syscalls/linux/sys_sync.go b/pkg/sentry/syscalls/linux/sys_sync.go
index 6278bef21..0c22599bf 100644
--- a/pkg/sentry/syscalls/linux/sys_sync.go
+++ b/pkg/sentry/syscalls/linux/sys_sync.go
@@ -20,7 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserr"
)
// LINT.IfChange
@@ -58,7 +58,7 @@ func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
defer file.DecRef(t)
err := file.Fsync(t, 0, fs.FileMaxOffset, fs.SyncAll)
- return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
+ return 0, nil, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS)
}
// Fdatasync implements linux syscall fdatasync(2).
@@ -74,7 +74,7 @@ func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
defer file.DecRef(t)
err := file.Fsync(t, 0, fs.FileMaxOffset, fs.SyncData)
- return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
+ return 0, nil, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS)
}
// SyncFileRange implements linux syscall sync_file_rage(2)
@@ -112,7 +112,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
if uflags&linux.SYNC_FILE_RANGE_WAIT_BEFORE != 0 &&
uflags&linux.SYNC_FILE_RANGE_WAIT_AFTER == 0 {
t.Kernel().EmitUnimplementedEvent(t)
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
// SYNC_FILE_RANGE_WRITE initiates write-out of all dirty pages in the
@@ -137,7 +137,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
err = file.Fsync(t, offset, fs.FileMaxOffset, fs.SyncData)
}
- return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
+ return 0, nil, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS)
}
// LINT.ThenChange(vfs2/sync.go)
diff --git a/pkg/sentry/syscalls/linux/sys_syslog.go b/pkg/sentry/syscalls/linux/sys_syslog.go
index ba372f9e3..15acb2b8b 100644
--- a/pkg/sentry/syscalls/linux/sys_syslog.go
+++ b/pkg/sentry/syscalls/linux/sys_syslog.go
@@ -18,7 +18,6 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -57,6 +56,6 @@ func Syslog(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
case _SYSLOG_ACTION_SIZE_BUFFER:
return logBufLen, nil, nil
default:
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
}
diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go
index 981cdd985..d74173c56 100644
--- a/pkg/sentry/syscalls/linux/sys_thread.go
+++ b/pkg/sentry/syscalls/linux/sys_thread.go
@@ -27,7 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/sched"
"gvisor.dev/gvisor/pkg/sentry/loader"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -111,7 +110,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr host
}
atEmptyPath := flags&linux.AT_EMPTY_PATH != 0
if !atEmptyPath && len(pathname) == 0 {
- return 0, nil, syserror.ENOENT
+ return 0, nil, linuxerr.ENOENT
}
resolveFinal := flags&linux.AT_SYMLINK_NOFOLLOW == 0
@@ -244,7 +243,7 @@ func parseCommonWaitOptions(wopts *kernel.WaitOptions, options int) error {
wopts.Events |= kernel.EventGroupContinue
}
if options&linux.WNOHANG == 0 {
- wopts.BlockInterruptErr = syserror.ERESTARTSYS
+ wopts.BlockInterruptErr = linuxerr.ERESTARTSYS
}
if options&linux.WNOTHREAD == 0 {
wopts.SiblingChildren = true
diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go
index 674e74f82..4adc8b8a4 100644
--- a/pkg/sentry/syscalls/linux/sys_time.go
+++ b/pkg/sentry/syscalls/linux/sys_time.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/syserror"
)
// The most significant 29 bits hold either a pid or a file descriptor.
@@ -214,7 +213,7 @@ func clockNanosleepUntil(t *kernel.Task, c ktime.Clock, end ktime.Time, rem host
case linuxerr.Equals(linuxerr.ETIMEDOUT, err):
// Slept for entire timeout.
return nil
- case err == syserror.ErrInterrupted:
+ case err == linuxerr.ErrInterrupted:
// Interrupted.
remaining := end.Sub(c.Now())
if remaining <= 0 {
@@ -235,9 +234,9 @@ func clockNanosleepUntil(t *kernel.Task, c ktime.Clock, end ktime.Time, rem host
end: end,
rem: rem,
})
- return syserror.ERESTART_RESTARTBLOCK
+ return linuxerr.ERESTART_RESTARTBLOCK
}
- return syserror.ERESTARTNOHAND
+ return linuxerr.ERESTARTNOHAND
default:
panic(fmt.Sprintf("Impossible BlockWithTimer error %v", err))
}
diff --git a/pkg/sentry/syscalls/linux/sys_timer.go b/pkg/sentry/syscalls/linux/sys_timer.go
index 45eef4feb..d39a0a6f5 100644
--- a/pkg/sentry/syscalls/linux/sys_timer.go
+++ b/pkg/sentry/syscalls/linux/sys_timer.go
@@ -18,9 +18,9 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
)
const nsecPerSec = int64(time.Second)
@@ -29,7 +29,7 @@ const nsecPerSec = int64(time.Second)
func Getitimer(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
if t.Arch().Width() != 8 {
// Definition of linux.ItimerVal assumes 64-bit architecture.
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
timerID := args[0].Int()
@@ -51,7 +51,7 @@ func Getitimer(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
func Setitimer(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
if t.Arch().Width() != 8 {
// Definition of linux.ItimerVal assumes 64-bit architecture.
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
timerID := args[0].Int()
diff --git a/pkg/sentry/syscalls/linux/sys_tls_amd64.go b/pkg/sentry/syscalls/linux/sys_tls_amd64.go
index 8c6cd7511..bde672d67 100644
--- a/pkg/sentry/syscalls/linux/sys_tls_amd64.go
+++ b/pkg/sentry/syscalls/linux/sys_tls_amd64.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
)
// ArchPrctl implements linux syscall arch_prctl(2).
@@ -39,7 +38,7 @@ func ArchPrctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, err
}
default:
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
case linux.ARCH_SET_FS:
fsbase := args[1].Uint64()
diff --git a/pkg/sentry/syscalls/linux/sys_tls_arm64.go b/pkg/sentry/syscalls/linux/sys_tls_arm64.go
index ff4ac4d6d..dfa684387 100644
--- a/pkg/sentry/syscalls/linux/sys_tls_arm64.go
+++ b/pkg/sentry/syscalls/linux/sys_tls_arm64.go
@@ -18,12 +18,12 @@
package linux
import (
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
)
// ArchPrctl is not defined for ARM64.
func ArchPrctl(*kernel.Task, arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go
index 872168606..4a4ef5046 100644
--- a/pkg/sentry/syscalls/linux/sys_write.go
+++ b/pkg/sentry/syscalls/linux/sys_write.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/socket"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -72,7 +71,7 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
n, err := writev(t, file, src)
t.IOUsage().AccountWriteSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "write", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "write", file)
}
// Pwrite64 implements linux syscall pwrite64(2).
@@ -119,7 +118,7 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
n, err := pwritev(t, file, src, offset)
t.IOUsage().AccountWriteSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwrite64", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "pwrite64", file)
}
// Writev implements linux syscall writev(2).
@@ -149,7 +148,7 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
n, err := writev(t, file, src)
t.IOUsage().AccountWriteSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "writev", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "writev", file)
}
// Pwritev implements linux syscall pwritev(2).
@@ -190,7 +189,7 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
n, err := pwritev(t, file, src, offset)
t.IOUsage().AccountWriteSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "pwritev", file)
}
// Pwritev2 implements linux syscall pwritev2(2).
@@ -251,17 +250,17 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
if offset == -1 {
n, err := writev(t, file, src)
t.IOUsage().AccountWriteSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "pwritev2", file)
}
n, err := pwritev(t, file, src, offset)
t.IOUsage().AccountWriteSyscall(n)
- return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file)
+ return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "pwritev2", file)
}
func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) {
n, err := f.Writev(t, src)
- if err != syserror.ErrWouldBlock || f.Flags().NonBlocking {
+ if err != linuxerr.ErrWouldBlock || f.Flags().NonBlocking {
if n > 0 {
// Queue notification if we wrote anything.
f.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
@@ -274,7 +273,7 @@ func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) {
var deadline ktime.Time
if s, ok := f.FileOperations.(socket.Socket); ok {
dl := s.SendTimeout()
- if dl < 0 && err == syserror.ErrWouldBlock {
+ if dl < 0 && err == linuxerr.ErrWouldBlock {
return n, err
}
if dl > 0 {
@@ -296,14 +295,14 @@ func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) {
// anything other than "would block".
n, err = f.Writev(t, src)
total += n
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
break
}
// Wait for a notification that we should retry.
if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
break
}
@@ -321,7 +320,7 @@ func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) {
func pwritev(t *kernel.Task, f *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
n, err := f.Pwritev(t, src, offset)
- if err != syserror.ErrWouldBlock || f.Flags().NonBlocking {
+ if err != linuxerr.ErrWouldBlock || f.Flags().NonBlocking {
if n > 0 {
// Queue notification if we wrote anything.
f.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
@@ -342,7 +341,7 @@ func pwritev(t *kernel.Task, f *fs.File, src usermem.IOSequence, offset int64) (
// anything other than "would block".
n, err = f.Pwritev(t, src, offset+total)
total += n
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
break
}
diff --git a/pkg/sentry/syscalls/linux/timespec.go b/pkg/sentry/syscalls/linux/timespec.go
index b327e27d6..d90652a3f 100644
--- a/pkg/sentry/syscalls/linux/timespec.go
+++ b/pkg/sentry/syscalls/linux/timespec.go
@@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
)
// copyTimespecIn copies a Timespec from the untrusted app range to the kernel.
@@ -38,7 +37,7 @@ func copyTimespecIn(t *kernel.Task, addr hostarch.Addr) (linux.Timespec, error)
ts.Nsec = int64(hostarch.ByteOrder.Uint64(in[8:]))
return ts, nil
default:
- return linux.Timespec{}, syserror.ENOSYS
+ return linux.Timespec{}, linuxerr.ENOSYS
}
}
@@ -52,7 +51,7 @@ func copyTimespecOut(t *kernel.Task, addr hostarch.Addr, ts *linux.Timespec) err
_, err := t.CopyOutBytes(addr, out)
return err
default:
- return syserror.ENOSYS
+ return linuxerr.ENOSYS
}
}
@@ -70,7 +69,7 @@ func copyTimevalIn(t *kernel.Task, addr hostarch.Addr) (linux.Timeval, error) {
tv.Usec = int64(hostarch.ByteOrder.Uint64(in[8:]))
return tv, nil
default:
- return linux.Timeval{}, syserror.ENOSYS
+ return linux.Timeval{}, linuxerr.ENOSYS
}
}
@@ -84,7 +83,7 @@ func copyTimevalOut(t *kernel.Task, addr hostarch.Addr, tv *linux.Timeval) error
_, err := t.CopyOutBytes(addr, out)
return err
default:
- return syserror.ENOSYS
+ return linuxerr.ENOSYS
}
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD
index a73f096ff..1e3bd2a50 100644
--- a/pkg/sentry/syscalls/linux/vfs2/BUILD
+++ b/pkg/sentry/syscalls/linux/vfs2/BUILD
@@ -73,7 +73,6 @@ go_library(
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/syscalls/linux/vfs2/aio.go b/pkg/sentry/syscalls/linux/vfs2/aio.go
index a8fa86cdc..0b57c0f7c 100644
--- a/pkg/sentry/syscalls/linux/vfs2/aio.go
+++ b/pkg/sentry/syscalls/linux/vfs2/aio.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/mm"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -56,7 +55,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
cbAddr = hostarch.Addr(cbAddrP)
default:
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
// Copy in this callback.
diff --git a/pkg/sentry/syscalls/linux/vfs2/execve.go b/pkg/sentry/syscalls/linux/vfs2/execve.go
index 38818c175..fcf2e25de 100644
--- a/pkg/sentry/syscalls/linux/vfs2/execve.go
+++ b/pkg/sentry/syscalls/linux/vfs2/execve.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/loader"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Execve implements linux syscall execve(2).
@@ -83,7 +82,7 @@ func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr host
// do_open_execat(fd=AT_FDCWD)), and the loader package is currently
// incapable of handling this correctly.
if !path.HasComponents() && flags&linux.AT_EMPTY_PATH == 0 {
- return 0, nil, syserror.ENOENT
+ return 0, nil, linuxerr.ENOENT
}
dirfile, dirfileFlags := t.FDTable().GetVFS2(dirfd)
if dirfile == nil {
diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go
index 2cfb12cad..2198aa065 100644
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Close implements Linux syscall close(2).
@@ -42,7 +41,7 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
defer file.DecRef(t)
err := file.OnClose(t)
- return 0, nil, slinux.HandleIOErrorVFS2(t, false /* partial */, err, syserror.EINTR, "close", file)
+ return 0, nil, slinux.HandleIOErrorVFS2(t, false /* partial */, err, linuxerr.EINTR, "close", file)
}
// Dup implements Linux syscall dup(2).
diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
index 534355237..f19f0fd41 100644
--- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go
+++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
@@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Link implements Linux syscall link(2).
@@ -46,7 +45,7 @@ func linkat(t *kernel.Task, olddirfd int32, oldpathAddr hostarch.Addr, newdirfd
return linuxerr.EINVAL
}
if flags&linux.AT_EMPTY_PATH != 0 && !t.HasCapability(linux.CAP_DAC_READ_SEARCH) {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
oldpath, err := copyInPath(t, oldpathAddr)
@@ -320,7 +319,7 @@ func symlinkat(t *kernel.Task, targetAddr hostarch.Addr, newdirfd int32, linkpat
return err
}
if len(target) == 0 {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
linkpath, err := copyInPath(t, linkpathAddr)
if err != nil {
diff --git a/pkg/sentry/syscalls/linux/vfs2/path.go b/pkg/sentry/syscalls/linux/vfs2/path.go
index 2bb783a85..38796d4db 100644
--- a/pkg/sentry/syscalls/linux/vfs2/path.go
+++ b/pkg/sentry/syscalls/linux/vfs2/path.go
@@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
func copyInPath(t *kernel.Task, addr hostarch.Addr) (fspath.Path, error) {
@@ -44,7 +43,7 @@ func getTaskPathOperation(t *kernel.Task, dirfd int32, path fspath.Path, shouldA
if !path.Absolute {
if !path.HasComponents() && !bool(shouldAllowEmptyPath) {
root.DecRef(t)
- return taskPathOperation{}, syserror.ENOENT
+ return taskPathOperation{}, linuxerr.ENOENT
}
if dirfd == linux.AT_FDCWD {
start = t.FSContext().WorkingDirectoryVFS2()
diff --git a/pkg/sentry/syscalls/linux/vfs2/poll.go b/pkg/sentry/syscalls/linux/vfs2/poll.go
index 042aa4c97..204051cd0 100644
--- a/pkg/sentry/syscalls/linux/vfs2/poll.go
+++ b/pkg/sentry/syscalls/linux/vfs2/poll.go
@@ -20,15 +20,14 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/waiter"
-
- "gvisor.dev/gvisor/pkg/hostarch"
)
// fileCap is the maximum allowable files for poll & select. This has no
@@ -189,7 +188,7 @@ func doPoll(t *kernel.Task, addr hostarch.Addr, nfds uint, timeout time.Duration
pfd[i].Events |= linux.POLLHUP | linux.POLLERR
}
remainingTimeout, n, err := pollBlock(t, pfd, timeout)
- err = syserror.ConvertIntr(err, syserror.EINTR)
+ err = syserr.ConvertIntr(err, linuxerr.EINTR)
// The poll entries are copied out regardless of whether
// any are set or not. This aligns with the Linux behavior.
@@ -299,7 +298,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Ad
// Do the syscall, then count the number of bits set.
if _, _, err = pollBlock(t, pfd, timeout); err != nil {
- return 0, syserror.ConvertIntr(err, syserror.EINTR)
+ return 0, syserr.ConvertIntr(err, linuxerr.EINTR)
}
// r, w, and e are currently event mask bitsets; unset bits corresponding
@@ -417,7 +416,7 @@ func poll(t *kernel.Task, pfdAddr hostarch.Addr, nfds uint, timeout time.Duratio
nfds: nfds,
timeout: remainingTimeout,
})
- return 0, syserror.ERESTART_RESTARTBLOCK
+ return 0, linuxerr.ERESTART_RESTARTBLOCK
}
return n, err
}
@@ -464,7 +463,7 @@ func Ppoll(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// Note that this means that if err is nil but copyErr is not, copyErr is
// ignored. This is consistent with Linux.
if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil {
- err = syserror.ERESTARTNOHAND
+ err = linuxerr.ERESTARTNOHAND
}
return n, nil, err
}
@@ -494,7 +493,7 @@ func Select(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
copyErr := copyOutTimevalRemaining(t, startNs, timeout, timevalAddr)
// See comment in Ppoll.
if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil {
- err = syserror.ERESTARTNOHAND
+ err = linuxerr.ERESTARTNOHAND
}
return n, nil, err
}
@@ -541,7 +540,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
copyErr := copyOutTimespecRemaining(t, startNs, timeout, timespecAddr)
// See comment in Ppoll.
if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil {
- err = syserror.ERESTARTNOHAND
+ err = linuxerr.ERESTARTNOHAND
}
return n, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go
index fe8aa06da..4e7dc5080 100644
--- a/pkg/sentry/syscalls/linux/vfs2/read_write.go
+++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -63,7 +62,7 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
n, err := read(t, file, dst, vfs.ReadOptions{})
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "read", file)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "read", file)
}
// Readv implements Linux syscall readv(2).
@@ -88,12 +87,12 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
n, err := read(t, file, dst, vfs.ReadOptions{})
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "readv", file)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "readv", file)
}
func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
n, err := file.Read(t, dst, opts)
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
return n, err
}
@@ -115,14 +114,14 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt
// "would block".
n, err = file.Read(t, dst, opts)
total += n
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
break
}
// Wait for a notification that we should retry.
if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil {
if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
break
}
@@ -166,7 +165,7 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
n, err := pread(t, file, dst, offset, vfs.ReadOptions{})
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pread64", file)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "pread64", file)
}
// Preadv implements Linux syscall preadv(2).
@@ -197,7 +196,7 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
n, err := pread(t, file, dst, offset, vfs.ReadOptions{})
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "preadv", file)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "preadv", file)
}
// Preadv2 implements Linux syscall preadv2(2).
@@ -243,12 +242,12 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
n, err = pread(t, file, dst, offset, opts)
}
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "preadv2", file)
}
func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
n, err := file.PRead(t, dst, offset, opts)
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
return n, err
}
@@ -270,14 +269,14 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of
// "would block".
n, err = file.PRead(t, dst, offset+total, opts)
total += n
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
break
}
// Wait for a notification that we should retry.
if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil {
if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
break
}
@@ -314,7 +313,7 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
n, err := write(t, file, src, vfs.WriteOptions{})
t.IOUsage().AccountWriteSyscall(n)
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "write", file)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "write", file)
}
// Writev implements Linux syscall writev(2).
@@ -339,12 +338,12 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
n, err := write(t, file, src, vfs.WriteOptions{})
t.IOUsage().AccountWriteSyscall(n)
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "writev", file)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "writev", file)
}
func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
n, err := file.Write(t, src, opts)
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
return n, err
}
@@ -366,14 +365,14 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op
// "would block".
n, err = file.Write(t, src, opts)
total += n
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
break
}
// Wait for a notification that we should retry.
if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil {
if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
break
}
@@ -416,7 +415,7 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
n, err := pwrite(t, file, src, offset, vfs.WriteOptions{})
t.IOUsage().AccountWriteSyscall(n)
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pwrite64", file)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "pwrite64", file)
}
// Pwritev implements Linux syscall pwritev(2).
@@ -447,7 +446,7 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
n, err := pwrite(t, file, src, offset, vfs.WriteOptions{})
t.IOUsage().AccountReadSyscall(n)
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pwritev", file)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "pwritev", file)
}
// Pwritev2 implements Linux syscall pwritev2(2).
@@ -493,12 +492,12 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
n, err = pwrite(t, file, src, offset, opts)
}
t.IOUsage().AccountWriteSyscall(n)
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "pwritev2", file)
}
func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
n, err := file.PWrite(t, src, offset, opts)
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
return n, err
}
@@ -520,14 +519,14 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o
// "would block".
n, err = file.PWrite(t, src, offset+total, opts)
total += n
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
break
}
// Wait for a notification that we should retry.
if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil {
if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
break
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go
index b5a3b92c5..e608572b4 100644
--- a/pkg/sentry/syscalls/linux/vfs2/setstat.go
+++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
const chmodMask = 0777 | linux.S_ISUID | linux.S_ISGID | linux.S_ISVTX
@@ -432,7 +431,7 @@ func setstatat(t *kernel.Task, dirfd int32, path fspath.Path, shouldAllowEmptyPa
start := root
if !path.Absolute {
if !path.HasComponents() && !bool(shouldAllowEmptyPath) {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if dirfd == linux.AT_FDCWD {
start = t.FSContext().WorkingDirectoryVFS2()
@@ -465,7 +464,7 @@ func setstatat(t *kernel.Task, dirfd int32, path fspath.Path, shouldAllowEmptyPa
}
func handleSetSizeError(t *kernel.Task, err error) error {
- if err == syserror.ErrExceedsFileSizeLimit {
+ if err == linuxerr.ErrExceedsFileSizeLimit {
// Convert error to EFBIG and send a SIGXFSZ per setrlimit(2).
t.SendSignal(kernel.SignalInfoNoInfo(linux.SIGXFSZ, t, t))
return linuxerr.EFBIG
diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go
index 0c2e0720b..48be5a88d 100644
--- a/pkg/sentry/syscalls/linux/vfs2/socket.go
+++ b/pkg/sentry/syscalls/linux/vfs2/socket.go
@@ -19,6 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -30,10 +31,7 @@ import (
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
-
- "gvisor.dev/gvisor/pkg/hostarch"
)
// maxAddrLen is the maximum socket address length we're willing to accept.
@@ -264,7 +262,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Capture address and call syscall implementation.
@@ -274,7 +272,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
}
blocking := (file.StatusFlags() & linux.SOCK_NONBLOCK) == 0
- return 0, nil, syserror.ConvertIntr(s.Connect(t, a, blocking).ToError(), syserror.ERESTARTSYS)
+ return 0, nil, syserr.ConvertIntr(s.Connect(t, a, blocking).ToError(), linuxerr.ERESTARTSYS)
}
// accept is the implementation of the accept syscall. It is called by accept
@@ -295,7 +293,7 @@ func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr,
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, syserror.ENOTSOCK
+ return 0, linuxerr.ENOTSOCK
}
// Call the syscall implementation for this socket, then copy the
@@ -305,7 +303,7 @@ func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr,
peerRequested := addrLen != 0
nfd, peer, peerLen, e := s.Accept(t, peerRequested, flags, blocking)
if e != nil {
- return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
+ return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS)
}
if peerRequested {
// NOTE(magi): Linux does not give you an error if it can't
@@ -354,7 +352,7 @@ func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Capture address and call syscall implementation.
@@ -381,7 +379,7 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
if backlog > maxListenBacklog {
@@ -419,7 +417,7 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Validate how, then call syscall implementation.
@@ -450,7 +448,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Read the length. Reject negative values.
@@ -531,7 +529,7 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
if optLen < 0 {
@@ -569,7 +567,7 @@ func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Get the socket name and copy it to the caller.
@@ -597,7 +595,7 @@ func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Get the socket peer name and copy it to the caller.
@@ -630,7 +628,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Reject flags that we don't handle yet.
@@ -687,7 +685,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 {
@@ -767,7 +765,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr hostarch.Addr, fl
if msg.ControlLen == 0 && msg.NameLen == 0 {
n, mflags, _, _, cms, err := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, false, 0)
if err != nil {
- return 0, syserror.ConvertIntr(err.ToError(), syserror.ERESTARTSYS)
+ return 0, syserr.ConvertIntr(err.ToError(), linuxerr.ERESTARTSYS)
}
if !cms.Unix.Empty() {
mflags |= linux.MSG_CTRUNC
@@ -789,7 +787,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr hostarch.Addr, fl
}
n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen)
if e != nil {
- return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
+ return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS)
}
defer cms.Release(t)
@@ -852,7 +850,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, fla
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, syserror.ENOTSOCK
+ return 0, linuxerr.ENOTSOCK
}
if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 {
@@ -878,7 +876,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, fla
n, _, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0)
cm.Release(t)
if e != nil {
- return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
+ return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS)
}
// Copy the address to the caller.
@@ -925,7 +923,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Reject flags that we don't handle yet.
@@ -967,7 +965,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, nil, syserror.ENOTSOCK
+ return 0, nil, linuxerr.ENOTSOCK
}
// Reject flags that we don't handle yet.
@@ -1064,7 +1062,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio
// Call the syscall implementation.
n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, controlMessages)
- err = slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendmsg", file)
+ err = slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), linuxerr.ERESTARTSYS, "sendmsg", file)
// Control messages should be released on error as well as for zero-length
// messages, which are discarded by the receiver.
if n == 0 || err != nil {
@@ -1091,7 +1089,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags
// Extract the socket.
s, ok := file.Impl().(socket.SocketVFS2)
if !ok {
- return 0, syserror.ENOTSOCK
+ return 0, linuxerr.ENOTSOCK
}
if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 {
@@ -1126,7 +1124,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags
// Call the syscall implementation.
n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, socket.ControlMessages{Unix: control.New(t, s, nil)})
- return uintptr(n), slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendto", file)
+ return uintptr(n), slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), linuxerr.ERESTARTSYS, "sendto", file)
}
// SendTo implements the linux syscall sendto(2).
diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go
index d8009123f..0205f09e0 100644
--- a/pkg/sentry/syscalls/linux/vfs2/splice.go
+++ b/pkg/sentry/syscalls/linux/vfs2/splice.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -151,7 +150,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
panic("at least one end of splice must be a pipe")
}
- if n != 0 || err != syserror.ErrWouldBlock || nonBlock {
+ if n != 0 || err != linuxerr.ErrWouldBlock || nonBlock {
break
}
if err = dw.waitForBoth(t); err != nil {
@@ -173,7 +172,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// We can only pass a single file to handleIOError, so pick inFile arbitrarily.
// This is used only for debugging purposes.
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "splice", outFile)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "splice", outFile)
}
// Tee implements Linux syscall tee(2).
@@ -241,7 +240,7 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo
defer dw.destroy()
for {
n, err = pipe.Tee(t, outPipeFD, inPipeFD, count)
- if n != 0 || err != syserror.ErrWouldBlock || nonBlock {
+ if n != 0 || err != linuxerr.ErrWouldBlock || nonBlock {
break
}
if err = dw.waitForBoth(t); err != nil {
@@ -251,7 +250,7 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo
if n != 0 {
// If a partial write is completed, the error is dropped. Log it here.
- if err != nil && err != io.EOF && err != syserror.ErrWouldBlock {
+ if err != nil && err != io.EOF && err != linuxerr.ErrWouldBlock {
log.Debugf("tee completed a partial write with error: %v", err)
err = nil
}
@@ -259,7 +258,7 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo
// We can only pass a single file to handleIOError, so pick inFile arbitrarily.
// This is used only for debugging purposes.
- return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "tee", inFile)
+ return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "tee", inFile)
}
// Sendfile implements linux system call sendfile(2).
@@ -360,10 +359,10 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
break
}
if err == nil && t.Interrupted() {
- err = syserror.ErrInterrupted
+ err = linuxerr.ErrInterrupted
break
}
- if err == syserror.ErrWouldBlock && !nonBlock {
+ if err == linuxerr.ErrWouldBlock && !nonBlock {
err = dw.waitForBoth(t)
}
if err != nil {
@@ -389,7 +388,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
var writeN int64
writeN, err = outFile.Write(t, usermem.BytesIOSequence(wbuf), vfs.WriteOptions{})
wbuf = wbuf[writeN:]
- if err == syserror.ErrWouldBlock && !nonBlock {
+ if err == linuxerr.ErrWouldBlock && !nonBlock {
err = dw.waitForOut(t)
}
if err != nil {
@@ -420,10 +419,10 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
break
}
if err == nil && t.Interrupted() {
- err = syserror.ErrInterrupted
+ err = linuxerr.ErrInterrupted
break
}
- if err == syserror.ErrWouldBlock && !nonBlock {
+ if err == linuxerr.ErrWouldBlock && !nonBlock {
err = dw.waitForBoth(t)
}
if err != nil {
@@ -441,7 +440,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
if total != 0 {
- if err != nil && err != io.EOF && err != syserror.ErrWouldBlock {
+ if err != nil && err != io.EOF && err != linuxerr.ErrWouldBlock {
// If a partial write is completed, the error is dropped. Log it here.
log.Debugf("sendfile completed a partial write with error: %v", err)
err = nil
@@ -450,7 +449,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// We can only pass a single file to handleIOError, so pick inFile arbitrarily.
// This is used only for debugging purposes.
- return uintptr(total), nil, slinux.HandleIOErrorVFS2(t, total != 0, err, syserror.ERESTARTSYS, "sendfile", inFile)
+ return uintptr(total), nil, slinux.HandleIOErrorVFS2(t, total != 0, err, linuxerr.ERESTARTSYS, "sendfile", inFile)
}
// dualWaiter is used to wait on one or both vfs.FileDescriptions. It is not
diff --git a/pkg/sentry/syscalls/linux/vfs2/stat.go b/pkg/sentry/syscalls/linux/vfs2/stat.go
index ba1d30823..adaf8db3f 100644
--- a/pkg/sentry/syscalls/linux/vfs2/stat.go
+++ b/pkg/sentry/syscalls/linux/vfs2/stat.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Stat implements Linux syscall stat(2).
@@ -70,7 +69,7 @@ func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr hostarch.Addr, flag
start := root
if !path.Absolute {
if !path.HasComponents() && flags&linux.AT_EMPTY_PATH == 0 {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if dirfd == linux.AT_FDCWD {
start = t.FSContext().WorkingDirectoryVFS2()
@@ -182,7 +181,7 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
start := root
if !path.Absolute {
if !path.HasComponents() && flags&linux.AT_EMPTY_PATH == 0 {
- return 0, nil, syserror.ENOENT
+ return 0, nil, linuxerr.ENOENT
}
if dirfd == linux.AT_FDCWD {
start = t.FSContext().WorkingDirectoryVFS2()
diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go
index d0ffc7c32..cfc693422 100644
--- a/pkg/sentry/syscalls/linux/vfs2/sync.go
+++ b/pkg/sentry/syscalls/linux/vfs2/sync.go
@@ -19,7 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserr"
)
// Sync implements Linux syscall sync(2).
@@ -108,12 +108,12 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
if flags&linux.SYNC_FILE_RANGE_WAIT_BEFORE != 0 &&
flags&linux.SYNC_FILE_RANGE_WAIT_AFTER == 0 {
t.Kernel().EmitUnimplementedEvent(t)
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
}
if flags&linux.SYNC_FILE_RANGE_WAIT_AFTER != 0 {
if err := file.Sync(t); err != nil {
- return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
+ return 0, nil, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS)
}
}
return 0, nil, nil
diff --git a/pkg/sentry/syscalls/syscalls.go b/pkg/sentry/syscalls/syscalls.go
index 511fb8b28..cfcc21271 100644
--- a/pkg/sentry/syscalls/syscalls.go
+++ b/pkg/sentry/syscalls/syscalls.go
@@ -31,7 +31,6 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Supported returns a syscall that is fully supported.
@@ -103,10 +102,10 @@ func CapError(name string, c linux.Capability, note string, urls []string) kerne
return 0, nil, linuxerr.EPERM
}
t.Kernel().EmitUnimplementedEvent(t)
- return 0, nil, syserror.ENOSYS
+ return 0, nil, linuxerr.ENOSYS
},
SupportLevel: kernel.SupportUnimplemented,
- Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise.", note, linuxerr.EPERM, c.String(), syserror.ENOSYS),
+ Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise.", note, linuxerr.EPERM, c.String(), linuxerr.ENOSYS),
URLs: urls,
}
}
diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD
index 36d999c47..c21971322 100644
--- a/pkg/sentry/time/BUILD
+++ b/pkg/sentry/time/BUILD
@@ -39,7 +39,6 @@ go_library(
"//pkg/log",
"//pkg/metric",
"//pkg/sync",
- "//pkg/syserror",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index a2032162d..914574543 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -116,7 +116,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/uniqueid",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
@@ -137,7 +136,6 @@ go_test(
"//pkg/errors/linuxerr",
"//pkg/sentry/contexttest",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md
index 5aad31b78..82ee2c521 100644
--- a/pkg/sentry/vfs/README.md
+++ b/pkg/sentry/vfs/README.md
@@ -1,9 +1,5 @@
# The gVisor Virtual Filesystem
-THIS PACKAGE IS CURRENTLY EXPERIMENTAL AND NOT READY OR ENABLED FOR PRODUCTION
-USE. For the filesystem implementation currently used by gVisor, see the `fs`
-package.
-
## Implementation Notes
### Reference Counting
diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go
index befe3ca25..04bc4d10c 100644
--- a/pkg/sentry/vfs/epoll.go
+++ b/pkg/sentry/vfs/epoll.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -259,7 +258,7 @@ func (ep *EpollInstance) ModifyInterest(file *FileDescription, num int32, event
num: num,
}]
if !ok {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
// Update epi for the next call to ep.ReadEvents().
@@ -295,7 +294,7 @@ func (ep *EpollInstance) DeleteInterest(file *FileDescription, num int32) error
num: num,
}]
if !ok {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
// Unregister from the file so that epi will no longer be readied.
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index a875fdeca..5dab069ed 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -25,7 +25,6 @@ import (
fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -56,7 +55,7 @@ func (FileDescriptionDefaultImpl) OnClose(ctx context.Context) error {
// StatFS implements FileDescriptionImpl.StatFS analogously to
// super_operations::statfs == NULL in Linux.
func (FileDescriptionDefaultImpl) StatFS(ctx context.Context) (linux.Statfs, error) {
- return linux.Statfs{}, syserror.ENOSYS
+ return linux.Statfs{}, linuxerr.ENOSYS
}
// Allocate implements FileDescriptionImpl.Allocate analogously to
@@ -175,27 +174,27 @@ type DirectoryFileDescriptionDefaultImpl struct{}
// Allocate implements DirectoryFileDescriptionDefaultImpl.Allocate.
func (DirectoryFileDescriptionDefaultImpl) Allocate(ctx context.Context, mode, offset, length uint64) error {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
// PRead implements FileDescriptionImpl.PRead.
func (DirectoryFileDescriptionDefaultImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// Read implements FileDescriptionImpl.Read.
func (DirectoryFileDescriptionDefaultImpl) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// PWrite implements FileDescriptionImpl.PWrite.
func (DirectoryFileDescriptionDefaultImpl) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// Write implements FileDescriptionImpl.Write.
func (DirectoryFileDescriptionDefaultImpl) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// DentryMetadataFileDescriptionImpl may be embedded by implementations of
@@ -368,7 +367,7 @@ func (fd *DynamicBytesFileDescriptionImpl) pwriteLocked(ctx context.Context, src
writable, ok := fd.data.(WritableDynamicBytesSource)
if !ok {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
n, err := writable.Write(ctx, src, offset)
if err != nil {
diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go
index 3423dede1..e34a8c11b 100644
--- a/pkg/sentry/vfs/file_description_impl_util_test.go
+++ b/pkg/sentry/vfs/file_description_impl_util_test.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -157,10 +156,10 @@ func TestGenCountFD(t *testing.T) {
// Write and PWrite fails.
if _, err := fd.Write(ctx, ioseq, WriteOptions{}); !linuxerr.Equals(linuxerr.EIO, err) {
- t.Errorf("Write: got err %v, wanted %v", err, syserror.EIO)
+ t.Errorf("Write: got err %v, wanted %v", err, linuxerr.EIO)
}
if _, err := fd.PWrite(ctx, ioseq, 0, WriteOptions{}); !linuxerr.Equals(linuxerr.EIO, err) {
- t.Errorf("Write: got err %v, wanted %v", err, syserror.EIO)
+ t.Errorf("Write: got err %v, wanted %v", err, linuxerr.EIO)
}
}
diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go
index 088beb8e2..17d94b341 100644
--- a/pkg/sentry/vfs/inotify.go
+++ b/pkg/sentry/vfs/inotify.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/uniqueid"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -209,7 +208,7 @@ func (i *Inotify) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOpt
if i.events.Empty() {
// Nothing to read yet, tell caller to block.
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
var writeLen int64
diff --git a/pkg/sentry/vfs/lock.go b/pkg/sentry/vfs/lock.go
index cbe4d8c2d..1853cdca0 100644
--- a/pkg/sentry/vfs/lock.go
+++ b/pkg/sentry/vfs/lock.go
@@ -17,8 +17,8 @@ package vfs
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
- "gvisor.dev/gvisor/pkg/syserror"
)
// FileLocks supports POSIX and BSD style locks, which correspond to fcntl(2)
@@ -47,9 +47,9 @@ func (fl *FileLocks) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerID i
// Return an appropriate error for the unsuccessful lock attempt, depending on
// whether this is a blocking or non-blocking operation.
if block == nil {
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
- return syserror.ERESTARTSYS
+ return linuxerr.ERESTARTSYS
}
// UnlockBSD releases a BSD-style lock on the entire file.
@@ -69,9 +69,9 @@ func (fl *FileLocks) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPI
// Return an appropriate error for the unsuccessful lock attempt, depending on
// whether this is a blocking or non-blocking operation.
if block == nil {
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
- return syserror.ERESTARTSYS
+ return linuxerr.ERESTARTSYS
}
// UnlockPOSIX releases a POSIX-style lock on a file region.
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index 4d6b59a26..05a416775 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -27,7 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/refsvfs2"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/syserror"
)
// A Mount is a replacement of a Dentry (Mount.key.point) from one Filesystem
@@ -225,7 +224,7 @@ func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Cr
vdDentry.mu.Unlock()
vfs.mountMu.Unlock()
vd.DecRef(ctx)
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
// vd might have been mounted over between vfs.GetDentryAt() and
// vfs.mountMu.Lock().
diff --git a/pkg/sentry/vfs/pathname.go b/pkg/sentry/vfs/pathname.go
index e4da15009..7cc68a157 100644
--- a/pkg/sentry/vfs/pathname.go
+++ b/pkg/sentry/vfs/pathname.go
@@ -16,9 +16,9 @@ package vfs
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
var fspathBuilderPool = sync.Pool{
@@ -137,7 +137,7 @@ loop:
// Linux's sys_getcwd().
func (vfs *VirtualFilesystem) PathnameForGetcwd(ctx context.Context, vfsroot, vd VirtualDentry) (string, error) {
if vd.dentry.IsDead() {
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
b := getFSPathBuilder()
diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go
index 4744514bd..953d31876 100644
--- a/pkg/sentry/vfs/permissions.go
+++ b/pkg/sentry/vfs/permissions.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/limits"
- "gvisor.dev/gvisor/pkg/syserror"
)
// AccessTypes is a bitmask of Unix file permissions.
@@ -195,7 +194,7 @@ func CheckSetStat(ctx context.Context, creds *auth.Credentials, opts *SetStatOpt
return err
}
if limit < int64(stat.Size) {
- return syserror.ErrExceedsFileSizeLimit
+ return linuxerr.ErrExceedsFileSizeLimit
}
}
if stat.Mask&linux.STATX_MODE != 0 {
@@ -282,7 +281,7 @@ func CheckLimit(ctx context.Context, offset, size int64) (int64, error) {
return size, nil
}
if offset >= int64(fileSizeLimit) {
- return 0, syserror.ErrExceedsFileSizeLimit
+ return 0, linuxerr.ErrExceedsFileSizeLimit
}
remaining := int64(fileSizeLimit) - offset
if remaining < size {
diff --git a/pkg/sentry/vfs/resolving_path.go b/pkg/sentry/vfs/resolving_path.go
index 6f58f33ce..7fd7f000d 100644
--- a/pkg/sentry/vfs/resolving_path.go
+++ b/pkg/sentry/vfs/resolving_path.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// ResolvingPath represents the state of an in-progress path resolution, shared
@@ -331,7 +330,7 @@ func (rp *ResolvingPath) HandleSymlink(target string) error {
return linuxerr.ELOOP
}
if len(target) == 0 {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
rp.symlinks++
targetPath := fspath.Parse(target)
diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index eb3c60610..1b2a668c0 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -48,7 +48,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// A VirtualFilesystem (VFS for short) combines Filesystems in trees of Mounts.
@@ -281,7 +280,7 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential
if newpop.Path.Absolute {
return linuxerr.EEXIST
}
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if newpop.FollowFinalSymlink {
oldVD.DecRef(ctx)
@@ -318,7 +317,7 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia
if pop.Path.Absolute {
return linuxerr.EEXIST
}
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if pop.FollowFinalSymlink {
ctx.Warningf("VirtualFilesystem.MkdirAt: file creation paths can't follow final symlink")
@@ -348,7 +347,7 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia
}
// MknodAt creates a file of the given mode at the given path. It returns an
-// error from the syserror package.
+// error from the linuxerr package.
func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MknodOptions) error {
if !pop.Path.Begin.Ok() {
// pop.Path should not be empty in operations that create/delete files.
@@ -356,7 +355,7 @@ func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentia
if pop.Path.Absolute {
return linuxerr.EEXIST
}
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if pop.FollowFinalSymlink {
ctx.Warningf("VirtualFilesystem.MknodAt: file creation paths can't follow final symlink")
@@ -494,7 +493,7 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti
if oldpop.Path.Absolute {
return linuxerr.EBUSY
}
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if oldpop.FollowFinalSymlink {
ctx.Warningf("VirtualFilesystem.RenameAt: source path can't follow final symlink")
@@ -515,7 +514,7 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti
if newpop.Path.Absolute {
return linuxerr.EBUSY
}
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if newpop.FollowFinalSymlink {
oldParentVD.DecRef(ctx)
@@ -556,7 +555,7 @@ func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentia
if pop.Path.Absolute {
return linuxerr.EBUSY
}
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if pop.FollowFinalSymlink {
ctx.Warningf("VirtualFilesystem.RmdirAt: file deletion paths can't follow final symlink")
@@ -639,7 +638,7 @@ func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credent
if pop.Path.Absolute {
return linuxerr.EEXIST
}
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if pop.FollowFinalSymlink {
ctx.Warningf("VirtualFilesystem.SymlinkAt: file creation paths can't follow final symlink")
@@ -673,7 +672,7 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti
if pop.Path.Absolute {
return linuxerr.EBUSY
}
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if pop.FollowFinalSymlink {
ctx.Warningf("VirtualFilesystem.UnlinkAt: file deletion paths can't follow final symlink")
diff --git a/pkg/syserr/BUILD b/pkg/syserr/BUILD
index ceee494fc..1cd5d641d 100644
--- a/pkg/syserr/BUILD
+++ b/pkg/syserr/BUILD
@@ -14,7 +14,6 @@ go_library(
"//pkg/abi/linux/errno",
"//pkg/errors",
"//pkg/errors/linuxerr",
- "//pkg/syserror",
"//pkg/tcpip",
"@org_golang_x_sys//unix:go_default_library",
],
diff --git a/pkg/syserr/syserr.go b/pkg/syserr/syserr.go
index 558240008..a5e386e38 100644
--- a/pkg/syserr/syserr.go
+++ b/pkg/syserr/syserr.go
@@ -24,7 +24,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux/errno"
"gvisor.dev/gvisor/pkg/errors"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Error represents an internal error.
@@ -52,12 +51,12 @@ func New(message string, linuxTranslation errno.Errno) *Error {
}
e := error(unix.Errno(err.errno))
- // syserror.ErrWouldBlock gets translated to linuxerr.EWOULDBLOCK and
+ // linuxerr.ErrWouldBlock gets translated to linuxerr.EWOULDBLOCK and
// enables proper blocking semantics. This should temporary address the
// class of blocking bugs that keep popping up with the current state of
// the error space.
if err.errno == linuxerr.EWOULDBLOCK.Errno() {
- e = syserror.ErrWouldBlock
+ e = linuxerr.ErrWouldBlock
}
linuxBackwardsTranslations[err.errno] = linuxBackwardsTranslation{err: e, ok: true}
@@ -287,8 +286,14 @@ func FromError(err error) *Error {
return FromHost(unix.Errno(linuxErr.Errno()))
}
- if errno, ok := syserror.TranslateError(err); ok {
- return FromHost(errno)
- }
panic("unknown error: " + err.Error())
}
+
+// ConvertIntr converts the provided error code (err) to another one (intr) if
+// the first error corresponds to an interrupted operation.
+func ConvertIntr(err, intr error) error {
+ if err == linuxerr.ErrInterrupted {
+ return intr
+ }
+ return err
+}
diff --git a/pkg/syserror/BUILD b/pkg/syserror/BUILD
deleted file mode 100644
index 76bee5a64..000000000
--- a/pkg/syserror/BUILD
+++ /dev/null
@@ -1,10 +0,0 @@
-load("//tools:defs.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "syserror",
- srcs = ["syserror.go"],
- visibility = ["//visibility:public"],
- deps = ["@org_golang_x_sys//unix:go_default_library"],
-)
diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go
deleted file mode 100644
index b24edb364..000000000
--- a/pkg/syserror/syserror.go
+++ /dev/null
@@ -1,180 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package syserror contains syscall error codes exported as error interface
-// instead of Errno. This allows for fast comparison and returns when the
-// comparand or return value is of type error because there is no need to
-// convert from Errno to an interface, i.e., runtime.convT2I isn't called.
-package syserror
-
-import (
- "errors"
-
- "golang.org/x/sys/unix"
-)
-
-// The following variables have the same meaning as their syscall equivalent.
-var (
- EIDRM = error(unix.EIDRM)
- EINTR = error(unix.EINTR)
- EIO = error(unix.EIO)
- EISDIR = error(unix.EISDIR)
- ENOENT = error(unix.ENOENT)
- ENOEXEC = error(unix.ENOEXEC)
- ENOMEM = error(unix.ENOMEM)
- ENOTSOCK = error(unix.ENOTSOCK)
- ENOSPC = error(unix.ENOSPC)
- ENOSYS = error(unix.ENOSYS)
-)
-
-var (
- // ErrWouldBlock is an internal error used to indicate that an operation
- // cannot be satisfied immediately, and should be retried at a later
- // time, possibly when the caller has received a notification that the
- // operation may be able to complete. It is used by implementations of
- // the kio.File interface.
- ErrWouldBlock = errors.New("request would block")
-
- // ErrInterrupted is returned if a request is interrupted before it can
- // complete.
- ErrInterrupted = errors.New("request was interrupted")
-
- // ErrExceedsFileSizeLimit is returned if a request would exceed the
- // file's size limit.
- ErrExceedsFileSizeLimit = errors.New("exceeds file size limit")
-)
-
-// errorMap is the map used to convert generic errors into errnos.
-var errorMap = map[error]unix.Errno{}
-
-// errorUnwrappers is an array of unwrap functions to extract typed errors.
-var errorUnwrappers = []func(error) (unix.Errno, bool){}
-
-// AddErrorTranslation allows modules to populate the error map by adding their
-// own translations during initialization. Returns if the error translation is
-// accepted or not. A pre-existing translation will not be overwritten by the
-// new translation.
-func AddErrorTranslation(from error, to unix.Errno) bool {
- if _, ok := errorMap[from]; ok {
- return false
- }
-
- errorMap[from] = to
- return true
-}
-
-// AddErrorUnwrapper registers an unwrap method that can extract a concrete error
-// from a typed, but not initialized, error.
-func AddErrorUnwrapper(unwrap func(e error) (unix.Errno, bool)) {
- errorUnwrappers = append(errorUnwrappers, unwrap)
-}
-
-// TranslateError translates errors to errnos, it will return false if
-// the error was not registered.
-func TranslateError(from error) (unix.Errno, bool) {
- if err, ok := errorMap[from]; ok {
- return err, true
- }
- // Try to unwrap the error if we couldn't match an error
- // exactly. This might mean that a package has its own
- // error type.
- for _, unwrap := range errorUnwrappers {
- if err, ok := unwrap(from); ok {
- return err, true
- }
- }
- return 0, false
-}
-
-// ConvertIntr converts the provided error code (err) to another one (intr) if
-// the first error corresponds to an interrupted operation.
-func ConvertIntr(err, intr error) error {
- if err == ErrInterrupted {
- return intr
- }
- return err
-}
-
-// SyscallRestartErrno represents a ERESTART* errno defined in the Linux's kernel
-// include/linux/errno.h. These errnos are never returned to userspace
-// directly, but are used to communicate the expected behavior of an
-// interrupted syscall from the syscall to signal handling.
-type SyscallRestartErrno int
-
-// These numeric values are significant because ptrace syscall exit tracing can
-// observe them.
-//
-// For all of the following errnos, if the syscall is not interrupted by a
-// signal delivered to a user handler, the syscall is restarted.
-const (
- // ERESTARTSYS is returned by an interrupted syscall to indicate that it
- // should be converted to EINTR if interrupted by a signal delivered to a
- // user handler without SA_RESTART set, and restarted otherwise.
- ERESTARTSYS = SyscallRestartErrno(512)
-
- // ERESTARTNOINTR is returned by an interrupted syscall to indicate that it
- // should always be restarted.
- ERESTARTNOINTR = SyscallRestartErrno(513)
-
- // ERESTARTNOHAND is returned by an interrupted syscall to indicate that it
- // should be converted to EINTR if interrupted by a signal delivered to a
- // user handler, and restarted otherwise.
- ERESTARTNOHAND = SyscallRestartErrno(514)
-
- // ERESTART_RESTARTBLOCK is returned by an interrupted syscall to indicate
- // that it should be restarted using a custom function. The interrupted
- // syscall must register a custom restart function by calling
- // Task.SetRestartSyscallFn.
- ERESTART_RESTARTBLOCK = SyscallRestartErrno(516)
-)
-
-// Error implements error.Error.
-func (e SyscallRestartErrno) Error() string {
- // Descriptions are borrowed from strace.
- switch e {
- case ERESTARTSYS:
- return "to be restarted if SA_RESTART is set"
- case ERESTARTNOINTR:
- return "to be restarted"
- case ERESTARTNOHAND:
- return "to be restarted if no handler"
- case ERESTART_RESTARTBLOCK:
- return "interrupted by signal"
- default:
- return "(unknown interrupt error)"
- }
-}
-
-// SyscallRestartErrnoFromReturn returns the SyscallRestartErrno represented by
-// rv, the value in a syscall return register.
-func SyscallRestartErrnoFromReturn(rv uintptr) (SyscallRestartErrno, bool) {
- switch int(rv) {
- case -int(ERESTARTSYS):
- return ERESTARTSYS, true
- case -int(ERESTARTNOINTR):
- return ERESTARTNOINTR, true
- case -int(ERESTARTNOHAND):
- return ERESTARTNOHAND, true
- case -int(ERESTART_RESTARTBLOCK):
- return ERESTART_RESTARTBLOCK, true
- default:
- return 0, false
- }
-}
-
-func init() {
- AddErrorTranslation(ErrWouldBlock, unix.EWOULDBLOCK)
- AddErrorTranslation(ErrInterrupted, unix.EINTR)
- AddErrorTranslation(ErrExceedsFileSizeLimit, unix.EFBIG)
-}
diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go
index e0dfe5813..2f34bf8dd 100644
--- a/pkg/tcpip/checker/checker.go
+++ b/pkg/tcpip/checker/checker.go
@@ -729,7 +729,7 @@ func TCPTimestampChecker(wantTS bool, wantTSVal uint32, wantTSEcr uint32) Transp
return
}
l := int(opts[i+1])
- if i < 2 || i+l > limit {
+ if l < 2 || i+l > limit {
return
}
i += l
diff --git a/pkg/tcpip/header/eth.go b/pkg/tcpip/header/eth.go
index 95ade0e5c..1f18213e5 100644
--- a/pkg/tcpip/header/eth.go
+++ b/pkg/tcpip/header/eth.go
@@ -49,9 +49,9 @@ const (
// EthernetAddressSize is the size, in bytes, of an ethernet address.
EthernetAddressSize = 6
- // unspecifiedEthernetAddress is the unspecified ethernet address
+ // UnspecifiedEthernetAddress is the unspecified ethernet address
// (all bits set to 0).
- unspecifiedEthernetAddress = tcpip.LinkAddress("\x00\x00\x00\x00\x00\x00")
+ UnspecifiedEthernetAddress = tcpip.LinkAddress("\x00\x00\x00\x00\x00\x00")
// EthernetBroadcastAddress is an ethernet address that addresses every node
// on a local link.
@@ -134,7 +134,7 @@ func IsValidUnicastEthernetAddress(addr tcpip.LinkAddress) bool {
return false
}
- if addr == unspecifiedEthernetAddress {
+ if addr == UnspecifiedEthernetAddress {
return false
}
diff --git a/pkg/tcpip/header/eth_test.go b/pkg/tcpip/header/eth_test.go
index bf9ccbf1a..adc04e855 100644
--- a/pkg/tcpip/header/eth_test.go
+++ b/pkg/tcpip/header/eth_test.go
@@ -44,7 +44,7 @@ func TestIsValidUnicastEthernetAddress(t *testing.T) {
},
{
"Unspecified",
- unspecifiedEthernetAddress,
+ UnspecifiedEthernetAddress,
false,
},
{
@@ -91,7 +91,7 @@ func TestIsMulticastEthernetAddress(t *testing.T) {
},
{
"Unspecified",
- unspecifiedEthernetAddress,
+ UnspecifiedEthernetAddress,
false,
},
{
diff --git a/pkg/tcpip/link/ethernet/ethernet.go b/pkg/tcpip/link/ethernet/ethernet.go
index b427c6170..b9db273d0 100644
--- a/pkg/tcpip/link/ethernet/ethernet.go
+++ b/pkg/tcpip/link/ethernet/ethernet.go
@@ -42,6 +42,14 @@ type Endpoint struct {
nested.Endpoint
}
+// LinkAddress implements stack.LinkEndpoint.
+func (e *Endpoint) LinkAddress() tcpip.LinkAddress {
+ if l := e.Endpoint.LinkAddress(); len(l) != 0 {
+ return l
+ }
+ return header.UnspecifiedEthernetAddress
+}
+
// DeliverNetworkPacket implements stack.NetworkDispatcher.
func (e *Endpoint) DeliverNetworkPacket(_, _ tcpip.LinkAddress, _ tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
hdr, ok := pkt.LinkHeader().Consume(header.EthernetMinimumSize)
@@ -57,18 +65,22 @@ func (e *Endpoint) DeliverNetworkPacket(_, _ tcpip.LinkAddress, _ tcpip.NetworkP
// Capabilities implements stack.LinkEndpoint.
func (e *Endpoint) Capabilities() stack.LinkEndpointCapabilities {
- return stack.CapabilityResolutionRequired | e.Endpoint.Capabilities()
+ c := e.Endpoint.Capabilities()
+ if c&stack.CapabilityLoopback == 0 {
+ c |= stack.CapabilityResolutionRequired
+ }
+ return c
}
// WritePacket implements stack.LinkEndpoint.
func (e *Endpoint) WritePacket(r stack.RouteInfo, proto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error {
- e.AddHeader(e.Endpoint.LinkAddress(), r.RemoteLinkAddress, proto, pkt)
+ e.AddHeader(e.LinkAddress(), r.RemoteLinkAddress, proto, pkt)
return e.Endpoint.WritePacket(r, proto, pkt)
}
// WritePackets implements stack.LinkEndpoint.
func (e *Endpoint) WritePackets(r stack.RouteInfo, pkts stack.PacketBufferList, proto tcpip.NetworkProtocolNumber) (int, tcpip.Error) {
- linkAddr := e.Endpoint.LinkAddress()
+ linkAddr := e.LinkAddress()
for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
e.AddHeader(linkAddr, r.RemoteLinkAddress, proto, pkt)
@@ -83,7 +95,10 @@ func (e *Endpoint) MaxHeaderLength() uint16 {
}
// ARPHardwareType implements stack.LinkEndpoint.
-func (*Endpoint) ARPHardwareType() header.ARPHardwareType {
+func (e *Endpoint) ARPHardwareType() header.ARPHardwareType {
+ if a := e.Endpoint.ARPHardwareType(); a != header.ARPHardwareNone {
+ return a
+ }
return header.ARPHardwareEther
}
diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD
index 4758a99ad..c3e4c3455 100644
--- a/pkg/tcpip/link/tun/BUILD
+++ b/pkg/tcpip/link/tun/BUILD
@@ -31,7 +31,6 @@ go_library(
"//pkg/refs",
"//pkg/refsvfs2",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/header",
diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go
index d23210503..fa2131c28 100644
--- a/pkg/tcpip/link/tun/device.go
+++ b/pkg/tcpip/link/tun/device.go
@@ -20,7 +20,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -174,7 +173,7 @@ func (d *Device) Write(data []byte) (int64, error) {
return 0, linuxerr.EBADFD
}
if !endpoint.IsAttached() {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
dataLen := int64(len(data))
@@ -249,7 +248,7 @@ func (d *Device) Read() ([]byte, error) {
for {
info, ok := endpoint.Read()
if !ok {
- return nil, syserror.ErrWouldBlock
+ return nil, linuxerr.ErrWouldBlock
}
v, ok := d.encodePkt(&info)
diff --git a/pkg/tcpip/socketops.go b/pkg/tcpip/socketops.go
index 6bce3af04..34ac62444 100644
--- a/pkg/tcpip/socketops.go
+++ b/pkg/tcpip/socketops.go
@@ -57,6 +57,11 @@ type SocketOptionsHandler interface {
// OnSetReceiveBufferSize is invoked by SO_RCVBUF and SO_RCVBUFFORCE.
OnSetReceiveBufferSize(v, oldSz int64) (newSz int64)
+
+ // WakeupWriters is invoked when the send buffer size for an endpoint is
+ // changed. The handler notifies the writers if the send buffer size is
+ // increased with setsockopt(2) for TCP endpoints.
+ WakeupWriters()
}
// DefaultSocketOptionsHandler is an embeddable type that implements no-op
@@ -98,6 +103,9 @@ func (*DefaultSocketOptionsHandler) OnSetSendBufferSize(v int64) (newSz int64) {
return v
}
+// WakeupWriters implements SocketOptionsHandler.WakeupWriters.
+func (*DefaultSocketOptionsHandler) WakeupWriters() {}
+
// OnSetReceiveBufferSize implements SocketOptionsHandler.OnSetReceiveBufferSize.
func (*DefaultSocketOptionsHandler) OnSetReceiveBufferSize(v, oldSz int64) (newSz int64) {
return v
@@ -626,6 +634,9 @@ func (so *SocketOptions) SetSendBufferSize(sendBufferSize int64, notify bool) {
sendBufferSize = so.handler.OnSetSendBufferSize(sendBufferSize)
}
so.sendBufferSize.Store(sendBufferSize)
+ if notify {
+ so.handler.WakeupWriters()
+ }
}
// GetReceiveBufferSize gets value for SO_RCVBUF option.
diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go
index 9192d8433..29c22bfd4 100644
--- a/pkg/tcpip/stack/packet_buffer.go
+++ b/pkg/tcpip/stack/packet_buffer.go
@@ -282,14 +282,12 @@ func (pk *PacketBuffer) headerView(typ headerType) tcpipbuffer.View {
return v
}
-// Clone makes a shallow copy of pk.
-//
-// Clone should be called in such cases so that no modifications is done to
-// underlying packet payload.
+// Clone makes a semi-deep copy of pk. The underlying packet payload is
+// shared. Hence, no modifications is done to underlying packet payload.
func (pk *PacketBuffer) Clone() *PacketBuffer {
return &PacketBuffer{
PacketBufferEntry: pk.PacketBufferEntry,
- buf: pk.buf,
+ buf: pk.buf.Clone(),
reserved: pk.reserved,
pushed: pk.pushed,
consumed: pk.consumed,
@@ -321,14 +319,14 @@ func (pk *PacketBuffer) Network() header.Network {
}
}
-// CloneToInbound makes a shallow copy of the packet buffer to be used as an
-// inbound packet.
+// CloneToInbound makes a semi-deep copy of the packet buffer (similar to
+// Clone) to be used as an inbound packet.
//
// See PacketBuffer.Data for details about how a packet buffer holds an inbound
// packet.
func (pk *PacketBuffer) CloneToInbound() *PacketBuffer {
newPk := &PacketBuffer{
- buf: pk.buf,
+ buf: pk.buf.Clone(),
// Treat unfilled header portion as reserved.
reserved: pk.AvailableHeaderBytes(),
}
diff --git a/pkg/tcpip/stack/packet_buffer_test.go b/pkg/tcpip/stack/packet_buffer_test.go
index a8da34992..87b023445 100644
--- a/pkg/tcpip/stack/packet_buffer_test.go
+++ b/pkg/tcpip/stack/packet_buffer_test.go
@@ -123,6 +123,32 @@ func TestPacketHeaderPush(t *testing.T) {
}
}
+func TestPacketBufferClone(t *testing.T) {
+ data := concatViews(makeView(20), makeView(30), makeView(40))
+ pk := NewPacketBuffer(PacketBufferOptions{
+ // Make a copy of data to make sure our truth data won't be taint by
+ // PacketBuffer.
+ Data: buffer.NewViewFromBytes(data).ToVectorisedView(),
+ })
+
+ bytesToDelete := 30
+ originalSize := data.Size()
+
+ clonedPks := []*PacketBuffer{
+ pk.Clone(),
+ pk.CloneToInbound(),
+ }
+ pk.Data().DeleteFront(bytesToDelete)
+ if got, want := pk.Data().Size(), originalSize-bytesToDelete; got != want {
+ t.Errorf("original packet was not changed: size expected = %d, got = %d", want, got)
+ }
+ for _, clonedPk := range clonedPks {
+ if got := clonedPk.Data().Size(); got != originalSize {
+ t.Errorf("cloned packet should not be modified: expected size = %d, got = %d", originalSize, got)
+ }
+ }
+}
+
func TestPacketHeaderConsume(t *testing.T) {
for _, test := range []struct {
name string
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index c73890c4c..8e5c6edbf 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -119,8 +119,7 @@ type Stack struct {
// by the stack.
icmpRateLimiter *ICMPRateLimiter
- // seed is a one-time random value initialized at stack startup
- // and is used to seed the TCP port picking on active connections
+ // seed is a one-time random value initialized at stack startup.
//
// TODO(gvisor.dev/issue/940): S/R this field.
seed uint32
@@ -161,6 +160,10 @@ type Stack struct {
// This is required to prevent potential ACK loops.
// Setting this to 0 will disable all rate limiting.
tcpInvalidRateLimit time.Duration
+
+ // tsOffsetSecret is the secret key for generating timestamp offsets
+ // initialized at stack startup.
+ tsOffsetSecret uint32
}
// UniqueID is an abstract generator of unique identifiers.
@@ -384,6 +387,7 @@ func New(opts Options) *Stack {
Max: DefaultMaxBufferSize,
},
tcpInvalidRateLimit: defaultTCPInvalidRateLimit,
+ tsOffsetSecret: randomGenerator.Uint32(),
}
// Add specified network protocols.
@@ -1819,14 +1823,6 @@ func (s *Stack) SetNUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocol
return nic.setNUDConfigs(proto, c)
}
-// Seed returns a 32 bit value that can be used as a seed value for port
-// picking, ISN generation etc.
-//
-// NOTE: The seed is generated once during stack initialization only.
-func (s *Stack) Seed() uint32 {
- return s.seed
-}
-
// Rand returns a reference to a pseudo random generator that can be used
// to generate random numbers as required.
func (s *Stack) Rand() *rand.Rand {
diff --git a/pkg/tcpip/stack/tcp.go b/pkg/tcpip/stack/tcp.go
index 90a8ba6cf..93ea83cdc 100644
--- a/pkg/tcpip/stack/tcp.go
+++ b/pkg/tcpip/stack/tcp.go
@@ -386,6 +386,12 @@ type TCPSndBufState struct {
// SndMTU is the smallest MTU seen in the control packets received.
SndMTU int
+
+ // AutoTuneSndBufDisabled indicates that the auto tuning of send buffer
+ // is disabled.
+ //
+ // Must be accessed using atomic operations.
+ AutoTuneSndBufDisabled uint32
}
// TCPEndpointStateInner contains the members of TCPEndpointState used directly
diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go
index dda57e225..824cf6526 100644
--- a/pkg/tcpip/stack/transport_demuxer.go
+++ b/pkg/tcpip/stack/transport_demuxer.go
@@ -479,7 +479,7 @@ func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocol
if !ok {
epsByNIC = &endpointsByNIC{
endpoints: make(map[tcpip.NICID]*multiPortEndpoint),
- seed: d.stack.Seed(),
+ seed: d.stack.seed,
}
}
if err := epsByNIC.registerEndpoint(d, netProto, protocol, ep, flags, bindToDevice); err != nil {
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index b3d8951ff..55854ba59 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -321,28 +321,26 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
}
defer route.Release()
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: int(route.MaxHeaderLength()),
+ Data: buffer.View(payloadBytes).ToVectorisedView(),
+ })
+ pkt.Owner = owner
+
if e.ops.GetHeaderIncluded() {
- pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
- Data: buffer.View(payloadBytes).ToVectorisedView(),
- })
if err := route.WriteHeaderIncludedPacket(pkt); err != nil {
return 0, err
}
- } else {
- pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
- ReserveHeaderBytes: int(route.MaxHeaderLength()),
- Data: buffer.View(payloadBytes).ToVectorisedView(),
- })
- pkt.Owner = owner
- if err := route.WritePacket(stack.NetworkHeaderParams{
- Protocol: e.TransProto,
- TTL: route.DefaultTTL(),
- TOS: stack.DefaultTOS,
- }, pkt); err != nil {
- return 0, err
- }
+ return int64(len(payloadBytes)), nil
}
+ if err := route.WritePacket(stack.NetworkHeaderParams{
+ Protocol: e.TransProto,
+ TTL: route.DefaultTTL(),
+ TOS: stack.DefaultTOS,
+ }, pkt); err != nil {
+ return 0, err
+ }
return int64(len(payloadBytes)), nil
}
diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD
index 8436d2cf0..c3922bbe5 100644
--- a/pkg/tcpip/transport/tcp/BUILD
+++ b/pkg/tcpip/transport/tcp/BUILD
@@ -96,6 +96,7 @@ go_test(
"//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/checker",
+ "//pkg/tcpip/faketime",
"//pkg/tcpip/header",
"//pkg/tcpip/link/loopback",
"//pkg/tcpip/link/sniffer",
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go
index aa413ad05..9560ed43c 100644
--- a/pkg/tcpip/transport/tcp/accept.go
+++ b/pkg/tcpip/transport/tcp/accept.go
@@ -72,7 +72,8 @@ func encodeMSS(mss uint16) uint32 {
// and must not be accessed or have its methods called concurrently as they
// may mutate the stored objects.
type listenContext struct {
- stack *stack.Stack
+ stack *stack.Stack
+ protocol *protocol
// rcvWnd is the receive window that is sent by this listening context
// in the initial SYN-ACK.
@@ -119,9 +120,10 @@ func timeStamp(clock tcpip.Clock) uint32 {
}
// newListenContext creates a new listen context.
-func newListenContext(stk *stack.Stack, listenEP *endpoint, rcvWnd seqnum.Size, v6Only bool, netProto tcpip.NetworkProtocolNumber) *listenContext {
+func newListenContext(stk *stack.Stack, protocol *protocol, listenEP *endpoint, rcvWnd seqnum.Size, v6Only bool, netProto tcpip.NetworkProtocolNumber) *listenContext {
l := &listenContext{
stack: stk,
+ protocol: protocol,
rcvWnd: rcvWnd,
hasher: sha1.New(),
v6Only: v6Only,
@@ -213,7 +215,7 @@ func (l *listenContext) createConnectingEndpoint(s *segment, rcvdSynOpts *header
return nil, err
}
- n := newEndpoint(l.stack, netProto, queue)
+ n := newEndpoint(l.stack, l.protocol, netProto, queue)
n.ops.SetV6Only(l.v6Only)
n.TransportEndpointInfo.ID = s.id
n.boundNICID = s.nicID
@@ -247,7 +249,7 @@ func (l *listenContext) createConnectingEndpoint(s *segment, rcvdSynOpts *header
func (l *listenContext) startHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (*handshake, tcpip.Error) {
// Create new endpoint.
irs := s.sequenceNumber
- isn := generateSecureISN(s.id, l.stack.Clock(), l.stack.Seed())
+ isn := generateSecureISN(s.id, l.stack.Clock(), l.protocol.seqnumSecret)
ep, err := l.createConnectingEndpoint(s, opts, queue)
if err != nil {
return nil, err
@@ -600,7 +602,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
synOpts := header.TCPSynOptions{
WS: -1,
TS: opts.TS,
- TSVal: tcpTimeStamp(e.stack.Clock().NowMonotonic(), timeStampOffset(e.stack.Rand())),
+ TSVal: tcpTimeStamp(e.stack.Clock().NowMonotonic(), timeStampOffset(e.protocol.tsOffsetSecret, s.dstAddr, s.srcAddr)),
TSEcr: opts.TSVal,
MSS: calculateAdvertisedMSS(e.userMSS, route),
}
@@ -726,24 +728,24 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
n.isRegistered = true
- // clear the tsOffset for the newly created
- // endpoint as the Timestamp was already
- // randomly offset when the original SYN-ACK was
- // sent above.
- n.TSOffset = 0
+ // Reset the tsOffset for the newly created endpoint to the one
+ // that we have used in SYN-ACK in order to calculate RTT.
+ n.TSOffset = timeStampOffset(e.protocol.tsOffsetSecret, s.dstAddr, s.srcAddr)
// Switch state to connected.
n.isConnectNotified = true
- n.transitionToStateEstablishedLocked(&handshake{
- ep: n,
- iss: iss,
- ackNum: irs + 1,
- rcvWnd: seqnum.Size(n.initialReceiveWindow()),
- sndWnd: s.window,
- rcvWndScale: e.rcvWndScaleForHandshake(),
- sndWndScale: rcvdSynOptions.WS,
- mss: rcvdSynOptions.MSS,
- })
+ h := &handshake{
+ ep: n,
+ iss: iss,
+ ackNum: irs + 1,
+ rcvWnd: seqnum.Size(n.initialReceiveWindow()),
+ sndWnd: s.window,
+ rcvWndScale: e.rcvWndScaleForHandshake(),
+ sndWndScale: rcvdSynOptions.WS,
+ mss: rcvdSynOptions.MSS,
+ sampleRTTWithTSOnly: true,
+ }
+ h.transitionToStateEstablishedLocked(s)
// Requeue the segment if the ACK completing the handshake has more info
// to be procesed by the newly established endpoint.
@@ -779,7 +781,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) {
e.mu.Lock()
v6Only := e.ops.GetV6Only()
- ctx := newListenContext(e.stack, e, rcvWnd, v6Only, e.NetProto)
+ ctx := newListenContext(e.stack, e.protocol, e, rcvWnd, v6Only, e.NetProto)
defer func() {
// Mark endpoint as closed. This will prevent goroutines running
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 93ed161f9..f85775a48 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -105,6 +105,11 @@ type handshake struct {
// sendSYNOpts is the cached values for the SYN options to be sent.
sendSYNOpts header.TCPSynOptions
+
+ // sampleRTTWithTSOnly is true when the segment was retransmitted or we can't
+ // tell; then RTT can only be sampled when the incoming segment has timestamp
+ // options enabled.
+ sampleRTTWithTSOnly bool
}
func (e *endpoint) newHandshake() *handshake {
@@ -117,6 +122,8 @@ func (e *endpoint) newHandshake() *handshake {
h.resetState()
// Store reference to handshake state in endpoint.
e.h = h
+ // By the time handshake is created, e.ID is already initialized.
+ e.TSOffset = timeStampOffset(e.protocol.tsOffsetSecret, e.ID.LocalAddress, e.ID.RemoteAddress)
return h
}
@@ -150,7 +157,7 @@ func (h *handshake) resetState() {
h.flags = header.TCPFlagSyn
h.ackNum = 0
h.mss = 0
- h.iss = generateSecureISN(h.ep.TransportEndpointInfo.ID, h.ep.stack.Clock(), h.ep.stack.Seed())
+ h.iss = generateSecureISN(h.ep.TransportEndpointInfo.ID, h.ep.stack.Clock(), h.ep.protocol.seqnumSecret)
}
// generateSecureISN generates a secure Initial Sequence number based on the
@@ -266,8 +273,7 @@ func (h *handshake) synSentState(s *segment) tcpip.Error {
// and the handshake is completed.
if s.flags.Contains(header.TCPFlagAck) {
h.state = handshakeCompleted
-
- h.ep.transitionToStateEstablishedLocked(h)
+ h.transitionToStateEstablishedLocked(s)
h.ep.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck, h.iss+1, h.ackNum, h.rcvWnd>>h.effectiveRcvWndScale())
return nil
@@ -402,9 +408,10 @@ func (h *handshake) synRcvdState(s *segment) tcpip.Error {
if h.ep.SendTSOk && s.parsedOptions.TS {
h.ep.updateRecentTimestamp(s.parsedOptions.TSVal, h.ackNum, s.sequenceNumber)
}
+
h.state = handshakeCompleted
- h.ep.transitionToStateEstablishedLocked(h)
+ h.transitionToStateEstablishedLocked(s)
// Requeue the segment if the ACK completing the handshake has more info
// to be procesed by the newly established endpoint.
@@ -557,6 +564,10 @@ func (h *handshake) complete() tcpip.Error {
ack: h.ackNum,
rcvWnd: h.rcvWnd,
}, h.sendSYNOpts)
+ // If we have ever retransmitted the SYN-ACK or
+ // SYN segment, we should only measure RTT if
+ // TS option is present.
+ h.sampleRTTWithTSOnly = true
}
case wakerForNotification:
@@ -600,6 +611,38 @@ func (h *handshake) complete() tcpip.Error {
return nil
}
+// transitionToStateEstablisedLocked transitions the endpoint of the handshake
+// to an established state given the last segment received from peer. It also
+// initializes sender/receiver.
+func (h *handshake) transitionToStateEstablishedLocked(s *segment) {
+ // Transfer handshake state to TCP connection. We disable
+ // receive window scaling if the peer doesn't support it
+ // (indicated by a negative send window scale).
+ h.ep.snd = newSender(h.ep, h.iss, h.ackNum-1, h.sndWnd, h.mss, h.sndWndScale)
+
+ var rtt time.Duration
+ if h.ep.SendTSOk && s.parsedOptions.TSEcr != 0 {
+ rtt = time.Duration(h.ep.timestamp()-s.parsedOptions.TSEcr) * time.Millisecond
+ }
+ if !h.sampleRTTWithTSOnly && rtt == 0 {
+ rtt = h.ep.stack.Clock().NowMonotonic().Sub(h.startTime)
+ }
+
+ if rtt > 0 {
+ h.ep.snd.updateRTO(rtt)
+ }
+
+ h.ep.rcvQueueInfo.rcvQueueMu.Lock()
+ h.ep.rcv = newReceiver(h.ep, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale())
+ // Bootstrap the auto tuning algorithm. Starting at zero will
+ // result in a really large receive window after the first auto
+ // tuning adjustment.
+ h.ep.rcvQueueInfo.RcvAutoParams.PrevCopiedBytes = int(h.rcvWnd)
+ h.ep.rcvQueueInfo.rcvQueueMu.Unlock()
+
+ h.ep.setEndpointState(StateEstablished)
+}
+
type backoffTimer struct {
timeout time.Duration
maxTimeout time.Duration
@@ -965,26 +1008,6 @@ func (e *endpoint) completeWorkerLocked() {
}
}
-// transitionToStateEstablisedLocked transitions a given endpoint
-// to an established state using the handshake parameters provided.
-// It also initializes sender/receiver.
-func (e *endpoint) transitionToStateEstablishedLocked(h *handshake) {
- // Transfer handshake state to TCP connection. We disable
- // receive window scaling if the peer doesn't support it
- // (indicated by a negative send window scale).
- e.snd = newSender(e, h.iss, h.ackNum-1, h.sndWnd, h.mss, h.sndWndScale)
-
- e.rcvQueueInfo.rcvQueueMu.Lock()
- e.rcv = newReceiver(e, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale())
- // Bootstrap the auto tuning algorithm. Starting at zero will
- // result in a really large receive window after the first auto
- // tuning adjustment.
- e.rcvQueueInfo.RcvAutoParams.PrevCopiedBytes = int(h.rcvWnd)
- e.rcvQueueInfo.rcvQueueMu.Unlock()
-
- e.setEndpointState(StateEstablished)
-}
-
// transitionToStateCloseLocked ensures that the endpoint is
// cleaned up from the transport demuxer, "before" moving to
// StateClose. This will ensure that no packet will be
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 044123185..4937d126f 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -20,7 +20,6 @@ import (
"fmt"
"io"
"math"
- "math/rand"
"runtime"
"strings"
"sync/atomic"
@@ -378,6 +377,7 @@ type endpoint struct {
// The following fields are initialized at creation time and do not
// change throughout the lifetime of the endpoint.
stack *stack.Stack `state:"manual"`
+ protocol *protocol `state:"manual"`
waiterQueue *waiter.Queue `state:"wait"`
uniqueID uint64
@@ -803,9 +803,10 @@ type keepalive struct {
waker sleep.Waker `state:"nosave"`
}
-func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) *endpoint {
+func newEndpoint(s *stack.Stack, protocol *protocol, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) *endpoint {
e := &endpoint{
- stack: s,
+ stack: s,
+ protocol: protocol,
TransportEndpointInfo: stack.TransportEndpointInfo{
NetProto: netProto,
TransProto: header.TCPProtocolNumber,
@@ -874,7 +875,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
}
e.segmentQueue.ep = e
- e.TSOffset = timeStampOffset(e.stack.Rand())
+
e.acceptCond = sync.NewCond(&e.acceptMu)
e.keepalive.timer.init(e.stack.Clock(), &e.keepalive.waker)
@@ -1717,6 +1718,27 @@ func (e *endpoint) OnSetReceiveBufferSize(rcvBufSz, oldSz int64) (newSz int64) {
return rcvBufSz
}
+// OnSetSendBufferSize implements tcpip.SocketOptionsHandler.OnSetSendBufferSize.
+func (e *endpoint) OnSetSendBufferSize(sz int64) int64 {
+ atomic.StoreUint32(&e.sndQueueInfo.TCPSndBufState.AutoTuneSndBufDisabled, 1)
+ return sz
+}
+
+// WakeupWriters implements tcpip.SocketOptionsHandler.WakeupWriters.
+func (e *endpoint) WakeupWriters() {
+ e.LockUser()
+ defer e.UnlockUser()
+
+ sendBufferSize := e.getSendBufferSize()
+ e.sndQueueInfo.sndQueueMu.Lock()
+ notify := (sendBufferSize - e.sndQueueInfo.SndBufUsed) >= e.sndQueueInfo.SndBufUsed>>1
+ e.sndQueueInfo.sndQueueMu.Unlock()
+
+ if notify {
+ e.waiterQueue.Notify(waiter.WritableEvents)
+ }
+}
+
// SetSockOptInt sets a socket option.
func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error {
// Lower 2 bits represents ECN bits. RFC 3168, section 23.1
@@ -2177,7 +2199,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) tcp
portBuf := make([]byte, 2)
binary.LittleEndian.PutUint16(portBuf, e.ID.RemotePort)
- h := jenkins.Sum32(e.stack.Seed())
+ h := jenkins.Sum32(e.protocol.portOffsetSecret)
for _, s := range [][]byte{
[]byte(e.ID.LocalAddress),
[]byte(e.ID.RemoteAddress),
@@ -2329,6 +2351,9 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) tcp
e.segmentQueue.mu.Unlock()
e.snd.updateMaxPayloadSize(int(e.route.MTU()), 0)
e.setEndpointState(StateEstablished)
+ // Set the new auto tuned send buffer size after entering
+ // established state.
+ e.ops.SetSendBufferSize(e.computeTCPSendBufferSize(), false /* notify */)
}
if run {
@@ -2763,13 +2788,20 @@ func (e *endpoint) updateSndBufferUsage(v int) {
e.sndQueueInfo.sndQueueMu.Lock()
notify := e.sndQueueInfo.SndBufUsed >= sendBufferSize>>1
e.sndQueueInfo.SndBufUsed -= v
+
+ // Get the new send buffer size with auto tuning, but do not set it
+ // unless we decide to notify the writers.
+ newSndBufSz := e.computeTCPSendBufferSize()
+
// We only notify when there is half the sendBufferSize available after
// a full buffer event occurs. This ensures that we don't wake up
// writers to queue just 1-2 segments and go back to sleep.
- notify = notify && e.sndQueueInfo.SndBufUsed < sendBufferSize>>1
+ notify = notify && e.sndQueueInfo.SndBufUsed < int(newSndBufSz)>>1
e.sndQueueInfo.sndQueueMu.Unlock()
if notify {
+ // Set the new send buffer size calculated from auto tuning.
+ e.ops.SetSendBufferSize(newSndBufSz, false /* notify */)
e.waiterQueue.Notify(waiter.WritableEvents)
}
}
@@ -2896,17 +2928,22 @@ func tcpTimeStamp(curTime tcpip.MonotonicTime, offset uint32) uint32 {
// timeStampOffset returns a randomized timestamp offset to be used when sending
// timestamp values in a timestamp option for a TCP segment.
-func timeStampOffset(rng *rand.Rand) uint32 {
+func timeStampOffset(secret uint32, src, dst tcpip.Address) uint32 {
// Initialize a random tsOffset that will be added to the recentTS
// everytime the timestamp is sent when the Timestamp option is enabled.
//
// See https://tools.ietf.org/html/rfc7323#section-5.4 for details on
// why this is required.
//
- // NOTE: This is not completely to spec as normally this should be
- // initialized in a manner analogous to how sequence numbers are
- // randomized per connection basis. But for now this is sufficient.
- return rng.Uint32()
+ // TODO(https://gvisor.dev/issues/6473): This is not really secure as
+ // it does not use the recommended algorithm linked above.
+ h := jenkins.Sum32(secret)
+ // Per hash.Hash.Writer:
+ //
+ // It never returns an error.
+ _, _ = h.Write([]byte(src))
+ _, _ = h.Write([]byte(dst))
+ return h.Sum32()
}
// maybeEnableSACKPermitted marks the SACKPermitted option enabled for this endpoint
@@ -3091,3 +3128,36 @@ func GetTCPReceiveBufferLimits(s tcpip.StackHandler) tcpip.ReceiveBufferSizeOpti
Max: ss.Max,
}
}
+
+// computeTCPSendBufferSize implements auto tuning of send buffer size and
+// returns the new send buffer size.
+func (e *endpoint) computeTCPSendBufferSize() int64 {
+ curSndBufSz := int64(e.getSendBufferSize())
+
+ // Auto tuning is disabled when the user explicitly sets the send
+ // buffer size with SO_SNDBUF option.
+ if disabled := atomic.LoadUint32(&e.sndQueueInfo.TCPSndBufState.AutoTuneSndBufDisabled); disabled == 1 {
+ return curSndBufSz
+ }
+
+ const packetOverheadFactor = 2
+ curMSS := e.snd.MaxPayloadSize
+ numSeg := InitialCwnd
+ if numSeg < e.snd.SndCwnd {
+ numSeg = e.snd.SndCwnd
+ }
+
+ // SndCwnd indicates the number of segments that can be sent. This means
+ // that the sender can send upto #SndCwnd segments and the send buffer
+ // size should be set to SndCwnd*MSS to accommodate sending of all the
+ // segments.
+ newSndBufSz := int64(numSeg * curMSS * packetOverheadFactor)
+ if newSndBufSz < curSndBufSz {
+ return curSndBufSz
+ }
+ if ss := GetTCPSendBufferLimits(e.stack); int64(ss.Max) < newSndBufSz {
+ newSndBufSz = int64(ss.Max)
+ }
+
+ return newSndBufSz
+}
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index 952ccacdd..f2e8b3840 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -170,6 +170,7 @@ func (e *endpoint) Resume(s *stack.Stack) {
snd.probeTimer.init(s.Clock(), &snd.probeWaker)
}
e.stack = s
+ e.protocol = protocolFromStack(s)
e.ops.InitHandler(e, e.stack, GetTCPSendBufferLimits, GetTCPReceiveBufferLimits)
e.segmentQueue.thaw()
epState := EndpointState(e.origEndpointState)
diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go
index 2e709ed78..78745ea86 100644
--- a/pkg/tcpip/transport/tcp/forwarder.go
+++ b/pkg/tcpip/transport/tcp/forwarder.go
@@ -54,7 +54,7 @@ func NewForwarder(s *stack.Stack, rcvWnd, maxInFlight int, handler func(*Forward
maxInFlight: maxInFlight,
handler: handler,
inFlight: make(map[stack.TransportEndpointID]struct{}),
- listen: newListenContext(s, nil /* listenEP */, seqnum.Size(rcvWnd), true, 0),
+ listen: newListenContext(s, protocolFromStack(s), nil /* listenEP */, seqnum.Size(rcvWnd), true, 0),
}
}
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index 18b834243..00a083dbe 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -96,6 +96,11 @@ type protocol struct {
maxRetries uint32
synRetries uint8
dispatcher dispatcher
+
+ // The following secrets are initialized once and stay unchanged after.
+ seqnumSecret uint32
+ portOffsetSecret uint32
+ tsOffsetSecret uint32
}
// Number returns the tcp protocol number.
@@ -105,7 +110,7 @@ func (*protocol) Number() tcpip.TransportProtocolNumber {
// NewEndpoint creates a new tcp endpoint.
func (p *protocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) {
- return newEndpoint(p.stack, netProto, waiterQueue), nil
+ return newEndpoint(p.stack, p, netProto, waiterQueue), nil
}
// NewRawEndpoint creates a new raw TCP endpoint. Raw TCP sockets are currently
@@ -292,22 +297,26 @@ func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) tcpip
case *tcpip.TCPMinRTOOption:
p.mu.Lock()
+ defer p.mu.Unlock()
if *v < 0 {
p.minRTO = MinRTO
+ } else if minRTO := time.Duration(*v); minRTO <= p.maxRTO {
+ p.minRTO = minRTO
} else {
- p.minRTO = time.Duration(*v)
+ return &tcpip.ErrInvalidOptionValue{}
}
- p.mu.Unlock()
return nil
case *tcpip.TCPMaxRTOOption:
p.mu.Lock()
+ defer p.mu.Unlock()
if *v < 0 {
p.maxRTO = MaxRTO
+ } else if maxRTO := time.Duration(*v); maxRTO >= p.minRTO {
+ p.maxRTO = maxRTO
} else {
- p.maxRTO = time.Duration(*v)
+ return &tcpip.ErrInvalidOptionValue{}
}
- p.mu.Unlock()
return nil
case *tcpip.TCPMaxRetriesOption:
@@ -479,7 +488,15 @@ func NewProtocol(s *stack.Stack) stack.TransportProtocol {
maxRTO: MaxRTO,
maxRetries: MaxRetries,
recovery: tcpip.TCPRACKLossDetection,
+ seqnumSecret: s.Rand().Uint32(),
+ portOffsetSecret: s.Rand().Uint32(),
+ tsOffsetSecret: s.Rand().Uint32(),
}
p.dispatcher.init(s.Rand(), runtime.GOMAXPROCS(0))
return &p
}
+
+// protocolFromStack retrieves the tcp.protocol instance from stack s.
+func protocolFromStack(s *stack.Stack) *protocol {
+ return s.TransportProtocolInstance(ProtocolNumber).(*protocol)
+}
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index 92a66f17e..a1f1c4e59 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -382,6 +382,9 @@ func (s *sender) updateRTO(rtt time.Duration) {
if s.RTO < s.minRTO {
s.RTO = s.minRTO
}
+ if s.RTO > s.maxRTO {
+ s.RTO = s.maxRTO
+ }
}
// resendSegment resends the first unacknowledged segment.
@@ -1415,9 +1418,6 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
ackLeft -= datalen
}
- // Update the send buffer usage and notify potential waiters.
- s.ep.updateSndBufferUsage(int(acked))
-
// Clear SACK information for all acked data.
s.ep.scoreboard.Delete(s.SndUna)
@@ -1437,6 +1437,9 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
}
}
+ // Update the send buffer usage and notify potential waiters.
+ s.ep.updateSndBufferUsage(int(acked))
+
// It is possible for s.outstanding to drop below zero if we get
// a retransmit timeout, reset outstanding to zero but later
// get an ack that cover previously sent data.
diff --git a/pkg/tcpip/transport/tcp/tcp_rack_test.go b/pkg/tcpip/transport/tcp/tcp_rack_test.go
index 89e9fb886..c35db7c95 100644
--- a/pkg/tcpip/transport/tcp/tcp_rack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_rack_test.go
@@ -33,7 +33,6 @@ const (
tsOptionSize = 12
maxTCPOptionSize = 40
mtu = header.TCPMinimumSize + header.IPv4MinimumSize + maxTCPOptionSize + maxPayload
- latency = 5 * time.Millisecond
)
func setStackTCPRecovery(t *testing.T, c *context.Context, recovery int) {
@@ -163,7 +162,10 @@ func sendAndReceiveWithSACK(t *testing.T, c *context.Context, numPackets int, en
if !enableRACK {
setStackTCPRecovery(t, c, 0)
}
- createConnectedWithSACKAndTS(c)
+ // The delay should be below initial RTO (1s) otherwise retransimission
+ // will start. Choose a relatively large value so that estimated RTT
+ // keeps high even after a few rounds of undelayed RTT samples.
+ c.CreateConnectedWithOptions(header.TCPSynOptions{SACKPermitted: c.SACKEnabled(), TS: true}, 800*time.Millisecond /* delay */)
data := make([]byte, numPackets*maxPayload)
for i := range data {
@@ -181,9 +183,6 @@ func sendAndReceiveWithSACK(t *testing.T, c *context.Context, numPackets int, en
for i := 0; i < numPackets; i++ {
c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
bytesRead += maxPayload
- // This delay is added to increase RTT as low RTT can cause TLP
- // before sending ACK.
- time.Sleep(latency)
}
return data
diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go
index 83e0653b9..6255355bb 100644
--- a/pkg/tcpip/transport/tcp/tcp_sack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go
@@ -35,13 +35,13 @@ import (
// SACKPermitted option enabled if the stack in the context has the SACK support
// enabled.
func createConnectedWithSACKPermittedOption(c *context.Context) *context.RawEndpoint {
- return c.CreateConnectedWithOptions(header.TCPSynOptions{SACKPermitted: c.SACKEnabled()})
+ return c.CreateConnectedWithOptionsNoDelay(header.TCPSynOptions{SACKPermitted: c.SACKEnabled()})
}
// createConnectedWithSACKAndTS creates and connects c.ep with the SACK & TS
// option enabled if the stack in the context has SACK and TS enabled.
func createConnectedWithSACKAndTS(c *context.Context) *context.RawEndpoint {
- return c.CreateConnectedWithOptions(header.TCPSynOptions{SACKPermitted: c.SACKEnabled(), TS: true})
+ return c.CreateConnectedWithOptionsNoDelay(header.TCPSynOptions{SACKPermitted: c.SACKEnabled(), TS: true})
}
func setStackSACKPermitted(t *testing.T, c *context.Context, enable bool) {
@@ -108,7 +108,7 @@ func TestSackDisabledConnect(t *testing.T) {
setStackSACKPermitted(t, c, sackEnabled)
setStackTCPRecovery(t, c, 0)
- rep := c.CreateConnectedWithOptions(header.TCPSynOptions{})
+ rep := c.CreateConnectedWithOptionsNoDelay(header.TCPSynOptions{})
data := []byte{1, 2, 3}
@@ -170,7 +170,7 @@ func TestSackPermittedAccept(t *testing.T) {
setStackSACKPermitted(t, c, sackEnabled)
setStackTCPRecovery(t, c, 0)
- rep := c.AcceptWithOptions(tc.wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS, SACKPermitted: tc.sackPermitted})
+ rep := c.AcceptWithOptionsNoDelay(tc.wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS, SACKPermitted: tc.sackPermitted})
// Now verify no SACK blocks are
// received when sack is disabled.
data := []byte{1, 2, 3}
@@ -244,7 +244,7 @@ func TestSackDisabledAccept(t *testing.T) {
setStackSACKPermitted(t, c, sackEnabled)
setStackTCPRecovery(t, c, 0)
- rep := c.AcceptWithOptions(tc.wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS})
+ rep := c.AcceptWithOptionsNoDelay(tc.wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS})
// Now verify no SACK blocks are
// received when sack is disabled.
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 031f01357..bf726e86a 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -28,6 +28,7 @@ import (
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/checker"
+ "gvisor.dev/gvisor/pkg/tcpip/faketime"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
@@ -2143,7 +2144,7 @@ func TestSmallSegReceiveWindowAdvertisement(t *testing.T) {
t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err)
}
- c.AcceptWithOptions(tcp.FindWndScale(seqnum.Size(opt.Default)), header.TCPSynOptions{MSS: defaultIPv4MSS})
+ c.AcceptWithOptionsNoDelay(tcp.FindWndScale(seqnum.Size(opt.Default)), header.TCPSynOptions{MSS: defaultIPv4MSS})
// Bump up the receive buffer size such that, when the receive window grows,
// the scaled window exceeds maxUint16.
@@ -2535,7 +2536,7 @@ func TestScaledWindowAccept(t *testing.T) {
// Do 3-way handshake.
// wndScale expected is 3 as 65535 * 3 * 2 < 65535 * 2^3 but > 65535 *2 *2
- c.PassiveConnectWithOptions(100, 3 /* wndScale */, header.TCPSynOptions{MSS: defaultIPv4MSS})
+ c.PassiveConnectWithOptions(100, 3 /* wndScale */, header.TCPSynOptions{MSS: defaultIPv4MSS}, 0 /* delay */)
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
@@ -3532,6 +3533,12 @@ func TestMaxRetransmitsTimeout(t *testing.T) {
t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
}
+ // Wait for the connection to timeout after MaxRetries retransmits.
+ initRTO := time.Second
+ minRTOOpt := tcpip.TCPMinRTOOption(initRTO)
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &minRTOOpt); err != nil {
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, minRTOOpt, minRTOOpt, err)
+ }
c.CreateConnected(context.TestInitialSequenceNumber, 30000 /* rcvWnd */, -1 /* epRcvBuf */)
waitEntry, notifyCh := waiter.NewChannelEntry(nil)
@@ -3554,8 +3561,6 @@ func TestMaxRetransmitsTimeout(t *testing.T) {
),
)
}
- // Wait for the connection to timeout after MaxRetries retransmits.
- initRTO := 1 * time.Second
select {
case <-notifyCh:
case <-time.After((2 << numRetries) * initRTO):
@@ -3590,9 +3595,13 @@ func TestMaxRTO(t *testing.T) {
defer c.Cleanup()
rto := 1 * time.Second
- opt := tcpip.TCPMaxRTOOption(rto)
- if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
- t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
+ minRTOOpt := tcpip.TCPMinRTOOption(rto / 2)
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &minRTOOpt); err != nil {
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, minRTOOpt, minRTOOpt, err)
+ }
+ maxRTOOpt := tcpip.TCPMaxRTOOption(rto)
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &maxRTOOpt); err != nil {
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, maxRTOOpt, maxRTOOpt, err)
}
c.CreateConnected(context.TestInitialSequenceNumber, 30000 /* rcvWnd */, -1 /* epRcvBuf */)
@@ -3618,8 +3627,8 @@ func TestMaxRTO(t *testing.T) {
checker.TCPFlagsMatch(header.TCPFlagAck, ^header.TCPFlagPsh),
),
)
- if time.Since(start).Round(time.Second).Seconds() != rto.Seconds() {
- t.Errorf("Retransmit interval not capped to MaxRTO.\n")
+ if elapsed := time.Since(start); elapsed.Round(time.Second).Seconds() != rto.Seconds() {
+ t.Errorf("Retransmit interval not capped to MaxRTO(%s). %s", rto, elapsed)
}
}
}
@@ -3670,6 +3679,10 @@ func TestRetransmitIPv4IDUniqueness(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
+ minRTOOpt := tcpip.TCPMinRTOOption(time.Second)
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &minRTOOpt); err != nil {
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, minRTOOpt, minRTOOpt, err)
+ }
c.CreateConnected(context.TestInitialSequenceNumber, 30000 /* rcvWnd */, -1 /* epRcvBuf */)
// Disabling PMTU discovery causes all packets sent from this socket to
@@ -6304,7 +6317,7 @@ func TestEndpointBindListenAcceptState(t *testing.T) {
t.Errorf("unexpected endpoint state: want %s, got %s", want, got)
}
- c.PassiveConnectWithOptions(100, 5, header.TCPSynOptions{MSS: defaultIPv4MSS})
+ c.PassiveConnectWithOptions(100, 5, header.TCPSynOptions{MSS: defaultIPv4MSS}, 0 /* delay */)
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
@@ -6385,7 +6398,7 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) {
// maximum buffer size defined above.
c.WindowScale = uint8(tcp.FindWndScale(maxReceiveBufferSize))
- rawEP := c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, WS: 4})
+ rawEP := c.CreateConnectedWithOptionsNoDelay(header.TCPSynOptions{TS: true, WS: 4})
// NOTE: The timestamp values in the sent packets are meaningless to the
// peer so we just increment the timestamp value by 1 every batch as we
@@ -6515,7 +6528,7 @@ func TestReceiveBufferAutoTuning(t *testing.T) {
// maximum buffer size used by stack.
c.WindowScale = uint8(tcp.FindWndScale(maxReceiveBufferSize))
- rawEP := c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, WS: 4})
+ rawEP := c.CreateConnectedWithOptionsNoDelay(header.TCPSynOptions{TS: true, WS: 4})
tsVal := rawEP.TSVal
rawEP.NextSeqNum--
rawEP.SendPacketWithTS(nil, tsVal)
@@ -7430,6 +7443,11 @@ func TestTCPUserTimeout(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
+ initRTO := 1 * time.Second
+ minRTOOpt := tcpip.TCPMinRTOOption(initRTO)
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &minRTOOpt); err != nil {
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, minRTOOpt, minRTOOpt, err)
+ }
c.CreateConnected(context.TestInitialSequenceNumber, 30000, -1 /* epRcvBuf */)
waitEntry, notifyCh := waiter.NewChannelEntry(nil)
@@ -7440,7 +7458,6 @@ func TestTCPUserTimeout(t *testing.T) {
// Ensure that on the next retransmit timer fire, the user timeout has
// expired.
- initRTO := 1 * time.Second
userTimeout := initRTO / 2
v := tcpip.TCPUserTimeoutOption(userTimeout)
if err := c.EP.SetSockOpt(&v); err != nil {
@@ -7954,6 +7971,151 @@ func TestSetStackTimeWaitReuse(t *testing.T) {
}
}
+func TestHandshakeRTT(t *testing.T) {
+ type testCase struct {
+ connect bool
+ tsEnabled bool
+ useCookie bool
+ retrans bool
+ delay time.Duration
+ wantRTT time.Duration
+ }
+ var testCases []testCase
+ for _, connect := range []bool{false, true} {
+ for _, tsEnabled := range []bool{false, true} {
+ for _, useCookie := range []bool{false, true} {
+ for _, retrans := range []bool{false, true} {
+ if connect && useCookie {
+ continue
+ }
+ delay := 800 * time.Millisecond
+ if retrans {
+ delay = 1200 * time.Millisecond
+ }
+ wantRTT := delay
+ // If syncookie is enabled, sample RTT only when TS option is enabled.
+ if !retrans && useCookie && !tsEnabled {
+ wantRTT = 0
+ }
+ // If retransmitted, sample RTT only when TS option is enabled.
+ if retrans && !tsEnabled {
+ wantRTT = 0
+ }
+ testCases = append(testCases, testCase{connect, tsEnabled, useCookie, retrans, delay, wantRTT})
+ }
+ }
+ }
+ }
+ for _, tt := range testCases {
+ tt := tt
+ t.Run(fmt.Sprintf("connect=%t,TS=%t,cookie=%t,retrans=%t)", tt.connect, tt.tsEnabled, tt.useCookie, tt.retrans), func(t *testing.T) {
+ t.Parallel()
+ c := context.New(t, defaultMTU)
+ if tt.useCookie {
+ opt := tcpip.TCPAlwaysUseSynCookies(true)
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
+ }
+ }
+ synOpts := header.TCPSynOptions{}
+ if tt.tsEnabled {
+ synOpts.TS = true
+ synOpts.TSVal = 42
+ }
+ if tt.connect {
+ c.CreateConnectedWithOptions(synOpts, tt.delay)
+ } else {
+ synOpts.MSS = defaultIPv4MSS
+ synOpts.WS = -1
+ c.AcceptWithOptions(-1, synOpts, tt.delay)
+ }
+ var info tcpip.TCPInfoOption
+ if err := c.EP.GetSockOpt(&info); err != nil {
+ t.Fatalf("c.EP.GetSockOpt(&%T) = %s", info, err)
+ }
+ if got := time.Duration(info.RTT).Round(tt.wantRTT); got != tt.wantRTT {
+ t.Fatalf("got info.RTT=%s, expect %s", got, tt.wantRTT)
+ }
+ if info.RTTVar != 0 && tt.wantRTT == 0 {
+ t.Fatalf("got info.RTTVar=%s, expect 0", info.RTTVar)
+ }
+ if info.RTTVar == 0 && tt.wantRTT != 0 {
+ t.Fatalf("got info.RTTVar=0, expect non zero")
+ }
+ })
+ }
+}
+
+func TestSetRTO(t *testing.T) {
+ c := context.New(t, defaultMTU)
+ minRTO, maxRTO := tcpRTOMinMax(t, c)
+ for _, tt := range []struct {
+ name string
+ RTO time.Duration
+ minRTO time.Duration
+ maxRTO time.Duration
+ err tcpip.Error
+ }{
+ {
+ name: "invalid minRTO",
+ minRTO: maxRTO + time.Second,
+ err: &tcpip.ErrInvalidOptionValue{},
+ },
+ {
+ name: "invalid maxRTO",
+ maxRTO: minRTO - time.Millisecond,
+ err: &tcpip.ErrInvalidOptionValue{},
+ },
+ {
+ name: "valid minRTO",
+ minRTO: maxRTO - time.Second,
+ },
+ {
+ name: "valid maxRTO",
+ maxRTO: minRTO + time.Millisecond,
+ },
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ c := context.New(t, defaultMTU)
+ var opt tcpip.SettableTransportProtocolOption
+ if tt.minRTO > 0 {
+ min := tcpip.TCPMinRTOOption(tt.minRTO)
+ opt = &min
+ }
+ if tt.maxRTO > 0 {
+ max := tcpip.TCPMaxRTOOption(tt.maxRTO)
+ opt = &max
+ }
+ err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, opt)
+ if got, want := err, tt.err; got != want {
+ t.Fatalf("c.Stack().SetTransportProtocolOption(TCP, &%T(%v)) = %v, want = %v", opt, opt, got, want)
+ }
+ if tt.err == nil {
+ minRTO, maxRTO := tcpRTOMinMax(t, c)
+ if tt.minRTO > 0 && tt.minRTO != minRTO {
+ t.Fatalf("got minRTO = %s, want %s", minRTO, tt.minRTO)
+ }
+ if tt.maxRTO > 0 && tt.maxRTO != maxRTO {
+ t.Fatalf("got maxRTO = %s, want %s", maxRTO, tt.maxRTO)
+ }
+ }
+ })
+ }
+}
+
+func tcpRTOMinMax(t *testing.T, c *context.Context) (time.Duration, time.Duration) {
+ t.Helper()
+ var minOpt tcpip.TCPMinRTOOption
+ var maxOpt tcpip.TCPMaxRTOOption
+ if err := c.Stack().TransportProtocolOption(tcp.ProtocolNumber, &minOpt); err != nil {
+ t.Fatalf("c.Stack().TransportProtocolOption(TCP, %T): %s", minOpt, err)
+ }
+ if err := c.Stack().TransportProtocolOption(tcp.ProtocolNumber, &maxOpt); err != nil {
+ t.Fatalf("c.Stack().TransportProtocolOption(TCP, %T): %s", maxOpt, err)
+ }
+ return time.Duration(minOpt), time.Duration(maxOpt)
+}
+
// generateRandomPayload generates a random byte slice of the specified length
// causing a fatal test failure if it is unable to do so.
func generateRandomPayload(t *testing.T, n int) []byte {
@@ -7964,3 +8126,185 @@ func generateRandomPayload(t *testing.T, n int) []byte {
}
return buf
}
+
+func TestSendBufferTuning(t *testing.T) {
+ const maxPayload = 536
+ const mtu = header.TCPMinimumSize + header.IPv4MinimumSize + maxTCPOptionSize + maxPayload
+ const packetOverheadFactor = 2
+
+ testCases := []struct {
+ name string
+ autoTuningDisabled bool
+ }{
+ {"autoTuningDisabled", true},
+ {"autoTuningEnabled", false},
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ c := context.New(t, mtu)
+ defer c.Cleanup()
+
+ // Set the stack option for send buffer size.
+ const defaultSndBufSz = maxPayload * tcp.InitialCwnd
+ const maxSndBufSz = defaultSndBufSz * 10
+ {
+ opt := tcpip.TCPSendBufferSizeRangeOption{Min: 1, Default: defaultSndBufSz, Max: maxSndBufSz}
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+ t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err)
+ }
+ }
+
+ c.CreateConnected(context.TestInitialSequenceNumber, 30000, -1 /* epRcvBuf */)
+
+ oldSz := c.EP.SocketOptions().GetSendBufferSize()
+ if oldSz != defaultSndBufSz {
+ t.Fatalf("Wrong send buffer size got %d want %d", oldSz, defaultSndBufSz)
+ }
+
+ if tc.autoTuningDisabled {
+ c.EP.SocketOptions().SetSendBufferSize(defaultSndBufSz, true /* notify */)
+ }
+
+ data := make([]byte, maxPayload)
+ for i := range data {
+ data[i] = byte(i)
+ }
+
+ w, ch := waiter.NewChannelEntry(nil)
+ c.WQ.EventRegister(&w, waiter.WritableEvents)
+ defer c.WQ.EventUnregister(&w)
+
+ bytesRead := 0
+ for {
+ // Packets will be sent till the send buffer
+ // size is reached.
+ var r bytes.Reader
+ r.Reset(data[bytesRead : bytesRead+maxPayload])
+ _, err := c.EP.Write(&r, tcpip.WriteOptions{})
+ if cmp.Equal(&tcpip.ErrWouldBlock{}, err) {
+ break
+ }
+
+ c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, 0)
+ bytesRead += maxPayload
+ data = append(data, data...)
+ }
+
+ // Send an ACK and wait for connection to become writable again.
+ c.SendAck(seqnum.Value(context.TestInitialSequenceNumber).Add(1), bytesRead)
+ select {
+ case <-ch:
+ if err := c.EP.LastError(); err != nil {
+ t.Fatalf("Write failed: %s", err)
+ }
+ case <-time.After(1 * time.Second):
+ t.Fatalf("Timed out waiting for connection")
+ }
+
+ outSz := int64(defaultSndBufSz)
+ if !tc.autoTuningDisabled {
+ // Calculate the new auto tuned send buffer.
+ var info tcpip.TCPInfoOption
+ if err := c.EP.GetSockOpt(&info); err != nil {
+ t.Fatalf("GetSockOpt failed: %v", err)
+ }
+ outSz = (int64(info.SndCwnd) * packetOverheadFactor * (maxPayload))
+ }
+
+ if newSz := c.EP.SocketOptions().GetSendBufferSize(); newSz != outSz {
+ t.Fatalf("Wrong send buffer size, got %d want %d", newSz, outSz)
+ }
+ })
+ }
+}
+
+func TestTimestampSynCookies(t *testing.T) {
+ clock := faketime.NewManualClock()
+ c := context.NewWithOpts(t, context.Options{
+ EnableV4: true,
+ EnableV6: true,
+ MTU: defaultMTU,
+ Clock: clock,
+ })
+ defer c.Cleanup()
+ opt := tcpip.TCPAlwaysUseSynCookies(true)
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
+ }
+ wq := &waiter.Queue{}
+ ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq)
+ if err != nil {
+ t.Fatalf("NewEndpoint failed: %s", err)
+ }
+ defer ep.Close()
+
+ tcpOpts := [12]byte{header.TCPOptionNOP, header.TCPOptionNOP}
+ header.EncodeTSOption(42, 0, tcpOpts[2:])
+ if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
+ t.Fatalf("Bind failed: %s", err)
+ }
+ if err := ep.Listen(10); err != nil {
+ t.Fatalf("Listen failed: %s", err)
+ }
+ iss := seqnum.Value(context.TestInitialSequenceNumber)
+ c.SendPacket(nil, &context.Headers{
+ SrcPort: context.TestPort,
+ DstPort: context.StackPort,
+ Flags: header.TCPFlagSyn,
+ RcvWnd: seqnum.Size(512),
+ SeqNum: iss,
+ TCPOpts: tcpOpts[:],
+ })
+ // Get the TSVal of SYN-ACK.
+ b := c.GetPacket()
+ tcpHdr := header.TCP(header.IPv4(b).Payload())
+ c.IRS = seqnum.Value(tcpHdr.SequenceNumber())
+ initialTSVal := tcpHdr.ParsedOptions().TSVal
+
+ header.EncodeTSOption(420, initialTSVal, tcpOpts[2:])
+ c.SendPacket(nil, &context.Headers{
+ SrcPort: context.TestPort,
+ DstPort: context.StackPort,
+ Flags: header.TCPFlagAck,
+ RcvWnd: seqnum.Size(512),
+ SeqNum: iss + 1,
+ AckNum: c.IRS + 1,
+ TCPOpts: tcpOpts[:],
+ })
+ c.EP, _, err = ep.Accept(nil)
+ // Try to accept the connection.
+ we, ch := waiter.NewChannelEntry(nil)
+ wq.EventRegister(&we, waiter.ReadableEvents)
+ defer wq.EventUnregister(&we)
+ if cmp.Equal(&tcpip.ErrWouldBlock{}, err) {
+ // Wait for connection to be established.
+ select {
+ case <-ch:
+ c.EP, _, err = ep.Accept(nil)
+ if err != nil {
+ t.Fatalf("Accept failed: %s", err)
+ }
+
+ case <-time.After(1 * time.Second):
+ t.Fatalf("Timed out waiting for accept")
+ }
+ } else if err != nil {
+ t.Fatalf("failed to accept: %s", err)
+ }
+
+ const elapsed = 200 * time.Millisecond
+ clock.Advance(elapsed)
+ data := []byte{1, 2, 3}
+ var r bytes.Reader
+ r.Reset(data)
+ if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil {
+ t.Fatalf("Write failed: %s", err)
+ }
+
+ // The endpoint should have a correct TSOffset so that the received TSVal
+ // should match our expectation.
+ if got, want := header.TCP(header.IPv4(c.GetPacket()).Payload()).ParsedOptions().TSVal, initialTSVal+uint32(elapsed.Milliseconds()); got != want {
+ t.Fatalf("got TSVal = %d, want %d", got, want)
+ }
+}
diff --git a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
index 1deb1fe4d..65925daa5 100644
--- a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
@@ -32,7 +32,7 @@ import (
// createConnectedWithTimestampOption creates and connects c.ep with the
// timestamp option enabled.
func createConnectedWithTimestampOption(c *context.Context) *context.RawEndpoint {
- return c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, TSVal: 1})
+ return c.CreateConnectedWithOptionsNoDelay(header.TCPSynOptions{TS: true, TSVal: 1})
}
// TestTimeStampEnabledConnect tests that netstack sends the timestamp option on
@@ -131,7 +131,7 @@ func TestTimeStampDisabledConnect(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
- c.CreateConnectedWithOptions(header.TCPSynOptions{})
+ c.CreateConnectedWithOptionsNoDelay(header.TCPSynOptions{})
}
func timeStampEnabledAccept(t *testing.T, cookieEnabled bool, wndScale int, wndSize uint16) {
@@ -147,7 +147,7 @@ func timeStampEnabledAccept(t *testing.T, cookieEnabled bool, wndScale int, wndS
t.Logf("Test w/ CookieEnabled = %v", cookieEnabled)
tsVal := rand.Uint32()
- c.AcceptWithOptions(wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS, TS: true, TSVal: tsVal})
+ c.AcceptWithOptionsNoDelay(wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS, TS: true, TSVal: tsVal})
// Now send some data and validate that timestamp is echoed correctly in the ACK.
data := []byte{1, 2, 3}
@@ -209,7 +209,7 @@ func timeStampDisabledAccept(t *testing.T, cookieEnabled bool, wndScale int, wnd
}
t.Logf("Test w/ CookieEnabled = %v", cookieEnabled)
- c.AcceptWithOptions(wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS})
+ c.AcceptWithOptionsNoDelay(wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS})
// Now send some data with the accepted connection endpoint and validate
// that no timestamp option is sent in the TCP segment.
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 96e4849d2..6e55a7a32 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -122,6 +122,9 @@ type Options struct {
// MTU indicates the maximum transmission unit on the link layer.
MTU uint32
+
+ // Clock that is used by Stack.
+ Clock tcpip.Clock
}
// Context provides an initialized Network stack and a link layer endpoint
@@ -182,6 +185,7 @@ func NewWithOpts(t *testing.T, opts Options) *Context {
stackOpts := stack.Options{
TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol},
+ Clock: opts.Clock,
}
if opts.EnableV4 {
stackOpts.NetworkProtocols = append(stackOpts.NetworkProtocols, ipv4.NewProtocol)
@@ -879,13 +883,21 @@ func (r *RawEndpoint) VerifyACKHasSACK(sackBlocks []header.SACKBlock) {
)
}
+// CreateConnectedWithOptionsNoDelay just calls CreateConnectedWithOptions
+// without delay.
+func (c *Context) CreateConnectedWithOptionsNoDelay(wantOptions header.TCPSynOptions) *RawEndpoint {
+ return c.CreateConnectedWithOptions(wantOptions, 0 /* delay */)
+}
+
// CreateConnectedWithOptions creates and connects c.ep with the specified TCP
// options enabled and returns a RawEndpoint which represents the other end of
-// the connection.
+// the connection. It delays before a SYNACK is sent. This makes c.EP have a
+// higher RTT estimate so that spurious TLPs aren't sent in tests, which helps
+// reduce flakiness.
//
// It also verifies where required(eg.Timestamp) that the ACK to the SYN-ACK
// does not carry an option that was not requested.
-func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) *RawEndpoint {
+func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions, delay time.Duration) *RawEndpoint {
var err tcpip.Error
c.EP, err = c.s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
if err != nil {
@@ -911,18 +923,17 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) *
// TS value.
mss := uint16(c.linkEP.MTU() - header.IPv4MinimumSize - header.TCPMinimumSize)
- checker.IPv4(c.t, b,
- checker.TCP(
- checker.DstPort(TestPort),
- checker.TCPFlags(header.TCPFlagSyn),
- checker.TCPSynOptions(header.TCPSynOptions{
- MSS: mss,
- TS: true,
- WS: int(c.WindowScale),
- SACKPermitted: c.SACKEnabled(),
- }),
- ),
+ synChecker := checker.TCP(
+ checker.DstPort(TestPort),
+ checker.TCPFlags(header.TCPFlagSyn),
+ checker.TCPSynOptions(header.TCPSynOptions{
+ MSS: mss,
+ TS: true,
+ WS: int(c.WindowScale),
+ SACKPermitted: c.SACKEnabled(),
+ }),
)
+ checker.IPv4(c.t, b, synChecker)
if got, want := tcp.EndpointState(c.EP.State()), tcp.StateSynSent; got != want {
c.t.Fatalf("Unexpected endpoint state: want %v, got %v", want, got)
}
@@ -948,6 +959,10 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) *
// Build SYN-ACK.
c.IRS = seqnum.Value(tcpSeg.SequenceNumber())
iss := seqnum.Value(TestInitialSequenceNumber)
+ if delay > 0 {
+ // Sleep so that RTT is increased.
+ time.Sleep(delay)
+ }
c.SendPacket(nil, &Headers{
SrcPort: tcpSeg.DestinationPort(),
DstPort: tcpSeg.SourcePort(),
@@ -959,7 +974,17 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) *
})
// Read ACK.
- ackPacket := c.GetPacket()
+ var ackPacket []byte
+ // Ignore retransimitted SYN packets.
+ for {
+ packet := c.GetPacket()
+ if header.TCP(header.IPv4(packet).Payload()).Flags()&header.TCPFlagSyn != 0 {
+ checker.IPv4(c.t, packet, synChecker)
+ } else {
+ ackPacket = packet
+ break
+ }
+ }
// Verify TCP header fields.
tcpCheckers := []checker.TransportChecker{
@@ -1016,13 +1041,19 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) *
}
}
-// AcceptWithOptions initializes a listening endpoint and connects to it with the
-// provided options enabled. It also verifies that the SYN-ACK has the expected
-// values for the provided options.
+// AcceptWithOptionsNoDelay delegates call to AcceptWithOptions without delay.
+func (c *Context) AcceptWithOptionsNoDelay(wndScale int, synOptions header.TCPSynOptions) *RawEndpoint {
+ return c.AcceptWithOptions(wndScale, synOptions, 0 /* delay */)
+}
+
+// AcceptWithOptions initializes a listening endpoint and connects to it with
+// the provided options enabled. It delays before the final ACK of the 3WHS is
+// sent. It also verifies that the SYN-ACK has the expected values for the
+// provided options.
//
// The function returns a RawEndpoint representing the other end of the accepted
// endpoint.
-func (c *Context) AcceptWithOptions(wndScale int, synOptions header.TCPSynOptions) *RawEndpoint {
+func (c *Context) AcceptWithOptions(wndScale int, synOptions header.TCPSynOptions, delay time.Duration) *RawEndpoint {
// Create EP and start listening.
wq := &waiter.Queue{}
ep, err := c.s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq)
@@ -1045,7 +1076,7 @@ func (c *Context) AcceptWithOptions(wndScale int, synOptions header.TCPSynOption
c.t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
- rep := c.PassiveConnectWithOptions(100, wndScale, synOptions)
+ rep := c.PassiveConnectWithOptions(100, wndScale, synOptions, delay)
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
@@ -1077,13 +1108,14 @@ func (c *Context) AcceptWithOptions(wndScale int, synOptions header.TCPSynOption
// PassiveConnectWithOptions.
func (c *Context) PassiveConnect(maxPayload, wndScale int, synOptions header.TCPSynOptions) {
synOptions.WS = -1
- c.PassiveConnectWithOptions(maxPayload, wndScale, synOptions)
+ c.PassiveConnectWithOptions(maxPayload, wndScale, synOptions, 0 /* delay */)
}
// PassiveConnectWithOptions initiates a new connection (with the specified TCP
// options enabled) to the port on which the Context.ep is listening for new
// connections. It also validates that the SYN-ACK has the expected values for
-// the enabled options.
+// the enabled options. The final ACK of the handshake is delayed by specified
+// duration.
//
// NOTE: MSS is not a negotiated option and it can be asymmetric
// in each direction. This function uses the maxPayload to set the MSS to be
@@ -1093,7 +1125,7 @@ func (c *Context) PassiveConnect(maxPayload, wndScale int, synOptions header.TCP
// wndScale is the expected window scale in the SYN-ACK and synOptions.WS is the
// value of the window scaling option to be sent in the SYN. If synOptions.WS >
// 0 then we send the WindowScale option.
-func (c *Context) PassiveConnectWithOptions(maxPayload, wndScale int, synOptions header.TCPSynOptions) *RawEndpoint {
+func (c *Context) PassiveConnectWithOptions(maxPayload, wndScale int, synOptions header.TCPSynOptions, delay time.Duration) *RawEndpoint {
c.t.Helper()
opts := make([]byte, header.TCPOptionsMaximumSize)
offset := 0
@@ -1180,7 +1212,10 @@ func (c *Context) PassiveConnectWithOptions(maxPayload, wndScale int, synOptions
ackHeaders.TCPOpts = opts[:]
}
- // Send ACK.
+ // Send ACK, delay if needed.
+ if delay > 0 {
+ time.Sleep(delay)
+ }
c.SendPacket(nil, ackHeaders)
c.RcvdWindowScale = uint8(rcvdSynOptions.WS)
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 82a3f2287..108580508 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -266,7 +266,7 @@ func (e *endpoint) Close() {
for mem := range e.multicastMemberships {
e.stack.LeaveGroup(e.NetProto, mem.nicID, mem.multicastAddr)
}
- e.multicastMemberships = make(map[multicastMembership]struct{})
+ e.multicastMemberships = nil
// Close the receive list and drain it.
e.rcvMu.Lock()
diff --git a/pkg/usermem/usermem.go b/pkg/usermem/usermem.go
index cde1038ed..f46a00e42 100644
--- a/pkg/usermem/usermem.go
+++ b/pkg/usermem/usermem.go
@@ -429,7 +429,7 @@ type IOSequence struct {
// return 0, nil
// }
// if f.availableBytes == 0 {
-// return 0, syserror.ErrWouldBlock
+// return 0, linuxerr.ErrWouldBlock
// }
// return ioseq.CopyOutFrom(..., reader)
//
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index c9d2b3eff..0ded907f0 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -45,6 +45,7 @@ go_library(
"//pkg/sentry/arch",
"//pkg/sentry/arch:registers_go_proto",
"//pkg/sentry/control",
+ "//pkg/sentry/control:control_go_proto",
"//pkg/sentry/devices/memdev",
"//pkg/sentry/devices/ttydev",
"//pkg/sentry/devices/tundev",
@@ -96,6 +97,7 @@ go_library(
"//pkg/sentry/watchdog",
"//pkg/sync",
"//pkg/tcpip",
+ "//pkg/tcpip/link/ethernet",
"//pkg/tcpip/link/fdbased",
"//pkg/tcpip/link/loopback",
"//pkg/tcpip/link/packetsocket",
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 60b532798..76e1f596b 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -26,6 +26,7 @@ import (
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
+ controlpb "gvisor.dev/gvisor/pkg/sentry/control/control_go_proto"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket/netstack"
@@ -114,6 +115,18 @@ const (
FsCat = "Fs.Cat"
)
+// Usage related commands (see usage.go for more details).
+const (
+ UsageCollect = "Usage.Collect"
+ UsageUsageFD = "Usage.UsageFD"
+ UsageReduce = "Usage.Reduce"
+)
+
+// Events related commands (see events.go for more details).
+const (
+ EventsAttachDebugEmitter = "Events.AttachDebugEmitter"
+)
+
// ControlSocketAddr generates an abstract unix socket name for the given ID.
func ControlSocketAddr(id string) string {
return fmt.Sprintf("\x00runsc-sandbox.%s", id)
@@ -153,13 +166,31 @@ func newController(fd int, l *Loader) (*controller, error) {
ctrl.srv.Register(net)
}
- ctrl.srv.Register(&debug{})
- ctrl.srv.Register(&control.Logging{})
- ctrl.srv.Register(&control.Lifecycle{l.k})
- ctrl.srv.Register(&control.Fs{l.k})
-
- if l.root.conf.ProfileEnable {
- ctrl.srv.Register(control.NewProfile(l.k))
+ if l.root.conf.Controls.Controls != nil {
+ for _, c := range l.root.conf.Controls.Controls.AllowedControls {
+ switch c {
+ case controlpb.ControlConfig_EVENTS:
+ ctrl.srv.Register(&control.Events{})
+ case controlpb.ControlConfig_FS:
+ ctrl.srv.Register(&control.Fs{Kernel: l.k})
+ case controlpb.ControlConfig_LIFECYCLE:
+ ctrl.srv.Register(&control.Lifecycle{Kernel: l.k})
+ case controlpb.ControlConfig_LOGGING:
+ ctrl.srv.Register(&control.Logging{})
+ case controlpb.ControlConfig_PROFILE:
+ if l.root.conf.ProfileEnable {
+ ctrl.srv.Register(control.NewProfile(l.k))
+ }
+ case controlpb.ControlConfig_USAGE:
+ ctrl.srv.Register(&control.Usage{Kernel: l.k})
+ case controlpb.ControlConfig_PROC:
+ ctrl.srv.Register(&control.Proc{Kernel: l.k})
+ case controlpb.ControlConfig_STATE:
+ ctrl.srv.Register(&control.State{Kernel: l.k})
+ case controlpb.ControlConfig_DEBUG:
+ ctrl.srv.Register(&debug{})
+ }
+ }
}
return ctrl, nil
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index ec9188021..3f667cd74 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -58,6 +58,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/watchdog"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/ethernet"
"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
"gvisor.dev/gvisor/pkg/tcpip/network/arp"
@@ -1174,7 +1175,7 @@ func (f *sandboxNetstackCreator) CreateStack() (inet.Stack, error) {
n := &Network{Stack: s.(*netstack.Stack).Stack}
nicID := tcpip.NICID(f.uniqueID.UniqueID())
link := DefaultLoopbackLink
- linkEP := loopback.New()
+ linkEP := ethernet.New(loopback.New())
if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
return nil, err
}
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 7e627e4c6..5c6879198 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -23,6 +23,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/ethernet"
"gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
"gvisor.dev/gvisor/pkg/tcpip/link/packetsocket"
@@ -169,7 +170,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
nicID++
nicids[link.Name] = nicID
- linkEP := loopback.New()
+ linkEP := ethernet.New(loopback.New())
log.Infof("Enabling loopback interface %q with id %d on addresses %+v", link.Name, nicID, link.Addresses)
if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
diff --git a/runsc/boot/strace.go b/runsc/boot/strace.go
index c21648a32..cf5be34cd 100644
--- a/runsc/boot/strace.go
+++ b/runsc/boot/strace.go
@@ -35,9 +35,14 @@ func enableStrace(conf *config.Config) error {
}
strace.LogMaximumSize = max
+ sink := strace.SinkTypeLog
+ if conf.StraceEvent {
+ sink = strace.SinkTypeEvent
+ }
+
if len(conf.StraceSyscalls) == 0 {
- strace.EnableAll(strace.SinkTypeLog)
+ strace.EnableAll(sink)
return nil
}
- return strace.Enable(strings.Split(conf.StraceSyscalls, ","), strace.SinkTypeLog)
+ return strace.Enable(strings.Split(conf.StraceSyscalls, ","), sink)
}
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index 031ddd57e..c5e32807d 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -36,6 +36,7 @@ go_library(
"statefile.go",
"symbolize.go",
"syscalls.go",
+ "usage.go",
"verity_prepare.go",
"wait.go",
],
diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go
index c1d029d7f..08246e543 100644
--- a/runsc/cmd/events.go
+++ b/runsc/cmd/events.go
@@ -33,6 +33,10 @@ type Events struct {
intervalSec int
// If true, events will print a single group of stats and exit.
stats bool
+ // If true, events will dump all filtered events to stdout.
+ stream bool
+ // filters for streamed events.
+ filters stringSlice
}
// Name implements subcommands.Command.Name.
@@ -62,6 +66,8 @@ OPTIONS:
func (evs *Events) SetFlags(f *flag.FlagSet) {
f.IntVar(&evs.intervalSec, "interval", 5, "set the stats collection interval, in seconds")
f.BoolVar(&evs.stats, "stats", false, "display the container's stats then exit")
+ f.BoolVar(&evs.stream, "stream", false, "dump all filtered events to stdout")
+ f.Var(&evs.filters, "filters", "only display matching events")
}
// Execute implements subcommands.Command.Execute.
@@ -79,6 +85,13 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa
Fatalf("loading sandbox: %v", err)
}
+ if evs.stream {
+ if err := c.Stream(evs.filters, os.Stdout); err != nil {
+ Fatalf("Stream failed: %v", err)
+ }
+ return subcommands.ExitSuccess
+ }
+
// Repeatedly get stats from the container.
for {
// Get the event and print it as JSON.
diff --git a/runsc/cmd/usage.go b/runsc/cmd/usage.go
new file mode 100644
index 000000000..d2aeafa28
--- /dev/null
+++ b/runsc/cmd/usage.go
@@ -0,0 +1,93 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "os"
+
+ "github.com/google/subcommands"
+ "gvisor.dev/gvisor/runsc/config"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
+)
+
+// Usage implements subcommands.Command for the "usage" command.
+type Usage struct {
+ full bool
+ fd bool
+}
+
+// Name implements subcommands.Command.Name.
+func (*Usage) Name() string {
+ return "usage"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Usage) Synopsis() string {
+ return "Usage shows application memory usage across various categories in bytes."
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Usage) Usage() string {
+ return `usage [flags] <container id> - print memory usages to standard output.`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (u *Usage) SetFlags(f *flag.FlagSet) {
+ f.BoolVar(&u.full, "full", false, "enumerate all usage by categories")
+ f.BoolVar(&u.fd, "fd", false, "retrieves a subset of usage through the established usage FD")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (u *Usage) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() < 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*config.Config)
+
+ cont, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{})
+ if err != nil {
+ Fatalf("loading container: %v", err)
+ }
+
+ if !u.fd {
+ m, err := cont.Usage(u.full)
+ if err != nil {
+ Fatalf("usage failed: %v", err)
+ }
+ if err := json.NewEncoder(os.Stdout).Encode(m); err != nil {
+ Fatalf("Encode MemoryUsage failed: %v", err)
+ }
+ } else {
+ m, err := cont.UsageFD()
+ if err != nil {
+ Fatalf("usagefd failed: %v", err)
+ }
+
+ mapped, unknown, total, err := m.Fetch()
+ if err != nil {
+ Fatalf("Fetch memory usage failed: %v", err)
+ }
+
+ fmt.Printf("Mapped %v, Unknown %v, Total %v\n", mapped, unknown, total)
+ }
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/verity_prepare.go b/runsc/cmd/verity_prepare.go
index 85d762a51..44c1d05db 100644
--- a/runsc/cmd/verity_prepare.go
+++ b/runsc/cmd/verity_prepare.go
@@ -82,7 +82,7 @@ func (c *VerityPrepare) Execute(_ context.Context, f *flag.FlagSet, args ...inte
},
Process: &specs.Process{
Cwd: absRoot,
- Args: []string{c.tool, "--path", "/verityroot"},
+ Args: []string{c.tool, "--path", "/verityroot", "--rawpath", "/rawroot"},
Env: os.Environ(),
Capabilities: specutils.AllCapabilities(),
},
@@ -94,6 +94,11 @@ func (c *VerityPrepare) Execute(_ context.Context, f *flag.FlagSet, args ...inte
Type: "bind",
Options: []string{"verity.roothash="},
},
+ {
+ Source: c.dir,
+ Destination: "/rawroot",
+ Type: "bind",
+ },
},
}
diff --git a/runsc/config/BUILD b/runsc/config/BUILD
index b1672bb9d..64295d283 100644
--- a/runsc/config/BUILD
+++ b/runsc/config/BUILD
@@ -11,6 +11,7 @@ go_library(
visibility = ["//:sandbox"],
deps = [
"//pkg/refs",
+ "//pkg/sentry/control:control_go_proto",
"//pkg/sentry/watchdog",
"//pkg/sync",
"//runsc/flag",
@@ -24,5 +25,8 @@ go_test(
"config_test.go",
],
library = ":config",
- deps = ["//runsc/flag"],
+ deps = [
+ "//pkg/sentry/control:control_go_proto",
+ "//runsc/flag",
+ ],
)
diff --git a/runsc/config/config.go b/runsc/config/config.go
index cc4650180..2f52863ff 100644
--- a/runsc/config/config.go
+++ b/runsc/config/config.go
@@ -19,8 +19,10 @@ package config
import (
"fmt"
+ "strings"
"gvisor.dev/gvisor/pkg/refs"
+ controlpb "gvisor.dev/gvisor/pkg/sentry/control/control_go_proto"
"gvisor.dev/gvisor/pkg/sentry/watchdog"
)
@@ -117,6 +119,10 @@ type Config struct {
// StraceLogSize is the max size of data blobs to display.
StraceLogSize uint `flag:"strace-log-size"`
+ // StraceEvent indicates sending strace to events if true. Strace is
+ // sent to log if false.
+ StraceEvent bool `flag:"strace-event"`
+
// DisableSeccomp indicates whether seccomp syscall filters should be
// disabled. Pardon the double negation, but default to enabled is important.
DisableSeccomp bool
@@ -131,6 +137,9 @@ type Config struct {
// ProfileEnable is set to prepare the sandbox to be profiled.
ProfileEnable bool `flag:"profile"`
+ // Controls defines the controls that may be enabled.
+ Controls controlConfig `flag:"controls"`
+
// RestoreFile is the path to the saved container image
RestoreFile string
@@ -347,6 +356,96 @@ func (q QueueingDiscipline) String() string {
panic(fmt.Sprintf("Invalid qdisc %d", q))
}
+// controlConfig represents control endpoints.
+type controlConfig struct {
+ Controls *controlpb.ControlConfig
+}
+
+// Set implements flag.Value.
+func (c *controlConfig) Set(v string) error {
+ controls := strings.Split(v, ",")
+ var controlList []controlpb.ControlConfig_Endpoint
+ for _, control := range controls {
+ switch control {
+ case "EVENTS":
+ controlList = append(controlList, controlpb.ControlConfig_EVENTS)
+ case "FS":
+ controlList = append(controlList, controlpb.ControlConfig_FS)
+ case "LIFECYCLE":
+ controlList = append(controlList, controlpb.ControlConfig_LIFECYCLE)
+ case "LOGGING":
+ controlList = append(controlList, controlpb.ControlConfig_LOGGING)
+ case "PROFILE":
+ controlList = append(controlList, controlpb.ControlConfig_PROFILE)
+ case "USAGE":
+ controlList = append(controlList, controlpb.ControlConfig_USAGE)
+ case "PROC":
+ controlList = append(controlList, controlpb.ControlConfig_PROC)
+ case "STATE":
+ controlList = append(controlList, controlpb.ControlConfig_STATE)
+ case "DEBUG":
+ controlList = append(controlList, controlpb.ControlConfig_DEBUG)
+ default:
+ return fmt.Errorf("invalid control %q", control)
+ }
+ }
+ c.Controls.AllowedControls = controlList
+ return nil
+}
+
+// Get implements flag.Value.
+func (c *controlConfig) Get() interface{} {
+ return *c
+}
+
+// String implements flag.Value.
+func (c *controlConfig) String() string {
+ v := ""
+ for _, control := range c.Controls.AllowedControls {
+ if len(v) > 0 {
+ v += ","
+ }
+ switch control {
+ case controlpb.ControlConfig_EVENTS:
+ v += "EVENTS"
+ case controlpb.ControlConfig_FS:
+ v += "FS"
+ case controlpb.ControlConfig_LIFECYCLE:
+ v += "LIFECYCLE"
+ case controlpb.ControlConfig_LOGGING:
+ v += "LOGGING"
+ case controlpb.ControlConfig_PROFILE:
+ v += "PROFILE"
+ case controlpb.ControlConfig_USAGE:
+ v += "USAGE"
+ case controlpb.ControlConfig_PROC:
+ v += "PROC"
+ case controlpb.ControlConfig_STATE:
+ v += "STATE"
+ case controlpb.ControlConfig_DEBUG:
+ v += "DEBUG"
+ default:
+ panic(fmt.Sprintf("Invalid control %d", control))
+ }
+ }
+ return v
+}
+
+func defaultControlConfig() *controlConfig {
+ c := controlConfig{}
+ c.Controls = &controlpb.ControlConfig{}
+ c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_EVENTS)
+ c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_FS)
+ c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_LIFECYCLE)
+ c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_LOGGING)
+ c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_PROFILE)
+ c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_USAGE)
+ c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_PROC)
+ c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_STATE)
+ c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_DEBUG)
+ return &c
+}
+
func leakModePtr(v refs.LeakMode) *refs.LeakMode {
return &v
}
diff --git a/runsc/config/config_test.go b/runsc/config/config_test.go
index 80ff2c0a6..57c241c86 100644
--- a/runsc/config/config_test.go
+++ b/runsc/config/config_test.go
@@ -18,6 +18,7 @@ import (
"strings"
"testing"
+ controlpb "gvisor.dev/gvisor/pkg/sentry/control/control_go_proto"
"gvisor.dev/gvisor/runsc/flag"
)
@@ -59,6 +60,9 @@ func TestFromFlags(t *testing.T) {
if err := flag.CommandLine.Lookup("network").Value.Set("none"); err != nil {
t.Errorf("Flag set: %v", err)
}
+ if err := flag.CommandLine.Lookup("controls").Value.Set("EVENTS,FS"); err != nil {
+ t.Errorf("Flag set: %v", err)
+ }
defer func() {
if err := setDefault("root"); err != nil {
t.Errorf("Flag set: %v", err)
@@ -72,6 +76,9 @@ func TestFromFlags(t *testing.T) {
if err := setDefault("network"); err != nil {
t.Errorf("Flag set: %v", err)
}
+ if err := setDefault("controls"); err != nil {
+ t.Errorf("Flag set: %v", err)
+ }
}()
c, err := NewFromFlags()
@@ -90,6 +97,12 @@ func TestFromFlags(t *testing.T) {
if want := NetworkNone; c.Network != want {
t.Errorf("Network=%v, want: %v", c.Network, want)
}
+ wants := []controlpb.ControlConfig_Endpoint{controlpb.ControlConfig_EVENTS, controlpb.ControlConfig_FS}
+ for i, want := range wants {
+ if c.Controls.Controls.AllowedControls[i] != want {
+ t.Errorf("Controls.Controls.AllowedControls[%d]=%v, want: %v", i, c.Controls.Controls.AllowedControls[i], want)
+ }
+ }
}
func TestToFlags(t *testing.T) {
@@ -101,10 +114,15 @@ func TestToFlags(t *testing.T) {
c.Debug = true
c.NumNetworkChannels = 123
c.Network = NetworkNone
+ c.Controls = controlConfig{
+ Controls: &controlpb.ControlConfig{
+ AllowedControls: []controlpb.ControlConfig_Endpoint{controlpb.ControlConfig_EVENTS, controlpb.ControlConfig_FS},
+ },
+ }
flags := c.ToFlags()
- if len(flags) != 4 {
- t.Errorf("wrong number of flags set, want: 4, got: %d: %s", len(flags), flags)
+ if len(flags) != 5 {
+ t.Errorf("wrong number of flags set, want: 5, got: %d: %s", len(flags), flags)
}
t.Logf("Flags: %s", flags)
fm := map[string]string{}
@@ -117,6 +135,7 @@ func TestToFlags(t *testing.T) {
"--debug": "true",
"--num-network-channels": "123",
"--network": "none",
+ "--controls": "EVENTS,FS",
} {
if got, ok := fm[name]; ok {
if got != want {
diff --git a/runsc/config/flags.go b/runsc/config/flags.go
index 6f1b5927a..85507902a 100644
--- a/runsc/config/flags.go
+++ b/runsc/config/flags.go
@@ -56,6 +56,7 @@ func RegisterFlags() {
flag.Bool("strace", false, "enable strace.")
flag.String("strace-syscalls", "", "comma-separated list of syscalls to trace. If --strace is true and this list is empty, then all syscalls will be traced.")
flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs.")
+ flag.Bool("strace-event", false, "send strace to event.")
// Flags that control sandbox runtime behavior.
flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm.")
@@ -66,6 +67,7 @@ func RegisterFlags() {
flag.Var(leakModePtr(refs.NoLeakChecking), "ref-leak-mode", "sets reference leak check mode: disabled (default), log-names, log-traces.")
flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)")
flag.Bool("oci-seccomp", false, "Enables loading OCI seccomp filters inside the sandbox.")
+ flag.Var(defaultControlConfig(), "controls", "Sentry control endpoints.")
// Flags that control sandbox runtime behavior: FS related.
flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem validation to use for the root mount: exclusive (default), shared.")
diff --git a/runsc/container/container.go b/runsc/container/container.go
index d1f979eb2..50b0dd5e7 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -652,6 +652,30 @@ func (c *Container) Cat(files []string, out *os.File) error {
return c.Sandbox.Cat(c.ID, files, out)
}
+// Usage displays memory used by the application.
+func (c *Container) Usage(full bool) (control.MemoryUsage, error) {
+ log.Debugf("Usage in container, cid: %s, full: %v", c.ID, full)
+ return c.Sandbox.Usage(c.ID, full)
+}
+
+// UsageFD shows application memory usage using two donated FDs.
+func (c *Container) UsageFD() (*control.MemoryUsageRecord, error) {
+ log.Debugf("UsageFD in container, cid: %s", c.ID)
+ return c.Sandbox.UsageFD(c.ID)
+}
+
+// Reduce requests that the sentry attempt to reduce its memory usage.
+func (c *Container) Reduce(wait bool) error {
+ log.Debugf("Reduce in container, cid: %s", c.ID)
+ return c.Sandbox.Reduce(c.ID, wait)
+}
+
+// Stream dumps all events to out.
+func (c *Container) Stream(filters []string, out *os.File) error {
+ log.Debugf("Stream in container, cid: %s", c.ID)
+ return c.Sandbox.Stream(c.ID, filters, out)
+}
+
// State returns the metadata of the container.
func (c *Container) State() specs.State {
return specs.State{
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 960c36946..681f5c1a9 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -2655,3 +2655,177 @@ func TestCat(t *testing.T) {
t.Errorf("out got %s, want include %s", buf, want)
}
}
+
+// TestUsage checks that usage generates the expected memory usage.
+func TestUsage(t *testing.T) {
+ spec, conf := sleepSpecConf(t)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
+
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+
+ cont, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("Creating container: %v", err)
+ }
+ defer cont.Destroy()
+
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("starting container: %v", err)
+ }
+
+ for _, full := range []bool{false, true} {
+ m, err := cont.Usage(full)
+ if err != nil {
+ t.Fatalf("error usage from container: %v", err)
+ }
+ if m.Mapped == 0 {
+ t.Errorf("Usage mapped got zero")
+ }
+ if m.Total == 0 {
+ t.Errorf("Usage total got zero")
+ }
+ if full {
+ if m.System == 0 {
+ t.Errorf("Usage system got zero")
+ }
+ if m.Anonymous == 0 {
+ t.Errorf("Usage anonymous got zero")
+ }
+ }
+ }
+}
+
+// TestUsageFD checks that usagefd generates the expected memory usage.
+func TestUsageFD(t *testing.T) {
+ spec, conf := sleepSpecConf(t)
+
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
+
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+
+ cont, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("Creating container: %v", err)
+ }
+ defer cont.Destroy()
+
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("starting container: %v", err)
+ }
+
+ m, err := cont.UsageFD()
+ if err != nil {
+ t.Fatalf("error usageFD from container: %v", err)
+ }
+
+ mapped, unknown, total, err := m.Fetch()
+ if err != nil {
+ t.Fatalf("error Fetch memory usage: %v", err)
+ }
+
+ if mapped == 0 {
+ t.Errorf("UsageFD Mapped got zero")
+ }
+ if unknown == 0 {
+ t.Errorf("UsageFD unknown got zero")
+ }
+ if total == 0 {
+ t.Errorf("UsageFD total got zero")
+ }
+}
+
+// TestReduce checks that reduce call succeeds.
+func TestReduce(t *testing.T) {
+ spec, conf := sleepSpecConf(t)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
+
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+
+ cont, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("Creating container: %v", err)
+ }
+ defer cont.Destroy()
+
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("starting container: %v", err)
+ }
+
+ if err := cont.Reduce(false); err != nil {
+ t.Fatalf("error reduce from container: %v", err)
+ }
+}
+
+// TestStream checks that Stream dumps expected events.
+func TestStream(t *testing.T) {
+ spec, conf := sleepSpecConf(t)
+ conf.Strace = true
+ conf.StraceEvent = true
+ conf.StraceSyscalls = ""
+
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
+
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+
+ cont, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("Creating container: %v", err)
+ }
+ defer cont.Destroy()
+
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("starting container: %v", err)
+ }
+
+ r, w, err := os.Pipe()
+ if err != nil {
+ t.Fatalf("os.Create(): %v", err)
+ }
+
+ // Spawn a new thread to Stream events as it blocks indefinitely.
+ go func() {
+ cont.Stream(nil, w)
+ }()
+
+ buf := make([]byte, 1024)
+ if _, err := r.Read(buf); err != nil {
+ t.Fatalf("Read out: %v", err)
+ }
+
+ // A syscall strace event includes "Strace".
+ if got, want := string(buf), "Strace"; !strings.Contains(got, want) {
+ t.Errorf("out got %s, want include %s", buf, want)
+ }
+}
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 07497e47b..600b21189 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -1242,13 +1242,14 @@ func (l *localFile) MultiGetAttr(names []string) ([]p9.FullStat, error) {
}
parent := l.file.FD()
- for _, name := range names {
- child, err := unix.Openat(parent, name, openFlags|unix.O_PATH, 0)
+ closeParent := func() {
if parent != l.file.FD() {
- // Parent is no longer needed.
_ = unix.Close(parent)
- parent = -1
}
+ }
+ defer closeParent()
+ for _, name := range names {
+ child, err := unix.Openat(parent, name, openFlags|unix.O_PATH, 0)
if err != nil {
if errors.Is(err, unix.ENOENT) {
// No pont in continuing any further.
@@ -1256,10 +1257,11 @@ func (l *localFile) MultiGetAttr(names []string) ([]p9.FullStat, error) {
}
return nil, err
}
+ closeParent()
+ parent = child
var stat unix.Stat_t
if err := unix.Fstat(child, &stat); err != nil {
- _ = unix.Close(child)
return nil, err
}
valid, attr := l.fillAttr(&stat)
@@ -1271,13 +1273,9 @@ func (l *localFile) MultiGetAttr(names []string) ([]p9.FullStat, error) {
if (stat.Mode & unix.S_IFMT) != unix.S_IFDIR {
// Doesn't need to continue if entry is not a dir. Including symlinks
// that cannot be followed.
- _ = unix.Close(child)
break
}
parent = child
}
- if parent != -1 && parent != l.file.FD() {
- _ = unix.Close(parent)
- }
return stats, nil
}
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index bc4a3fa32..d625230dd 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -17,12 +17,14 @@ go_library(
"//pkg/control/client",
"//pkg/control/server",
"//pkg/coverage",
+ "//pkg/eventchannel",
"//pkg/log",
"//pkg/sentry/control",
"//pkg/sentry/platform",
"//pkg/sync",
"//pkg/tcpip/header",
"//pkg/tcpip/stack",
+ "//pkg/unet",
"//pkg/urpc",
"//runsc/boot",
"//runsc/boot/platforms",
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index b15572a98..9fbce6bd6 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -35,10 +35,12 @@ import (
"gvisor.dev/gvisor/pkg/control/client"
"gvisor.dev/gvisor/pkg/control/server"
"gvisor.dev/gvisor/pkg/coverage"
+ "gvisor.dev/gvisor/pkg/eventchannel"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/unet"
"gvisor.dev/gvisor/pkg/urpc"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/boot/platforms"
@@ -1020,6 +1022,90 @@ func (s *Sandbox) Cat(cid string, files []string, out *os.File) error {
return nil
}
+// Usage sends the collect call for a container in the sandbox.
+func (s *Sandbox) Usage(cid string, Full bool) (control.MemoryUsage, error) {
+ log.Debugf("Usage sandbox %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return control.MemoryUsage{}, err
+ }
+ defer conn.Close()
+
+ var m control.MemoryUsage
+ err = conn.Call(boot.UsageCollect, &control.MemoryUsageOpts{
+ Full: Full,
+ }, &m)
+ return m, err
+}
+
+// UsageFD sends the usagefd call for a container in the sandbox.
+func (s *Sandbox) UsageFD(cid string) (*control.MemoryUsageRecord, error) {
+ log.Debugf("Usage sandbox %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return nil, err
+ }
+ defer conn.Close()
+
+ var m control.MemoryUsageFile
+ if err := conn.Call(boot.UsageUsageFD, &control.MemoryUsageFileOpts{
+ Version: 1,
+ }, &m); err != nil {
+ return nil, fmt.Errorf("UsageFD failed: %v", err)
+ }
+
+ if len(m.FilePayload.Files) != 2 {
+ return nil, fmt.Errorf("wants exactly two fds")
+ }
+
+ return control.NewMemoryUsageRecord(*m.FilePayload.Files[0], *m.FilePayload.Files[1])
+}
+
+// Reduce sends the reduce call for a container in the sandbox.
+func (s *Sandbox) Reduce(cid string, wait bool) error {
+ log.Debugf("Reduce sandbox %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ return conn.Call(boot.UsageReduce, &control.UsageReduceOpts{
+ Wait: wait,
+ }, nil)
+}
+
+// Stream sends the AttachDebugEmitter call for a container in the sandbox, and
+// dumps filtered events to out.
+func (s *Sandbox) Stream(cid string, filters []string, out *os.File) error {
+ log.Debugf("Stream sandbox %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ r, w, err := unet.SocketPair(false)
+ if err != nil {
+ return err
+ }
+
+ wfd, err := w.Release()
+ if err != nil {
+ return fmt.Errorf("failed to release write socket FD: %v", err)
+ }
+
+ if err := conn.Call(boot.EventsAttachDebugEmitter, &control.EventsOpts{
+ FilePayload: urpc.FilePayload{Files: []*os.File{
+ os.NewFile(uintptr(wfd), "event sink"),
+ }},
+ }, nil); err != nil {
+ return fmt.Errorf("AttachDebugEmitter failed: %v", err)
+ }
+
+ return eventchannel.ProcessAll(r, filters, out)
+}
+
// IsRunning returns true if the sandbox or gofer process is running.
func (s *Sandbox) IsRunning() bool {
if s.Pid != 0 {
diff --git a/test/benchmarks/base/startup_test.go b/test/benchmarks/base/startup_test.go
index 05a43ad17..197241622 100644
--- a/test/benchmarks/base/startup_test.go
+++ b/test/benchmarks/base/startup_test.go
@@ -34,15 +34,19 @@ func BenchmarkStartupEmpty(b *testing.B) {
defer machine.CleanUp()
ctx := context.Background()
+ b.StopTimer()
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
harness.DebugLog(b, "Running container: %d", i)
container := machine.GetContainer(ctx, b)
- defer container.CleanUp(ctx)
- if _, err := container.Run(ctx, dockerutil.RunOpts{
+ b.StartTimer()
+ if err := container.Spawn(ctx, dockerutil.RunOpts{
Image: "benchmarks/alpine",
- }, "true"); err != nil {
- b.Fatalf("failed to run container: %v", err)
+ }, "sleep", "100"); err != nil {
+ b.Fatalf("failed to start container: %v", err)
}
+ b.StopTimer()
+ container.CleanUp(ctx)
harness.DebugLog(b, "Ran container: %d", i)
}
}
diff --git a/test/benchmarks/tools/fio_test.go b/test/benchmarks/tools/fio_test.go
index a98277150..3b1f852ce 100644
--- a/test/benchmarks/tools/fio_test.go
+++ b/test/benchmarks/tools/fio_test.go
@@ -86,7 +86,7 @@ func TestFio(t *testing.T) {
fio := Fio{}
// WriteBandwidth.
got, err := fio.parseBandwidth(sampleData, false)
- var want float64 = 1753471.0 * 1024
+ want := 1753471.0 * 1024
if err != nil {
t.Fatalf("parse failed with err: %v", err)
} else if got != want {
diff --git a/test/perf/BUILD b/test/perf/BUILD
index 58fe333ee..59923da47 100644
--- a/test/perf/BUILD
+++ b/test/perf/BUILD
@@ -174,3 +174,10 @@ syscall_test(
test = "//test/perf/linux:verity_open_read_close_benchmark",
vfs1 = False,
)
+
+syscall_test(
+ size = "large",
+ debug = False,
+ test = "//test/perf/linux:verity_stat_benchmark",
+ vfs1 = False,
+)
diff --git a/test/perf/linux/BUILD b/test/perf/linux/BUILD
index 61ed98ff5..020a69ab5 100644
--- a/test/perf/linux/BUILD
+++ b/test/perf/linux/BUILD
@@ -462,3 +462,23 @@ cc_binary(
"//test/util:verity_util",
],
)
+
+cc_binary(
+ name = "verity_stat_benchmark",
+ testonly = 1,
+ srcs = [
+ "verity_stat_benchmark.cc",
+ ],
+ deps = [
+ gbenchmark,
+ gtest,
+ "//test/util:capability_util",
+ "//test/util:fs_util",
+ "//test/util:logging",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:verity_util",
+ "@com_google_absl//absl/strings",
+ ],
+)
diff --git a/test/perf/linux/randread_benchmark.cc b/test/perf/linux/randread_benchmark.cc
index b0eb8c24e..11b56a8cb 100644
--- a/test/perf/linux/randread_benchmark.cc
+++ b/test/perf/linux/randread_benchmark.cc
@@ -85,7 +85,7 @@ void BM_RandRead(benchmark::State& state) {
unsigned int seed = 1;
for (auto _ : state) {
TEST_CHECK(PreadFd(fd.get(), buf.data(), buf.size(),
- rand_r(&seed) % kFileSize) == size);
+ rand_r(&seed) % (kFileSize - buf.size())) == size);
}
state.SetBytesProcessed(static_cast<int64_t>(size) *
diff --git a/test/perf/linux/verity_stat_benchmark.cc b/test/perf/linux/verity_stat_benchmark.cc
new file mode 100644
index 000000000..b43a9e266
--- /dev/null
+++ b/test/perf/linux/verity_stat_benchmark.cc
@@ -0,0 +1,84 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "benchmark/benchmark.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/verity_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Creates a file in a nested directory hierarchy at least `depth` directories
+// deep, and stats that file multiple times.
+void BM_VerityStat(benchmark::State& state) {
+ // Create nested directories with given depth.
+ int depth = state.range(0);
+
+ // Mount a tmpfs file system to be wrapped by a verity fs.
+ TempPath top_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ TEST_CHECK(mount("", top_dir.path().c_str(), "tmpfs", 0, "") == 0);
+ std::string dir_path = top_dir.path();
+ std::string child_path = "";
+ std::vector<EnableTarget> targets;
+
+ while (depth-- > 0) {
+ // Don't use TempPath because it will make paths too long to use.
+ //
+ // The top_dir destructor will clean up this whole tree.
+ dir_path = JoinPath(dir_path, absl::StrCat(depth));
+ ASSERT_NO_ERRNO(Mkdir(dir_path, 0755));
+ child_path = JoinPath(child_path, Basename(dir_path));
+ targets.emplace_back(EnableTarget(child_path, O_RDONLY));
+ }
+
+ // Create the file that will be stat'd.
+ const TempPath file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir_path));
+
+ targets.emplace_back(
+ EnableTarget(JoinPath(child_path, Basename(file.path())), O_RDONLY));
+
+ // Reverse the targets because verity should be enabled from the lowest level.
+ std::reverse(targets.begin(), targets.end());
+
+ std::string verity_dir =
+ TEST_CHECK_NO_ERRNO_AND_VALUE(MountVerity(top_dir.path(), targets));
+
+ struct stat st;
+ for (auto _ : state) {
+ ASSERT_THAT(stat(JoinPath(verity_dir, targets[0].path).c_str(), &st),
+ SyscallSucceeds());
+ }
+}
+
+BENCHMARK(BM_VerityStat)->Range(1, 100)->UseRealTime();
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 960421466..01ee432cb 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -7,6 +7,8 @@ package(
exports_files(
[
+ "packet_socket.cc",
+ "packet_socket_raw.cc",
"raw_socket.cc",
"raw_socket_hdrincl.cc",
"raw_socket_icmp.cc",
@@ -1446,6 +1448,7 @@ cc_binary(
deps = [
":unix_domain_socket_test_util",
"//test/util:capability_util",
+ "//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:socket_util",
"@com_google_absl//absl/base:core_headers",
@@ -1464,6 +1467,7 @@ cc_binary(
deps = [
":unix_domain_socket_test_util",
"//test/util:capability_util",
+ "//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:socket_util",
"@com_google_absl//absl/base:core_headers",
diff --git a/test/syscalls/linux/cgroup.cc b/test/syscalls/linux/cgroup.cc
index f29891571..ca23dfeee 100644
--- a/test/syscalls/linux/cgroup.cc
+++ b/test/syscalls/linux/cgroup.cc
@@ -279,6 +279,23 @@ TEST(Cgroup, UnmountRepeated) {
EXPECT_THAT(umount(c.Path().c_str()), SyscallFailsWithErrno(EINVAL));
}
+TEST(Cgroup, Create) {
+ SKIP_IF(!CgroupsAvailable());
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs(""));
+ ASSERT_NO_ERRNO(c.CreateChild("child1"));
+ EXPECT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(Exists(c.Path())));
+}
+
+TEST(Cgroup, SubcontainerInitiallyEmpty) {
+ SKIP_IF(!CgroupsAvailable());
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs(""));
+ Cgroup child = ASSERT_NO_ERRNO_AND_VALUE(c.CreateChild("child1"));
+ auto procs = ASSERT_NO_ERRNO_AND_VALUE(child.Procs());
+ EXPECT_TRUE(procs.empty());
+}
+
TEST(MemoryCgroup, MemoryUsageInBytes) {
SKIP_IF(!CgroupsAvailable());
diff --git a/test/syscalls/linux/link.cc b/test/syscalls/linux/link.cc
index 4f9ca1a65..8b208f99a 100644
--- a/test/syscalls/linux/link.cc
+++ b/test/syscalls/linux/link.cc
@@ -142,7 +142,8 @@ TEST(LinkTest, OldnameIsEmpty) {
TEST(LinkTest, OldnameDoesNotExist) {
const std::string oldname = NewTempAbsPath();
const std::string newname = NewTempAbsPath();
- EXPECT_THAT(link("", newname.c_str()), SyscallFailsWithErrno(ENOENT));
+ EXPECT_THAT(link(oldname.c_str(), newname.c_str()),
+ SyscallFailsWithErrno(ENOENT));
}
TEST(LinkTest, NewnameCannotExist) {
diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc
index caad575cf..fda176261 100644
--- a/test/syscalls/linux/mmap.cc
+++ b/test/syscalls/linux/mmap.cc
@@ -795,7 +795,7 @@ class MMapFileTest : public MMapTest {
bool FSSupportsMap() const {
bool supported = true;
- void* ret = mmap(nullptr, 1, PROT_NONE, 0, fd_.get(), 0);
+ void* ret = mmap(nullptr, 1, PROT_NONE, MAP_PRIVATE, fd_.get(), 0);
if (ret == MAP_FAILED && errno != ENODEV) {
supported = false;
}
diff --git a/test/syscalls/linux/msgqueue.cc b/test/syscalls/linux/msgqueue.cc
index 8994b739a..c4761eba8 100644
--- a/test/syscalls/linux/msgqueue.cc
+++ b/test/syscalls/linux/msgqueue.cc
@@ -30,9 +30,15 @@ namespace gvisor {
namespace testing {
namespace {
-constexpr int msgMax = 8192; // Max size for message in bytes.
+// Source: include/uapi/linux/msg.h
+constexpr int msgMnb = 16384; // Maximum number of bytes in a queue.
constexpr int msgMni = 32000; // Max number of identifiers.
-constexpr int msgMnb = 16384; // Default max size of message queue in bytes.
+constexpr int msgPool =
+ (msgMni * msgMnb / 1024); // Size of buffer pool used to hold message data.
+constexpr int msgMap = msgMnb; // Maximum number of entries in message map.
+constexpr int msgMax = 8192; // Maximum number of bytes in a single message.
+constexpr int msgSsz = 16; // Message segment size.
+constexpr int msgTql = msgMnb; // Maximum number of messages on all queues.
constexpr int kInterruptSignal = SIGALRM;
@@ -40,6 +46,10 @@ constexpr int kInterruptSignal = SIGALRM;
class Queue {
public:
explicit Queue(int id) : id_(id) {}
+ Queue(const Queue&) = delete;
+ Queue& operator=(const Queue&) = delete;
+
+ Queue(Queue&& other) { id_ = other.release(); }
~Queue() {
if (id_ >= 0) {
@@ -59,6 +69,14 @@ class Queue {
int id_ = -1;
};
+PosixErrorOr<Queue> Msgget(key_t key, int flags) {
+ int id = msgget(key, flags);
+ if (id == -1) {
+ return PosixError(errno, absl::StrFormat("msgget(%d, %d)", key, flags));
+ }
+ return Queue(id);
+}
+
// Default size for messages.
constexpr size_t msgSize = 50;
@@ -90,8 +108,7 @@ TEST(MsgqueueTest, MsgGet) {
const key_t key = ftok(keyfile.path().c_str(), 1);
ASSERT_THAT(key, SyscallSucceeds());
- Queue queue(msgget(key, IPC_CREAT));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(key, IPC_CREAT));
EXPECT_THAT(msgget(key, 0), SyscallSucceedsWithValue(queue.get()));
}
@@ -103,27 +120,20 @@ TEST(MsgqueueTest, MsgGetFail) {
EXPECT_THAT(msgget(key, 0), SyscallFailsWithErrno(ENOENT));
- Queue queue(msgget(key, IPC_CREAT));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
-
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(key, IPC_CREAT));
EXPECT_THAT(msgget(key, IPC_CREAT | IPC_EXCL), SyscallFailsWithErrno(EEXIST));
}
// Test using msgget(2) with IPC_PRIVATE option.
TEST(MsgqueueTest, MsgGetIpcPrivate) {
- Queue queue1(msgget(IPC_PRIVATE, 0));
- ASSERT_THAT(queue1.get(), SyscallSucceeds());
-
- Queue queue2(msgget(IPC_PRIVATE, 0));
- ASSERT_THAT(queue2.get(), SyscallSucceeds());
-
+ Queue queue1 = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0));
+ Queue queue2 = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0));
EXPECT_NE(queue1.get(), queue2.get());
}
// Test simple msgsnd and msgrcv.
TEST(MsgqueueTest, MsgOpSimple) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf buf{1, "A message."};
msgbuf rcv;
@@ -138,8 +148,7 @@ TEST(MsgqueueTest, MsgOpSimple) {
// Test msgsnd and msgrcv of an empty message.
TEST(MsgqueueTest, MsgOpEmpty) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf buf{1, ""};
msgbuf rcv;
@@ -151,8 +160,7 @@ TEST(MsgqueueTest, MsgOpEmpty) {
// Test truncation of message with MSG_NOERROR flag.
TEST(MsgqueueTest, MsgOpTruncate) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf buf{1, ""};
msgbuf rcv;
@@ -166,8 +174,7 @@ TEST(MsgqueueTest, MsgOpTruncate) {
// Test msgsnd and msgrcv using invalid arguments.
TEST(MsgqueueTest, MsgOpInvalidArgs) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf buf{1, ""};
@@ -184,8 +191,7 @@ TEST(MsgqueueTest, MsgOpInvalidArgs) {
// Test non-blocking msgrcv with an empty queue.
TEST(MsgqueueTest, MsgOpNoMsg) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf rcv;
EXPECT_THAT(msgrcv(queue.get(), &rcv, sizeof(rcv.mtext) + 1, 0, IPC_NOWAIT),
@@ -195,8 +201,7 @@ TEST(MsgqueueTest, MsgOpNoMsg) {
// Test non-blocking msgrcv with a non-empty queue, but no messages of wanted
// type.
TEST(MsgqueueTest, MsgOpNoMsgType) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf buf{1, ""};
ASSERT_THAT(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0),
@@ -208,8 +213,7 @@ TEST(MsgqueueTest, MsgOpNoMsgType) {
// Test msgrcv with a larger size message than wanted, and truncation disabled.
TEST(MsgqueueTest, MsgOpTooBig) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf buf{1, ""};
ASSERT_THAT(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0),
@@ -221,8 +225,7 @@ TEST(MsgqueueTest, MsgOpTooBig) {
// Test receiving messages based on type.
TEST(MsgqueueTest, MsgRcvType) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
// Send messages in an order and receive them in reverse, based on type,
// which shouldn't block.
@@ -248,8 +251,7 @@ TEST(MsgqueueTest, MsgRcvType) {
// Test using MSG_EXCEPT to receive a different-type message.
TEST(MsgqueueTest, MsgExcept) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
std::map<int64_t, msgbuf> typeToBuf = {
{1, msgbuf{1, "Message 1."}},
@@ -274,8 +276,7 @@ TEST(MsgqueueTest, MsgExcept) {
// Test msgrcv with a negative type.
TEST(MsgqueueTest, MsgRcvTypeNegative) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
// When msgtyp is negative, msgrcv returns the first message with mtype less
// than or equal to the absolute value.
@@ -298,8 +299,7 @@ TEST(MsgqueueTest, MsgRcvTypeNegative) {
TEST(MsgqueueTest, MsgOpPermissions) {
AutoCapability cap(CAP_IPC_OWNER, false);
- Queue queue(msgget(IPC_PRIVATE, 0000));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0000));
msgbuf buf{1, ""};
@@ -311,8 +311,7 @@ TEST(MsgqueueTest, MsgOpPermissions) {
// Test limits for messages and queues.
TEST(MsgqueueTest, MsgOpLimits) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf buf{1, "A message."};
@@ -340,11 +339,14 @@ bool MsgCopySupported() {
// test if errno == ENOSYS. This means that the test will always run on
// gVisor, but may be skipped on native linux.
- Queue queue(msgget(IPC_PRIVATE, 0600));
-
+ auto maybe_id = Msgget(IPC_PRIVATE, 0600);
+ if (!maybe_id.ok()) {
+ return false;
+ }
+ Queue queue(std::move(maybe_id.ValueOrDie()));
msgbuf buf{1, "Test message."};
- msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0);
+ msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0);
return !(msgrcv(queue.get(), &buf, sizeof(buf.mtext) + 1, 0,
MSG_COPY | IPC_NOWAIT) == -1 &&
errno == ENOSYS);
@@ -354,9 +356,7 @@ bool MsgCopySupported() {
TEST(MsgqueueTest, MsgCopy) {
SKIP_IF(!MsgCopySupported());
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
-
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf bufs[5] = {
msgbuf{1, "Message 1."}, msgbuf{2, "Message 2."}, msgbuf{3, "Message 3."},
msgbuf{4, "Message 4."}, msgbuf{5, "Message 5."},
@@ -390,9 +390,7 @@ TEST(MsgqueueTest, MsgCopy) {
TEST(MsgqueueTest, MsgCopyInvalidArgs) {
SKIP_IF(!MsgCopySupported());
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
-
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf rcv;
EXPECT_THAT(msgrcv(queue.get(), &rcv, msgSize, 1, MSG_COPY),
SyscallFailsWithErrno(EINVAL));
@@ -406,9 +404,7 @@ TEST(MsgqueueTest, MsgCopyInvalidArgs) {
TEST(MsgqueueTest, MsgCopyInvalidIndex) {
SKIP_IF(!MsgCopySupported());
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
-
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf rcv;
EXPECT_THAT(msgrcv(queue.get(), &rcv, msgSize, -3, MSG_COPY | IPC_NOWAIT),
SyscallFailsWithErrno(ENOMSG));
@@ -419,9 +415,7 @@ TEST(MsgqueueTest, MsgCopyInvalidIndex) {
// Test msgrcv (most probably) blocking on an empty queue.
TEST(MsgqueueTest, MsgRcvBlocking) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
-
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf buf{1, "A message."};
ScopedThread t([&] {
@@ -441,9 +435,7 @@ TEST(MsgqueueTest, MsgRcvBlocking) {
// Test msgrcv (most probably) waiting for a specific-type message.
TEST(MsgqueueTest, MsgRcvTypeBlocking) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
-
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgbuf bufs[5] = {{1, "A message."},
{1, "A message."},
{1, "A message."},
@@ -471,9 +463,7 @@ TEST(MsgqueueTest, MsgRcvTypeBlocking) {
// Test msgsnd (most probably) blocking on a full queue.
TEST(MsgqueueTest, MsgSndBlocking) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
-
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgmax buf{1, ""}; // Has max amount of bytes.
const size_t msgCount = msgMnb / msgMax; // Number of messages that can be
@@ -512,8 +502,7 @@ TEST(MsgqueueTest, MsgSndBlocking) {
// Test removing a queue while a blocking msgsnd is executing.
TEST(MsgqueueTest, MsgSndRmWhileBlocking) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
// Number of messages that can be sent without blocking.
const size_t msgCount = msgMnb / msgMax;
@@ -549,8 +538,7 @@ TEST(MsgqueueTest, MsgSndRmWhileBlocking) {
// Test removing a queue while a blocking msgrcv is executing.
TEST(MsgqueueTest, MsgRcvRmWhileBlocking) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
ScopedThread t([&] {
// Because we're repeating on EINTR, msgsnd may race with msgctl(IPC_RMID)
@@ -568,8 +556,7 @@ TEST(MsgqueueTest, MsgRcvRmWhileBlocking) {
// Test a collection of msgsnd/msgrcv operations in different processes.
TEST(MsgqueueTest, MsgOpGeneral) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
- ASSERT_THAT(queue.get(), SyscallSucceeds());
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
// Create multiple sending/receiving threads that send messages back and
// forth. There's a matching recv for each send, so by the end of the test,
@@ -625,7 +612,7 @@ TEST(MsgqueueTest, MsgOpGeneral) {
void empty_sighandler(int sig, siginfo_t* info, void* context) {}
TEST(MsgqueueTest, InterruptRecv) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
char buf[64];
absl::Notification done, exit;
@@ -663,7 +650,7 @@ TEST(MsgqueueTest, InterruptRecv) {
}
TEST(MsgqueueTest, InterruptSend) {
- Queue queue(msgget(IPC_PRIVATE, 0600));
+ Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600));
msgmax buf{1, ""};
// Number of messages that can be sent without blocking.
const size_t msgCount = msgMnb / msgMax;
@@ -708,6 +695,178 @@ TEST(MsgqueueTest, InterruptSend) {
t.Join();
}
+// Test msgctl with IPC_STAT option.
+TEST(MsgqueueTest, MsgCtlIpcStat) {
+ auto start = absl::Now();
+
+ Queue queue(msgget(IPC_PRIVATE, 0600));
+ ASSERT_THAT(queue.get(), SyscallSucceeds());
+
+ const uid_t uid = getuid();
+ const gid_t gid = getgid();
+ const pid_t pid = getpid();
+
+ struct msqid_ds ds;
+ ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
+
+ EXPECT_EQ(ds.msg_perm.__key, IPC_PRIVATE);
+ EXPECT_EQ(ds.msg_perm.uid, uid);
+ EXPECT_EQ(ds.msg_perm.gid, gid);
+ EXPECT_EQ(ds.msg_perm.cuid, uid);
+ EXPECT_EQ(ds.msg_perm.cgid, gid);
+ EXPECT_EQ(ds.msg_perm.mode, 0600);
+
+ EXPECT_EQ(ds.msg_stime, 0);
+ EXPECT_EQ(ds.msg_rtime, 0);
+ EXPECT_GE(ds.msg_ctime, absl::ToTimeT(start));
+
+ EXPECT_EQ(ds.msg_cbytes, 0);
+ EXPECT_EQ(ds.msg_qnum, 0);
+ EXPECT_EQ(ds.msg_qbytes, msgMnb);
+ EXPECT_EQ(ds.msg_lspid, 0);
+ EXPECT_EQ(ds.msg_lrpid, 0);
+
+ // The timestamps only have a resolution of seconds; slow down so we actually
+ // see the timestamps change.
+ absl::SleepFor(absl::Seconds(1));
+ auto pre_send = absl::Now();
+
+ msgbuf buf{1, "A message."};
+ ASSERT_THAT(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0),
+ SyscallSucceeds());
+
+ ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
+
+ EXPECT_GE(ds.msg_stime, absl::ToTimeT(pre_send));
+ EXPECT_EQ(ds.msg_rtime, 0);
+ EXPECT_GE(ds.msg_ctime, absl::ToTimeT(start));
+
+ EXPECT_EQ(ds.msg_cbytes, msgSize);
+ EXPECT_EQ(ds.msg_qnum, 1);
+ EXPECT_EQ(ds.msg_qbytes, msgMnb);
+ EXPECT_EQ(ds.msg_lspid, pid);
+ EXPECT_EQ(ds.msg_lrpid, 0);
+
+ absl::SleepFor(absl::Seconds(1));
+ auto pre_receive = absl::Now();
+
+ ASSERT_THAT(msgrcv(queue.get(), &buf, sizeof(buf.mtext), 0, 0),
+ SyscallSucceedsWithValue(msgSize));
+
+ ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
+
+ EXPECT_GE(ds.msg_stime, absl::ToTimeT(pre_send));
+ EXPECT_GE(ds.msg_rtime, absl::ToTimeT(pre_receive));
+ EXPECT_GE(ds.msg_ctime, absl::ToTimeT(start));
+
+ EXPECT_EQ(ds.msg_cbytes, 0);
+ EXPECT_EQ(ds.msg_qnum, 0);
+ EXPECT_EQ(ds.msg_qbytes, msgMnb);
+ EXPECT_EQ(ds.msg_lspid, pid);
+ EXPECT_EQ(ds.msg_lrpid, pid);
+}
+
+// Test msgctl with IPC_STAT option on a write-only queue.
+TEST(MsgqueueTest, MsgCtlIpcStatWriteOnly) {
+ // Drop CAP_IPC_OWNER which allows us to bypass permissions.
+ AutoCapability cap(CAP_IPC_OWNER, false);
+
+ Queue queue(msgget(IPC_PRIVATE, 0200));
+ ASSERT_THAT(queue.get(), SyscallSucceeds());
+
+ struct msqid_ds ds;
+ ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds),
+ SyscallFailsWithErrno(EACCES));
+}
+
+// Test msgctl with IPC_SET option.
+TEST(MsgqueueTest, MsgCtlIpcSet) {
+ Queue queue(msgget(IPC_PRIVATE, 0600));
+ ASSERT_THAT(queue.get(), SyscallSucceeds());
+
+ struct msqid_ds ds;
+ ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
+ EXPECT_EQ(ds.msg_perm.mode, 0600);
+
+ ds.msg_perm.mode = 0777;
+ ASSERT_THAT(msgctl(queue.get(), IPC_SET, &ds), SyscallSucceeds());
+
+ ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
+ EXPECT_EQ(ds.msg_perm.mode, 0777);
+}
+
+// Test increasing msg_qbytes beyond limit with IPC_SET.
+TEST(MsgqueueTest, MsgCtlIpcSetMaxBytes) {
+ // Drop CAP_SYS_RESOURCE which allows us to increase msg_qbytes beyond the
+ // system parameter MSGMNB.
+ AutoCapability cap(CAP_SYS_RESOURCE, false);
+
+ Queue queue(msgget(IPC_PRIVATE, 0600));
+ ASSERT_THAT(queue.get(), SyscallSucceeds());
+
+ struct msqid_ds ds;
+ ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
+ EXPECT_EQ(ds.msg_qbytes, msgMnb);
+
+ ds.msg_qbytes = msgMnb - 10;
+ ASSERT_THAT(msgctl(queue.get(), IPC_SET, &ds), SyscallSucceeds());
+
+ ASSERT_THAT(msgctl(queue.get(), IPC_STAT, &ds), SyscallSucceeds());
+ EXPECT_EQ(ds.msg_qbytes, msgMnb - 10);
+
+ ds.msg_qbytes = msgMnb + 10;
+ EXPECT_THAT(msgctl(queue.get(), IPC_SET, &ds), SyscallFailsWithErrno(EPERM));
+}
+
+// Test msgctl with IPC_INFO option.
+TEST(MsgqueueTest, MsgCtlIpcInfo) {
+ struct msginfo info;
+ ASSERT_THAT(msgctl(0, IPC_INFO, reinterpret_cast<struct msqid_ds*>(&info)),
+ SyscallSucceeds());
+
+ EXPECT_GT(info.msgmax, 0);
+ EXPECT_GT(info.msgmni, 0);
+ EXPECT_GT(info.msgmnb, 0);
+ EXPECT_EQ(info.msgpool, msgPool);
+ EXPECT_EQ(info.msgmap, msgMap);
+ EXPECT_EQ(info.msgssz, msgSsz);
+ EXPECT_EQ(info.msgtql, msgTql);
+}
+
+// Test msgctl with MSG_INFO option.
+TEST(MsgqueueTest, MsgCtlMsgInfo) {
+ struct msginfo info;
+ ASSERT_THAT(msgctl(0, MSG_INFO, reinterpret_cast<struct msqid_ds*>(&info)),
+ SyscallSucceeds());
+
+ EXPECT_GT(info.msgmax, 0);
+ EXPECT_GT(info.msgmni, 0);
+ EXPECT_GT(info.msgmnb, 0);
+ EXPECT_EQ(info.msgpool, 0); // Number of queues in the system.
+ EXPECT_EQ(info.msgmap, 0); // Total number of messages in all queues.
+ EXPECT_EQ(info.msgtql, 0); // Total number of bytes in all messages.
+ EXPECT_EQ(info.msgssz, msgSsz);
+
+ // Add a queue and a message.
+ Queue queue(msgget(IPC_PRIVATE, 0600));
+ ASSERT_THAT(queue.get(), SyscallSucceeds());
+
+ msgbuf buf{1, "A message."};
+ ASSERT_THAT(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0),
+ SyscallSucceeds());
+
+ ASSERT_THAT(msgctl(0, MSG_INFO, reinterpret_cast<struct msqid_ds*>(&info)),
+ SyscallSucceeds());
+
+ EXPECT_GT(info.msgmax, 0);
+ EXPECT_GT(info.msgmni, 0);
+ EXPECT_GT(info.msgmnb, 0);
+ EXPECT_EQ(info.msgpool, 1); // Number of queues in the system.
+ EXPECT_EQ(info.msgmap, 1); // Total number of messages in all queues.
+ EXPECT_EQ(info.msgtql, msgSize); // Total number of bytes in all messages.
+ EXPECT_EQ(info.msgssz, msgSsz);
+}
+
} // namespace
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc
index 98339277b..ca4ab0aad 100644
--- a/test/syscalls/linux/packet_socket.cc
+++ b/test/syscalls/linux/packet_socket.cc
@@ -14,13 +14,13 @@
#include <arpa/inet.h>
#include <ifaddrs.h>
-#include <linux/capability.h>
-#include <linux/if_arp.h>
-#include <linux/if_packet.h>
#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_arp.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
+#include <netpacket/packet.h>
#include <poll.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -31,6 +31,7 @@
#include "absl/base/internal/endian.h"
#include "test/syscalls/linux/unix_domain_socket_test_util.h"
#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
#include "test/util/file_descriptor.h"
#include "test/util/socket_util.h"
#include "test/util/test_util.h"
@@ -85,7 +86,7 @@ void SendUDPMessage(int sock) {
// Send an IP packet and make sure ETH_P_<something else> doesn't pick it up.
TEST(BasicCookedPacketTest, WrongType) {
- if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+ if (!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())) {
ASSERT_THAT(socket(AF_PACKET, SOCK_DGRAM, ETH_P_PUP),
SyscallFailsWithErrno(EPERM));
GTEST_SKIP();
@@ -123,7 +124,7 @@ class CookedPacketTest : public ::testing::TestWithParam<int> {
};
void CookedPacketTest::SetUp() {
- if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+ if (!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())) {
ASSERT_THAT(socket(AF_PACKET, SOCK_DGRAM, htons(GetParam())),
SyscallFailsWithErrno(EPERM));
GTEST_SKIP();
@@ -149,7 +150,7 @@ void CookedPacketTest::SetUp() {
void CookedPacketTest::TearDown() {
// TearDown will be run even if we skip the test.
- if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+ if (ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())) {
EXPECT_THAT(close(socket_), SyscallSucceeds());
}
}
diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc
index 07beb8ba0..61714d1da 100644
--- a/test/syscalls/linux/packet_socket_raw.cc
+++ b/test/syscalls/linux/packet_socket_raw.cc
@@ -13,14 +13,13 @@
// limitations under the License.
#include <arpa/inet.h>
-#include <linux/capability.h>
-#include <linux/filter.h>
-#include <linux/if_arp.h>
-#include <linux/if_packet.h>
#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_arp.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
+#include <netpacket/packet.h>
#include <poll.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -32,6 +31,7 @@
#include "absl/base/internal/endian.h"
#include "test/syscalls/linux/unix_domain_socket_test_util.h"
#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
#include "test/util/file_descriptor.h"
#include "test/util/socket_util.h"
#include "test/util/test_util.h"
@@ -100,7 +100,7 @@ class RawPacketTest : public ::testing::TestWithParam<int> {
};
void RawPacketTest::SetUp() {
- if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+ if (!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())) {
ASSERT_THAT(socket(AF_PACKET, SOCK_RAW, htons(GetParam())),
SyscallFailsWithErrno(EPERM));
GTEST_SKIP();
@@ -150,7 +150,7 @@ void RawPacketTest::SetUp() {
void RawPacketTest::TearDown() {
// TearDown will be run even if we skip the test.
- if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+ if (ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())) {
EXPECT_THAT(close(s_), SyscallSucceeds());
}
}
@@ -340,7 +340,7 @@ TEST_P(RawPacketTest, Send) {
// Check that setting SO_RCVBUF below min is clamped to the minimum
// receive buffer size.
TEST_P(RawPacketTest, SetSocketRecvBufBelowMin) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
// Discover minimum receive buf size by trying to set it to zero.
// See:
@@ -373,7 +373,7 @@ TEST_P(RawPacketTest, SetSocketRecvBufBelowMin) {
// Check that setting SO_RCVBUF above max is clamped to the maximum
// receive buffer size.
TEST_P(RawPacketTest, SetSocketRecvBufAboveMax) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
// Discover max buf size by trying to set the largest possible buffer size.
constexpr int kRcvBufSz = 0xffffffff;
@@ -400,7 +400,7 @@ TEST_P(RawPacketTest, SetSocketRecvBufAboveMax) {
// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored.
TEST_P(RawPacketTest, SetSocketRecvBuf) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
int max = 0;
int min = 0;
@@ -449,7 +449,7 @@ TEST_P(RawPacketTest, SetSocketRecvBuf) {
// Check that setting SO_SNDBUF below min is clamped to the minimum
// receive buffer size.
TEST_P(RawPacketTest, SetSocketSendBufBelowMin) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
// Discover minimum buffer size by trying to set it to zero.
constexpr int kSndBufSz = 0;
@@ -480,7 +480,7 @@ TEST_P(RawPacketTest, SetSocketSendBufBelowMin) {
// Check that setting SO_SNDBUF above max is clamped to the maximum
// send buffer size.
TEST_P(RawPacketTest, SetSocketSendBufAboveMax) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
// Discover maximum buffer size by trying to set it to a large value.
constexpr int kSndBufSz = 0xffffffff;
@@ -507,7 +507,7 @@ TEST_P(RawPacketTest, SetSocketSendBufAboveMax) {
// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
TEST_P(RawPacketTest, SetSocketSendBuf) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
int max = 0;
int min = 0;
@@ -551,7 +551,7 @@ TEST_P(RawPacketTest, SetSocketSendBuf) {
}
TEST_P(RawPacketTest, GetSocketError) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
int val = 0;
socklen_t val_len = sizeof(val);
@@ -561,7 +561,7 @@ TEST_P(RawPacketTest, GetSocketError) {
}
TEST_P(RawPacketTest, GetSocketErrorBind) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
{
// Bind to the loopback device.
@@ -627,7 +627,7 @@ TEST_P(RawPacketTest, SetSocketDetachFilterNoInstalledFilter) {
}
TEST_P(RawPacketTest, GetSocketDetachFilter) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
int val = 0;
socklen_t val_len = sizeof(val);
@@ -636,7 +636,7 @@ TEST_P(RawPacketTest, GetSocketDetachFilter) {
}
TEST_P(RawPacketTest, SetAndGetSocketLinger) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
int level = SOL_SOCKET;
int type = SO_LINGER;
@@ -657,7 +657,7 @@ TEST_P(RawPacketTest, SetAndGetSocketLinger) {
}
TEST_P(RawPacketTest, GetSocketAcceptConn) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
int got = -1;
socklen_t length = sizeof(got);
@@ -673,7 +673,7 @@ INSTANTIATE_TEST_SUITE_P(AllInetTests, RawPacketTest,
class RawPacketMsgSizeTest : public ::testing::TestWithParam<TestAddress> {};
TEST_P(RawPacketMsgSizeTest, SendTooLong) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
TestAddress addr = GetParam().WithPort(kPort);
@@ -690,8 +690,11 @@ TEST_P(RawPacketMsgSizeTest, SendTooLong) {
SyscallFailsWithErrno(EMSGSIZE));
}
+// TODO(https://fxbug.dev/76957): Run this test on Fuchsia once splice is
+// available.
+#ifndef __Fuchsia__
TEST_P(RawPacketMsgSizeTest, SpliceTooLong) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability()));
const char buf[65536] = {};
int fds[2];
@@ -718,6 +721,7 @@ TEST_P(RawPacketMsgSizeTest, SpliceTooLong) {
EXPECT_THAT(n, SyscallSucceedsWithValue(sizeof(buf)));
}
}
+#endif // __Fuchsia__
INSTANTIATE_TEST_SUITE_P(AllRawPacketMsgSizeTest, RawPacketMsgSizeTest,
::testing::Values(V4Loopback(), V6Loopback()));
diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc
index 0bba86846..209801e36 100644
--- a/test/syscalls/linux/pipe.cc
+++ b/test/syscalls/linux/pipe.cc
@@ -13,11 +13,13 @@
// limitations under the License.
#include <fcntl.h> /* Obtain O_* constant definitions */
+#include <linux/futex.h>
#include <linux/magic.h>
#include <signal.h>
#include <sys/ioctl.h>
#include <sys/statfs.h>
#include <sys/uio.h>
+#include <syscall.h>
#include <unistd.h>
#include <vector>
@@ -50,6 +52,9 @@ std::atomic<int> global_num_signals_received = 0;
void SigRecordingHandler(int signum, siginfo_t* siginfo,
void* unused_ucontext) {
global_num_signals_received++;
+ ASSERT_THAT(syscall(SYS_futex, &global_num_signals_received,
+ FUTEX_WAKE | FUTEX_PRIVATE_FLAG, INT_MAX, 0, 0, 0),
+ SyscallSucceeds());
}
PosixErrorOr<Cleanup> RegisterSignalHandler(int signum) {
@@ -61,11 +66,14 @@ PosixErrorOr<Cleanup> RegisterSignalHandler(int signum) {
return ScopedSigaction(signum, handler);
}
-void WaitForSignalDelivery(absl::Duration timeout, int max_expected) {
- absl::Time wait_start = absl::Now();
- while (global_num_signals_received < max_expected &&
- absl::Now() - wait_start < timeout) {
- absl::SleepFor(absl::Milliseconds(10));
+void WaitForSignalDelivery(int expected) {
+ while (1) {
+ int v = global_num_signals_received;
+ if (v >= expected) {
+ break;
+ }
+ RetryEINTR(syscall)(SYS_futex, &global_num_signals_received,
+ FUTEX_WAIT | FUTEX_PRIVATE_FLAG, v, 0, 0, 0);
}
}
@@ -371,7 +379,7 @@ TEST_P(PipeTest, ReaderSideCloses) {
EXPECT_THAT(write(wfd_.get(), &buf, sizeof(buf)),
SyscallFailsWithErrno(EPIPE));
- WaitForSignalDelivery(absl::Seconds(1), 1);
+ WaitForSignalDelivery(1);
ASSERT_EQ(global_num_signals_received, 1);
}
@@ -411,7 +419,7 @@ TEST_P(PipeTest, BlockWriteClosed) {
notify.WaitForNotification();
ASSERT_THAT(close(rfd_.release()), SyscallSucceeds());
- WaitForSignalDelivery(absl::Seconds(1), 1);
+ WaitForSignalDelivery(1);
ASSERT_EQ(global_num_signals_received, 1);
t.Join();
@@ -443,7 +451,7 @@ TEST_P(PipeTest, BlockPartialWriteClosed) {
// Unblock the above.
ASSERT_THAT(close(rfd_.release()), SyscallSucceeds());
- WaitForSignalDelivery(absl::Seconds(1), 2);
+ WaitForSignalDelivery(2);
ASSERT_EQ(global_num_signals_received, 2);
t.Join();
diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc
index 4cbe30fc1..672f2dd42 100644
--- a/test/syscalls/linux/proc_net.cc
+++ b/test/syscalls/linux/proc_net.cc
@@ -152,6 +152,37 @@ TEST(ProcNetDev, Format) {
EXPECT_GT(entries.size(), 0);
}
+// GetMibsAllocationSysctl retuns a value of the net.core.mibs_allocation
+// sysctl./proc/sys/net/core/mibs_allocation
+//
+// When mibs_allocation is unset, a netns creation inherits MIB from init
+// network namespace. Otherwise, MIBS is allocated for each namespace.
+int GetMibsAllocationSysctl() {
+ auto ret = GetContents("/proc/sys/net/core/mibs_allocation");
+ if (!ret.ok()) {
+ // The current kernel doesn't support mibs_allocation.
+ return 1;
+ }
+ int32_t val;
+ EXPECT_TRUE(absl::SimpleAtoi(ret.ValueOrDie(), &val));
+ return val;
+}
+
+// GetSNMPMetricFromProc retrieves the metric named `item` of `type` from the
+// `snmp` string which represents the file contents of /proc/net/snmp.
+//
+// Note to test writers: If you are writing tests to check the change in value
+// of SNMP metrics, note that if GetMibsAllocationSysctl() == 0
+// (net.core.mibs_allocation isn't set), MIB is from the init network namespace
+// and hence these metrics can have system-wide noise.
+//
+// A feasible way of testing is the following:
+// * Consult RFC 1213 to learn the "SYNTAX" of the metric.
+// * If net.core.mibs_allocation is set:
+// - Can test any metric while checking for hard equality.
+// * If net.core.mibs_allocation is NOT set (system-wide noise is present):
+// - Only test "SYNTAX Counter" metrics.
+// - Counter metrics are increasing in nature, so use LE/GE equality checks.
PosixErrorOr<uint64_t> GetSNMPMetricFromProc(const std::string snmp,
const std::string& type,
const std::string& item) {
@@ -226,12 +257,25 @@ TEST(ProcNetSnmp, TcpReset) {
newAttemptFails = ASSERT_NO_ERRNO_AND_VALUE(
GetSNMPMetricFromProc(snmp, "Tcp", "AttemptFails"));
- EXPECT_EQ(oldActiveOpens, newActiveOpens - 1);
- EXPECT_EQ(oldOutRsts, newOutRsts - 1);
- EXPECT_EQ(oldAttemptFails, newAttemptFails - 1);
+ if (GetMibsAllocationSysctl()) {
+ EXPECT_EQ(oldActiveOpens + 1, newActiveOpens);
+ EXPECT_EQ(oldOutRsts + 1, newOutRsts);
+ EXPECT_EQ(oldAttemptFails + 1, newAttemptFails);
+ } else {
+ // System-wide statistics can have some noise. These metrics should have
+ // increased by at least 1.
+ EXPECT_LE(oldActiveOpens + 1, newActiveOpens);
+ EXPECT_LE(oldOutRsts + 1, newOutRsts);
+ EXPECT_LE(oldAttemptFails + 1, newAttemptFails);
+ }
}
TEST(ProcNetSnmp, TcpEstab) {
+ // This test aims to test the tcpCurrEstab metric which has "SYNTAX Gauge" as
+ // per RFC 1213. Hence, it becomes infeasible to test this when system-wide
+ // statistics have noise.
+ SKIP_IF(GetMibsAllocationSysctl() == 0);
+
// TODO(gvisor.dev/issue/866): epsocket metrics are not savable.
DisableSave ds;
@@ -286,9 +330,9 @@ TEST(ProcNetSnmp, TcpEstab) {
newCurrEstab = ASSERT_NO_ERRNO_AND_VALUE(
GetSNMPMetricFromProc(snmp, "Tcp", "CurrEstab"));
- EXPECT_EQ(oldActiveOpens, newActiveOpens - 1);
- EXPECT_EQ(oldPassiveOpens, newPassiveOpens - 1);
- EXPECT_EQ(oldCurrEstab, newCurrEstab - 2);
+ EXPECT_EQ(oldActiveOpens + 1, newActiveOpens);
+ EXPECT_EQ(oldPassiveOpens + 1, newPassiveOpens);
+ EXPECT_EQ(oldCurrEstab + 2, newCurrEstab);
// Send 1 byte from client to server.
ASSERT_THAT(send(s_connect.get(), "a", 1, 0), SyscallSucceedsWithValue(1));
@@ -355,8 +399,15 @@ TEST(ProcNetSnmp, UdpNoPorts) {
newNoPorts =
ASSERT_NO_ERRNO_AND_VALUE(GetSNMPMetricFromProc(snmp, "Udp", "NoPorts"));
- EXPECT_EQ(oldOutDatagrams, newOutDatagrams - 1);
- EXPECT_EQ(oldNoPorts, newNoPorts - 1);
+ if (GetMibsAllocationSysctl()) {
+ EXPECT_EQ(oldOutDatagrams + 1, newOutDatagrams);
+ EXPECT_EQ(oldNoPorts + 1, newNoPorts);
+ } else {
+ // System-wide statistics can have some noise. These metrics should have
+ // increased by at least 1.
+ EXPECT_LE(oldOutDatagrams + 1, newOutDatagrams);
+ EXPECT_LE(oldNoPorts + 1, newNoPorts);
+ }
}
TEST(ProcNetSnmp, UdpIn) {
@@ -405,8 +456,15 @@ TEST(ProcNetSnmp, UdpIn) {
newInDatagrams = ASSERT_NO_ERRNO_AND_VALUE(
GetSNMPMetricFromProc(snmp, "Udp", "InDatagrams"));
- EXPECT_EQ(oldOutDatagrams, newOutDatagrams - 1);
- EXPECT_EQ(oldInDatagrams, newInDatagrams - 1);
+ if (GetMibsAllocationSysctl()) {
+ EXPECT_EQ(oldOutDatagrams + 1, newOutDatagrams);
+ EXPECT_EQ(oldInDatagrams + 1, newInDatagrams);
+ } else {
+ // System-wide statistics can have some noise. These metrics should have
+ // increased by at least 1.
+ EXPECT_LE(oldOutDatagrams + 1, newOutDatagrams);
+ EXPECT_LE(oldInDatagrams + 1, newInDatagrams);
+ }
}
TEST(ProcNetSnmp, CheckNetStat) {
diff --git a/test/syscalls/linux/raw_socket.cc b/test/syscalls/linux/raw_socket.cc
index 4e69e389b..66f0e6ca4 100644
--- a/test/syscalls/linux/raw_socket.cc
+++ b/test/syscalls/linux/raw_socket.cc
@@ -97,7 +97,7 @@ class RawSocketTest : public ::testing::TestWithParam<std::tuple<int, int>> {
};
void RawSocketTest::SetUp() {
- if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+ if (!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) {
ASSERT_THAT(socket(Family(), SOCK_RAW, Protocol()),
SyscallFailsWithErrno(EPERM));
GTEST_SKIP();
@@ -121,7 +121,7 @@ void RawSocketTest::SetUp() {
void RawSocketTest::TearDown() {
// TearDown will be run even if we skip the test.
- if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+ if (ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) {
EXPECT_THAT(close(s_), SyscallSucceeds());
}
}
@@ -130,7 +130,7 @@ void RawSocketTest::TearDown() {
// BasicRawSocket::Setup creates the first one, so we only have to create one
// more here.
TEST_P(RawSocketTest, MultipleCreation) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
int s2;
ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds());
@@ -140,7 +140,7 @@ TEST_P(RawSocketTest, MultipleCreation) {
// Test that shutting down an unconnected socket fails.
TEST_P(RawSocketTest, FailShutdownWithoutConnect) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
@@ -148,7 +148,7 @@ TEST_P(RawSocketTest, FailShutdownWithoutConnect) {
// Shutdown is a no-op for raw sockets (and datagram sockets in general).
TEST_P(RawSocketTest, ShutdownWriteNoop) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
ASSERT_THAT(
connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
@@ -163,7 +163,7 @@ TEST_P(RawSocketTest, ShutdownWriteNoop) {
// Shutdown is a no-op for raw sockets (and datagram sockets in general).
TEST_P(RawSocketTest, ShutdownReadNoop) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
ASSERT_THAT(
connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
@@ -180,14 +180,14 @@ TEST_P(RawSocketTest, ShutdownReadNoop) {
// Test that listen() fails.
TEST_P(RawSocketTest, FailListen) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
ASSERT_THAT(listen(s_, 1), SyscallFailsWithErrno(ENOTSUP));
}
// Test that accept() fails.
TEST_P(RawSocketTest, FailAccept) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
struct sockaddr saddr;
socklen_t addrlen;
@@ -195,7 +195,7 @@ TEST_P(RawSocketTest, FailAccept) {
}
TEST_P(RawSocketTest, BindThenGetSockName) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_);
ASSERT_THAT(bind(s_, addr, AddrLen()), SyscallSucceeds());
@@ -219,7 +219,7 @@ TEST_P(RawSocketTest, BindThenGetSockName) {
}
TEST_P(RawSocketTest, ConnectThenGetSockName) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_);
ASSERT_THAT(connect(s_, addr, AddrLen()), SyscallSucceeds());
@@ -244,7 +244,7 @@ TEST_P(RawSocketTest, ConnectThenGetSockName) {
// Test that getpeername() returns nothing before connect().
TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
struct sockaddr saddr;
socklen_t addrlen = sizeof(saddr);
@@ -254,7 +254,7 @@ TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) {
// Test that getpeername() returns something after connect().
TEST_P(RawSocketTest, GetPeerName) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
ASSERT_THAT(
connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
@@ -268,7 +268,7 @@ TEST_P(RawSocketTest, GetPeerName) {
// Test that the socket is writable immediately.
TEST_P(RawSocketTest, PollWritableImmediately) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
struct pollfd pfd = {};
pfd.fd = s_;
@@ -278,7 +278,7 @@ TEST_P(RawSocketTest, PollWritableImmediately) {
// Test that the socket isn't readable before receiving anything.
TEST_P(RawSocketTest, PollNotReadableInitially) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
// Try to receive data with MSG_DONTWAIT, which returns immediately if there's
// nothing to be read.
@@ -289,7 +289,7 @@ TEST_P(RawSocketTest, PollNotReadableInitially) {
// Test that the socket becomes readable once something is written to it.
TEST_P(RawSocketTest, PollTriggeredOnWrite) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
// Write something so that there's data to be read.
// Arbitrary.
@@ -304,7 +304,7 @@ TEST_P(RawSocketTest, PollTriggeredOnWrite) {
// Test that we can connect() to a valid IP (loopback).
TEST_P(RawSocketTest, ConnectToLoopback) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
ASSERT_THAT(
connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
@@ -313,7 +313,7 @@ TEST_P(RawSocketTest, ConnectToLoopback) {
// Test that calling send() without connect() fails.
TEST_P(RawSocketTest, SendWithoutConnectFails) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
// Arbitrary.
constexpr char kBuf[] = "Endgame was good";
@@ -323,7 +323,7 @@ TEST_P(RawSocketTest, SendWithoutConnectFails) {
// Wildcard Bind.
TEST_P(RawSocketTest, BindToWildcard) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
struct sockaddr_storage addr;
addr = {};
@@ -344,16 +344,15 @@ TEST_P(RawSocketTest, BindToWildcard) {
// Bind to localhost.
TEST_P(RawSocketTest, BindToLocalhost) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
- ASSERT_THAT(
- bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
- SyscallSucceeds());
+ ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+ SyscallSucceeds());
}
// Bind to a different address.
TEST_P(RawSocketTest, BindToInvalid) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
struct sockaddr_storage bind_addr = addr_;
if (Family() == AF_INET) {
@@ -365,13 +364,14 @@ TEST_P(RawSocketTest, BindToInvalid) {
memset(&sin6->sin6_addr.s6_addr, 0, sizeof(sin6->sin6_addr.s6_addr));
sin6->sin6_addr.s6_addr[0] = 1; // 1: - An address that we can't bind to.
}
- ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr),
- AddrLen()), SyscallFailsWithErrno(EADDRNOTAVAIL));
+ ASSERT_THAT(
+ bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr), AddrLen()),
+ SyscallFailsWithErrno(EADDRNOTAVAIL));
}
// Send and receive an packet.
TEST_P(RawSocketTest, SendAndReceive) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
// Arbitrary.
constexpr char kBuf[] = "TB12";
@@ -386,7 +386,7 @@ TEST_P(RawSocketTest, SendAndReceive) {
// We should be able to create multiple raw sockets for the same protocol and
// receive the same packet on both.
TEST_P(RawSocketTest, MultipleSocketReceive) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
int s2;
ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds());
@@ -401,11 +401,11 @@ TEST_P(RawSocketTest, MultipleSocketReceive) {
// Receive it on socket 2.
std::vector<char> recv_buf2(sizeof(kBuf) + HdrLen());
- ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s2, recv_buf2.data(),
- recv_buf2.size()));
+ ASSERT_NO_FATAL_FAILURE(
+ ReceiveBufFrom(s2, recv_buf2.data(), recv_buf2.size()));
- EXPECT_EQ(memcmp(recv_buf1.data() + HdrLen(),
- recv_buf2.data() + HdrLen(), sizeof(kBuf)),
+ EXPECT_EQ(memcmp(recv_buf1.data() + HdrLen(), recv_buf2.data() + HdrLen(),
+ sizeof(kBuf)),
0);
ASSERT_THAT(close(s2), SyscallSucceeds());
@@ -413,7 +413,7 @@ TEST_P(RawSocketTest, MultipleSocketReceive) {
// Test that connect sends packets to the right place.
TEST_P(RawSocketTest, SendAndReceiveViaConnect) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
ASSERT_THAT(
connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
@@ -432,11 +432,10 @@ TEST_P(RawSocketTest, SendAndReceiveViaConnect) {
// Bind to localhost, then send and receive packets.
TEST_P(RawSocketTest, BindSendAndReceive) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
- ASSERT_THAT(
- bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
- SyscallSucceeds());
+ ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+ SyscallSucceeds());
// Arbitrary.
constexpr char kBuf[] = "DR16";
@@ -450,11 +449,10 @@ TEST_P(RawSocketTest, BindSendAndReceive) {
// Bind and connect to localhost and send/receive packets.
TEST_P(RawSocketTest, BindConnectSendAndReceive) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
- ASSERT_THAT(
- bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
- SyscallSucceeds());
+ ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+ SyscallSucceeds());
ASSERT_THAT(
connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
SyscallSucceeds());
@@ -472,7 +470,7 @@ TEST_P(RawSocketTest, BindConnectSendAndReceive) {
// Check that setting SO_RCVBUF below min is clamped to the minimum
// receive buffer size.
TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
// Discover minimum receive buf size by trying to set it to zero.
// See:
@@ -505,7 +503,7 @@ TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) {
// Check that setting SO_RCVBUF above max is clamped to the maximum
// receive buffer size.
TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
// Discover max buf size by trying to set the largest possible buffer size.
constexpr int kRcvBufSz = 0xffffffff;
@@ -532,7 +530,7 @@ TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) {
// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored.
TEST_P(RawSocketTest, SetSocketRecvBuf) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
int max = 0;
int min = 0;
@@ -582,7 +580,7 @@ TEST_P(RawSocketTest, SetSocketRecvBuf) {
// Check that setting SO_SNDBUF below min is clamped to the minimum
// receive buffer size.
TEST_P(RawSocketTest, SetSocketSendBufBelowMin) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
// Discover minimum buffer size by trying to set it to zero.
constexpr int kSndBufSz = 0;
@@ -613,7 +611,7 @@ TEST_P(RawSocketTest, SetSocketSendBufBelowMin) {
// Check that setting SO_SNDBUF above max is clamped to the maximum
// send buffer size.
TEST_P(RawSocketTest, SetSocketSendBufAboveMax) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
// Discover maximum buffer size by trying to set it to a large value.
constexpr int kSndBufSz = 0xffffffff;
@@ -640,7 +638,7 @@ TEST_P(RawSocketTest, SetSocketSendBufAboveMax) {
// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
TEST_P(RawSocketTest, SetSocketSendBuf) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
int max = 0;
int min = 0;
@@ -686,11 +684,10 @@ TEST_P(RawSocketTest, SetSocketSendBuf) {
// Test that receive buffer limits are not enforced when the recv buffer is
// empty.
TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
- ASSERT_THAT(
- bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
- SyscallSucceeds());
+ ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+ SyscallSucceeds());
ASSERT_THAT(
connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
SyscallSucceeds());
@@ -717,9 +714,7 @@ TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) {
// Receive the packet and make sure it's identical.
std::vector<char> recv_buf(buf.size() + HdrLen());
ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
- EXPECT_EQ(
- memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()),
- 0);
+ EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0);
}
{
@@ -732,9 +727,7 @@ TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) {
// Receive the packet and make sure it's identical.
std::vector<char> recv_buf(buf.size() + HdrLen());
ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
- EXPECT_EQ(
- memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()),
- 0);
+ EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0);
}
}
@@ -748,11 +741,10 @@ TEST_P(RawSocketTest, RecvBufLimits) {
if (Protocol() == IPPROTO_TCP) {
return;
}
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
- ASSERT_THAT(
- bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
- SyscallSucceeds());
+ ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+ SyscallSucceeds());
ASSERT_THAT(
connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
SyscallSucceeds());
@@ -812,9 +804,7 @@ TEST_P(RawSocketTest, RecvBufLimits) {
// Receive the packet and make sure it's identical.
std::vector<char> recv_buf(buf.size() + HdrLen());
ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
- EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(),
- buf.size()),
- 0);
+ EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0);
}
// Assert that the last packet is dropped because the receive buffer should
@@ -883,7 +873,7 @@ TEST_P(RawSocketTest, GetSocketDetachFilter) {
// AF_INET6+SOCK_RAW+IPPROTO_RAW sockets can be created, but not written to.
TEST(RawSocketTest, IPv6ProtoRaw) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
int sock;
ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW),
@@ -900,7 +890,7 @@ TEST(RawSocketTest, IPv6ProtoRaw) {
}
TEST(RawSocketTest, IPv6SendMsg) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
int sock;
ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP),
@@ -928,7 +918,7 @@ TEST(RawSocketTest, IPv6SendMsg) {
}
TEST_P(RawSocketTest, ConnectOnIPv6Socket) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
int sock;
ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP),
diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc
index e54d781c9..d45bd07bc 100644
--- a/test/syscalls/linux/raw_socket_hdrincl.cc
+++ b/test/syscalls/linux/raw_socket_hdrincl.cc
@@ -62,7 +62,7 @@ class RawHDRINCL : public ::testing::Test {
};
void RawHDRINCL::SetUp() {
- if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+ if (!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) {
ASSERT_THAT(socket(AF_INET, SOCK_RAW, IPPROTO_RAW),
SyscallFailsWithErrno(EPERM));
GTEST_SKIP();
@@ -80,7 +80,7 @@ void RawHDRINCL::SetUp() {
void RawHDRINCL::TearDown() {
// TearDown will be run even if we skip the test.
- if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+ if (ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) {
EXPECT_THAT(close(socket_), SyscallSucceeds());
}
}
diff --git a/test/syscalls/linux/raw_socket_icmp.cc b/test/syscalls/linux/raw_socket_icmp.cc
index 80a524273..3f9717284 100644
--- a/test/syscalls/linux/raw_socket_icmp.cc
+++ b/test/syscalls/linux/raw_socket_icmp.cc
@@ -76,7 +76,7 @@ class RawSocketICMPTest : public ::testing::Test {
};
void RawSocketICMPTest::SetUp() {
- if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+ if (!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) {
ASSERT_THAT(socket(AF_INET, SOCK_RAW, IPPROTO_ICMP),
SyscallFailsWithErrno(EPERM));
GTEST_SKIP();
@@ -94,7 +94,7 @@ void RawSocketICMPTest::SetUp() {
void RawSocketICMPTest::TearDown() {
// TearDown will be run even if we skip the test.
- if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+ if (ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) {
EXPECT_THAT(close(s_), SyscallSucceeds());
}
}
@@ -102,7 +102,7 @@ void RawSocketICMPTest::TearDown() {
// We'll only read an echo in this case, as the kernel won't respond to the
// malformed ICMP checksum.
TEST_F(RawSocketICMPTest, SendAndReceiveBadChecksum) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
// Prepare and send an ICMP packet. Use arbitrary junk for checksum, sequence,
// and ID. None of that should matter for raw sockets - the kernel should
@@ -131,7 +131,7 @@ TEST_F(RawSocketICMPTest, SendAndReceiveBadChecksum) {
// Send and receive an ICMP packet.
TEST_F(RawSocketICMPTest, SendAndReceive) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
// Prepare and send an ICMP packet. Use arbitrary junk for sequence and ID.
// None of that should matter for raw sockets - the kernel should still give
@@ -151,7 +151,7 @@ TEST_F(RawSocketICMPTest, SendAndReceive) {
// We should be able to create multiple raw sockets for the same protocol and
// receive the same packet on both.
TEST_F(RawSocketICMPTest, MultipleSocketReceive) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
FileDescriptor s2 =
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_ICMP));
@@ -214,7 +214,7 @@ TEST_F(RawSocketICMPTest, MultipleSocketReceive) {
// A raw ICMP socket and ping socket should both receive the ICMP packets
// intended for the ping socket.
TEST_F(RawSocketICMPTest, RawAndPingSockets) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
FileDescriptor ping_sock =
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP));
@@ -264,7 +264,7 @@ TEST_F(RawSocketICMPTest, RawAndPingSockets) {
// while a ping socket should not. Neither should be able to receieve a short
// malformed packet.
TEST_F(RawSocketICMPTest, ShortEchoRawAndPingSockets) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
FileDescriptor ping_sock =
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP));
@@ -305,7 +305,7 @@ TEST_F(RawSocketICMPTest, ShortEchoRawAndPingSockets) {
// while ping socket should not.
// Neither should be able to receieve a short malformed packet.
TEST_F(RawSocketICMPTest, ShortEchoReplyRawAndPingSockets) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
FileDescriptor ping_sock =
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP));
@@ -344,7 +344,7 @@ TEST_F(RawSocketICMPTest, ShortEchoReplyRawAndPingSockets) {
// Test that connect() sends packets to the right place.
TEST_F(RawSocketICMPTest, SendAndReceiveViaConnect) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
ASSERT_THAT(
connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
@@ -368,7 +368,7 @@ TEST_F(RawSocketICMPTest, SendAndReceiveViaConnect) {
// Bind to localhost, then send and receive packets.
TEST_F(RawSocketICMPTest, BindSendAndReceive) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
ASSERT_THAT(
bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
@@ -391,7 +391,7 @@ TEST_F(RawSocketICMPTest, BindSendAndReceive) {
// Bind and connect to localhost and send/receive packets.
TEST_F(RawSocketICMPTest, BindConnectSendAndReceive) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
ASSERT_THAT(
bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
@@ -417,7 +417,7 @@ TEST_F(RawSocketICMPTest, BindConnectSendAndReceive) {
// Set and get SO_LINGER.
TEST_F(RawSocketICMPTest, SetAndGetSocketLinger) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
int level = SOL_SOCKET;
int type = SO_LINGER;
@@ -439,7 +439,7 @@ TEST_F(RawSocketICMPTest, SetAndGetSocketLinger) {
// Test getsockopt for SO_ACCEPTCONN.
TEST_F(RawSocketICMPTest, GetSocketAcceptConn) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
int got = -1;
socklen_t length = sizeof(got);
diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc
index cb77986c2..3fbbf1423 100644
--- a/test/syscalls/linux/tcp_socket.cc
+++ b/test/syscalls/linux/tcp_socket.cc
@@ -2088,6 +2088,66 @@ TEST_P(SimpleTcpSocketTest, ConnectUnspecifiedAddress) {
}
}
+// Tests that send will return EWOULDBLOCK initially with large buffer and will
+// succeed after the send buffer size is increased.
+TEST_P(TcpSocketTest, SendUnblocksOnSendBufferIncrease) {
+ // Set the FD to O_NONBLOCK.
+ int opts;
+ ASSERT_THAT(opts = fcntl(first_fd, F_GETFL), SyscallSucceeds());
+ opts |= O_NONBLOCK;
+ ASSERT_THAT(fcntl(first_fd, F_SETFL, opts), SyscallSucceeds());
+
+ // Get maximum buffer size by trying to set it to a large value.
+ constexpr int kSndBufSz = 0xffffffff;
+ ASSERT_THAT(setsockopt(first_fd, SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+ sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ int max_buffer_sz = 0;
+ socklen_t max_len = sizeof(max_buffer_sz);
+ ASSERT_THAT(
+ getsockopt(first_fd, SOL_SOCKET, SO_SNDBUF, &max_buffer_sz, &max_len),
+ SyscallSucceeds());
+
+ int buffer_sz = max_buffer_sz >> 2;
+ EXPECT_THAT(setsockopt(first_fd, SOL_SOCKET, SO_SNDBUF, &buffer_sz,
+ sizeof(buffer_sz)),
+ SyscallSucceedsWithValue(0));
+
+ // Create a large buffer that will be used for sending.
+ std::vector<char> buffer(max_buffer_sz);
+
+ // Write until we receive an error.
+ while (RetryEINTR(send)(first_fd, buffer.data(), buffer.size(), 0) != -1) {
+ // Sleep to give linux a chance to move data from the send buffer to the
+ // receive buffer.
+ usleep(10000); // 10ms.
+ }
+
+ // The last error should have been EWOULDBLOCK.
+ ASSERT_EQ(errno, EWOULDBLOCK);
+
+ ScopedThread send_thread([this]() {
+ int flags = 0;
+ ASSERT_THAT(flags = fcntl(first_fd, F_GETFL), SyscallSucceeds());
+ EXPECT_THAT(fcntl(first_fd, F_SETFL, flags & ~O_NONBLOCK),
+ SyscallSucceeds());
+
+ // Expect the send() to succeed.
+ char buffer;
+ ASSERT_THAT(RetryEINTR(send)(first_fd, &buffer, sizeof(buffer), 0),
+ SyscallSucceeds());
+ });
+
+ // Set SO_SNDBUF to maximum buffer size allowed.
+ buffer_sz = max_buffer_sz >> 1;
+ EXPECT_THAT(setsockopt(first_fd, SOL_SOCKET, SO_SNDBUF, &buffer_sz,
+ sizeof(buffer_sz)),
+ SyscallSucceedsWithValue(0));
+
+ send_thread.Join();
+}
+
INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest,
::testing::Values(AF_INET, AF_INET6));
diff --git a/test/util/BUILD b/test/util/BUILD
index 3211546f5..b92af1c27 100644
--- a/test/util/BUILD
+++ b/test/util/BUILD
@@ -14,7 +14,6 @@ cc_library(
],
hdrs = [
"capability_util.h",
- "fuchsia_capability_util.h",
"linux_capability_util.h",
],
deps = [
diff --git a/test/util/capability_util.h b/test/util/capability_util.h
index f5d622e2d..318a43feb 100644
--- a/test/util/capability_util.h
+++ b/test/util/capability_util.h
@@ -18,11 +18,29 @@
#define GVISOR_TEST_UTIL_CAPABILITY_UTIL_H_
#if defined(__Fuchsia__)
-#include "test/util/fuchsia_capability_util.h"
+// Nothing to include.
#elif defined(__linux__)
#include "test/util/linux_capability_util.h"
#else
#error "Unhandled platform"
#endif
+namespace gvisor {
+namespace testing {
+
+// HaveRawIPSocketCapability returns whether or not the process has access to
+// raw IP sockets.
+//
+// Returns an error when raw IP socket access cannot be determined.
+PosixErrorOr<bool> HaveRawIPSocketCapability();
+
+// HavePacketSocketCapability returns whether or not the process has access to
+// packet sockets.
+//
+// Returns an error when packet socket access cannot be determined.
+PosixErrorOr<bool> HavePacketSocketCapability();
+
+} // namespace testing
+} // namespace gvisor
+
#endif // GVISOR_TEST_UTIL_CAPABILITY_UTIL_H_
diff --git a/test/util/cgroup_util.cc b/test/util/cgroup_util.cc
index 977993f41..df3c57b87 100644
--- a/test/util/cgroup_util.cc
+++ b/test/util/cgroup_util.cc
@@ -25,12 +25,26 @@
namespace gvisor {
namespace testing {
-Cgroup::Cgroup(std::string_view path) : cgroup_path_(path) {
+Cgroup::Cgroup(absl::string_view path) : cgroup_path_(path) {
id_ = ++Cgroup::next_id_;
std::cerr << absl::StreamFormat("[cg#%d] <= %s", id_, cgroup_path_)
<< std::endl;
}
+PosixErrorOr<Cgroup> Cgroup::RecursivelyCreate(absl::string_view path) {
+ RETURN_IF_ERRNO(RecursivelyCreateDir(path));
+ return Cgroup(path);
+}
+
+PosixErrorOr<Cgroup> Cgroup::Create(absl::string_view path) {
+ RETURN_IF_ERRNO(Mkdir(path));
+ return Cgroup(path);
+}
+
+PosixErrorOr<Cgroup> Cgroup::CreateChild(absl::string_view name) const {
+ return Cgroup::Create(JoinPath(Path(), name));
+}
+
PosixErrorOr<std::string> Cgroup::ReadControlFile(
absl::string_view name) const {
std::string buf;
@@ -93,7 +107,7 @@ PosixErrorOr<absl::flat_hash_set<pid_t>> Cgroup::ParsePIDList(
absl::string_view data) const {
absl::flat_hash_set<pid_t> res;
std::vector<absl::string_view> lines = absl::StrSplit(data, '\n');
- for (const std::string_view& line : lines) {
+ for (const absl::string_view& line : lines) {
if (line.empty()) {
continue;
}
diff --git a/test/util/cgroup_util.h b/test/util/cgroup_util.h
index e3f696a89..ccc7219e3 100644
--- a/test/util/cgroup_util.h
+++ b/test/util/cgroup_util.h
@@ -34,8 +34,20 @@ class Cgroup {
uint64_t id() const { return id_; }
+ // RecursivelyCreate creates cgroup specified by path, including all
+ // components leading up to path. Path should end inside a cgroupfs mount. If
+ // path already exists, RecursivelyCreate does nothing and silently succeeds.
+ static PosixErrorOr<Cgroup> RecursivelyCreate(std::string_view path);
+
+ // Creates a new cgroup at path. The parent directory must exist and be a
+ // cgroupfs directory.
+ static PosixErrorOr<Cgroup> Create(std::string_view path);
+
const std::string& Path() const { return cgroup_path_; }
+ // Creates a child cgroup under this cgroup with the given name.
+ PosixErrorOr<Cgroup> CreateChild(std::string_view name) const;
+
std::string Relpath(absl::string_view leaf) const {
return JoinPath(cgroup_path_, leaf);
}
diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc
index 483ae848d..253411858 100644
--- a/test/util/fs_util.cc
+++ b/test/util/fs_util.cc
@@ -201,7 +201,8 @@ PosixError UnlinkAt(const FileDescriptor& dfd, absl::string_view path,
PosixError Mkdir(absl::string_view path, int mode) {
int res = mkdir(std::string(path).c_str(), mode);
if (res < 0) {
- return PosixError(errno, absl::StrCat("mkdir ", path, " mode ", mode));
+ return PosixError(errno,
+ absl::StrFormat("mkdir \"%s\" mode %#o", path, mode));
}
return NoError();
diff --git a/test/util/fuchsia_capability_util.cc b/test/util/fuchsia_capability_util.cc
index 43f60f20f..bbe8643e7 100644
--- a/test/util/fuchsia_capability_util.cc
+++ b/test/util/fuchsia_capability_util.cc
@@ -14,8 +14,7 @@
#ifdef __Fuchsia__
-#include "test/util/fuchsia_capability_util.h"
-
+#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <sys/socket.h>
@@ -24,19 +23,48 @@
namespace gvisor {
namespace testing {
-PosixErrorOr<bool> HaveCapability(int cap) {
- if (cap == CAP_NET_RAW) {
- auto s = Socket(AF_INET, SOCK_RAW, IPPROTO_UDP);
- if (s.ok()) {
- return true;
- }
- if (s.error().errno_value() == EPERM) {
- return false;
- }
- return s.error();
+// On Linux, access to raw IP and packet socket is controlled by a single
+// capability (CAP_NET_RAW). However on Fuchsia, access to raw IP and packet
+// sockets are controlled by separate capabilities/protocols.
+
+namespace {
+
+PosixErrorOr<bool> HaveSocketCapability(int domain, int type, int protocol) {
+ // Fuchsia does not have a platform supported way to check the protocols made
+ // available to a sandbox. As a workaround, simply try to create the specified
+ // socket and assume no access if we get a no permissions error.
+ auto s = Socket(domain, type, protocol);
+ if (s.ok()) {
+ return true;
}
+ if (s.error().errno_value() == EPERM) {
+ return false;
+ }
+ return s.error();
+}
+
+} // namespace
+
+PosixErrorOr<bool> HaveRawIPSocketCapability() {
+ static PosixErrorOr<bool> result(false);
+ static std::once_flag once;
+
+ std::call_once(once, [&]() {
+ result = HaveSocketCapability(AF_INET, SOCK_RAW, IPPROTO_UDP);
+ });
+
+ return result;
+}
+
+PosixErrorOr<bool> HavePacketSocketCapability() {
+ static PosixErrorOr<bool> result(false);
+ static std::once_flag once;
+
+ std::call_once(once, [&]() {
+ result = HaveSocketCapability(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ });
- return false;
+ return result;
}
} // namespace testing
diff --git a/test/util/fuchsia_capability_util.h b/test/util/fuchsia_capability_util.h
deleted file mode 100644
index 87657d7e8..000000000
--- a/test/util/fuchsia_capability_util.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2021 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Utilities for testing capabilities on Fuchsia.
-
-#ifndef GVISOR_TEST_UTIL_FUCHSIA_CAPABILITY_UTIL_H_
-#define GVISOR_TEST_UTIL_FUCHSIA_CAPABILITY_UTIL_H_
-
-#ifdef __Fuchsia__
-
-#include "test/util/posix_error.h"
-
-#ifdef CAP_NET_RAW
-#error "Fuchsia should not define CAP_NET_RAW"
-#endif // CAP_NET_RAW
-#define CAP_NET_RAW 0
-
-namespace gvisor {
-namespace testing {
-
-// HaveCapability returns true if the process has the specified EFFECTIVE
-// capability.
-PosixErrorOr<bool> HaveCapability(int cap);
-
-} // namespace testing
-} // namespace gvisor
-
-#endif // __Fuchsia__
-
-#endif // GVISOR_TEST_UTIL_FUCHSIA_CAPABILITY_UTIL_H_
diff --git a/test/util/linux_capability_util.cc b/test/util/linux_capability_util.cc
index 958eb96f9..7218aa4ac 100644
--- a/test/util/linux_capability_util.cc
+++ b/test/util/linux_capability_util.cc
@@ -32,6 +32,14 @@
namespace gvisor {
namespace testing {
+PosixErrorOr<bool> HaveRawIPSocketCapability() {
+ return HaveCapability(CAP_NET_RAW);
+}
+
+PosixErrorOr<bool> HavePacketSocketCapability() {
+ return HaveCapability(CAP_NET_RAW);
+}
+
PosixErrorOr<bool> CanCreateUserNamespace() {
// The most reliable way to determine if userns creation is possible is by
// trying to create one; see below.
diff --git a/tools/bazel.mk b/tools/bazel.mk
index 60b50cfb0..4f979bbeb 100644
--- a/tools/bazel.mk
+++ b/tools/bazel.mk
@@ -84,7 +84,7 @@ DOCKER_RUN_OPTIONS += -v "$(shell readlink -m $(GCLOUD_CONFIG)):$(GCLOUD_CONFIG)
DOCKER_RUN_OPTIONS += -v "/tmp:/tmp"
DOCKER_EXEC_OPTIONS := --user $(UID):$(GID)
DOCKER_EXEC_OPTIONS += --interactive
-ifeq (true,$(shell test -t 0 && echo true))
+ifeq (true,$(shell test -t 1 && echo true))
DOCKER_EXEC_OPTIONS += --tty
endif
@@ -181,21 +181,11 @@ endif
# build_paths extracts the built binary from the bazel stderr output.
#
-# This could be alternately done by parsing the bazel build event stream, but
-# this is a complex schema, and begs the question: what will build the thing
-# that parses the output? Bazel? Do we need a separate bootstrapping build
-# command here? Yikes, let's just stick with the ugly shell pipeline.
-#
# The last line is used to prevent terminal shenanigans.
build_paths = \
(set -euo pipefail; \
- $(call wrapper,$(BAZEL) build $(BASE_OPTIONS) $(BAZEL_OPTIONS) $(1)) 2>&1 \
- | tee /dev/fd/2 \
- | sed -n -e '/^Target/,$$p' \
- | sed -n -e '/^ \($(subst /,\/,$(subst $(SPACE),\|,$(BUILD_ROOTS)))\)/p' \
- | sed -e 's/ /\n/g' \
- | awk '{$$1=$$1};1' \
- | strings \
+ $(call wrapper,$(BAZEL) build $(BASE_OPTIONS) $(BAZEL_OPTIONS) $(1)) && \
+ $(call wrapper,$(BAZEL) cquery $(BASE_OPTIONS) $(BAZEL_OPTIONS) $(1) --output=starlark --starlark:file=tools/show_paths.bzl) \
| xargs -r -n 1 -I {} readlink -f "{}" \
| xargs -r -n 1 -I {} bash -c 'set -xeuo pipefail; $(2)')
diff --git a/tools/go_generics/rules_tests/template_test.go b/tools/go_generics/rules_tests/template_test.go
index b2a3446ef..6f4d140da 100644
--- a/tools/go_generics/rules_tests/template_test.go
+++ b/tools/go_generics/rules_tests/template_test.go
@@ -20,14 +20,16 @@ import (
)
func TestMax(t *testing.T) {
- var a int = max(10, 20)
+ var a int
+ a = max(10, 20)
if a != 20 {
t.Errorf("Bad result of max, got %v, want %v", a, 20)
}
}
func TestIntConst(t *testing.T) {
- var a int = add(10)
+ var a int
+ a = add(10)
if a != 30 {
t.Errorf("Bad result of add, got %v, want %v", a, 30)
}
diff --git a/tools/go_stateify/main.go b/tools/go_stateify/main.go
index 7216388a0..3cf00b5dd 100644
--- a/tools/go_stateify/main.go
+++ b/tools/go_stateify/main.go
@@ -362,7 +362,12 @@ func main() {
fmt.Fprintf(outputFile, " stateSourceObject.LoadWait(%d, &%s.%s)\n", fields[name], recv, name)
}
emitSaveValue := func(name, typName string) {
- fmt.Fprintf(outputFile, " var %sValue %s = %s.save%s()\n", name, typName, recv, camelCased(name))
+ // Emit typName to be more robust against code generation bugs,
+ // but instead of one line make two lines to silence ST1023
+ // finding (i.e. avoid nogo finding: "should omit type $typName
+ // from declaration; it will be inferred from the right-hand side")
+ fmt.Fprintf(outputFile, " var %sValue %s\n", name, typName)
+ fmt.Fprintf(outputFile, " %sValue = %s.save%s()\n", name, recv, camelCased(name))
fmt.Fprintf(outputFile, " stateSinkObject.SaveValue(%d, %sValue)\n", fields[name], name)
}
emitSave := func(name string) {
diff --git a/tools/nogo/analyzers.go b/tools/nogo/analyzers.go
index 6705fc905..db8bbdb8a 100644
--- a/tools/nogo/analyzers.go
+++ b/tools/nogo/analyzers.go
@@ -117,11 +117,11 @@ func register(all []*analysis.Analyzer) {
func init() {
// Add all staticcheck analyzers.
for _, a := range staticcheck.Analyzers {
- AllAnalyzers = append(AllAnalyzers, a)
+ AllAnalyzers = append(AllAnalyzers, a.Analyzer)
}
// Add all stylecheck analyzers.
for _, a := range stylecheck.Analyzers {
- AllAnalyzers = append(AllAnalyzers, a)
+ AllAnalyzers = append(AllAnalyzers, a.Analyzer)
}
// Register lists.
diff --git a/tools/nogo/defs.bzl b/tools/nogo/defs.bzl
index 80182ff6c..dc9a8b24e 100644
--- a/tools/nogo/defs.bzl
+++ b/tools/nogo/defs.bzl
@@ -160,6 +160,11 @@ def _nogo_stdlib_impl(ctx):
return [NogoStdlibInfo(
facts = facts,
raw_findings = raw_findings,
+ ), DefaultInfo(
+ # Declare the facts and findings as default outputs. This is not
+ # strictly required, but ensures that the target still perform analysis
+ # when built directly rather than just indirectly via a nogo_test.
+ files = depset([facts, raw_findings]),
)]
nogo_stdlib = go_rule(
diff --git a/tools/show_paths.bzl b/tools/show_paths.bzl
new file mode 100644
index 000000000..ba78d3494
--- /dev/null
+++ b/tools/show_paths.bzl
@@ -0,0 +1,25 @@
+"""Formatter to extract the output files from a target."""
+
+def format(target):
+ provider_map = providers(target)
+ outputs = dict()
+
+ # Try to resolve in order.
+ files_to_run = provider_map.get("FilesToRunProvider", None)
+ default_info = provider_map.get("DefaultInfo", None)
+ output_group_info = provider_map.get("OutputGroupInfo", None)
+ if files_to_run and files_to_run.executable:
+ outputs[files_to_run.executable.path] = True
+ elif default_info:
+ for x in default_info.files:
+ outputs[x.path] = True
+ elif output_group_info:
+ for entry in dir(output_group_info):
+ # Filter out all built-ins and anything that is not a depset.
+ if entry.startswith("_") or not hasattr(getattr(output_group_info, entry), "to_list"):
+ continue
+ for x in getattr(output_group_info, entry).to_list():
+ outputs[x.path] = True
+
+ # Return all found files.
+ return "\n".join(outputs.keys())
diff --git a/tools/verity/measure_tool.go b/tools/verity/measure_tool.go
index 0d314ae70..4a0bc497a 100644
--- a/tools/verity/measure_tool.go
+++ b/tools/verity/measure_tool.go
@@ -21,12 +21,14 @@ import (
"io/ioutil"
"log"
"os"
+ "strings"
"syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
)
var path = flag.String("path", "", "path to the verity file system.")
+var rawpath = flag.String("rawpath", "", "path to the raw file system.")
const maxDigestSize = 64
@@ -40,6 +42,14 @@ func main() {
if *path == "" {
log.Fatalf("no path provided")
}
+ if *rawpath == "" {
+ log.Fatalf("no rawpath provided")
+ }
+ // TODO(b/182315468): Optimize the Merkle tree generate process to
+ // allow only updating certain files/directories.
+ if err := clearMerkle(*rawpath); err != nil {
+ log.Fatalf("Failed to clear merkle files in %s: %v", *rawpath, err)
+ }
if err := enableDir(*path); err != nil {
log.Fatalf("Failed to enable file system %s: %v", *path, err)
}
@@ -49,6 +59,26 @@ func main() {
}
}
+func clearMerkle(path string) error {
+ files, err := ioutil.ReadDir(path)
+ if err != nil {
+ return err
+ }
+
+ for _, file := range files {
+ if file.IsDir() {
+ if err := clearMerkle(path + "/" + file.Name()); err != nil {
+ return err
+ }
+ } else if strings.HasPrefix(file.Name(), ".merkle.verity") {
+ if err := os.Remove(path + "/" + file.Name()); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
// enableDir enables verity features on all the files and sub-directories within
// path.
func enableDir(path string) error {