diff options
Diffstat (limited to 'pkg/sentry/kernel')
79 files changed, 9528 insertions, 3034 deletions
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD deleted file mode 100644 index 26614b029..000000000 --- a/pkg/sentry/kernel/BUILD +++ /dev/null @@ -1,317 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test", "proto_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "pending_signals_list", - out = "pending_signals_list.go", - package = "kernel", - prefix = "pendingSignal", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*pendingSignal", - "Linker": "*pendingSignal", - }, -) - -go_template_instance( - name = "process_group_list", - out = "process_group_list.go", - package = "kernel", - prefix = "processGroup", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*ProcessGroup", - "Linker": "*ProcessGroup", - }, -) - -go_template_instance( - name = "seqatomic_taskgoroutineschedinfo", - out = "seqatomic_taskgoroutineschedinfo_unsafe.go", - package = "kernel", - suffix = "TaskGoroutineSchedInfo", - template = "//pkg/sync/seqatomic:generic_seqatomic", - types = { - "Value": "TaskGoroutineSchedInfo", - }, -) - -go_template_instance( - name = "session_list", - out = "session_list.go", - package = "kernel", - prefix = "session", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Session", - "Linker": "*Session", - }, -) - -go_template_instance( - name = "task_list", - out = "task_list.go", - package = "kernel", - prefix = "task", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Task", - "Linker": "*Task", - }, -) - -go_template_instance( - name = "socket_list", - out = "socket_list.go", - package = "kernel", - prefix = "socket", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*SocketRecordVFS1", - "Linker": "*SocketRecordVFS1", - }, -) - -go_template_instance( - name = "fd_table_refs", - out = "fd_table_refs.go", - package = "kernel", - prefix = "FDTable", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "FDTable", - }, -) - -go_template_instance( - name = "fs_context_refs", - out = "fs_context_refs.go", - package = "kernel", - prefix = "FSContext", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "FSContext", - }, -) - -go_template_instance( - name = "ipc_namespace_refs", - out = "ipc_namespace_refs.go", - package = "kernel", - prefix = "IPCNamespace", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "IPCNamespace", - }, -) - -go_template_instance( - name = "process_group_refs", - out = "process_group_refs.go", - package = "kernel", - prefix = "ProcessGroup", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "ProcessGroup", - }, -) - -go_template_instance( - name = "session_refs", - out = "session_refs.go", - package = "kernel", - prefix = "Session", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "Session", - }, -) - -proto_library( - name = "uncaught_signal", - srcs = ["uncaught_signal.proto"], - visibility = ["//visibility:public"], - deps = ["//pkg/sentry/arch:registers_proto"], -) - -go_library( - name = "kernel", - srcs = [ - "abstract_socket_namespace.go", - "aio.go", - "cgroup.go", - "context.go", - "fd_table.go", - "fd_table_refs.go", - "fd_table_unsafe.go", - "fs_context.go", - "fs_context_refs.go", - "ipc_namespace.go", - "ipc_namespace_refs.go", - "kcov.go", - "kcov_unsafe.go", - "kernel.go", - "kernel_opts.go", - "kernel_state.go", - "pending_signals.go", - "pending_signals_list.go", - "pending_signals_state.go", - "posixtimer.go", - "process_group_list.go", - "process_group_refs.go", - "ptrace.go", - "ptrace_amd64.go", - "ptrace_arm64.go", - "rseq.go", - "seccomp.go", - "seqatomic_taskgoroutineschedinfo_unsafe.go", - "session_list.go", - "session_refs.go", - "sessions.go", - "signal.go", - "signal_handlers.go", - "socket_list.go", - "syscalls.go", - "syscalls_state.go", - "syslog.go", - "task.go", - "task_acct.go", - "task_block.go", - "task_cgroup.go", - "task_clone.go", - "task_context.go", - "task_exec.go", - "task_exit.go", - "task_futex.go", - "task_identity.go", - "task_image.go", - "task_list.go", - "task_log.go", - "task_net.go", - "task_run.go", - "task_sched.go", - "task_signals.go", - "task_start.go", - "task_stop.go", - "task_syscall.go", - "task_usermem.go", - "task_work.go", - "thread_group.go", - "threads.go", - "timekeeper.go", - "timekeeper_state.go", - "tty.go", - "uts_namespace.go", - "vdso.go", - "version.go", - ], - imports = [ - "gvisor.dev/gvisor/pkg/bpf", - "gvisor.dev/gvisor/pkg/sentry/device", - "gvisor.dev/gvisor/pkg/tcpip", - ], - marshal = True, - visibility = ["//:sandbox"], - deps = [ - ":uncaught_signal_go_proto", - "//pkg/abi", - "//pkg/abi/linux", - "//pkg/abi/linux/errno", - "//pkg/amutex", - "//pkg/bits", - "//pkg/bpf", - "//pkg/cleanup", - "//pkg/context", - "//pkg/coverage", - "//pkg/cpuid", - "//pkg/errors", - "//pkg/errors/linuxerr", - "//pkg/eventchannel", - "//pkg/fspath", - "//pkg/goid", - "//pkg/hostarch", - "//pkg/log", - "//pkg/marshal", - "//pkg/marshal/primitive", - "//pkg/metric", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/secio", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fs/timerfd", - "//pkg/sentry/fsbridge", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/fsimpl/pipefs", - "//pkg/sentry/fsimpl/sockfs", - "//pkg/sentry/fsimpl/timerfd", - "//pkg/sentry/fsimpl/tmpfs", - "//pkg/sentry/hostcpu", - "//pkg/sentry/inet", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/epoll", - "//pkg/sentry/kernel/futex", - "//pkg/sentry/kernel/sched", - "//pkg/sentry/kernel/semaphore", - "//pkg/sentry/kernel/shm", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/loader", - "//pkg/sentry/memmap", - "//pkg/sentry/mm", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/socket/netlink/port", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/time", - "//pkg/sentry/unimpl", - "//pkg/sentry/unimpl:unimplemented_syscall_go_proto", - "//pkg/sentry/uniqueid", - "//pkg/sentry/usage", - "//pkg/sentry/vfs", - "//pkg/state", - "//pkg/state/statefile", - "//pkg/state/wire", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/tcpip/stack", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "kernel_test", - size = "small", - srcs = [ - "fd_table_test.go", - "table_test.go", - "task_test.go", - "timekeeper_test.go", - ], - library = ":kernel", - deps = [ - "//pkg/abi", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/sentry/arch", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/filetest", - "//pkg/sentry/kernel/sched", - "//pkg/sentry/limits", - "//pkg/sentry/pgalloc", - "//pkg/sentry/time", - "//pkg/sentry/usage", - "//pkg/sync", - ], -) diff --git a/pkg/sentry/kernel/README.md b/pkg/sentry/kernel/README.md deleted file mode 100644 index 427311be8..000000000 --- a/pkg/sentry/kernel/README.md +++ /dev/null @@ -1,108 +0,0 @@ -This package contains: - -- A (partial) emulation of the "core Linux kernel", which governs task - execution and scheduling, system call dispatch, and signal handling. See - below for details. - -- The top-level interface for the sentry's Linux kernel emulation in general, - used by the `main` function of all versions of the sentry. This interface - revolves around the `Env` type (defined in `kernel.go`). - -# Background - -In Linux, each schedulable context is referred to interchangeably as a "task" or -"thread". Tasks can be divided into userspace and kernel tasks. In the sentry, -scheduling is managed by the Go runtime, so each schedulable context is a -goroutine; only "userspace" (application) contexts are referred to as tasks, and -represented by Task objects. (From this point forward, "task" refers to the -sentry's notion of a task unless otherwise specified.) - -At a high level, Linux application threads can be thought of as repeating a "run -loop": - -- Some amount of application code is executed in userspace. - -- A trap (explicit syscall invocation, hardware interrupt or exception, etc.) - causes control flow to switch to the kernel. - -- Some amount of kernel code is executed in kernelspace, e.g. to handle the - cause of the trap. - -- The kernel "returns from the trap" into application code. - -Analogously, each task in the sentry is associated with a *task goroutine* that -executes that task's run loop (`Task.run` in `task_run.go`). However, the -sentry's task run loop differs in structure in order to support saving execution -state to, and resuming execution from, checkpoints. - -While in kernelspace, a Linux thread can be descheduled (cease execution) in a -variety of ways: - -- It can yield or be preempted, becoming temporarily descheduled but still - runnable. At present, the sentry delegates scheduling of runnable threads to - the Go runtime. - -- It can exit, becoming permanently descheduled. The sentry's equivalent is - returning from `Task.run`, terminating the task goroutine. - -- It can enter interruptible sleep, a state in which it can be woken by a - caller-defined wakeup or the receipt of a signal. In the sentry, - interruptible sleep (which is ambiguously referred to as *blocking*) is - implemented by making all events that can end blocking (including signal - notifications) communicated via Go channels and using `select` to multiplex - wakeup sources; see `task_block.go`. - -- It can enter uninterruptible sleep, a state in which it can only be woken by - a caller-defined wakeup. Killable sleep is a closely related variant in - which the task can also be woken by SIGKILL. (These definitions also include - Linux's "group-stopped" (`TASK_STOPPED`) and "ptrace-stopped" - (`TASK_TRACED`) states.) - -To maximize compatibility with Linux, sentry checkpointing appears as a spurious -signal-delivery interrupt on all tasks; interrupted system calls return `EINTR` -or are automatically restarted as usual. However, these semantics require that -uninterruptible and killable sleeps do not appear to be interrupted. In other -words, the state of the task, including its progress through the interrupted -operation, must be preserved by checkpointing. For many such sleeps, the wakeup -condition is application-controlled, making it infeasible to wait for the sleep -to end before checkpointing. Instead, we must support checkpointing progress -through sleeping operations. - -# Implementation - -We break the task's control flow graph into *states*, delimited by: - -1. Points where uninterruptible and killable sleeps may occur. For example, - there exists a state boundary between signal dequeueing and signal delivery - because there may be an intervening ptrace signal-delivery-stop. - -2. Points where sleep-induced branches may "rejoin" normal execution. For - example, the syscall exit state exists because it can be reached immediately - following a synchronous syscall, or after a task that is sleeping in - `execve()` or `vfork()` resumes execution. - -3. Points containing large branches. This is strictly for organizational - purposes. For example, the state that processes interrupt-signaled - conditions is kept separate from the main "app" state to reduce the size of - the latter. - -4. `SyscallReinvoke`, which does not correspond to anything in Linux, and - exists solely to serve the autosave feature. - -![dot -Tpng -Goverlap=false -orun_states.png run_states.dot](g3doc/run_states.png "Task control flow graph") - -States before which a stop may occur are represented as implementations of the -`taskRunState` interface named `run(state)`, allowing them to be saved and -restored. States that cannot be immediately preceded by a stop are simply `Task` -methods named `do(state)`. - -Conditions that can require task goroutines to cease execution for unknown -lengths of time are called *stops*. Stops are divided into *internal stops*, -which are stops whose start and end conditions are implemented within the -sentry, and *external stops*, which are stops whose start and end conditions are -not known to the sentry. Hence all uninterruptible and killable sleeps are -internal stops, and the existence of a pending checkpoint operation is an -external stop. Internal stops are reified into instances of the `TaskStop` type, -while external stops are merely counted. The task run loop alternates between -checking for stops and advancing the task's state. This allows checkpointing to -hold tasks in a stopped state while waiting for all tasks in the system to stop. diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD deleted file mode 100644 index 7a1a36454..000000000 --- a/pkg/sentry/kernel/auth/BUILD +++ /dev/null @@ -1,71 +0,0 @@ -load("//tools:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "atomicptr_credentials", - out = "atomicptr_credentials_unsafe.go", - package = "auth", - suffix = "Credentials", - template = "//pkg/sync/atomicptr:generic_atomicptr", - types = { - "Value": "Credentials", - }, -) - -go_template_instance( - name = "id_map_range", - out = "id_map_range.go", - package = "auth", - prefix = "idMap", - template = "//pkg/segment:generic_range", - types = { - "T": "uint32", - }, -) - -go_template_instance( - name = "id_map_set", - out = "id_map_set.go", - consts = { - "minDegree": "3", - }, - package = "auth", - prefix = "idMap", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint32", - "Range": "idMapRange", - "Value": "uint32", - "Functions": "idMapFunctions", - }, -) - -go_library( - name = "auth", - srcs = [ - "atomicptr_credentials_unsafe.go", - "auth.go", - "capability_set.go", - "context.go", - "credentials.go", - "id.go", - "id_map.go", - "id_map_functions.go", - "id_map_range.go", - "id_map_set.go", - "user_namespace.go", - ], - marshal = True, - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/bits", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/log", - "//pkg/sync", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go b/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go new file mode 100644 index 000000000..4535c958f --- /dev/null +++ b/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go @@ -0,0 +1,37 @@ +package auth + +import ( + "sync/atomic" + "unsafe" +) + +// An AtomicPtr is a pointer to a value of type Value that can be atomically +// loaded and stored. The zero value of an AtomicPtr represents nil. +// +// Note that copying AtomicPtr by value performs a non-atomic read of the +// stored pointer, which is unsafe if Store() can be called concurrently; in +// this case, do `dst.Store(src.Load())` instead. +// +// +stateify savable +type AtomicPtrCredentials struct { + ptr unsafe.Pointer `state:".(*Credentials)"` +} + +func (p *AtomicPtrCredentials) savePtr() *Credentials { + return p.Load() +} + +func (p *AtomicPtrCredentials) loadPtr(v *Credentials) { + p.Store(v) +} + +// Load returns the value set by the most recent Store. It returns nil if there +// has been no previous call to Store. +func (p *AtomicPtrCredentials) Load() *Credentials { + return (*Credentials)(atomic.LoadPointer(&p.ptr)) +} + +// Store sets the value returned by Load to x. +func (p *AtomicPtrCredentials) Store(x *Credentials) { + atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) +} diff --git a/pkg/sentry/kernel/auth/auth_abi_autogen_unsafe.go b/pkg/sentry/kernel/auth/auth_abi_autogen_unsafe.go new file mode 100644 index 000000000..8ec71bc50 --- /dev/null +++ b/pkg/sentry/kernel/auth/auth_abi_autogen_unsafe.go @@ -0,0 +1,280 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build tag aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The build tags here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build tags, see +// tools/defs.bzl:calculate_sets(). + +package auth + +import ( + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/marshal" + "io" + "reflect" + "runtime" + "unsafe" +) + +// Marshallable types used by this file. +var _ marshal.Marshallable = (*GID)(nil) +var _ marshal.Marshallable = (*UID)(nil) + +// SizeBytes implements marshal.Marshallable.SizeBytes. +//go:nosplit +func (gid *GID) SizeBytes() int { + return 4 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (gid *GID) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(*gid)) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (gid *GID) UnmarshalBytes(src []byte) { + *gid = GID(uint32(hostarch.ByteOrder.Uint32(src[:4]))) +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (gid *GID) Packed() bool { + // Scalar newtypes are always packed. + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (gid *GID) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(gid), uintptr(gid.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (gid *GID) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(gid), unsafe.Pointer(&src[0]), uintptr(gid.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (gid *GID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid))) + hdr.Len = gid.SizeBytes() + hdr.Cap = gid.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that gid + // must live until the use above. + runtime.KeepAlive(gid) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (gid *GID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return gid.CopyOutN(cc, addr, gid.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (gid *GID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid))) + hdr.Len = gid.SizeBytes() + hdr.Cap = gid.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that gid + // must live until the use above. + runtime.KeepAlive(gid) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (gid *GID) WriteTo(w io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid))) + hdr.Len = gid.SizeBytes() + hdr.Cap = gid.SizeBytes() + + length, err := w.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that gid + // must live until the use above. + runtime.KeepAlive(gid) // escapes: replaced by intrinsic. + return int64(length), err +} + +// CopyGIDSliceIn copies in a slice of GID objects from the task's memory. +//go:nosplit +func CopyGIDSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []GID) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*GID)(nil).SizeBytes() + + ptr := unsafe.Pointer(&dst) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that dst + // must live until the use above. + runtime.KeepAlive(dst) // escapes: replaced by intrinsic. + return length, err +} + +// CopyGIDSliceOut copies a slice of GID objects to the task's memory. +//go:nosplit +func CopyGIDSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []GID) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*GID)(nil).SizeBytes() + + ptr := unsafe.Pointer(&src) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyOutBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that src + // must live until the use above. + runtime.KeepAlive(src) // escapes: replaced by intrinsic. + return length, err +} + +// MarshalUnsafeGIDSlice is like GID.MarshalUnsafe, but for a []GID. +func MarshalUnsafeGIDSlice(src []GID, dst []byte) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*GID)(nil).SizeBytes() + + dst = dst[:size*count] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(dst))) + return size*count, nil +} + +// UnmarshalUnsafeGIDSlice is like GID.UnmarshalUnsafe, but for a []GID. +func UnmarshalUnsafeGIDSlice(dst []GID, src []byte) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*GID)(nil).SizeBytes() + + src = src[:(size*count)] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(src))) + return size*count, nil +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +//go:nosplit +func (uid *UID) SizeBytes() int { + return 4 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (uid *UID) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(*uid)) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (uid *UID) UnmarshalBytes(src []byte) { + *uid = UID(uint32(hostarch.ByteOrder.Uint32(src[:4]))) +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (uid *UID) Packed() bool { + // Scalar newtypes are always packed. + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (uid *UID) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(uid), uintptr(uid.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (uid *UID) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(uid), unsafe.Pointer(&src[0]), uintptr(uid.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (uid *UID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid))) + hdr.Len = uid.SizeBytes() + hdr.Cap = uid.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that uid + // must live until the use above. + runtime.KeepAlive(uid) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (uid *UID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return uid.CopyOutN(cc, addr, uid.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (uid *UID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid))) + hdr.Len = uid.SizeBytes() + hdr.Cap = uid.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that uid + // must live until the use above. + runtime.KeepAlive(uid) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (uid *UID) WriteTo(w io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid))) + hdr.Len = uid.SizeBytes() + hdr.Cap = uid.SizeBytes() + + length, err := w.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that uid + // must live until the use above. + runtime.KeepAlive(uid) // escapes: replaced by intrinsic. + return int64(length), err +} + diff --git a/pkg/sentry/kernel/auth/auth_state_autogen.go b/pkg/sentry/kernel/auth/auth_state_autogen.go new file mode 100644 index 000000000..5a7f55933 --- /dev/null +++ b/pkg/sentry/kernel/auth/auth_state_autogen.go @@ -0,0 +1,280 @@ +// automatically generated by stateify. + +package auth + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (c *Credentials) StateTypeName() string { + return "pkg/sentry/kernel/auth.Credentials" +} + +func (c *Credentials) StateFields() []string { + return []string{ + "RealKUID", + "EffectiveKUID", + "SavedKUID", + "RealKGID", + "EffectiveKGID", + "SavedKGID", + "ExtraKGIDs", + "PermittedCaps", + "InheritableCaps", + "EffectiveCaps", + "BoundingCaps", + "KeepCaps", + "UserNamespace", + } +} + +func (c *Credentials) beforeSave() {} + +// +checklocksignore +func (c *Credentials) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.RealKUID) + stateSinkObject.Save(1, &c.EffectiveKUID) + stateSinkObject.Save(2, &c.SavedKUID) + stateSinkObject.Save(3, &c.RealKGID) + stateSinkObject.Save(4, &c.EffectiveKGID) + stateSinkObject.Save(5, &c.SavedKGID) + stateSinkObject.Save(6, &c.ExtraKGIDs) + stateSinkObject.Save(7, &c.PermittedCaps) + stateSinkObject.Save(8, &c.InheritableCaps) + stateSinkObject.Save(9, &c.EffectiveCaps) + stateSinkObject.Save(10, &c.BoundingCaps) + stateSinkObject.Save(11, &c.KeepCaps) + stateSinkObject.Save(12, &c.UserNamespace) +} + +func (c *Credentials) afterLoad() {} + +// +checklocksignore +func (c *Credentials) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.RealKUID) + stateSourceObject.Load(1, &c.EffectiveKUID) + stateSourceObject.Load(2, &c.SavedKUID) + stateSourceObject.Load(3, &c.RealKGID) + stateSourceObject.Load(4, &c.EffectiveKGID) + stateSourceObject.Load(5, &c.SavedKGID) + stateSourceObject.Load(6, &c.ExtraKGIDs) + stateSourceObject.Load(7, &c.PermittedCaps) + stateSourceObject.Load(8, &c.InheritableCaps) + stateSourceObject.Load(9, &c.EffectiveCaps) + stateSourceObject.Load(10, &c.BoundingCaps) + stateSourceObject.Load(11, &c.KeepCaps) + stateSourceObject.Load(12, &c.UserNamespace) +} + +func (i *IDMapEntry) StateTypeName() string { + return "pkg/sentry/kernel/auth.IDMapEntry" +} + +func (i *IDMapEntry) StateFields() []string { + return []string{ + "FirstID", + "FirstParentID", + "Length", + } +} + +func (i *IDMapEntry) beforeSave() {} + +// +checklocksignore +func (i *IDMapEntry) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.FirstID) + stateSinkObject.Save(1, &i.FirstParentID) + stateSinkObject.Save(2, &i.Length) +} + +func (i *IDMapEntry) afterLoad() {} + +// +checklocksignore +func (i *IDMapEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.FirstID) + stateSourceObject.Load(1, &i.FirstParentID) + stateSourceObject.Load(2, &i.Length) +} + +func (r *idMapRange) StateTypeName() string { + return "pkg/sentry/kernel/auth.idMapRange" +} + +func (r *idMapRange) StateFields() []string { + return []string{ + "Start", + "End", + } +} + +func (r *idMapRange) beforeSave() {} + +// +checklocksignore +func (r *idMapRange) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.Start) + stateSinkObject.Save(1, &r.End) +} + +func (r *idMapRange) afterLoad() {} + +// +checklocksignore +func (r *idMapRange) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.Start) + stateSourceObject.Load(1, &r.End) +} + +func (s *idMapSet) StateTypeName() string { + return "pkg/sentry/kernel/auth.idMapSet" +} + +func (s *idMapSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *idMapSet) beforeSave() {} + +// +checklocksignore +func (s *idMapSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *idMapSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *idMapSet) afterLoad() {} + +// +checklocksignore +func (s *idMapSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*idMapSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*idMapSegmentDataSlices)) }) +} + +func (n *idMapnode) StateTypeName() string { + return "pkg/sentry/kernel/auth.idMapnode" +} + +func (n *idMapnode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *idMapnode) beforeSave() {} + +// +checklocksignore +func (n *idMapnode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *idMapnode) afterLoad() {} + +// +checklocksignore +func (n *idMapnode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (i *idMapSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/kernel/auth.idMapSegmentDataSlices" +} + +func (i *idMapSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (i *idMapSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (i *idMapSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.Start) + stateSinkObject.Save(1, &i.End) + stateSinkObject.Save(2, &i.Values) +} + +func (i *idMapSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (i *idMapSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.Start) + stateSourceObject.Load(1, &i.End) + stateSourceObject.Load(2, &i.Values) +} + +func (ns *UserNamespace) StateTypeName() string { + return "pkg/sentry/kernel/auth.UserNamespace" +} + +func (ns *UserNamespace) StateFields() []string { + return []string{ + "parent", + "owner", + "uidMapFromParent", + "uidMapToParent", + "gidMapFromParent", + "gidMapToParent", + } +} + +func (ns *UserNamespace) beforeSave() {} + +// +checklocksignore +func (ns *UserNamespace) StateSave(stateSinkObject state.Sink) { + ns.beforeSave() + stateSinkObject.Save(0, &ns.parent) + stateSinkObject.Save(1, &ns.owner) + stateSinkObject.Save(2, &ns.uidMapFromParent) + stateSinkObject.Save(3, &ns.uidMapToParent) + stateSinkObject.Save(4, &ns.gidMapFromParent) + stateSinkObject.Save(5, &ns.gidMapToParent) +} + +func (ns *UserNamespace) afterLoad() {} + +// +checklocksignore +func (ns *UserNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ns.parent) + stateSourceObject.Load(1, &ns.owner) + stateSourceObject.Load(2, &ns.uidMapFromParent) + stateSourceObject.Load(3, &ns.uidMapToParent) + stateSourceObject.Load(4, &ns.gidMapFromParent) + stateSourceObject.Load(5, &ns.gidMapToParent) +} + +func init() { + state.Register((*Credentials)(nil)) + state.Register((*IDMapEntry)(nil)) + state.Register((*idMapRange)(nil)) + state.Register((*idMapSet)(nil)) + state.Register((*idMapnode)(nil)) + state.Register((*idMapSegmentDataSlices)(nil)) + state.Register((*UserNamespace)(nil)) +} diff --git a/pkg/sentry/kernel/auth/auth_unsafe_abi_autogen_unsafe.go b/pkg/sentry/kernel/auth/auth_unsafe_abi_autogen_unsafe.go new file mode 100644 index 000000000..7f4cbed94 --- /dev/null +++ b/pkg/sentry/kernel/auth/auth_unsafe_abi_autogen_unsafe.go @@ -0,0 +1,13 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build tag aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The build tags here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build tags, see +// tools/defs.bzl:calculate_sets(). + +package auth + +import ( +) + diff --git a/pkg/sentry/kernel/auth/auth_unsafe_state_autogen.go b/pkg/sentry/kernel/auth/auth_unsafe_state_autogen.go new file mode 100644 index 000000000..61f7e4ce9 --- /dev/null +++ b/pkg/sentry/kernel/auth/auth_unsafe_state_autogen.go @@ -0,0 +1,37 @@ +// automatically generated by stateify. + +package auth + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (p *AtomicPtrCredentials) StateTypeName() string { + return "pkg/sentry/kernel/auth.AtomicPtrCredentials" +} + +func (p *AtomicPtrCredentials) StateFields() []string { + return []string{ + "ptr", + } +} + +func (p *AtomicPtrCredentials) beforeSave() {} + +// +checklocksignore +func (p *AtomicPtrCredentials) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + var ptrValue *Credentials = p.savePtr() + stateSinkObject.SaveValue(0, ptrValue) +} + +func (p *AtomicPtrCredentials) afterLoad() {} + +// +checklocksignore +func (p *AtomicPtrCredentials) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*Credentials), func(y interface{}) { p.loadPtr(y.(*Credentials)) }) +} + +func init() { + state.Register((*AtomicPtrCredentials)(nil)) +} diff --git a/pkg/sentry/kernel/auth/id_map_range.go b/pkg/sentry/kernel/auth/id_map_range.go new file mode 100644 index 000000000..19d542716 --- /dev/null +++ b/pkg/sentry/kernel/auth/id_map_range.go @@ -0,0 +1,76 @@ +package auth + +// A Range represents a contiguous range of T. +// +// +stateify savable +type idMapRange struct { + // Start is the inclusive start of the range. + Start uint32 + + // End is the exclusive end of the range. + End uint32 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +// +//go:nosplit +func (r idMapRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +// +//go:nosplit +func (r idMapRange) Length() uint32 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +// +//go:nosplit +func (r idMapRange) Contains(x uint32) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +// +//go:nosplit +func (r idMapRange) Overlaps(r2 idMapRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +// +//go:nosplit +func (r idMapRange) IsSupersetOf(r2 idMapRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +// +//go:nosplit +func (r idMapRange) Intersect(r2 idMapRange) idMapRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +// +//go:nosplit +func (r idMapRange) CanSplitAt(x uint32) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/kernel/auth/id_map_set.go b/pkg/sentry/kernel/auth/id_map_set.go new file mode 100644 index 000000000..479753981 --- /dev/null +++ b/pkg/sentry/kernel/auth/id_map_set.go @@ -0,0 +1,1643 @@ +package auth + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const idMaptrackGaps = 0 + +var _ = uint8(idMaptrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type idMapdynamicGap [idMaptrackGaps]uint32 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *idMapdynamicGap) Get() uint32 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *idMapdynamicGap) Set(v uint32) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + idMapminDegree = 3 + + idMapmaxDegree = 2 * idMapminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type idMapSet struct { + root idMapnode `state:".(*idMapSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *idMapSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *idMapSet) IsEmptyRange(r idMapRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *idMapSet) Span() uint32 { + var sz uint32 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *idMapSet) SpanRange(r idMapRange) uint32 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint32 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *idMapSet) FirstSegment() idMapIterator { + if s.root.nrSegments == 0 { + return idMapIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *idMapSet) LastSegment() idMapIterator { + if s.root.nrSegments == 0 { + return idMapIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *idMapSet) FirstGap() idMapGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return idMapGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *idMapSet) LastGap() idMapGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return idMapGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *idMapSet) Find(key uint32) (idMapIterator, idMapGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return idMapIterator{n, i}, idMapGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return idMapIterator{}, idMapGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *idMapSet) FindSegment(key uint32) idMapIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *idMapSet) LowerBoundSegment(min uint32) idMapIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *idMapSet) UpperBoundSegment(max uint32) idMapIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *idMapSet) FindGap(key uint32) idMapGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *idMapSet) LowerBoundGap(min uint32) idMapGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *idMapSet) UpperBoundGap(max uint32) idMapGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *idMapSet) Add(r idMapRange, val uint32) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *idMapSet) AddWithoutMerging(r idMapRange, val uint32) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *idMapSet) Insert(gap idMapGapIterator, r idMapRange, val uint32) idMapIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (idMapFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := idMaptrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (idMapFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (idMapFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := idMaptrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *idMapSet) InsertWithoutMerging(gap idMapGapIterator, r idMapRange, val uint32) idMapIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *idMapSet) InsertWithoutMergingUnchecked(gap idMapGapIterator, r idMapRange, val uint32) idMapIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := idMaptrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return idMapIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *idMapSet) Remove(seg idMapIterator) idMapGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if idMaptrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + idMapFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if idMaptrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(idMapGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *idMapSet) RemoveAll() { + s.root = idMapnode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *idMapSet) RemoveRange(r idMapRange) idMapGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *idMapSet) Merge(first, second idMapIterator) idMapIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *idMapSet) MergeUnchecked(first, second idMapIterator) idMapIterator { + if first.End() == second.Start() { + if mval, ok := (idMapFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return idMapIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *idMapSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *idMapSet) MergeRange(r idMapRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *idMapSet) MergeAdjacent(r idMapRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *idMapSet) Split(seg idMapIterator, split uint32) (idMapIterator, idMapIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *idMapSet) SplitUnchecked(seg idMapIterator, split uint32) (idMapIterator, idMapIterator) { + val1, val2 := (idMapFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), idMapRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *idMapSet) SplitAt(split uint32) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *idMapSet) Isolate(seg idMapIterator, r idMapRange) idMapIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *idMapSet) ApplyContiguous(r idMapRange, fn func(seg idMapIterator)) idMapGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return idMapGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return idMapGapIterator{} + } + } +} + +// +stateify savable +type idMapnode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *idMapnode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap idMapdynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [idMapmaxDegree - 1]idMapRange + values [idMapmaxDegree - 1]uint32 + children [idMapmaxDegree]*idMapnode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *idMapnode) firstSegment() idMapIterator { + for n.hasChildren { + n = n.children[0] + } + return idMapIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *idMapnode) lastSegment() idMapIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return idMapIterator{n, n.nrSegments - 1} +} + +func (n *idMapnode) prevSibling() *idMapnode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *idMapnode) nextSibling() *idMapnode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *idMapnode) rebalanceBeforeInsert(gap idMapGapIterator) idMapGapIterator { + if n.nrSegments < idMapmaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &idMapnode{ + nrSegments: idMapminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &idMapnode{ + nrSegments: idMapminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:idMapminDegree-1], n.keys[:idMapminDegree-1]) + copy(left.values[:idMapminDegree-1], n.values[:idMapminDegree-1]) + copy(right.keys[:idMapminDegree-1], n.keys[idMapminDegree:]) + copy(right.values[:idMapminDegree-1], n.values[idMapminDegree:]) + n.keys[0], n.values[0] = n.keys[idMapminDegree-1], n.values[idMapminDegree-1] + idMapzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:idMapminDegree], n.children[:idMapminDegree]) + copy(right.children[:idMapminDegree], n.children[idMapminDegree:]) + idMapzeroNodeSlice(n.children[2:]) + for i := 0; i < idMapminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if idMaptrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < idMapminDegree { + return idMapGapIterator{left, gap.index} + } + return idMapGapIterator{right, gap.index - idMapminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[idMapminDegree-1], n.values[idMapminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &idMapnode{ + nrSegments: idMapminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:idMapminDegree-1], n.keys[idMapminDegree:]) + copy(sibling.values[:idMapminDegree-1], n.values[idMapminDegree:]) + idMapzeroValueSlice(n.values[idMapminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:idMapminDegree], n.children[idMapminDegree:]) + idMapzeroNodeSlice(n.children[idMapminDegree:]) + for i := 0; i < idMapminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = idMapminDegree - 1 + + if idMaptrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < idMapminDegree { + return gap + } + return idMapGapIterator{sibling, gap.index - idMapminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *idMapnode) rebalanceAfterRemove(gap idMapGapIterator) idMapGapIterator { + for { + if n.nrSegments >= idMapminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= idMapminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + idMapFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if idMaptrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return idMapGapIterator{n, 0} + } + if gap.node == n { + return idMapGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= idMapminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + idMapFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if idMaptrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return idMapGapIterator{n, n.nrSegments} + } + return idMapGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return idMapGapIterator{p, gap.index} + } + if gap.node == right { + return idMapGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *idMapnode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = idMapGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + idMapFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if idMaptrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *idMapnode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *idMapnode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *idMapnode) calculateMaxGapLeaf() uint32 { + max := idMapGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (idMapGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *idMapnode) calculateMaxGapInternal() uint32 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *idMapnode) searchFirstLargeEnoughGap(minSize uint32) idMapGapIterator { + if n.maxGap.Get() < minSize { + return idMapGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := idMapGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *idMapnode) searchLastLargeEnoughGap(minSize uint32) idMapGapIterator { + if n.maxGap.Get() < minSize { + return idMapGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := idMapGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type idMapIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *idMapnode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg idMapIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg idMapIterator) Range() idMapRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg idMapIterator) Start() uint32 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg idMapIterator) End() uint32 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg idMapIterator) SetRangeUnchecked(r idMapRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg idMapIterator) SetRange(r idMapRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg idMapIterator) SetStartUnchecked(start uint32) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg idMapIterator) SetStart(start uint32) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg idMapIterator) SetEndUnchecked(end uint32) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg idMapIterator) SetEnd(end uint32) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg idMapIterator) Value() uint32 { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg idMapIterator) ValuePtr() *uint32 { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg idMapIterator) SetValue(val uint32) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg idMapIterator) PrevSegment() idMapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return idMapIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return idMapIterator{} + } + return idMapsegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg idMapIterator) NextSegment() idMapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return idMapIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return idMapIterator{} + } + return idMapsegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg idMapIterator) PrevGap() idMapGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return idMapGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg idMapIterator) NextGap() idMapGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return idMapGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg idMapIterator) PrevNonEmpty() (idMapIterator, idMapGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return idMapIterator{}, gap + } + return gap.PrevSegment(), idMapGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg idMapIterator) NextNonEmpty() (idMapIterator, idMapGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return idMapIterator{}, gap + } + return gap.NextSegment(), idMapGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type idMapGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *idMapnode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap idMapGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap idMapGapIterator) Range() idMapRange { + return idMapRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap idMapGapIterator) Start() uint32 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return idMapFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap idMapGapIterator) End() uint32 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return idMapFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap idMapGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap idMapGapIterator) PrevSegment() idMapIterator { + return idMapsegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap idMapGapIterator) NextSegment() idMapIterator { + return idMapsegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap idMapGapIterator) PrevGap() idMapGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return idMapGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap idMapGapIterator) NextGap() idMapGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return idMapGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap idMapGapIterator) NextLargeEnoughGap(minSize uint32) idMapGapIterator { + if idMaptrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap idMapGapIterator) nextLargeEnoughGapHelper(minSize uint32) idMapGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return idMapGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap idMapGapIterator) PrevLargeEnoughGap(minSize uint32) idMapGapIterator { + if idMaptrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap idMapGapIterator) prevLargeEnoughGapHelper(minSize uint32) idMapGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return idMapGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func idMapsegmentBeforePosition(n *idMapnode, i int) idMapIterator { + for i == 0 { + if n.parent == nil { + return idMapIterator{} + } + n, i = n.parent, n.parentIndex + } + return idMapIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func idMapsegmentAfterPosition(n *idMapnode, i int) idMapIterator { + for i == n.nrSegments { + if n.parent == nil { + return idMapIterator{} + } + n, i = n.parent, n.parentIndex + } + return idMapIterator{n, i} +} + +func idMapzeroValueSlice(slice []uint32) { + + for i := range slice { + idMapFunctions{}.ClearValue(&slice[i]) + } +} + +func idMapzeroNodeSlice(slice []*idMapnode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *idMapSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *idMapnode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *idMapnode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if idMaptrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type idMapSegmentDataSlices struct { + Start []uint32 + End []uint32 + Values []uint32 +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *idMapSet) ExportSortedSlices() *idMapSegmentDataSlices { + var sds idMapSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *idMapSet) ImportSortedSlices(sds *idMapSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := idMapRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *idMapSet) segmentTestCheck(expectedSegments int, segFunc func(int, idMapRange, uint32) error) error { + havePrev := false + prev := uint32(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *idMapSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *idMapSet) saveRoot() *idMapSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *idMapSet) loadRoot(sds *idMapSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/kernel/contexttest/BUILD b/pkg/sentry/kernel/contexttest/BUILD deleted file mode 100644 index 9d26392c0..000000000 --- a/pkg/sentry/kernel/contexttest/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "contexttest", - testonly = 1, - srcs = ["contexttest.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/sentry/contexttest", - "//pkg/sentry/kernel", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - ], -) diff --git a/pkg/sentry/kernel/contexttest/contexttest.go b/pkg/sentry/kernel/contexttest/contexttest.go deleted file mode 100644 index 22c340e56..000000000 --- a/pkg/sentry/kernel/contexttest/contexttest.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package contexttest provides a test context.Context which includes -// a dummy kernel pointing to a valid platform. -package contexttest - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" -) - -// Context returns a Context that may be used in tests. Uses ptrace as the -// platform.Platform, and provides a stub kernel that only serves to point to -// the platform. -func Context(tb testing.TB) context.Context { - ctx := contexttest.Context(tb) - k := &kernel.Kernel{ - Platform: platform.FromContext(ctx), - } - k.SetMemoryFile(pgalloc.MemoryFileFromContext(ctx)) - ctx.(*contexttest.TestContext).RegisterValue(kernel.CtxKernel, k) - return ctx -} diff --git a/pkg/sentry/kernel/epoll/BUILD b/pkg/sentry/kernel/epoll/BUILD deleted file mode 100644 index 723a85f64..000000000 --- a/pkg/sentry/kernel/epoll/BUILD +++ /dev/null @@ -1,52 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "epoll_list", - out = "epoll_list.go", - package = "epoll", - prefix = "pollEntry", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*pollEntry", - "Linker": "*pollEntry", - }, -) - -go_library( - name = "epoll", - srcs = [ - "epoll.go", - "epoll_list.go", - "epoll_state.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/refs", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sync", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "epoll_test", - size = "small", - srcs = [ - "epoll_test.go", - ], - library = ":epoll", - deps = [ - "//pkg/sentry/contexttest", - "//pkg/sentry/fs/filetest", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/epoll/epoll_list.go b/pkg/sentry/kernel/epoll/epoll_list.go new file mode 100644 index 000000000..b6abe2de9 --- /dev/null +++ b/pkg/sentry/kernel/epoll/epoll_list.go @@ -0,0 +1,221 @@ +package epoll + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type pollEntryElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (pollEntryElementMapper) linkerFor(elem *pollEntry) *pollEntry { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type pollEntryList struct { + head *pollEntry + tail *pollEntry +} + +// Reset resets list l to the empty state. +func (l *pollEntryList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *pollEntryList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *pollEntryList) Front() *pollEntry { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *pollEntryList) Back() *pollEntry { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *pollEntryList) Len() (count int) { + for e := l.Front(); e != nil; e = (pollEntryElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *pollEntryList) PushFront(e *pollEntry) { + linker := pollEntryElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + pollEntryElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *pollEntryList) PushBack(e *pollEntry) { + linker := pollEntryElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + pollEntryElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *pollEntryList) PushBackList(m *pollEntryList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + pollEntryElementMapper{}.linkerFor(l.tail).SetNext(m.head) + pollEntryElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *pollEntryList) InsertAfter(b, e *pollEntry) { + bLinker := pollEntryElementMapper{}.linkerFor(b) + eLinker := pollEntryElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + pollEntryElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *pollEntryList) InsertBefore(a, e *pollEntry) { + aLinker := pollEntryElementMapper{}.linkerFor(a) + eLinker := pollEntryElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + pollEntryElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *pollEntryList) Remove(e *pollEntry) { + linker := pollEntryElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + pollEntryElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + pollEntryElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type pollEntryEntry struct { + next *pollEntry + prev *pollEntry +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *pollEntryEntry) Next() *pollEntry { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *pollEntryEntry) Prev() *pollEntry { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *pollEntryEntry) SetNext(elem *pollEntry) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *pollEntryEntry) SetPrev(elem *pollEntry) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/epoll/epoll_state_autogen.go b/pkg/sentry/kernel/epoll/epoll_state_autogen.go new file mode 100644 index 000000000..44bd520ac --- /dev/null +++ b/pkg/sentry/kernel/epoll/epoll_state_autogen.go @@ -0,0 +1,192 @@ +// automatically generated by stateify. + +package epoll + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (f *FileIdentifier) StateTypeName() string { + return "pkg/sentry/kernel/epoll.FileIdentifier" +} + +func (f *FileIdentifier) StateFields() []string { + return []string{ + "File", + "Fd", + } +} + +func (f *FileIdentifier) beforeSave() {} + +// +checklocksignore +func (f *FileIdentifier) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.File) + stateSinkObject.Save(1, &f.Fd) +} + +func (f *FileIdentifier) afterLoad() {} + +// +checklocksignore +func (f *FileIdentifier) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &f.File) + stateSourceObject.Load(1, &f.Fd) +} + +func (p *pollEntry) StateTypeName() string { + return "pkg/sentry/kernel/epoll.pollEntry" +} + +func (p *pollEntry) StateFields() []string { + return []string{ + "pollEntryEntry", + "id", + "userData", + "mask", + "flags", + "epoll", + } +} + +func (p *pollEntry) beforeSave() {} + +// +checklocksignore +func (p *pollEntry) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.pollEntryEntry) + stateSinkObject.Save(1, &p.id) + stateSinkObject.Save(2, &p.userData) + stateSinkObject.Save(3, &p.mask) + stateSinkObject.Save(4, &p.flags) + stateSinkObject.Save(5, &p.epoll) +} + +// +checklocksignore +func (p *pollEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.pollEntryEntry) + stateSourceObject.LoadWait(1, &p.id) + stateSourceObject.Load(2, &p.userData) + stateSourceObject.Load(3, &p.mask) + stateSourceObject.Load(4, &p.flags) + stateSourceObject.Load(5, &p.epoll) + stateSourceObject.AfterLoad(p.afterLoad) +} + +func (e *EventPoll) StateTypeName() string { + return "pkg/sentry/kernel/epoll.EventPoll" +} + +func (e *EventPoll) StateFields() []string { + return []string{ + "files", + "readyList", + "waitingList", + "disabledList", + } +} + +func (e *EventPoll) beforeSave() {} + +// +checklocksignore +func (e *EventPoll) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + if !state.IsZeroValue(&e.FilePipeSeek) { + state.Failf("FilePipeSeek is %#v, expected zero", &e.FilePipeSeek) + } + if !state.IsZeroValue(&e.FileNotDirReaddir) { + state.Failf("FileNotDirReaddir is %#v, expected zero", &e.FileNotDirReaddir) + } + if !state.IsZeroValue(&e.FileNoFsync) { + state.Failf("FileNoFsync is %#v, expected zero", &e.FileNoFsync) + } + if !state.IsZeroValue(&e.FileNoopFlush) { + state.Failf("FileNoopFlush is %#v, expected zero", &e.FileNoopFlush) + } + if !state.IsZeroValue(&e.FileNoIoctl) { + state.Failf("FileNoIoctl is %#v, expected zero", &e.FileNoIoctl) + } + if !state.IsZeroValue(&e.FileNoMMap) { + state.Failf("FileNoMMap is %#v, expected zero", &e.FileNoMMap) + } + if !state.IsZeroValue(&e.Queue) { + state.Failf("Queue is %#v, expected zero", &e.Queue) + } + stateSinkObject.Save(0, &e.files) + stateSinkObject.Save(1, &e.readyList) + stateSinkObject.Save(2, &e.waitingList) + stateSinkObject.Save(3, &e.disabledList) +} + +// +checklocksignore +func (e *EventPoll) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.files) + stateSourceObject.Load(1, &e.readyList) + stateSourceObject.Load(2, &e.waitingList) + stateSourceObject.Load(3, &e.disabledList) + stateSourceObject.AfterLoad(e.afterLoad) +} + +func (l *pollEntryList) StateTypeName() string { + return "pkg/sentry/kernel/epoll.pollEntryList" +} + +func (l *pollEntryList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *pollEntryList) beforeSave() {} + +// +checklocksignore +func (l *pollEntryList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *pollEntryList) afterLoad() {} + +// +checklocksignore +func (l *pollEntryList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *pollEntryEntry) StateTypeName() string { + return "pkg/sentry/kernel/epoll.pollEntryEntry" +} + +func (e *pollEntryEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *pollEntryEntry) beforeSave() {} + +// +checklocksignore +func (e *pollEntryEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *pollEntryEntry) afterLoad() {} + +// +checklocksignore +func (e *pollEntryEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func init() { + state.Register((*FileIdentifier)(nil)) + state.Register((*pollEntry)(nil)) + state.Register((*EventPoll)(nil)) + state.Register((*pollEntryList)(nil)) + state.Register((*pollEntryEntry)(nil)) +} diff --git a/pkg/sentry/kernel/epoll/epoll_test.go b/pkg/sentry/kernel/epoll/epoll_test.go deleted file mode 100644 index 8ef6cb3e7..000000000 --- a/pkg/sentry/kernel/epoll/epoll_test.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package epoll - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs/filetest" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestFileDestroyed(t *testing.T) { - f := filetest.NewTestFile(t) - id := FileIdentifier{f, 12} - - ctx := contexttest.Context(t) - efile := NewEventPoll(ctx) - e := efile.FileOperations.(*EventPoll) - if err := e.AddEntry(id, 0, waiter.ReadableEvents, [2]int32{}); err != nil { - t.Fatalf("addEntry failed: %v", err) - } - - // Check that we get an event reported twice in a row. - evt := e.ReadEvents(1) - if len(evt) != 1 { - t.Fatalf("Unexpected number of ready events: want %v, got %v", 1, len(evt)) - } - - evt = e.ReadEvents(1) - if len(evt) != 1 { - t.Fatalf("Unexpected number of ready events: want %v, got %v", 1, len(evt)) - } - - // Destroy the file. Check that we get no more events. - f.DecRef(ctx) - - evt = e.ReadEvents(1) - if len(evt) != 0 { - t.Fatalf("Unexpected number of ready events: want %v, got %v", 0, len(evt)) - } - -} diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD deleted file mode 100644 index 564c3d42e..000000000 --- a/pkg/sentry/kernel/eventfd/BUILD +++ /dev/null @@ -1,35 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "eventfd", - srcs = ["eventfd.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/fdnotifier", - "//pkg/hostarch", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "eventfd_test", - size = "small", - srcs = ["eventfd_test.go"], - library = ":eventfd", - deps = [ - "//pkg/sentry/contexttest", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go b/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go new file mode 100644 index 000000000..596920c42 --- /dev/null +++ b/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go @@ -0,0 +1,45 @@ +// automatically generated by stateify. + +package eventfd + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (e *EventOperations) StateTypeName() string { + return "pkg/sentry/kernel/eventfd.EventOperations" +} + +func (e *EventOperations) StateFields() []string { + return []string{ + "val", + "semMode", + "hostfd", + } +} + +func (e *EventOperations) beforeSave() {} + +// +checklocksignore +func (e *EventOperations) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + if !state.IsZeroValue(&e.wq) { + state.Failf("wq is %#v, expected zero", &e.wq) + } + stateSinkObject.Save(0, &e.val) + stateSinkObject.Save(1, &e.semMode) + stateSinkObject.Save(2, &e.hostfd) +} + +func (e *EventOperations) afterLoad() {} + +// +checklocksignore +func (e *EventOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.val) + stateSourceObject.Load(1, &e.semMode) + stateSourceObject.Load(2, &e.hostfd) +} + +func init() { + state.Register((*EventOperations)(nil)) +} diff --git a/pkg/sentry/kernel/eventfd/eventfd_test.go b/pkg/sentry/kernel/eventfd/eventfd_test.go deleted file mode 100644 index 1b9e60b3a..000000000 --- a/pkg/sentry/kernel/eventfd/eventfd_test.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package eventfd - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestEventfd(t *testing.T) { - initVals := []uint64{ - 0, - // Using a non-zero initial value verifies that writing to an - // eventfd signals when the eventfd's counter was already - // non-zero. - 343, - } - - for _, initVal := range initVals { - ctx := contexttest.Context(t) - - // Make a new event that is writable. - event := New(ctx, initVal, false) - - // Register a callback for a write event. - w, ch := waiter.NewChannelEntry(nil) - event.EventRegister(&w, waiter.ReadableEvents) - defer event.EventUnregister(&w) - - data := []byte("00000124") - // Create and submit a write request. - n, err := event.Writev(ctx, usermem.BytesIOSequence(data)) - if err != nil { - t.Fatal(err) - } - if n != 8 { - t.Errorf("eventfd.write wrote %d bytes, not full int64", n) - } - - // Check if the callback fired due to the write event. - select { - case <-ch: - default: - t.Errorf("Didn't get notified of EventIn after write") - } - } -} - -func TestEventfdStat(t *testing.T) { - ctx := contexttest.Context(t) - - // Make a new event that is writable. - event := New(ctx, 0, false) - - // Create and submit an stat request. - uattr, err := event.Dirent.Inode.UnstableAttr(ctx) - if err != nil { - t.Fatalf("eventfd stat request failed: %v", err) - } - if uattr.Size != 0 { - t.Fatal("EventFD size should be 0") - } -} diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD deleted file mode 100644 index 6b2dd09da..000000000 --- a/pkg/sentry/kernel/fasync/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "fasync", - srcs = ["fasync.go"], - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/errors/linuxerr", - "//pkg/sentry/fs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/fasync/fasync_state_autogen.go b/pkg/sentry/kernel/fasync/fasync_state_autogen.go new file mode 100644 index 000000000..8ec069b92 --- /dev/null +++ b/pkg/sentry/kernel/fasync/fasync_state_autogen.go @@ -0,0 +1,57 @@ +// automatically generated by stateify. + +package fasync + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (a *FileAsync) StateTypeName() string { + return "pkg/sentry/kernel/fasync.FileAsync" +} + +func (a *FileAsync) StateFields() []string { + return []string{ + "e", + "fd", + "requester", + "registered", + "signal", + "recipientPG", + "recipientTG", + "recipientT", + } +} + +func (a *FileAsync) beforeSave() {} + +// +checklocksignore +func (a *FileAsync) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.e) + stateSinkObject.Save(1, &a.fd) + stateSinkObject.Save(2, &a.requester) + stateSinkObject.Save(3, &a.registered) + stateSinkObject.Save(4, &a.signal) + stateSinkObject.Save(5, &a.recipientPG) + stateSinkObject.Save(6, &a.recipientTG) + stateSinkObject.Save(7, &a.recipientT) +} + +func (a *FileAsync) afterLoad() {} + +// +checklocksignore +func (a *FileAsync) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.e) + stateSourceObject.Load(1, &a.fd) + stateSourceObject.Load(2, &a.requester) + stateSourceObject.Load(3, &a.registered) + stateSourceObject.Load(4, &a.signal) + stateSourceObject.Load(5, &a.recipientPG) + stateSourceObject.Load(6, &a.recipientTG) + stateSourceObject.Load(7, &a.recipientT) +} + +func init() { + state.Register((*FileAsync)(nil)) +} diff --git a/pkg/sentry/kernel/fd_table_refs.go b/pkg/sentry/kernel/fd_table_refs.go new file mode 100644 index 000000000..9b0b1628e --- /dev/null +++ b/pkg/sentry/kernel/fd_table_refs.go @@ -0,0 +1,140 @@ +package kernel + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const FDTableenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var FDTableobj *FDTable + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type FDTableRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *FDTableRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *FDTableRefs) RefType() string { + return fmt.Sprintf("%T", FDTableobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *FDTableRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *FDTableRefs) LogRefs() bool { + return FDTableenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *FDTableRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *FDTableRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if FDTableenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.RefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *FDTableRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if FDTableenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *FDTableRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if FDTableenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *FDTableRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/fd_table_test.go b/pkg/sentry/kernel/fd_table_test.go deleted file mode 100644 index bf5460083..000000000 --- a/pkg/sentry/kernel/fd_table_test.go +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "runtime" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/filetest" - "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sync" -) - -const ( - // maxFD is the maximum FD to try to create in the map. - // - // This number of open files has been seen in the wild. - maxFD = 2 * 1024 -) - -func runTest(t testing.TB, fn func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet)) { - t.Helper() // Don't show in stacks. - - // Create the limits and context. - limitSet := limits.NewLimitSet() - limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true) - ctx := contexttest.WithLimitSet(contexttest.Context(t), limitSet) - - // Create a test file.; - file := filetest.NewTestFile(t) - - // Create the table. - fdTable := new(FDTable) - fdTable.init() - - // Run the test. - fn(ctx, fdTable, file, limitSet) -} - -// TestFDTableMany allocates maxFD FDs, i.e. maxes out the FDTable, until there -// is no room, then makes sure that NewFDAt works and also that if we remove -// one and add one that works too. -func TestFDTableMany(t *testing.T) { - runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - for i := 0; i < maxFD; i++ { - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { - t.Fatalf("Allocated %v FDs but wanted to allocate %v", i, maxFD) - } - } - - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil { - t.Fatalf("fdTable.NewFDs(0, r) in full map: got nil, wanted error") - } - - if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil { - t.Fatalf("fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) - } - - i := int32(2) - fdTable.Remove(ctx, i) - if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil || fds[0] != i { - t.Fatalf("Allocated %v FDs but wanted to allocate %v: %v", i, maxFD, err) - } - }) -} - -func TestFDTableOverLimit(t *testing.T) { - runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - if _, err := fdTable.NewFDs(ctx, maxFD, []*fs.File{file}, FDFlags{}); err == nil { - t.Fatalf("fdTable.NewFDs(maxFD, f): got nil, wanted error") - } - - if _, err := fdTable.NewFDs(ctx, maxFD-2, []*fs.File{file, file, file}, FDFlags{}); err == nil { - t.Fatalf("fdTable.NewFDs(maxFD-2, {f,f,f}): got nil, wanted error") - } - - if fds, err := fdTable.NewFDs(ctx, maxFD-3, []*fs.File{file, file, file}, FDFlags{}); err != nil { - t.Fatalf("fdTable.NewFDs(maxFD-3, {f,f,f}): got %v, wanted nil", err) - } else { - for _, fd := range fds { - fdTable.Remove(ctx, fd) - } - } - - if fds, err := fdTable.NewFDs(ctx, maxFD-1, []*fs.File{file}, FDFlags{}); err != nil || fds[0] != maxFD-1 { - t.Fatalf("fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) - } - - if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { - t.Fatalf("Adding an FD to a resized map: got %v, want nil", err) - } else if len(fds) != 1 || fds[0] != 0 { - t.Fatalf("Added an FD to a resized map: got %v, want {1}", fds) - } - }) -} - -// TestFDTable does a set of simple tests to make sure simple adds, removes, -// GetRefs, and DecRefs work. The ordering is just weird enough that a -// table-driven approach seemed clumsy. -func TestFDTable(t *testing.T) { - runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet) { - // Cap the limit at one. - limitSet.Set(limits.NumberOfFiles, limits.Limit{1, maxFD}, true) - - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { - t.Fatalf("Adding an FD to an empty 1-size map: got %v, want nil", err) - } - - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil { - t.Fatalf("Adding an FD to a filled 1-size map: got nil, wanted an error") - } - - // Remove the previous limit. - limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true) - - if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { - t.Fatalf("Adding an FD to a resized map: got %v, want nil", err) - } else if len(fds) != 1 || fds[0] != 1 { - t.Fatalf("Added an FD to a resized map: got %v, want {1}", fds) - } - - if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil { - t.Fatalf("Replacing FD 1 via fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) - } - - if err := fdTable.NewFDAt(ctx, maxFD+1, file, FDFlags{}); err == nil { - t.Fatalf("Using an FD that was too large via fdTable.NewFDAt(%v, r, FDFlags{}): got nil, wanted an error", maxFD+1) - } - - if ref, _ := fdTable.Get(1); ref == nil { - t.Fatalf("fdTable.Get(1): got nil, wanted %v", file) - } - - if ref, _ := fdTable.Get(2); ref != nil { - t.Fatalf("fdTable.Get(2): got a %v, wanted nil", ref) - } - - ref, _ := fdTable.Remove(ctx, 1) - if ref == nil { - t.Fatalf("fdTable.Remove(1) for an existing FD: failed, want success") - } - ref.DecRef(ctx) - - if ref, _ := fdTable.Remove(ctx, 1); ref != nil { - t.Fatalf("r.Remove(1) for a removed FD: got success, want failure") - } - }) -} - -func TestDescriptorFlags(t *testing.T) { - runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - if err := fdTable.NewFDAt(ctx, 2, file, FDFlags{CloseOnExec: true}); err != nil { - t.Fatalf("fdTable.NewFDAt(2, r, FDFlags{}): got %v, wanted nil", err) - } - - newFile, flags := fdTable.Get(2) - if newFile == nil { - t.Fatalf("fdTable.Get(2): got a %v, wanted nil", newFile) - } - - if !flags.CloseOnExec { - t.Fatalf("new File flags %v don't match original %d\n", flags, 0) - } - }) -} - -func BenchmarkFDLookupAndDecRef(b *testing.B) { - b.StopTimer() // Setup. - - runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{}) - if err != nil { - b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err) - } - - b.StartTimer() // Benchmark. - for i := 0; i < b.N; i++ { - tf, _ := fdTable.Get(fds[i%len(fds)]) - tf.DecRef(ctx) - } - }) -} - -func BenchmarkFDLookupAndDecRefConcurrent(b *testing.B) { - b.StopTimer() // Setup. - - runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{}) - if err != nil { - b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err) - } - - concurrency := runtime.GOMAXPROCS(0) - if concurrency < 4 { - concurrency = 4 - } - each := b.N / concurrency - - b.StartTimer() // Benchmark. - var wg sync.WaitGroup - for i := 0; i < concurrency; i++ { - wg.Add(1) - go func() { - defer wg.Done() - for i := 0; i < each; i++ { - tf, _ := fdTable.Get(fds[i%len(fds)]) - tf.DecRef(ctx) - } - }() - } - wg.Wait() - }) -} diff --git a/pkg/sentry/kernel/fs_context_refs.go b/pkg/sentry/kernel/fs_context_refs.go new file mode 100644 index 000000000..484c20683 --- /dev/null +++ b/pkg/sentry/kernel/fs_context_refs.go @@ -0,0 +1,140 @@ +package kernel + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const FSContextenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var FSContextobj *FSContext + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type FSContextRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *FSContextRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *FSContextRefs) RefType() string { + return fmt.Sprintf("%T", FSContextobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *FSContextRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *FSContextRefs) LogRefs() bool { + return FSContextenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *FSContextRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *FSContextRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if FSContextenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.RefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *FSContextRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if FSContextenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *FSContextRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if FSContextenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *FSContextRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD deleted file mode 100644 index cfdea5cf7..000000000 --- a/pkg/sentry/kernel/futex/BUILD +++ /dev/null @@ -1,61 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "atomicptr_bucket", - out = "atomicptr_bucket_unsafe.go", - package = "futex", - suffix = "Bucket", - template = "//pkg/sync/atomicptr:generic_atomicptr", - types = { - "Value": "bucket", - }, -) - -go_template_instance( - name = "waiter_list", - out = "waiter_list.go", - package = "futex", - prefix = "waiter", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Waiter", - "Linker": "*Waiter", - }, -) - -go_library( - name = "futex", - srcs = [ - "atomicptr_bucket_unsafe.go", - "futex.go", - "waiter_list.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/sentry/memmap", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - ], -) - -go_test( - name = "futex_test", - size = "small", - srcs = ["futex_test.go"], - library = ":futex", - deps = [ - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/sync", - ], -) diff --git a/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go b/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go new file mode 100644 index 000000000..d3fdf09b0 --- /dev/null +++ b/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go @@ -0,0 +1,37 @@ +package futex + +import ( + "sync/atomic" + "unsafe" +) + +// An AtomicPtr is a pointer to a value of type Value that can be atomically +// loaded and stored. The zero value of an AtomicPtr represents nil. +// +// Note that copying AtomicPtr by value performs a non-atomic read of the +// stored pointer, which is unsafe if Store() can be called concurrently; in +// this case, do `dst.Store(src.Load())` instead. +// +// +stateify savable +type AtomicPtrBucket struct { + ptr unsafe.Pointer `state:".(*bucket)"` +} + +func (p *AtomicPtrBucket) savePtr() *bucket { + return p.Load() +} + +func (p *AtomicPtrBucket) loadPtr(v *bucket) { + p.Store(v) +} + +// Load returns the value set by the most recent Store. It returns nil if there +// has been no previous call to Store. +func (p *AtomicPtrBucket) Load() *bucket { + return (*bucket)(atomic.LoadPointer(&p.ptr)) +} + +// Store sets the value returned by Load to x. +func (p *AtomicPtrBucket) Store(x *bucket) { + atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) +} diff --git a/pkg/sentry/kernel/futex/futex_state_autogen.go b/pkg/sentry/kernel/futex/futex_state_autogen.go new file mode 100644 index 000000000..6e22b313a --- /dev/null +++ b/pkg/sentry/kernel/futex/futex_state_autogen.go @@ -0,0 +1,122 @@ +// automatically generated by stateify. + +package futex + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (b *bucket) StateTypeName() string { + return "pkg/sentry/kernel/futex.bucket" +} + +func (b *bucket) StateFields() []string { + return []string{} +} + +func (b *bucket) beforeSave() {} + +// +checklocksignore +func (b *bucket) StateSave(stateSinkObject state.Sink) { + b.beforeSave() + if !state.IsZeroValue(&b.waiters) { + state.Failf("waiters is %#v, expected zero", &b.waiters) + } +} + +func (b *bucket) afterLoad() {} + +// +checklocksignore +func (b *bucket) StateLoad(stateSourceObject state.Source) { +} + +func (m *Manager) StateTypeName() string { + return "pkg/sentry/kernel/futex.Manager" +} + +func (m *Manager) StateFields() []string { + return []string{ + "sharedBucket", + } +} + +func (m *Manager) beforeSave() {} + +// +checklocksignore +func (m *Manager) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + if !state.IsZeroValue(&m.privateBuckets) { + state.Failf("privateBuckets is %#v, expected zero", &m.privateBuckets) + } + stateSinkObject.Save(0, &m.sharedBucket) +} + +func (m *Manager) afterLoad() {} + +// +checklocksignore +func (m *Manager) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.sharedBucket) +} + +func (l *waiterList) StateTypeName() string { + return "pkg/sentry/kernel/futex.waiterList" +} + +func (l *waiterList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *waiterList) beforeSave() {} + +// +checklocksignore +func (l *waiterList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *waiterList) afterLoad() {} + +// +checklocksignore +func (l *waiterList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *waiterEntry) StateTypeName() string { + return "pkg/sentry/kernel/futex.waiterEntry" +} + +func (e *waiterEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *waiterEntry) beforeSave() {} + +// +checklocksignore +func (e *waiterEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *waiterEntry) afterLoad() {} + +// +checklocksignore +func (e *waiterEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func init() { + state.Register((*bucket)(nil)) + state.Register((*Manager)(nil)) + state.Register((*waiterList)(nil)) + state.Register((*waiterEntry)(nil)) +} diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go deleted file mode 100644 index 04c136f87..000000000 --- a/pkg/sentry/kernel/futex/futex_test.go +++ /dev/null @@ -1,536 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package futex - -import ( - "math" - "runtime" - "sync/atomic" - "testing" - "unsafe" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/sync" -) - -// testData implements the Target interface, and allows us to -// treat the address passed for futex operations as an index in -// a byte slice for testing simplicity. -type testData struct { - context.Context - data []byte -} - -const sizeofInt32 = 4 - -func newTestData(size uint) testData { - return testData{ - data: make([]byte, size), - } -} - -func (t testData) SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) { - val := atomic.SwapUint32((*uint32)(unsafe.Pointer(&t.data[addr])), new) - return val, nil -} - -func (t testData) CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) { - if atomic.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&t.data[addr])), old, new) { - return old, nil - } - return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t.data[addr]))), nil -} - -func (t testData) LoadUint32(addr hostarch.Addr) (uint32, error) { - return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t.data[addr]))), nil -} - -func (t testData) GetSharedKey(addr hostarch.Addr) (Key, error) { - return Key{ - Kind: KindSharedMappable, - Offset: uint64(addr), - }, nil -} - -func futexKind(private bool) string { - if private { - return "private" - } - return "shared" -} - -func newPreparedTestWaiter(t *testing.T, m *Manager, ta Target, addr hostarch.Addr, private bool, val uint32, bitmask uint32) *Waiter { - w := NewWaiter() - if err := m.WaitPrepare(w, ta, addr, private, val, bitmask); err != nil { - t.Fatalf("WaitPrepare failed: %v", err) - } - return w -} - -func TestFutexWake(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(sizeofInt32) - - // Start waiting for wakeup. - w := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w, d) - - // Perform a wakeup. - if n, err := m.Wake(d, 0, private, ^uint32(0), 1); err != nil || n != 1 { - t.Errorf("Wake: got (%d, %v), wanted (1, nil)", n, err) - } - - // Expect the waiter to have been woken. - if !w.woken() { - t.Error("waiter not woken") - } - }) - } -} - -func TestFutexWakeBitmask(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(sizeofInt32) - - // Start waiting for wakeup. - w := newPreparedTestWaiter(t, m, d, 0, private, 0, 0x0000ffff) - defer m.WaitComplete(w, d) - - // Perform a wakeup using the wrong bitmask. - if n, err := m.Wake(d, 0, private, 0xffff0000, 1); err != nil || n != 0 { - t.Errorf("Wake with non-matching bitmask: got (%d, %v), wanted (0, nil)", n, err) - } - - // Expect the waiter to still be waiting. - if w.woken() { - t.Error("waiter woken unexpectedly") - } - - // Perform a wakeup using the right bitmask. - if n, err := m.Wake(d, 0, private, 0x00000001, 1); err != nil || n != 1 { - t.Errorf("Wake with matching bitmask: got (%d, %v), wanted (1, nil)", n, err) - } - - // Expect that the waiter was woken. - if !w.woken() { - t.Error("waiter not woken") - } - }) - } -} - -func TestFutexWakeTwo(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(sizeofInt32) - - // Start three waiters waiting for wakeup. - var ws [3]*Waiter - for i := range ws { - ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i], d) - } - - // Perform two wakeups. - const wakeups = 2 - if n, err := m.Wake(d, 0, private, ^uint32(0), 2); err != nil || n != wakeups { - t.Errorf("Wake: got (%d, %v), wanted (%d, nil)", n, err, wakeups) - } - - // Expect that exactly two waiters were woken. - // We don't get guarantees about exactly which two, - // (although we expect them to be w1 and w2). - awake := 0 - for i := range ws { - if ws[i].woken() { - awake++ - } - } - if awake != wakeups { - t.Errorf("got %d woken waiters, wanted %d", awake, wakeups) - } - }) - } -} - -func TestFutexWakeUnrelated(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(2 * sizeofInt32) - - // Start two waiters waiting for wakeup on different addresses. - w1 := newPreparedTestWaiter(t, m, d, 0*sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, 1*sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Perform two wakeups on the second address. - if n, err := m.Wake(d, 1*sizeofInt32, private, ^uint32(0), 2); err != nil || n != 1 { - t.Errorf("Wake: got (%d, %v), wanted (1, nil)", n, err) - } - - // Expect that only the second waiter was woken. - if w1.woken() { - t.Error("w1 woken unexpectedly") - } - if !w2.woken() { - t.Error("w2 not woken") - } - }) - } -} - -func TestWakeOpEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(2 * sizeofInt32) - - // Perform wakeups with no waiters. - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 10, 0); err != nil || n != 0 { - t.Fatalf("WakeOp: got (%d, %v), wanted (0, nil)", n, err) - } - }) - } -} - -func TestWakeOpFirstNonEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address 0. - w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Perform 10 wakeups on address 0. - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 0, 0); err != nil || n != 2 { - t.Errorf("WakeOp: got (%d, %v), wanted (2, nil)", n, err) - } - - // Expect that both waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - }) - } -} - -func TestWakeOpSecondNonEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address sizeofInt32. - w1 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Perform 10 wakeups on address sizeofInt32 (contingent on - // d.Op(0), which should succeed). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 0, 10, 0); err != nil || n != 2 { - t.Errorf("WakeOp: got (%d, %v), wanted (2, nil)", n, err) - } - - // Expect that both waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - }) - } -} - -func TestWakeOpSecondNonEmptyFailingOp(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address sizeofInt32. - w1 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Perform 10 wakeups on address sizeofInt32 (contingent on - // d.Op(1), which should fail). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 0, 10, 1); err != nil || n != 0 { - t.Errorf("WakeOp: got (%d, %v), wanted (0, nil)", n, err) - } - - // Expect that neither waiter was woken. - if w1.woken() { - t.Error("w1 woken unexpectedly") - } - if w2.woken() { - t.Error("w2 woken unexpectedly") - } - }) - } -} - -func TestWakeOpAllNonEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address 0. - w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Add two waiters on address sizeofInt32. - w3 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w3, d) - w4 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w4, d) - - // Perform 10 wakeups on address 0 (unconditionally), and 10 - // wakeups on address sizeofInt32 (contingent on d.Op(0), which - // should succeed). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 10, 0); err != nil || n != 4 { - t.Errorf("WakeOp: got (%d, %v), wanted (4, nil)", n, err) - } - - // Expect that all waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - if !w3.woken() { - t.Error("w3 not woken") - } - if !w4.woken() { - t.Error("w4 not woken") - } - }) - } -} - -func TestWakeOpAllNonEmptyFailingOp(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address 0. - w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Add two waiters on address sizeofInt32. - w3 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w3, d) - w4 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w4, d) - - // Perform 10 wakeups on address 0 (unconditionally), and 10 - // wakeups on address sizeofInt32 (contingent on d.Op(1), which - // should fail). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 10, 1); err != nil || n != 2 { - t.Errorf("WakeOp: got (%d, %v), wanted (2, nil)", n, err) - } - - // Expect that only the first two waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - if w3.woken() { - t.Error("w3 woken unexpectedly") - } - if w4.woken() { - t.Error("w4 woken unexpectedly") - } - }) - } -} - -func TestWakeOpSameAddress(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add four waiters on address 0. - var ws [4]*Waiter - for i := range ws { - ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i], d) - } - - // Perform 1 wakeup on address 0 (unconditionally), and 1 wakeup - // on address 0 (contingent on d.Op(0), which should succeed). - const wakeups = 2 - if n, err := m.WakeOp(d, 0, 0, private, 1, 1, 0); err != nil || n != wakeups { - t.Errorf("WakeOp: got (%d, %v), wanted (%d, nil)", n, err, wakeups) - } - - // Expect that exactly two waiters were woken. - awake := 0 - for i := range ws { - if ws[i].woken() { - awake++ - } - } - if awake != wakeups { - t.Errorf("got %d woken waiters, wanted %d", awake, wakeups) - } - }) - } -} - -func TestWakeOpSameAddressFailingOp(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add four waiters on address 0. - var ws [4]*Waiter - for i := range ws { - ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i], d) - } - - // Perform 1 wakeup on address 0 (unconditionally), and 1 wakeup - // on address 0 (contingent on d.Op(1), which should fail). - const wakeups = 1 - if n, err := m.WakeOp(d, 0, 0, private, 1, 1, 1); err != nil || n != wakeups { - t.Errorf("WakeOp: got (%d, %v), wanted (%d, nil)", n, err, wakeups) - } - - // Expect that exactly one waiter was woken. - awake := 0 - for i := range ws { - if ws[i].woken() { - awake++ - } - } - if awake != wakeups { - t.Errorf("got %d woken waiters, wanted %d", awake, wakeups) - } - }) - } -} - -const ( - testMutexSize = sizeofInt32 - testMutexLocked uint32 = 1 - testMutexUnlocked uint32 = 0 -) - -// testMutex ties together a testData slice, an address, and a -// futex manager in order to implement the sync.Locker interface. -// Beyond being used as a Locker, this is a simple mechanism for -// changing the underlying values for simpler tests. -type testMutex struct { - a hostarch.Addr - d testData - m *Manager -} - -func newTestMutex(addr hostarch.Addr, d testData, m *Manager) *testMutex { - return &testMutex{a: addr, d: d, m: m} -} - -// Lock acquires the testMutex. -// This may wait for it to be available via the futex manager. -func (t *testMutex) Lock() { - for { - // Attempt to grab the lock. - if atomic.CompareAndSwapUint32( - (*uint32)(unsafe.Pointer(&t.d.data[t.a])), - testMutexUnlocked, - testMutexLocked) { - // Lock held. - return - } - - // Wait for it to be "not locked". - w := NewWaiter() - err := t.m.WaitPrepare(w, t.d, t.a, true, testMutexLocked, ^uint32(0)) - if linuxerr.Equals(linuxerr.EAGAIN, err) { - continue - } - if err != nil { - // Should never happen. - panic("WaitPrepare returned unexpected error: " + err.Error()) - } - <-w.C - t.m.WaitComplete(w, t.d) - } -} - -// Unlock releases the testMutex. -// This will notify any waiters via the futex manager. -func (t *testMutex) Unlock() { - // Unlock. - atomic.StoreUint32((*uint32)(unsafe.Pointer(&t.d.data[t.a])), testMutexUnlocked) - - // Notify all waiters. - t.m.Wake(t.d, t.a, true, ^uint32(0), math.MaxInt32) -} - -// This function was shamelessly stolen from mutex_test.go. -func HammerMutex(l sync.Locker, loops int, cdone chan bool) { - for i := 0; i < loops; i++ { - l.Lock() - runtime.Gosched() - l.Unlock() - } - cdone <- true -} - -func TestMutexStress(t *testing.T) { - m := NewManager() - d := newTestData(testMutexSize) - tm := newTestMutex(0*testMutexSize, d, m) - c := make(chan bool) - - for i := 0; i < 10; i++ { - go HammerMutex(tm, 1000, c) - } - - for i := 0; i < 10; i++ { - <-c - } -} diff --git a/pkg/sentry/kernel/futex/futex_unsafe_state_autogen.go b/pkg/sentry/kernel/futex/futex_unsafe_state_autogen.go new file mode 100644 index 000000000..84bb180fc --- /dev/null +++ b/pkg/sentry/kernel/futex/futex_unsafe_state_autogen.go @@ -0,0 +1,37 @@ +// automatically generated by stateify. + +package futex + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (p *AtomicPtrBucket) StateTypeName() string { + return "pkg/sentry/kernel/futex.AtomicPtrBucket" +} + +func (p *AtomicPtrBucket) StateFields() []string { + return []string{ + "ptr", + } +} + +func (p *AtomicPtrBucket) beforeSave() {} + +// +checklocksignore +func (p *AtomicPtrBucket) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + var ptrValue *bucket = p.savePtr() + stateSinkObject.SaveValue(0, ptrValue) +} + +func (p *AtomicPtrBucket) afterLoad() {} + +// +checklocksignore +func (p *AtomicPtrBucket) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*bucket), func(y interface{}) { p.loadPtr(y.(*bucket)) }) +} + +func init() { + state.Register((*AtomicPtrBucket)(nil)) +} diff --git a/pkg/sentry/kernel/futex/waiter_list.go b/pkg/sentry/kernel/futex/waiter_list.go new file mode 100644 index 000000000..24968ce4b --- /dev/null +++ b/pkg/sentry/kernel/futex/waiter_list.go @@ -0,0 +1,221 @@ +package futex + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type waiterElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (waiterElementMapper) linkerFor(elem *Waiter) *Waiter { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type waiterList struct { + head *Waiter + tail *Waiter +} + +// Reset resets list l to the empty state. +func (l *waiterList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *waiterList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *waiterList) Front() *Waiter { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *waiterList) Back() *Waiter { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *waiterList) Len() (count int) { + for e := l.Front(); e != nil; e = (waiterElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *waiterList) PushFront(e *Waiter) { + linker := waiterElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + waiterElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *waiterList) PushBack(e *Waiter) { + linker := waiterElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *waiterList) PushBackList(m *waiterList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(m.head) + waiterElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *waiterList) InsertAfter(b, e *Waiter) { + bLinker := waiterElementMapper{}.linkerFor(b) + eLinker := waiterElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + waiterElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *waiterList) InsertBefore(a, e *Waiter) { + aLinker := waiterElementMapper{}.linkerFor(a) + eLinker := waiterElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + waiterElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *waiterList) Remove(e *Waiter) { + linker := waiterElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + waiterElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + waiterElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type waiterEntry struct { + next *Waiter + prev *Waiter +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *waiterEntry) Next() *Waiter { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *waiterEntry) Prev() *Waiter { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *waiterEntry) SetNext(elem *Waiter) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *waiterEntry) SetPrev(elem *Waiter) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/g3doc/run_states.dot b/pkg/sentry/kernel/g3doc/run_states.dot deleted file mode 100644 index 7861fe1f5..000000000 --- a/pkg/sentry/kernel/g3doc/run_states.dot +++ /dev/null @@ -1,99 +0,0 @@ -digraph { - subgraph { - App; - } - subgraph { - Interrupt; - InterruptAfterSignalDeliveryStop; - } - subgraph { - Syscall; - SyscallAfterPtraceEventSeccomp; - SyscallEnter; - SyscallAfterSyscallEnterStop; - SyscallAfterSysemuStop; - SyscallInvoke; - SyscallAfterPtraceEventClone; - SyscallAfterExecStop; - SyscallAfterVforkStop; - SyscallReinvoke; - SyscallExit; - } - subgraph { - Vsyscall; - VsyscallAfterPtraceEventSeccomp; - VsyscallInvoke; - } - subgraph { - Exit; - ExitMain; // leave thread group, release resources, reparent children, kill PID namespace and wait if TGID 1 - ExitNotify; // signal parent/tracer, become waitable - ExitDone; // represented by t.runState == nil - } - - // Task exit - Exit -> ExitMain; - ExitMain -> ExitNotify; - ExitNotify -> ExitDone; - - // Execution of untrusted application code - App -> App; - - // Interrupts (usually signal delivery) - App -> Interrupt; - Interrupt -> Interrupt; // if other interrupt conditions may still apply - Interrupt -> Exit; // if killed - - // Syscalls - App -> Syscall; - Syscall -> SyscallEnter; - SyscallEnter -> SyscallInvoke; - SyscallInvoke -> SyscallExit; - SyscallExit -> App; - - // exit, exit_group - SyscallInvoke -> Exit; - - // execve - SyscallInvoke -> SyscallAfterExecStop; - SyscallAfterExecStop -> SyscallExit; - SyscallAfterExecStop -> App; // fatal signal pending - - // vfork - SyscallInvoke -> SyscallAfterVforkStop; - SyscallAfterVforkStop -> SyscallExit; - - // Vsyscalls - App -> Vsyscall; - Vsyscall -> VsyscallInvoke; - Vsyscall -> App; // fault while reading return address from stack - VsyscallInvoke -> App; - - // ptrace-specific branches - Interrupt -> InterruptAfterSignalDeliveryStop; - InterruptAfterSignalDeliveryStop -> Interrupt; - SyscallEnter -> SyscallAfterSyscallEnterStop; - SyscallAfterSyscallEnterStop -> SyscallInvoke; - SyscallAfterSyscallEnterStop -> SyscallExit; // skipped by tracer - SyscallAfterSyscallEnterStop -> App; // fatal signal pending - SyscallEnter -> SyscallAfterSysemuStop; - SyscallAfterSysemuStop -> SyscallExit; - SyscallAfterSysemuStop -> App; // fatal signal pending - SyscallInvoke -> SyscallAfterPtraceEventClone; - SyscallAfterPtraceEventClone -> SyscallExit; - SyscallAfterPtraceEventClone -> SyscallAfterVforkStop; - - // seccomp - Syscall -> App; // SECCOMP_RET_TRAP, SECCOMP_RET_ERRNO, SECCOMP_RET_KILL, SECCOMP_RET_TRACE without tracer - Syscall -> SyscallAfterPtraceEventSeccomp; // SECCOMP_RET_TRACE - SyscallAfterPtraceEventSeccomp -> SyscallEnter; - SyscallAfterPtraceEventSeccomp -> SyscallExit; // skipped by tracer - SyscallAfterPtraceEventSeccomp -> App; // fatal signal pending - Vsyscall -> VsyscallAfterPtraceEventSeccomp; - VsyscallAfterPtraceEventSeccomp -> VsyscallInvoke; - VsyscallAfterPtraceEventSeccomp -> App; - - // Autosave - SyscallInvoke -> SyscallReinvoke; - SyscallReinvoke -> SyscallInvoke; -} diff --git a/pkg/sentry/kernel/g3doc/run_states.png b/pkg/sentry/kernel/g3doc/run_states.png Binary files differdeleted file mode 100644 index b63b60f02..000000000 --- a/pkg/sentry/kernel/g3doc/run_states.png +++ /dev/null diff --git a/pkg/sentry/kernel/ipc_namespace_refs.go b/pkg/sentry/kernel/ipc_namespace_refs.go new file mode 100644 index 000000000..dd399aebd --- /dev/null +++ b/pkg/sentry/kernel/ipc_namespace_refs.go @@ -0,0 +1,140 @@ +package kernel + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const IPCNamespaceenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var IPCNamespaceobj *IPCNamespace + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type IPCNamespaceRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *IPCNamespaceRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *IPCNamespaceRefs) RefType() string { + return fmt.Sprintf("%T", IPCNamespaceobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *IPCNamespaceRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *IPCNamespaceRefs) LogRefs() bool { + return IPCNamespaceenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *IPCNamespaceRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *IPCNamespaceRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if IPCNamespaceenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.RefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *IPCNamespaceRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if IPCNamespaceenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *IPCNamespaceRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if IPCNamespaceenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *IPCNamespaceRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go new file mode 100644 index 000000000..f951fab78 --- /dev/null +++ b/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go @@ -0,0 +1,230 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build tag aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The build tags here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build tags, see +// tools/defs.bzl:calculate_sets(). + +package kernel + +import ( + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/marshal" + "io" + "reflect" + "runtime" + "unsafe" +) + +// Marshallable types used by this file. +var _ marshal.Marshallable = (*ThreadID)(nil) +var _ marshal.Marshallable = (*vdsoParams)(nil) + +// SizeBytes implements marshal.Marshallable.SizeBytes. +//go:nosplit +func (tid *ThreadID) SizeBytes() int { + return 4 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (tid *ThreadID) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(*tid)) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (tid *ThreadID) UnmarshalBytes(src []byte) { + *tid = ThreadID(int32(hostarch.ByteOrder.Uint32(src[:4]))) +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (tid *ThreadID) Packed() bool { + // Scalar newtypes are always packed. + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (tid *ThreadID) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(tid), uintptr(tid.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (tid *ThreadID) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(tid), unsafe.Pointer(&src[0]), uintptr(tid.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (tid *ThreadID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(tid))) + hdr.Len = tid.SizeBytes() + hdr.Cap = tid.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that tid + // must live until the use above. + runtime.KeepAlive(tid) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (tid *ThreadID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return tid.CopyOutN(cc, addr, tid.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (tid *ThreadID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(tid))) + hdr.Len = tid.SizeBytes() + hdr.Cap = tid.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that tid + // must live until the use above. + runtime.KeepAlive(tid) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (tid *ThreadID) WriteTo(w io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(tid))) + hdr.Len = tid.SizeBytes() + hdr.Cap = tid.SizeBytes() + + length, err := w.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that tid + // must live until the use above. + runtime.KeepAlive(tid) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (v *vdsoParams) SizeBytes() int { + return 64 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (v *vdsoParams) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicReady)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicBaseCycles)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicBaseRef)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicFrequency)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeReady)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeBaseCycles)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeBaseRef)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeFrequency)) + dst = dst[8:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (v *vdsoParams) UnmarshalBytes(src []byte) { + v.monotonicReady = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.monotonicBaseCycles = int64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.monotonicBaseRef = int64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.monotonicFrequency = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeReady = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeBaseCycles = int64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeBaseRef = int64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeFrequency = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (v *vdsoParams) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (v *vdsoParams) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(v), uintptr(v.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (v *vdsoParams) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(v), unsafe.Pointer(&src[0]), uintptr(v.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (v *vdsoParams) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v))) + hdr.Len = v.SizeBytes() + hdr.Cap = v.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that v + // must live until the use above. + runtime.KeepAlive(v) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (v *vdsoParams) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return v.CopyOutN(cc, addr, v.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (v *vdsoParams) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v))) + hdr.Len = v.SizeBytes() + hdr.Cap = v.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that v + // must live until the use above. + runtime.KeepAlive(v) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (v *vdsoParams) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v))) + hdr.Len = v.SizeBytes() + hdr.Cap = v.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that v + // must live until the use above. + runtime.KeepAlive(v) // escapes: replaced by intrinsic. + return int64(length), err +} + diff --git a/pkg/sentry/kernel/kernel_amd64_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_amd64_abi_autogen_unsafe.go new file mode 100644 index 000000000..49bde618c --- /dev/null +++ b/pkg/sentry/kernel/kernel_amd64_abi_autogen_unsafe.go @@ -0,0 +1,15 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build tag aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The build tags here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build tags, see +// tools/defs.bzl:calculate_sets(). + +// +build amd64 + +package kernel + +import ( +) + diff --git a/pkg/sentry/kernel/kernel_amd64_state_autogen.go b/pkg/sentry/kernel/kernel_amd64_state_autogen.go new file mode 100644 index 000000000..12de47ad0 --- /dev/null +++ b/pkg/sentry/kernel/kernel_amd64_state_autogen.go @@ -0,0 +1,5 @@ +// automatically generated by stateify. + +// +build amd64 + +package kernel diff --git a/pkg/sentry/kernel/kernel_arm64_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_arm64_abi_autogen_unsafe.go new file mode 100644 index 000000000..ed40b65c6 --- /dev/null +++ b/pkg/sentry/kernel/kernel_arm64_abi_autogen_unsafe.go @@ -0,0 +1,15 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build tag aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The build tags here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build tags, see +// tools/defs.bzl:calculate_sets(). + +// +build arm64 + +package kernel + +import ( +) + diff --git a/pkg/sentry/kernel/kernel_arm64_state_autogen.go b/pkg/sentry/kernel/kernel_arm64_state_autogen.go new file mode 100644 index 000000000..3c040d283 --- /dev/null +++ b/pkg/sentry/kernel/kernel_arm64_state_autogen.go @@ -0,0 +1,5 @@ +// automatically generated by stateify. + +// +build arm64 + +package kernel diff --git a/pkg/sentry/kernel/kernel_opts_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_opts_abi_autogen_unsafe.go new file mode 100644 index 000000000..9c39d630a --- /dev/null +++ b/pkg/sentry/kernel/kernel_opts_abi_autogen_unsafe.go @@ -0,0 +1,13 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build tag aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The build tags here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build tags, see +// tools/defs.bzl:calculate_sets(). + +package kernel + +import ( +) + diff --git a/pkg/sentry/kernel/kernel_opts_state_autogen.go b/pkg/sentry/kernel/kernel_opts_state_autogen.go new file mode 100644 index 000000000..73334db4c --- /dev/null +++ b/pkg/sentry/kernel/kernel_opts_state_autogen.go @@ -0,0 +1,32 @@ +// automatically generated by stateify. + +package kernel + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *SpecialOpts) StateTypeName() string { + return "pkg/sentry/kernel.SpecialOpts" +} + +func (s *SpecialOpts) StateFields() []string { + return []string{} +} + +func (s *SpecialOpts) beforeSave() {} + +// +checklocksignore +func (s *SpecialOpts) StateSave(stateSinkObject state.Sink) { + s.beforeSave() +} + +func (s *SpecialOpts) afterLoad() {} + +// +checklocksignore +func (s *SpecialOpts) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*SpecialOpts)(nil)) +} diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go new file mode 100644 index 000000000..860599e73 --- /dev/null +++ b/pkg/sentry/kernel/kernel_state_autogen.go @@ -0,0 +1,2599 @@ +// automatically generated by stateify. + +package kernel + +import ( + "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/tcpip" +) + +func (a *abstractEndpoint) StateTypeName() string { + return "pkg/sentry/kernel.abstractEndpoint" +} + +func (a *abstractEndpoint) StateFields() []string { + return []string{ + "ep", + "socket", + "name", + "ns", + } +} + +func (a *abstractEndpoint) beforeSave() {} + +// +checklocksignore +func (a *abstractEndpoint) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.ep) + stateSinkObject.Save(1, &a.socket) + stateSinkObject.Save(2, &a.name) + stateSinkObject.Save(3, &a.ns) +} + +func (a *abstractEndpoint) afterLoad() {} + +// +checklocksignore +func (a *abstractEndpoint) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.ep) + stateSourceObject.Load(1, &a.socket) + stateSourceObject.Load(2, &a.name) + stateSourceObject.Load(3, &a.ns) +} + +func (a *AbstractSocketNamespace) StateTypeName() string { + return "pkg/sentry/kernel.AbstractSocketNamespace" +} + +func (a *AbstractSocketNamespace) StateFields() []string { + return []string{ + "endpoints", + } +} + +func (a *AbstractSocketNamespace) beforeSave() {} + +// +checklocksignore +func (a *AbstractSocketNamespace) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.endpoints) +} + +func (a *AbstractSocketNamespace) afterLoad() {} + +// +checklocksignore +func (a *AbstractSocketNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.endpoints) +} + +func (c *Cgroup) StateTypeName() string { + return "pkg/sentry/kernel.Cgroup" +} + +func (c *Cgroup) StateFields() []string { + return []string{ + "Dentry", + "CgroupImpl", + } +} + +func (c *Cgroup) beforeSave() {} + +// +checklocksignore +func (c *Cgroup) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.Dentry) + stateSinkObject.Save(1, &c.CgroupImpl) +} + +func (c *Cgroup) afterLoad() {} + +// +checklocksignore +func (c *Cgroup) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.Dentry) + stateSourceObject.Load(1, &c.CgroupImpl) +} + +func (h *hierarchy) StateTypeName() string { + return "pkg/sentry/kernel.hierarchy" +} + +func (h *hierarchy) StateFields() []string { + return []string{ + "id", + "controllers", + "fs", + } +} + +func (h *hierarchy) beforeSave() {} + +// +checklocksignore +func (h *hierarchy) StateSave(stateSinkObject state.Sink) { + h.beforeSave() + stateSinkObject.Save(0, &h.id) + stateSinkObject.Save(1, &h.controllers) + stateSinkObject.Save(2, &h.fs) +} + +func (h *hierarchy) afterLoad() {} + +// +checklocksignore +func (h *hierarchy) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &h.id) + stateSourceObject.Load(1, &h.controllers) + stateSourceObject.Load(2, &h.fs) +} + +func (r *CgroupRegistry) StateTypeName() string { + return "pkg/sentry/kernel.CgroupRegistry" +} + +func (r *CgroupRegistry) StateFields() []string { + return []string{ + "lastHierarchyID", + "controllers", + "hierarchies", + } +} + +func (r *CgroupRegistry) beforeSave() {} + +// +checklocksignore +func (r *CgroupRegistry) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.lastHierarchyID) + stateSinkObject.Save(1, &r.controllers) + stateSinkObject.Save(2, &r.hierarchies) +} + +func (r *CgroupRegistry) afterLoad() {} + +// +checklocksignore +func (r *CgroupRegistry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.lastHierarchyID) + stateSourceObject.Load(1, &r.controllers) + stateSourceObject.Load(2, &r.hierarchies) +} + +func (f *FDFlags) StateTypeName() string { + return "pkg/sentry/kernel.FDFlags" +} + +func (f *FDFlags) StateFields() []string { + return []string{ + "CloseOnExec", + } +} + +func (f *FDFlags) beforeSave() {} + +// +checklocksignore +func (f *FDFlags) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.CloseOnExec) +} + +func (f *FDFlags) afterLoad() {} + +// +checklocksignore +func (f *FDFlags) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.CloseOnExec) +} + +func (d *descriptor) StateTypeName() string { + return "pkg/sentry/kernel.descriptor" +} + +func (d *descriptor) StateFields() []string { + return []string{ + "file", + "fileVFS2", + "flags", + } +} + +func (d *descriptor) beforeSave() {} + +// +checklocksignore +func (d *descriptor) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.file) + stateSinkObject.Save(1, &d.fileVFS2) + stateSinkObject.Save(2, &d.flags) +} + +func (d *descriptor) afterLoad() {} + +// +checklocksignore +func (d *descriptor) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.file) + stateSourceObject.Load(1, &d.fileVFS2) + stateSourceObject.Load(2, &d.flags) +} + +func (f *FDTable) StateTypeName() string { + return "pkg/sentry/kernel.FDTable" +} + +func (f *FDTable) StateFields() []string { + return []string{ + "FDTableRefs", + "k", + "next", + "used", + "descriptorTable", + } +} + +func (f *FDTable) beforeSave() {} + +// +checklocksignore +func (f *FDTable) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + var descriptorTableValue map[int32]descriptor = f.saveDescriptorTable() + stateSinkObject.SaveValue(4, descriptorTableValue) + stateSinkObject.Save(0, &f.FDTableRefs) + stateSinkObject.Save(1, &f.k) + stateSinkObject.Save(2, &f.next) + stateSinkObject.Save(3, &f.used) +} + +func (f *FDTable) afterLoad() {} + +// +checklocksignore +func (f *FDTable) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.FDTableRefs) + stateSourceObject.Load(1, &f.k) + stateSourceObject.Load(2, &f.next) + stateSourceObject.Load(3, &f.used) + stateSourceObject.LoadValue(4, new(map[int32]descriptor), func(y interface{}) { f.loadDescriptorTable(y.(map[int32]descriptor)) }) +} + +func (r *FDTableRefs) StateTypeName() string { + return "pkg/sentry/kernel.FDTableRefs" +} + +func (r *FDTableRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *FDTableRefs) beforeSave() {} + +// +checklocksignore +func (r *FDTableRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *FDTableRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (f *FSContext) StateTypeName() string { + return "pkg/sentry/kernel.FSContext" +} + +func (f *FSContext) StateFields() []string { + return []string{ + "FSContextRefs", + "root", + "rootVFS2", + "cwd", + "cwdVFS2", + "umask", + } +} + +func (f *FSContext) beforeSave() {} + +// +checklocksignore +func (f *FSContext) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.FSContextRefs) + stateSinkObject.Save(1, &f.root) + stateSinkObject.Save(2, &f.rootVFS2) + stateSinkObject.Save(3, &f.cwd) + stateSinkObject.Save(4, &f.cwdVFS2) + stateSinkObject.Save(5, &f.umask) +} + +func (f *FSContext) afterLoad() {} + +// +checklocksignore +func (f *FSContext) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.FSContextRefs) + stateSourceObject.Load(1, &f.root) + stateSourceObject.Load(2, &f.rootVFS2) + stateSourceObject.Load(3, &f.cwd) + stateSourceObject.Load(4, &f.cwdVFS2) + stateSourceObject.Load(5, &f.umask) +} + +func (r *FSContextRefs) StateTypeName() string { + return "pkg/sentry/kernel.FSContextRefs" +} + +func (r *FSContextRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *FSContextRefs) beforeSave() {} + +// +checklocksignore +func (r *FSContextRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *FSContextRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (i *IPCNamespace) StateTypeName() string { + return "pkg/sentry/kernel.IPCNamespace" +} + +func (i *IPCNamespace) StateFields() []string { + return []string{ + "IPCNamespaceRefs", + "userNS", + "semaphores", + "shms", + } +} + +func (i *IPCNamespace) beforeSave() {} + +// +checklocksignore +func (i *IPCNamespace) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.IPCNamespaceRefs) + stateSinkObject.Save(1, &i.userNS) + stateSinkObject.Save(2, &i.semaphores) + stateSinkObject.Save(3, &i.shms) +} + +func (i *IPCNamespace) afterLoad() {} + +// +checklocksignore +func (i *IPCNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.IPCNamespaceRefs) + stateSourceObject.Load(1, &i.userNS) + stateSourceObject.Load(2, &i.semaphores) + stateSourceObject.Load(3, &i.shms) +} + +func (r *IPCNamespaceRefs) StateTypeName() string { + return "pkg/sentry/kernel.IPCNamespaceRefs" +} + +func (r *IPCNamespaceRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *IPCNamespaceRefs) beforeSave() {} + +// +checklocksignore +func (r *IPCNamespaceRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *IPCNamespaceRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (k *Kernel) StateTypeName() string { + return "pkg/sentry/kernel.Kernel" +} + +func (k *Kernel) StateFields() []string { + return []string{ + "featureSet", + "timekeeper", + "tasks", + "rootUserNamespace", + "rootNetworkNamespace", + "applicationCores", + "useHostCores", + "extraAuxv", + "vdso", + "rootUTSNamespace", + "rootIPCNamespace", + "rootAbstractSocketNamespace", + "futexes", + "globalInit", + "syslog", + "runningTasks", + "cpuClock", + "cpuClockTickerDisabled", + "cpuClockTickerSetting", + "uniqueID", + "nextInotifyCookie", + "netlinkPorts", + "danglingEndpoints", + "sockets", + "socketsVFS2", + "nextSocketRecord", + "deviceRegistry", + "DirentCacheLimiter", + "SpecialOpts", + "vfs", + "hostMount", + "pipeMount", + "shmMount", + "socketMount", + "SleepForAddressSpaceActivation", + "ptraceExceptions", + "YAMAPtraceScope", + "cgroupRegistry", + } +} + +func (k *Kernel) beforeSave() {} + +// +checklocksignore +func (k *Kernel) StateSave(stateSinkObject state.Sink) { + k.beforeSave() + var danglingEndpointsValue []tcpip.Endpoint = k.saveDanglingEndpoints() + stateSinkObject.SaveValue(22, danglingEndpointsValue) + var deviceRegistryValue *device.Registry = k.saveDeviceRegistry() + stateSinkObject.SaveValue(26, deviceRegistryValue) + stateSinkObject.Save(0, &k.featureSet) + stateSinkObject.Save(1, &k.timekeeper) + stateSinkObject.Save(2, &k.tasks) + stateSinkObject.Save(3, &k.rootUserNamespace) + stateSinkObject.Save(4, &k.rootNetworkNamespace) + stateSinkObject.Save(5, &k.applicationCores) + stateSinkObject.Save(6, &k.useHostCores) + stateSinkObject.Save(7, &k.extraAuxv) + stateSinkObject.Save(8, &k.vdso) + stateSinkObject.Save(9, &k.rootUTSNamespace) + stateSinkObject.Save(10, &k.rootIPCNamespace) + stateSinkObject.Save(11, &k.rootAbstractSocketNamespace) + stateSinkObject.Save(12, &k.futexes) + stateSinkObject.Save(13, &k.globalInit) + stateSinkObject.Save(14, &k.syslog) + stateSinkObject.Save(15, &k.runningTasks) + stateSinkObject.Save(16, &k.cpuClock) + stateSinkObject.Save(17, &k.cpuClockTickerDisabled) + stateSinkObject.Save(18, &k.cpuClockTickerSetting) + stateSinkObject.Save(19, &k.uniqueID) + stateSinkObject.Save(20, &k.nextInotifyCookie) + stateSinkObject.Save(21, &k.netlinkPorts) + stateSinkObject.Save(23, &k.sockets) + stateSinkObject.Save(24, &k.socketsVFS2) + stateSinkObject.Save(25, &k.nextSocketRecord) + stateSinkObject.Save(27, &k.DirentCacheLimiter) + stateSinkObject.Save(28, &k.SpecialOpts) + stateSinkObject.Save(29, &k.vfs) + stateSinkObject.Save(30, &k.hostMount) + stateSinkObject.Save(31, &k.pipeMount) + stateSinkObject.Save(32, &k.shmMount) + stateSinkObject.Save(33, &k.socketMount) + stateSinkObject.Save(34, &k.SleepForAddressSpaceActivation) + stateSinkObject.Save(35, &k.ptraceExceptions) + stateSinkObject.Save(36, &k.YAMAPtraceScope) + stateSinkObject.Save(37, &k.cgroupRegistry) +} + +func (k *Kernel) afterLoad() {} + +// +checklocksignore +func (k *Kernel) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &k.featureSet) + stateSourceObject.Load(1, &k.timekeeper) + stateSourceObject.Load(2, &k.tasks) + stateSourceObject.Load(3, &k.rootUserNamespace) + stateSourceObject.Load(4, &k.rootNetworkNamespace) + stateSourceObject.Load(5, &k.applicationCores) + stateSourceObject.Load(6, &k.useHostCores) + stateSourceObject.Load(7, &k.extraAuxv) + stateSourceObject.Load(8, &k.vdso) + stateSourceObject.Load(9, &k.rootUTSNamespace) + stateSourceObject.Load(10, &k.rootIPCNamespace) + stateSourceObject.Load(11, &k.rootAbstractSocketNamespace) + stateSourceObject.Load(12, &k.futexes) + stateSourceObject.Load(13, &k.globalInit) + stateSourceObject.Load(14, &k.syslog) + stateSourceObject.Load(15, &k.runningTasks) + stateSourceObject.Load(16, &k.cpuClock) + stateSourceObject.Load(17, &k.cpuClockTickerDisabled) + stateSourceObject.Load(18, &k.cpuClockTickerSetting) + stateSourceObject.Load(19, &k.uniqueID) + stateSourceObject.Load(20, &k.nextInotifyCookie) + stateSourceObject.Load(21, &k.netlinkPorts) + stateSourceObject.Load(23, &k.sockets) + stateSourceObject.Load(24, &k.socketsVFS2) + stateSourceObject.Load(25, &k.nextSocketRecord) + stateSourceObject.Load(27, &k.DirentCacheLimiter) + stateSourceObject.Load(28, &k.SpecialOpts) + stateSourceObject.Load(29, &k.vfs) + stateSourceObject.Load(30, &k.hostMount) + stateSourceObject.Load(31, &k.pipeMount) + stateSourceObject.Load(32, &k.shmMount) + stateSourceObject.Load(33, &k.socketMount) + stateSourceObject.Load(34, &k.SleepForAddressSpaceActivation) + stateSourceObject.Load(35, &k.ptraceExceptions) + stateSourceObject.Load(36, &k.YAMAPtraceScope) + stateSourceObject.Load(37, &k.cgroupRegistry) + stateSourceObject.LoadValue(22, new([]tcpip.Endpoint), func(y interface{}) { k.loadDanglingEndpoints(y.([]tcpip.Endpoint)) }) + stateSourceObject.LoadValue(26, new(*device.Registry), func(y interface{}) { k.loadDeviceRegistry(y.(*device.Registry)) }) +} + +func (s *SocketRecord) StateTypeName() string { + return "pkg/sentry/kernel.SocketRecord" +} + +func (s *SocketRecord) StateFields() []string { + return []string{ + "k", + "Sock", + "SockVFS2", + "ID", + } +} + +func (s *SocketRecord) beforeSave() {} + +// +checklocksignore +func (s *SocketRecord) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.k) + stateSinkObject.Save(1, &s.Sock) + stateSinkObject.Save(2, &s.SockVFS2) + stateSinkObject.Save(3, &s.ID) +} + +func (s *SocketRecord) afterLoad() {} + +// +checklocksignore +func (s *SocketRecord) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.k) + stateSourceObject.Load(1, &s.Sock) + stateSourceObject.Load(2, &s.SockVFS2) + stateSourceObject.Load(3, &s.ID) +} + +func (s *SocketRecordVFS1) StateTypeName() string { + return "pkg/sentry/kernel.SocketRecordVFS1" +} + +func (s *SocketRecordVFS1) StateFields() []string { + return []string{ + "socketEntry", + "SocketRecord", + } +} + +func (s *SocketRecordVFS1) beforeSave() {} + +// +checklocksignore +func (s *SocketRecordVFS1) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.socketEntry) + stateSinkObject.Save(1, &s.SocketRecord) +} + +func (s *SocketRecordVFS1) afterLoad() {} + +// +checklocksignore +func (s *SocketRecordVFS1) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.socketEntry) + stateSourceObject.Load(1, &s.SocketRecord) +} + +func (p *pendingSignals) StateTypeName() string { + return "pkg/sentry/kernel.pendingSignals" +} + +func (p *pendingSignals) StateFields() []string { + return []string{ + "signals", + } +} + +func (p *pendingSignals) beforeSave() {} + +// +checklocksignore +func (p *pendingSignals) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + var signalsValue []savedPendingSignal = p.saveSignals() + stateSinkObject.SaveValue(0, signalsValue) +} + +func (p *pendingSignals) afterLoad() {} + +// +checklocksignore +func (p *pendingSignals) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new([]savedPendingSignal), func(y interface{}) { p.loadSignals(y.([]savedPendingSignal)) }) +} + +func (p *pendingSignalQueue) StateTypeName() string { + return "pkg/sentry/kernel.pendingSignalQueue" +} + +func (p *pendingSignalQueue) StateFields() []string { + return []string{ + "pendingSignalList", + "length", + } +} + +func (p *pendingSignalQueue) beforeSave() {} + +// +checklocksignore +func (p *pendingSignalQueue) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.pendingSignalList) + stateSinkObject.Save(1, &p.length) +} + +func (p *pendingSignalQueue) afterLoad() {} + +// +checklocksignore +func (p *pendingSignalQueue) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.pendingSignalList) + stateSourceObject.Load(1, &p.length) +} + +func (p *pendingSignal) StateTypeName() string { + return "pkg/sentry/kernel.pendingSignal" +} + +func (p *pendingSignal) StateFields() []string { + return []string{ + "pendingSignalEntry", + "SignalInfo", + "timer", + } +} + +func (p *pendingSignal) beforeSave() {} + +// +checklocksignore +func (p *pendingSignal) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.pendingSignalEntry) + stateSinkObject.Save(1, &p.SignalInfo) + stateSinkObject.Save(2, &p.timer) +} + +func (p *pendingSignal) afterLoad() {} + +// +checklocksignore +func (p *pendingSignal) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.pendingSignalEntry) + stateSourceObject.Load(1, &p.SignalInfo) + stateSourceObject.Load(2, &p.timer) +} + +func (l *pendingSignalList) StateTypeName() string { + return "pkg/sentry/kernel.pendingSignalList" +} + +func (l *pendingSignalList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *pendingSignalList) beforeSave() {} + +// +checklocksignore +func (l *pendingSignalList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *pendingSignalList) afterLoad() {} + +// +checklocksignore +func (l *pendingSignalList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *pendingSignalEntry) StateTypeName() string { + return "pkg/sentry/kernel.pendingSignalEntry" +} + +func (e *pendingSignalEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *pendingSignalEntry) beforeSave() {} + +// +checklocksignore +func (e *pendingSignalEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *pendingSignalEntry) afterLoad() {} + +// +checklocksignore +func (e *pendingSignalEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (s *savedPendingSignal) StateTypeName() string { + return "pkg/sentry/kernel.savedPendingSignal" +} + +func (s *savedPendingSignal) StateFields() []string { + return []string{ + "si", + "timer", + } +} + +func (s *savedPendingSignal) beforeSave() {} + +// +checklocksignore +func (s *savedPendingSignal) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.si) + stateSinkObject.Save(1, &s.timer) +} + +func (s *savedPendingSignal) afterLoad() {} + +// +checklocksignore +func (s *savedPendingSignal) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.si) + stateSourceObject.Load(1, &s.timer) +} + +func (it *IntervalTimer) StateTypeName() string { + return "pkg/sentry/kernel.IntervalTimer" +} + +func (it *IntervalTimer) StateFields() []string { + return []string{ + "timer", + "target", + "signo", + "id", + "sigval", + "group", + "sigpending", + "sigorphan", + "overrunCur", + "overrunLast", + } +} + +func (it *IntervalTimer) beforeSave() {} + +// +checklocksignore +func (it *IntervalTimer) StateSave(stateSinkObject state.Sink) { + it.beforeSave() + stateSinkObject.Save(0, &it.timer) + stateSinkObject.Save(1, &it.target) + stateSinkObject.Save(2, &it.signo) + stateSinkObject.Save(3, &it.id) + stateSinkObject.Save(4, &it.sigval) + stateSinkObject.Save(5, &it.group) + stateSinkObject.Save(6, &it.sigpending) + stateSinkObject.Save(7, &it.sigorphan) + stateSinkObject.Save(8, &it.overrunCur) + stateSinkObject.Save(9, &it.overrunLast) +} + +func (it *IntervalTimer) afterLoad() {} + +// +checklocksignore +func (it *IntervalTimer) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &it.timer) + stateSourceObject.Load(1, &it.target) + stateSourceObject.Load(2, &it.signo) + stateSourceObject.Load(3, &it.id) + stateSourceObject.Load(4, &it.sigval) + stateSourceObject.Load(5, &it.group) + stateSourceObject.Load(6, &it.sigpending) + stateSourceObject.Load(7, &it.sigorphan) + stateSourceObject.Load(8, &it.overrunCur) + stateSourceObject.Load(9, &it.overrunLast) +} + +func (l *processGroupList) StateTypeName() string { + return "pkg/sentry/kernel.processGroupList" +} + +func (l *processGroupList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *processGroupList) beforeSave() {} + +// +checklocksignore +func (l *processGroupList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *processGroupList) afterLoad() {} + +// +checklocksignore +func (l *processGroupList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *processGroupEntry) StateTypeName() string { + return "pkg/sentry/kernel.processGroupEntry" +} + +func (e *processGroupEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *processGroupEntry) beforeSave() {} + +// +checklocksignore +func (e *processGroupEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *processGroupEntry) afterLoad() {} + +// +checklocksignore +func (e *processGroupEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (r *ProcessGroupRefs) StateTypeName() string { + return "pkg/sentry/kernel.ProcessGroupRefs" +} + +func (r *ProcessGroupRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *ProcessGroupRefs) beforeSave() {} + +// +checklocksignore +func (r *ProcessGroupRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *ProcessGroupRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (p *ptraceOptions) StateTypeName() string { + return "pkg/sentry/kernel.ptraceOptions" +} + +func (p *ptraceOptions) StateFields() []string { + return []string{ + "ExitKill", + "SysGood", + "TraceClone", + "TraceExec", + "TraceExit", + "TraceFork", + "TraceSeccomp", + "TraceVfork", + "TraceVforkDone", + } +} + +func (p *ptraceOptions) beforeSave() {} + +// +checklocksignore +func (p *ptraceOptions) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.ExitKill) + stateSinkObject.Save(1, &p.SysGood) + stateSinkObject.Save(2, &p.TraceClone) + stateSinkObject.Save(3, &p.TraceExec) + stateSinkObject.Save(4, &p.TraceExit) + stateSinkObject.Save(5, &p.TraceFork) + stateSinkObject.Save(6, &p.TraceSeccomp) + stateSinkObject.Save(7, &p.TraceVfork) + stateSinkObject.Save(8, &p.TraceVforkDone) +} + +func (p *ptraceOptions) afterLoad() {} + +// +checklocksignore +func (p *ptraceOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.ExitKill) + stateSourceObject.Load(1, &p.SysGood) + stateSourceObject.Load(2, &p.TraceClone) + stateSourceObject.Load(3, &p.TraceExec) + stateSourceObject.Load(4, &p.TraceExit) + stateSourceObject.Load(5, &p.TraceFork) + stateSourceObject.Load(6, &p.TraceSeccomp) + stateSourceObject.Load(7, &p.TraceVfork) + stateSourceObject.Load(8, &p.TraceVforkDone) +} + +func (s *ptraceStop) StateTypeName() string { + return "pkg/sentry/kernel.ptraceStop" +} + +func (s *ptraceStop) StateFields() []string { + return []string{ + "frozen", + "listen", + } +} + +func (s *ptraceStop) beforeSave() {} + +// +checklocksignore +func (s *ptraceStop) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.frozen) + stateSinkObject.Save(1, &s.listen) +} + +func (s *ptraceStop) afterLoad() {} + +// +checklocksignore +func (s *ptraceStop) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.frozen) + stateSourceObject.Load(1, &s.listen) +} + +func (o *OldRSeqCriticalRegion) StateTypeName() string { + return "pkg/sentry/kernel.OldRSeqCriticalRegion" +} + +func (o *OldRSeqCriticalRegion) StateFields() []string { + return []string{ + "CriticalSection", + "Restart", + } +} + +func (o *OldRSeqCriticalRegion) beforeSave() {} + +// +checklocksignore +func (o *OldRSeqCriticalRegion) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.CriticalSection) + stateSinkObject.Save(1, &o.Restart) +} + +func (o *OldRSeqCriticalRegion) afterLoad() {} + +// +checklocksignore +func (o *OldRSeqCriticalRegion) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.CriticalSection) + stateSourceObject.Load(1, &o.Restart) +} + +func (l *sessionList) StateTypeName() string { + return "pkg/sentry/kernel.sessionList" +} + +func (l *sessionList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *sessionList) beforeSave() {} + +// +checklocksignore +func (l *sessionList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *sessionList) afterLoad() {} + +// +checklocksignore +func (l *sessionList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *sessionEntry) StateTypeName() string { + return "pkg/sentry/kernel.sessionEntry" +} + +func (e *sessionEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *sessionEntry) beforeSave() {} + +// +checklocksignore +func (e *sessionEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *sessionEntry) afterLoad() {} + +// +checklocksignore +func (e *sessionEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (r *SessionRefs) StateTypeName() string { + return "pkg/sentry/kernel.SessionRefs" +} + +func (r *SessionRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *SessionRefs) beforeSave() {} + +// +checklocksignore +func (r *SessionRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *SessionRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (s *Session) StateTypeName() string { + return "pkg/sentry/kernel.Session" +} + +func (s *Session) StateFields() []string { + return []string{ + "SessionRefs", + "leader", + "id", + "foreground", + "processGroups", + "sessionEntry", + } +} + +func (s *Session) beforeSave() {} + +// +checklocksignore +func (s *Session) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.SessionRefs) + stateSinkObject.Save(1, &s.leader) + stateSinkObject.Save(2, &s.id) + stateSinkObject.Save(3, &s.foreground) + stateSinkObject.Save(4, &s.processGroups) + stateSinkObject.Save(5, &s.sessionEntry) +} + +func (s *Session) afterLoad() {} + +// +checklocksignore +func (s *Session) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.SessionRefs) + stateSourceObject.Load(1, &s.leader) + stateSourceObject.Load(2, &s.id) + stateSourceObject.Load(3, &s.foreground) + stateSourceObject.Load(4, &s.processGroups) + stateSourceObject.Load(5, &s.sessionEntry) +} + +func (pg *ProcessGroup) StateTypeName() string { + return "pkg/sentry/kernel.ProcessGroup" +} + +func (pg *ProcessGroup) StateFields() []string { + return []string{ + "refs", + "originator", + "id", + "session", + "ancestors", + "processGroupEntry", + } +} + +func (pg *ProcessGroup) beforeSave() {} + +// +checklocksignore +func (pg *ProcessGroup) StateSave(stateSinkObject state.Sink) { + pg.beforeSave() + stateSinkObject.Save(0, &pg.refs) + stateSinkObject.Save(1, &pg.originator) + stateSinkObject.Save(2, &pg.id) + stateSinkObject.Save(3, &pg.session) + stateSinkObject.Save(4, &pg.ancestors) + stateSinkObject.Save(5, &pg.processGroupEntry) +} + +func (pg *ProcessGroup) afterLoad() {} + +// +checklocksignore +func (pg *ProcessGroup) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &pg.refs) + stateSourceObject.Load(1, &pg.originator) + stateSourceObject.Load(2, &pg.id) + stateSourceObject.Load(3, &pg.session) + stateSourceObject.Load(4, &pg.ancestors) + stateSourceObject.Load(5, &pg.processGroupEntry) +} + +func (sh *SignalHandlers) StateTypeName() string { + return "pkg/sentry/kernel.SignalHandlers" +} + +func (sh *SignalHandlers) StateFields() []string { + return []string{ + "actions", + } +} + +func (sh *SignalHandlers) beforeSave() {} + +// +checklocksignore +func (sh *SignalHandlers) StateSave(stateSinkObject state.Sink) { + sh.beforeSave() + stateSinkObject.Save(0, &sh.actions) +} + +func (sh *SignalHandlers) afterLoad() {} + +// +checklocksignore +func (sh *SignalHandlers) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &sh.actions) +} + +func (l *socketList) StateTypeName() string { + return "pkg/sentry/kernel.socketList" +} + +func (l *socketList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *socketList) beforeSave() {} + +// +checklocksignore +func (l *socketList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *socketList) afterLoad() {} + +// +checklocksignore +func (l *socketList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *socketEntry) StateTypeName() string { + return "pkg/sentry/kernel.socketEntry" +} + +func (e *socketEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *socketEntry) beforeSave() {} + +// +checklocksignore +func (e *socketEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *socketEntry) afterLoad() {} + +// +checklocksignore +func (e *socketEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (s *syscallTableInfo) StateTypeName() string { + return "pkg/sentry/kernel.syscallTableInfo" +} + +func (s *syscallTableInfo) StateFields() []string { + return []string{ + "OS", + "Arch", + } +} + +func (s *syscallTableInfo) beforeSave() {} + +// +checklocksignore +func (s *syscallTableInfo) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.OS) + stateSinkObject.Save(1, &s.Arch) +} + +func (s *syscallTableInfo) afterLoad() {} + +// +checklocksignore +func (s *syscallTableInfo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.OS) + stateSourceObject.Load(1, &s.Arch) +} + +func (s *syslog) StateTypeName() string { + return "pkg/sentry/kernel.syslog" +} + +func (s *syslog) StateFields() []string { + return []string{ + "msg", + } +} + +func (s *syslog) beforeSave() {} + +// +checklocksignore +func (s *syslog) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.msg) +} + +func (s *syslog) afterLoad() {} + +// +checklocksignore +func (s *syslog) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.msg) +} + +func (t *Task) StateTypeName() string { + return "pkg/sentry/kernel.Task" +} + +func (t *Task) StateFields() []string { + return []string{ + "taskNode", + "runState", + "taskWorkCount", + "taskWork", + "haveSyscallReturn", + "gosched", + "yieldCount", + "pendingSignals", + "signalMask", + "realSignalMask", + "haveSavedSignalMask", + "savedSignalMask", + "signalStack", + "groupStopPending", + "groupStopAcknowledged", + "trapStopPending", + "trapNotifyPending", + "stop", + "exitStatus", + "syscallRestartBlock", + "k", + "containerID", + "image", + "fsContext", + "fdTable", + "vforkParent", + "exitState", + "exitTracerNotified", + "exitTracerAcked", + "exitParentNotified", + "exitParentAcked", + "ptraceTracer", + "ptraceTracees", + "ptraceSeized", + "ptraceOpts", + "ptraceSyscallMode", + "ptraceSinglestep", + "ptraceCode", + "ptraceSiginfo", + "ptraceEventMsg", + "ptraceYAMAExceptionAdded", + "ioUsage", + "creds", + "utsns", + "ipcns", + "abstractSockets", + "mountNamespaceVFS2", + "parentDeathSignal", + "syscallFilters", + "cleartid", + "allowedCPUMask", + "cpu", + "niceness", + "numaPolicy", + "numaNodeMask", + "netns", + "rseqCPU", + "oldRSeqCPUAddr", + "rseqAddr", + "rseqSignature", + "robustList", + "startTime", + "kcov", + "cgroups", + } +} + +func (t *Task) beforeSave() {} + +// +checklocksignore +func (t *Task) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + if !state.IsZeroValue(&t.signalQueue) { + state.Failf("signalQueue is %#v, expected zero", &t.signalQueue) + } + var ptraceTracerValue *Task = t.savePtraceTracer() + stateSinkObject.SaveValue(31, ptraceTracerValue) + var syscallFiltersValue []bpf.Program = t.saveSyscallFilters() + stateSinkObject.SaveValue(48, syscallFiltersValue) + stateSinkObject.Save(0, &t.taskNode) + stateSinkObject.Save(1, &t.runState) + stateSinkObject.Save(2, &t.taskWorkCount) + stateSinkObject.Save(3, &t.taskWork) + stateSinkObject.Save(4, &t.haveSyscallReturn) + stateSinkObject.Save(5, &t.gosched) + stateSinkObject.Save(6, &t.yieldCount) + stateSinkObject.Save(7, &t.pendingSignals) + stateSinkObject.Save(8, &t.signalMask) + stateSinkObject.Save(9, &t.realSignalMask) + stateSinkObject.Save(10, &t.haveSavedSignalMask) + stateSinkObject.Save(11, &t.savedSignalMask) + stateSinkObject.Save(12, &t.signalStack) + stateSinkObject.Save(13, &t.groupStopPending) + stateSinkObject.Save(14, &t.groupStopAcknowledged) + stateSinkObject.Save(15, &t.trapStopPending) + stateSinkObject.Save(16, &t.trapNotifyPending) + stateSinkObject.Save(17, &t.stop) + stateSinkObject.Save(18, &t.exitStatus) + stateSinkObject.Save(19, &t.syscallRestartBlock) + stateSinkObject.Save(20, &t.k) + stateSinkObject.Save(21, &t.containerID) + stateSinkObject.Save(22, &t.image) + stateSinkObject.Save(23, &t.fsContext) + stateSinkObject.Save(24, &t.fdTable) + stateSinkObject.Save(25, &t.vforkParent) + stateSinkObject.Save(26, &t.exitState) + stateSinkObject.Save(27, &t.exitTracerNotified) + stateSinkObject.Save(28, &t.exitTracerAcked) + stateSinkObject.Save(29, &t.exitParentNotified) + stateSinkObject.Save(30, &t.exitParentAcked) + stateSinkObject.Save(32, &t.ptraceTracees) + stateSinkObject.Save(33, &t.ptraceSeized) + stateSinkObject.Save(34, &t.ptraceOpts) + stateSinkObject.Save(35, &t.ptraceSyscallMode) + stateSinkObject.Save(36, &t.ptraceSinglestep) + stateSinkObject.Save(37, &t.ptraceCode) + stateSinkObject.Save(38, &t.ptraceSiginfo) + stateSinkObject.Save(39, &t.ptraceEventMsg) + stateSinkObject.Save(40, &t.ptraceYAMAExceptionAdded) + stateSinkObject.Save(41, &t.ioUsage) + stateSinkObject.Save(42, &t.creds) + stateSinkObject.Save(43, &t.utsns) + stateSinkObject.Save(44, &t.ipcns) + stateSinkObject.Save(45, &t.abstractSockets) + stateSinkObject.Save(46, &t.mountNamespaceVFS2) + stateSinkObject.Save(47, &t.parentDeathSignal) + stateSinkObject.Save(49, &t.cleartid) + stateSinkObject.Save(50, &t.allowedCPUMask) + stateSinkObject.Save(51, &t.cpu) + stateSinkObject.Save(52, &t.niceness) + stateSinkObject.Save(53, &t.numaPolicy) + stateSinkObject.Save(54, &t.numaNodeMask) + stateSinkObject.Save(55, &t.netns) + stateSinkObject.Save(56, &t.rseqCPU) + stateSinkObject.Save(57, &t.oldRSeqCPUAddr) + stateSinkObject.Save(58, &t.rseqAddr) + stateSinkObject.Save(59, &t.rseqSignature) + stateSinkObject.Save(60, &t.robustList) + stateSinkObject.Save(61, &t.startTime) + stateSinkObject.Save(62, &t.kcov) + stateSinkObject.Save(63, &t.cgroups) +} + +// +checklocksignore +func (t *Task) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.taskNode) + stateSourceObject.Load(1, &t.runState) + stateSourceObject.Load(2, &t.taskWorkCount) + stateSourceObject.Load(3, &t.taskWork) + stateSourceObject.Load(4, &t.haveSyscallReturn) + stateSourceObject.Load(5, &t.gosched) + stateSourceObject.Load(6, &t.yieldCount) + stateSourceObject.Load(7, &t.pendingSignals) + stateSourceObject.Load(8, &t.signalMask) + stateSourceObject.Load(9, &t.realSignalMask) + stateSourceObject.Load(10, &t.haveSavedSignalMask) + stateSourceObject.Load(11, &t.savedSignalMask) + stateSourceObject.Load(12, &t.signalStack) + stateSourceObject.Load(13, &t.groupStopPending) + stateSourceObject.Load(14, &t.groupStopAcknowledged) + stateSourceObject.Load(15, &t.trapStopPending) + stateSourceObject.Load(16, &t.trapNotifyPending) + stateSourceObject.Load(17, &t.stop) + stateSourceObject.Load(18, &t.exitStatus) + stateSourceObject.Load(19, &t.syscallRestartBlock) + stateSourceObject.Load(20, &t.k) + stateSourceObject.Load(21, &t.containerID) + stateSourceObject.Load(22, &t.image) + stateSourceObject.Load(23, &t.fsContext) + stateSourceObject.Load(24, &t.fdTable) + stateSourceObject.Load(25, &t.vforkParent) + stateSourceObject.Load(26, &t.exitState) + stateSourceObject.Load(27, &t.exitTracerNotified) + stateSourceObject.Load(28, &t.exitTracerAcked) + stateSourceObject.Load(29, &t.exitParentNotified) + stateSourceObject.Load(30, &t.exitParentAcked) + stateSourceObject.Load(32, &t.ptraceTracees) + stateSourceObject.Load(33, &t.ptraceSeized) + stateSourceObject.Load(34, &t.ptraceOpts) + stateSourceObject.Load(35, &t.ptraceSyscallMode) + stateSourceObject.Load(36, &t.ptraceSinglestep) + stateSourceObject.Load(37, &t.ptraceCode) + stateSourceObject.Load(38, &t.ptraceSiginfo) + stateSourceObject.Load(39, &t.ptraceEventMsg) + stateSourceObject.Load(40, &t.ptraceYAMAExceptionAdded) + stateSourceObject.Load(41, &t.ioUsage) + stateSourceObject.Load(42, &t.creds) + stateSourceObject.Load(43, &t.utsns) + stateSourceObject.Load(44, &t.ipcns) + stateSourceObject.Load(45, &t.abstractSockets) + stateSourceObject.Load(46, &t.mountNamespaceVFS2) + stateSourceObject.Load(47, &t.parentDeathSignal) + stateSourceObject.Load(49, &t.cleartid) + stateSourceObject.Load(50, &t.allowedCPUMask) + stateSourceObject.Load(51, &t.cpu) + stateSourceObject.Load(52, &t.niceness) + stateSourceObject.Load(53, &t.numaPolicy) + stateSourceObject.Load(54, &t.numaNodeMask) + stateSourceObject.Load(55, &t.netns) + stateSourceObject.Load(56, &t.rseqCPU) + stateSourceObject.Load(57, &t.oldRSeqCPUAddr) + stateSourceObject.Load(58, &t.rseqAddr) + stateSourceObject.Load(59, &t.rseqSignature) + stateSourceObject.Load(60, &t.robustList) + stateSourceObject.Load(61, &t.startTime) + stateSourceObject.Load(62, &t.kcov) + stateSourceObject.Load(63, &t.cgroups) + stateSourceObject.LoadValue(31, new(*Task), func(y interface{}) { t.loadPtraceTracer(y.(*Task)) }) + stateSourceObject.LoadValue(48, new([]bpf.Program), func(y interface{}) { t.loadSyscallFilters(y.([]bpf.Program)) }) + stateSourceObject.AfterLoad(t.afterLoad) +} + +func (r *runSyscallAfterPtraceEventClone) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallAfterPtraceEventClone" +} + +func (r *runSyscallAfterPtraceEventClone) StateFields() []string { + return []string{ + "vforkChild", + "vforkChildTID", + } +} + +func (r *runSyscallAfterPtraceEventClone) beforeSave() {} + +// +checklocksignore +func (r *runSyscallAfterPtraceEventClone) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.vforkChild) + stateSinkObject.Save(1, &r.vforkChildTID) +} + +func (r *runSyscallAfterPtraceEventClone) afterLoad() {} + +// +checklocksignore +func (r *runSyscallAfterPtraceEventClone) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.vforkChild) + stateSourceObject.Load(1, &r.vforkChildTID) +} + +func (r *runSyscallAfterVforkStop) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallAfterVforkStop" +} + +func (r *runSyscallAfterVforkStop) StateFields() []string { + return []string{ + "childTID", + } +} + +func (r *runSyscallAfterVforkStop) beforeSave() {} + +// +checklocksignore +func (r *runSyscallAfterVforkStop) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.childTID) +} + +func (r *runSyscallAfterVforkStop) afterLoad() {} + +// +checklocksignore +func (r *runSyscallAfterVforkStop) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.childTID) +} + +func (v *vforkStop) StateTypeName() string { + return "pkg/sentry/kernel.vforkStop" +} + +func (v *vforkStop) StateFields() []string { + return []string{} +} + +func (v *vforkStop) beforeSave() {} + +// +checklocksignore +func (v *vforkStop) StateSave(stateSinkObject state.Sink) { + v.beforeSave() +} + +func (v *vforkStop) afterLoad() {} + +// +checklocksignore +func (v *vforkStop) StateLoad(stateSourceObject state.Source) { +} + +func (e *execStop) StateTypeName() string { + return "pkg/sentry/kernel.execStop" +} + +func (e *execStop) StateFields() []string { + return []string{} +} + +func (e *execStop) beforeSave() {} + +// +checklocksignore +func (e *execStop) StateSave(stateSinkObject state.Sink) { + e.beforeSave() +} + +func (e *execStop) afterLoad() {} + +// +checklocksignore +func (e *execStop) StateLoad(stateSourceObject state.Source) { +} + +func (r *runSyscallAfterExecStop) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallAfterExecStop" +} + +func (r *runSyscallAfterExecStop) StateFields() []string { + return []string{ + "image", + } +} + +func (r *runSyscallAfterExecStop) beforeSave() {} + +// +checklocksignore +func (r *runSyscallAfterExecStop) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.image) +} + +func (r *runSyscallAfterExecStop) afterLoad() {} + +// +checklocksignore +func (r *runSyscallAfterExecStop) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.image) +} + +func (r *runExit) StateTypeName() string { + return "pkg/sentry/kernel.runExit" +} + +func (r *runExit) StateFields() []string { + return []string{} +} + +func (r *runExit) beforeSave() {} + +// +checklocksignore +func (r *runExit) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runExit) afterLoad() {} + +// +checklocksignore +func (r *runExit) StateLoad(stateSourceObject state.Source) { +} + +func (r *runExitMain) StateTypeName() string { + return "pkg/sentry/kernel.runExitMain" +} + +func (r *runExitMain) StateFields() []string { + return []string{} +} + +func (r *runExitMain) beforeSave() {} + +// +checklocksignore +func (r *runExitMain) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runExitMain) afterLoad() {} + +// +checklocksignore +func (r *runExitMain) StateLoad(stateSourceObject state.Source) { +} + +func (r *runExitNotify) StateTypeName() string { + return "pkg/sentry/kernel.runExitNotify" +} + +func (r *runExitNotify) StateFields() []string { + return []string{} +} + +func (r *runExitNotify) beforeSave() {} + +// +checklocksignore +func (r *runExitNotify) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runExitNotify) afterLoad() {} + +// +checklocksignore +func (r *runExitNotify) StateLoad(stateSourceObject state.Source) { +} + +func (image *TaskImage) StateTypeName() string { + return "pkg/sentry/kernel.TaskImage" +} + +func (image *TaskImage) StateFields() []string { + return []string{ + "Name", + "Arch", + "MemoryManager", + "fu", + "st", + } +} + +func (image *TaskImage) beforeSave() {} + +// +checklocksignore +func (image *TaskImage) StateSave(stateSinkObject state.Sink) { + image.beforeSave() + var stValue syscallTableInfo = image.saveSt() + stateSinkObject.SaveValue(4, stValue) + stateSinkObject.Save(0, &image.Name) + stateSinkObject.Save(1, &image.Arch) + stateSinkObject.Save(2, &image.MemoryManager) + stateSinkObject.Save(3, &image.fu) +} + +func (image *TaskImage) afterLoad() {} + +// +checklocksignore +func (image *TaskImage) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &image.Name) + stateSourceObject.Load(1, &image.Arch) + stateSourceObject.Load(2, &image.MemoryManager) + stateSourceObject.Load(3, &image.fu) + stateSourceObject.LoadValue(4, new(syscallTableInfo), func(y interface{}) { image.loadSt(y.(syscallTableInfo)) }) +} + +func (l *taskList) StateTypeName() string { + return "pkg/sentry/kernel.taskList" +} + +func (l *taskList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *taskList) beforeSave() {} + +// +checklocksignore +func (l *taskList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *taskList) afterLoad() {} + +// +checklocksignore +func (l *taskList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *taskEntry) StateTypeName() string { + return "pkg/sentry/kernel.taskEntry" +} + +func (e *taskEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *taskEntry) beforeSave() {} + +// +checklocksignore +func (e *taskEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *taskEntry) afterLoad() {} + +// +checklocksignore +func (e *taskEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (app *runApp) StateTypeName() string { + return "pkg/sentry/kernel.runApp" +} + +func (app *runApp) StateFields() []string { + return []string{} +} + +func (app *runApp) beforeSave() {} + +// +checklocksignore +func (app *runApp) StateSave(stateSinkObject state.Sink) { + app.beforeSave() +} + +func (app *runApp) afterLoad() {} + +// +checklocksignore +func (app *runApp) StateLoad(stateSourceObject state.Source) { +} + +func (ts *TaskGoroutineSchedInfo) StateTypeName() string { + return "pkg/sentry/kernel.TaskGoroutineSchedInfo" +} + +func (ts *TaskGoroutineSchedInfo) StateFields() []string { + return []string{ + "Timestamp", + "State", + "UserTicks", + "SysTicks", + } +} + +func (ts *TaskGoroutineSchedInfo) beforeSave() {} + +// +checklocksignore +func (ts *TaskGoroutineSchedInfo) StateSave(stateSinkObject state.Sink) { + ts.beforeSave() + stateSinkObject.Save(0, &ts.Timestamp) + stateSinkObject.Save(1, &ts.State) + stateSinkObject.Save(2, &ts.UserTicks) + stateSinkObject.Save(3, &ts.SysTicks) +} + +func (ts *TaskGoroutineSchedInfo) afterLoad() {} + +// +checklocksignore +func (ts *TaskGoroutineSchedInfo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ts.Timestamp) + stateSourceObject.Load(1, &ts.State) + stateSourceObject.Load(2, &ts.UserTicks) + stateSourceObject.Load(3, &ts.SysTicks) +} + +func (tc *taskClock) StateTypeName() string { + return "pkg/sentry/kernel.taskClock" +} + +func (tc *taskClock) StateFields() []string { + return []string{ + "t", + "includeSys", + } +} + +func (tc *taskClock) beforeSave() {} + +// +checklocksignore +func (tc *taskClock) StateSave(stateSinkObject state.Sink) { + tc.beforeSave() + stateSinkObject.Save(0, &tc.t) + stateSinkObject.Save(1, &tc.includeSys) +} + +func (tc *taskClock) afterLoad() {} + +// +checklocksignore +func (tc *taskClock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tc.t) + stateSourceObject.Load(1, &tc.includeSys) +} + +func (tgc *tgClock) StateTypeName() string { + return "pkg/sentry/kernel.tgClock" +} + +func (tgc *tgClock) StateFields() []string { + return []string{ + "tg", + "includeSys", + } +} + +func (tgc *tgClock) beforeSave() {} + +// +checklocksignore +func (tgc *tgClock) StateSave(stateSinkObject state.Sink) { + tgc.beforeSave() + stateSinkObject.Save(0, &tgc.tg) + stateSinkObject.Save(1, &tgc.includeSys) +} + +func (tgc *tgClock) afterLoad() {} + +// +checklocksignore +func (tgc *tgClock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tgc.tg) + stateSourceObject.Load(1, &tgc.includeSys) +} + +func (g *groupStop) StateTypeName() string { + return "pkg/sentry/kernel.groupStop" +} + +func (g *groupStop) StateFields() []string { + return []string{} +} + +func (g *groupStop) beforeSave() {} + +// +checklocksignore +func (g *groupStop) StateSave(stateSinkObject state.Sink) { + g.beforeSave() +} + +func (g *groupStop) afterLoad() {} + +// +checklocksignore +func (g *groupStop) StateLoad(stateSourceObject state.Source) { +} + +func (r *runInterrupt) StateTypeName() string { + return "pkg/sentry/kernel.runInterrupt" +} + +func (r *runInterrupt) StateFields() []string { + return []string{} +} + +func (r *runInterrupt) beforeSave() {} + +// +checklocksignore +func (r *runInterrupt) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runInterrupt) afterLoad() {} + +// +checklocksignore +func (r *runInterrupt) StateLoad(stateSourceObject state.Source) { +} + +func (r *runInterruptAfterSignalDeliveryStop) StateTypeName() string { + return "pkg/sentry/kernel.runInterruptAfterSignalDeliveryStop" +} + +func (r *runInterruptAfterSignalDeliveryStop) StateFields() []string { + return []string{} +} + +func (r *runInterruptAfterSignalDeliveryStop) beforeSave() {} + +// +checklocksignore +func (r *runInterruptAfterSignalDeliveryStop) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runInterruptAfterSignalDeliveryStop) afterLoad() {} + +// +checklocksignore +func (r *runInterruptAfterSignalDeliveryStop) StateLoad(stateSourceObject state.Source) { +} + +func (r *runSyscallAfterSyscallEnterStop) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallAfterSyscallEnterStop" +} + +func (r *runSyscallAfterSyscallEnterStop) StateFields() []string { + return []string{} +} + +func (r *runSyscallAfterSyscallEnterStop) beforeSave() {} + +// +checklocksignore +func (r *runSyscallAfterSyscallEnterStop) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runSyscallAfterSyscallEnterStop) afterLoad() {} + +// +checklocksignore +func (r *runSyscallAfterSyscallEnterStop) StateLoad(stateSourceObject state.Source) { +} + +func (r *runSyscallAfterSysemuStop) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallAfterSysemuStop" +} + +func (r *runSyscallAfterSysemuStop) StateFields() []string { + return []string{} +} + +func (r *runSyscallAfterSysemuStop) beforeSave() {} + +// +checklocksignore +func (r *runSyscallAfterSysemuStop) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runSyscallAfterSysemuStop) afterLoad() {} + +// +checklocksignore +func (r *runSyscallAfterSysemuStop) StateLoad(stateSourceObject state.Source) { +} + +func (r *runSyscallReinvoke) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallReinvoke" +} + +func (r *runSyscallReinvoke) StateFields() []string { + return []string{} +} + +func (r *runSyscallReinvoke) beforeSave() {} + +// +checklocksignore +func (r *runSyscallReinvoke) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runSyscallReinvoke) afterLoad() {} + +// +checklocksignore +func (r *runSyscallReinvoke) StateLoad(stateSourceObject state.Source) { +} + +func (r *runSyscallExit) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallExit" +} + +func (r *runSyscallExit) StateFields() []string { + return []string{} +} + +func (r *runSyscallExit) beforeSave() {} + +// +checklocksignore +func (r *runSyscallExit) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runSyscallExit) afterLoad() {} + +// +checklocksignore +func (r *runSyscallExit) StateLoad(stateSourceObject state.Source) { +} + +func (tg *ThreadGroup) StateTypeName() string { + return "pkg/sentry/kernel.ThreadGroup" +} + +func (tg *ThreadGroup) StateFields() []string { + return []string{ + "threadGroupNode", + "signalHandlers", + "pendingSignals", + "groupStopDequeued", + "groupStopSignal", + "groupStopPendingCount", + "groupStopComplete", + "groupStopWaitable", + "groupContNotify", + "groupContInterrupted", + "groupContWaitable", + "exiting", + "exitStatus", + "terminationSignal", + "itimerRealTimer", + "itimerVirtSetting", + "itimerProfSetting", + "rlimitCPUSoftSetting", + "cpuTimersEnabled", + "timers", + "nextTimerID", + "exitedCPUStats", + "childCPUStats", + "ioUsage", + "maxRSS", + "childMaxRSS", + "limits", + "processGroup", + "execed", + "oldRSeqCritical", + "mounts", + "tty", + "oomScoreAdj", + } +} + +func (tg *ThreadGroup) beforeSave() {} + +// +checklocksignore +func (tg *ThreadGroup) StateSave(stateSinkObject state.Sink) { + tg.beforeSave() + var oldRSeqCriticalValue *OldRSeqCriticalRegion = tg.saveOldRSeqCritical() + stateSinkObject.SaveValue(29, oldRSeqCriticalValue) + stateSinkObject.Save(0, &tg.threadGroupNode) + stateSinkObject.Save(1, &tg.signalHandlers) + stateSinkObject.Save(2, &tg.pendingSignals) + stateSinkObject.Save(3, &tg.groupStopDequeued) + stateSinkObject.Save(4, &tg.groupStopSignal) + stateSinkObject.Save(5, &tg.groupStopPendingCount) + stateSinkObject.Save(6, &tg.groupStopComplete) + stateSinkObject.Save(7, &tg.groupStopWaitable) + stateSinkObject.Save(8, &tg.groupContNotify) + stateSinkObject.Save(9, &tg.groupContInterrupted) + stateSinkObject.Save(10, &tg.groupContWaitable) + stateSinkObject.Save(11, &tg.exiting) + stateSinkObject.Save(12, &tg.exitStatus) + stateSinkObject.Save(13, &tg.terminationSignal) + stateSinkObject.Save(14, &tg.itimerRealTimer) + stateSinkObject.Save(15, &tg.itimerVirtSetting) + stateSinkObject.Save(16, &tg.itimerProfSetting) + stateSinkObject.Save(17, &tg.rlimitCPUSoftSetting) + stateSinkObject.Save(18, &tg.cpuTimersEnabled) + stateSinkObject.Save(19, &tg.timers) + stateSinkObject.Save(20, &tg.nextTimerID) + stateSinkObject.Save(21, &tg.exitedCPUStats) + stateSinkObject.Save(22, &tg.childCPUStats) + stateSinkObject.Save(23, &tg.ioUsage) + stateSinkObject.Save(24, &tg.maxRSS) + stateSinkObject.Save(25, &tg.childMaxRSS) + stateSinkObject.Save(26, &tg.limits) + stateSinkObject.Save(27, &tg.processGroup) + stateSinkObject.Save(28, &tg.execed) + stateSinkObject.Save(30, &tg.mounts) + stateSinkObject.Save(31, &tg.tty) + stateSinkObject.Save(32, &tg.oomScoreAdj) +} + +func (tg *ThreadGroup) afterLoad() {} + +// +checklocksignore +func (tg *ThreadGroup) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tg.threadGroupNode) + stateSourceObject.Load(1, &tg.signalHandlers) + stateSourceObject.Load(2, &tg.pendingSignals) + stateSourceObject.Load(3, &tg.groupStopDequeued) + stateSourceObject.Load(4, &tg.groupStopSignal) + stateSourceObject.Load(5, &tg.groupStopPendingCount) + stateSourceObject.Load(6, &tg.groupStopComplete) + stateSourceObject.Load(7, &tg.groupStopWaitable) + stateSourceObject.Load(8, &tg.groupContNotify) + stateSourceObject.Load(9, &tg.groupContInterrupted) + stateSourceObject.Load(10, &tg.groupContWaitable) + stateSourceObject.Load(11, &tg.exiting) + stateSourceObject.Load(12, &tg.exitStatus) + stateSourceObject.Load(13, &tg.terminationSignal) + stateSourceObject.Load(14, &tg.itimerRealTimer) + stateSourceObject.Load(15, &tg.itimerVirtSetting) + stateSourceObject.Load(16, &tg.itimerProfSetting) + stateSourceObject.Load(17, &tg.rlimitCPUSoftSetting) + stateSourceObject.Load(18, &tg.cpuTimersEnabled) + stateSourceObject.Load(19, &tg.timers) + stateSourceObject.Load(20, &tg.nextTimerID) + stateSourceObject.Load(21, &tg.exitedCPUStats) + stateSourceObject.Load(22, &tg.childCPUStats) + stateSourceObject.Load(23, &tg.ioUsage) + stateSourceObject.Load(24, &tg.maxRSS) + stateSourceObject.Load(25, &tg.childMaxRSS) + stateSourceObject.Load(26, &tg.limits) + stateSourceObject.Load(27, &tg.processGroup) + stateSourceObject.Load(28, &tg.execed) + stateSourceObject.Load(30, &tg.mounts) + stateSourceObject.Load(31, &tg.tty) + stateSourceObject.Load(32, &tg.oomScoreAdj) + stateSourceObject.LoadValue(29, new(*OldRSeqCriticalRegion), func(y interface{}) { tg.loadOldRSeqCritical(y.(*OldRSeqCriticalRegion)) }) +} + +func (l *itimerRealListener) StateTypeName() string { + return "pkg/sentry/kernel.itimerRealListener" +} + +func (l *itimerRealListener) StateFields() []string { + return []string{ + "tg", + } +} + +func (l *itimerRealListener) beforeSave() {} + +// +checklocksignore +func (l *itimerRealListener) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.tg) +} + +func (l *itimerRealListener) afterLoad() {} + +// +checklocksignore +func (l *itimerRealListener) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.tg) +} + +func (ts *TaskSet) StateTypeName() string { + return "pkg/sentry/kernel.TaskSet" +} + +func (ts *TaskSet) StateFields() []string { + return []string{ + "Root", + "sessions", + } +} + +func (ts *TaskSet) beforeSave() {} + +// +checklocksignore +func (ts *TaskSet) StateSave(stateSinkObject state.Sink) { + ts.beforeSave() + stateSinkObject.Save(0, &ts.Root) + stateSinkObject.Save(1, &ts.sessions) +} + +func (ts *TaskSet) afterLoad() {} + +// +checklocksignore +func (ts *TaskSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ts.Root) + stateSourceObject.Load(1, &ts.sessions) +} + +func (ns *PIDNamespace) StateTypeName() string { + return "pkg/sentry/kernel.PIDNamespace" +} + +func (ns *PIDNamespace) StateFields() []string { + return []string{ + "owner", + "parent", + "userns", + "last", + "tasks", + "tids", + "tgids", + "sessions", + "sids", + "processGroups", + "pgids", + "exiting", + } +} + +func (ns *PIDNamespace) beforeSave() {} + +// +checklocksignore +func (ns *PIDNamespace) StateSave(stateSinkObject state.Sink) { + ns.beforeSave() + stateSinkObject.Save(0, &ns.owner) + stateSinkObject.Save(1, &ns.parent) + stateSinkObject.Save(2, &ns.userns) + stateSinkObject.Save(3, &ns.last) + stateSinkObject.Save(4, &ns.tasks) + stateSinkObject.Save(5, &ns.tids) + stateSinkObject.Save(6, &ns.tgids) + stateSinkObject.Save(7, &ns.sessions) + stateSinkObject.Save(8, &ns.sids) + stateSinkObject.Save(9, &ns.processGroups) + stateSinkObject.Save(10, &ns.pgids) + stateSinkObject.Save(11, &ns.exiting) +} + +func (ns *PIDNamespace) afterLoad() {} + +// +checklocksignore +func (ns *PIDNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ns.owner) + stateSourceObject.Load(1, &ns.parent) + stateSourceObject.Load(2, &ns.userns) + stateSourceObject.Load(3, &ns.last) + stateSourceObject.Load(4, &ns.tasks) + stateSourceObject.Load(5, &ns.tids) + stateSourceObject.Load(6, &ns.tgids) + stateSourceObject.Load(7, &ns.sessions) + stateSourceObject.Load(8, &ns.sids) + stateSourceObject.Load(9, &ns.processGroups) + stateSourceObject.Load(10, &ns.pgids) + stateSourceObject.Load(11, &ns.exiting) +} + +func (t *threadGroupNode) StateTypeName() string { + return "pkg/sentry/kernel.threadGroupNode" +} + +func (t *threadGroupNode) StateFields() []string { + return []string{ + "pidns", + "leader", + "execing", + "tasks", + "tasksCount", + "liveTasks", + "activeTasks", + } +} + +func (t *threadGroupNode) beforeSave() {} + +// +checklocksignore +func (t *threadGroupNode) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.pidns) + stateSinkObject.Save(1, &t.leader) + stateSinkObject.Save(2, &t.execing) + stateSinkObject.Save(3, &t.tasks) + stateSinkObject.Save(4, &t.tasksCount) + stateSinkObject.Save(5, &t.liveTasks) + stateSinkObject.Save(6, &t.activeTasks) +} + +func (t *threadGroupNode) afterLoad() {} + +// +checklocksignore +func (t *threadGroupNode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.pidns) + stateSourceObject.Load(1, &t.leader) + stateSourceObject.Load(2, &t.execing) + stateSourceObject.Load(3, &t.tasks) + stateSourceObject.Load(4, &t.tasksCount) + stateSourceObject.Load(5, &t.liveTasks) + stateSourceObject.Load(6, &t.activeTasks) +} + +func (t *taskNode) StateTypeName() string { + return "pkg/sentry/kernel.taskNode" +} + +func (t *taskNode) StateFields() []string { + return []string{ + "tg", + "taskEntry", + "parent", + "children", + "childPIDNamespace", + } +} + +func (t *taskNode) beforeSave() {} + +// +checklocksignore +func (t *taskNode) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.tg) + stateSinkObject.Save(1, &t.taskEntry) + stateSinkObject.Save(2, &t.parent) + stateSinkObject.Save(3, &t.children) + stateSinkObject.Save(4, &t.childPIDNamespace) +} + +func (t *taskNode) afterLoad() {} + +// +checklocksignore +func (t *taskNode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &t.tg) + stateSourceObject.Load(1, &t.taskEntry) + stateSourceObject.Load(2, &t.parent) + stateSourceObject.Load(3, &t.children) + stateSourceObject.Load(4, &t.childPIDNamespace) +} + +func (t *Timekeeper) StateTypeName() string { + return "pkg/sentry/kernel.Timekeeper" +} + +func (t *Timekeeper) StateFields() []string { + return []string{ + "realtimeClock", + "monotonicClock", + "bootTime", + "saveMonotonic", + "saveRealtime", + "params", + } +} + +// +checklocksignore +func (t *Timekeeper) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.realtimeClock) + stateSinkObject.Save(1, &t.monotonicClock) + stateSinkObject.Save(2, &t.bootTime) + stateSinkObject.Save(3, &t.saveMonotonic) + stateSinkObject.Save(4, &t.saveRealtime) + stateSinkObject.Save(5, &t.params) +} + +// +checklocksignore +func (t *Timekeeper) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.realtimeClock) + stateSourceObject.Load(1, &t.monotonicClock) + stateSourceObject.Load(2, &t.bootTime) + stateSourceObject.Load(3, &t.saveMonotonic) + stateSourceObject.Load(4, &t.saveRealtime) + stateSourceObject.Load(5, &t.params) + stateSourceObject.AfterLoad(t.afterLoad) +} + +func (tc *timekeeperClock) StateTypeName() string { + return "pkg/sentry/kernel.timekeeperClock" +} + +func (tc *timekeeperClock) StateFields() []string { + return []string{ + "tk", + "c", + } +} + +func (tc *timekeeperClock) beforeSave() {} + +// +checklocksignore +func (tc *timekeeperClock) StateSave(stateSinkObject state.Sink) { + tc.beforeSave() + stateSinkObject.Save(0, &tc.tk) + stateSinkObject.Save(1, &tc.c) +} + +func (tc *timekeeperClock) afterLoad() {} + +// +checklocksignore +func (tc *timekeeperClock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tc.tk) + stateSourceObject.Load(1, &tc.c) +} + +func (t *TTY) StateTypeName() string { + return "pkg/sentry/kernel.TTY" +} + +func (t *TTY) StateFields() []string { + return []string{ + "Index", + "tg", + } +} + +func (t *TTY) beforeSave() {} + +// +checklocksignore +func (t *TTY) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.Index) + stateSinkObject.Save(1, &t.tg) +} + +func (t *TTY) afterLoad() {} + +// +checklocksignore +func (t *TTY) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.Index) + stateSourceObject.Load(1, &t.tg) +} + +func (u *UTSNamespace) StateTypeName() string { + return "pkg/sentry/kernel.UTSNamespace" +} + +func (u *UTSNamespace) StateFields() []string { + return []string{ + "hostName", + "domainName", + "userns", + } +} + +func (u *UTSNamespace) beforeSave() {} + +// +checklocksignore +func (u *UTSNamespace) StateSave(stateSinkObject state.Sink) { + u.beforeSave() + stateSinkObject.Save(0, &u.hostName) + stateSinkObject.Save(1, &u.domainName) + stateSinkObject.Save(2, &u.userns) +} + +func (u *UTSNamespace) afterLoad() {} + +// +checklocksignore +func (u *UTSNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &u.hostName) + stateSourceObject.Load(1, &u.domainName) + stateSourceObject.Load(2, &u.userns) +} + +func (v *VDSOParamPage) StateTypeName() string { + return "pkg/sentry/kernel.VDSOParamPage" +} + +func (v *VDSOParamPage) StateFields() []string { + return []string{ + "mfp", + "fr", + "seq", + "copyScratchBuffer", + } +} + +func (v *VDSOParamPage) beforeSave() {} + +// +checklocksignore +func (v *VDSOParamPage) StateSave(stateSinkObject state.Sink) { + v.beforeSave() + stateSinkObject.Save(0, &v.mfp) + stateSinkObject.Save(1, &v.fr) + stateSinkObject.Save(2, &v.seq) + stateSinkObject.Save(3, &v.copyScratchBuffer) +} + +func (v *VDSOParamPage) afterLoad() {} + +// +checklocksignore +func (v *VDSOParamPage) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &v.mfp) + stateSourceObject.Load(1, &v.fr) + stateSourceObject.Load(2, &v.seq) + stateSourceObject.Load(3, &v.copyScratchBuffer) +} + +func init() { + state.Register((*abstractEndpoint)(nil)) + state.Register((*AbstractSocketNamespace)(nil)) + state.Register((*Cgroup)(nil)) + state.Register((*hierarchy)(nil)) + state.Register((*CgroupRegistry)(nil)) + state.Register((*FDFlags)(nil)) + state.Register((*descriptor)(nil)) + state.Register((*FDTable)(nil)) + state.Register((*FDTableRefs)(nil)) + state.Register((*FSContext)(nil)) + state.Register((*FSContextRefs)(nil)) + state.Register((*IPCNamespace)(nil)) + state.Register((*IPCNamespaceRefs)(nil)) + state.Register((*Kernel)(nil)) + state.Register((*SocketRecord)(nil)) + state.Register((*SocketRecordVFS1)(nil)) + state.Register((*pendingSignals)(nil)) + state.Register((*pendingSignalQueue)(nil)) + state.Register((*pendingSignal)(nil)) + state.Register((*pendingSignalList)(nil)) + state.Register((*pendingSignalEntry)(nil)) + state.Register((*savedPendingSignal)(nil)) + state.Register((*IntervalTimer)(nil)) + state.Register((*processGroupList)(nil)) + state.Register((*processGroupEntry)(nil)) + state.Register((*ProcessGroupRefs)(nil)) + state.Register((*ptraceOptions)(nil)) + state.Register((*ptraceStop)(nil)) + state.Register((*OldRSeqCriticalRegion)(nil)) + state.Register((*sessionList)(nil)) + state.Register((*sessionEntry)(nil)) + state.Register((*SessionRefs)(nil)) + state.Register((*Session)(nil)) + state.Register((*ProcessGroup)(nil)) + state.Register((*SignalHandlers)(nil)) + state.Register((*socketList)(nil)) + state.Register((*socketEntry)(nil)) + state.Register((*syscallTableInfo)(nil)) + state.Register((*syslog)(nil)) + state.Register((*Task)(nil)) + state.Register((*runSyscallAfterPtraceEventClone)(nil)) + state.Register((*runSyscallAfterVforkStop)(nil)) + state.Register((*vforkStop)(nil)) + state.Register((*execStop)(nil)) + state.Register((*runSyscallAfterExecStop)(nil)) + state.Register((*runExit)(nil)) + state.Register((*runExitMain)(nil)) + state.Register((*runExitNotify)(nil)) + state.Register((*TaskImage)(nil)) + state.Register((*taskList)(nil)) + state.Register((*taskEntry)(nil)) + state.Register((*runApp)(nil)) + state.Register((*TaskGoroutineSchedInfo)(nil)) + state.Register((*taskClock)(nil)) + state.Register((*tgClock)(nil)) + state.Register((*groupStop)(nil)) + state.Register((*runInterrupt)(nil)) + state.Register((*runInterruptAfterSignalDeliveryStop)(nil)) + state.Register((*runSyscallAfterSyscallEnterStop)(nil)) + state.Register((*runSyscallAfterSysemuStop)(nil)) + state.Register((*runSyscallReinvoke)(nil)) + state.Register((*runSyscallExit)(nil)) + state.Register((*ThreadGroup)(nil)) + state.Register((*itimerRealListener)(nil)) + state.Register((*TaskSet)(nil)) + state.Register((*PIDNamespace)(nil)) + state.Register((*threadGroupNode)(nil)) + state.Register((*taskNode)(nil)) + state.Register((*Timekeeper)(nil)) + state.Register((*timekeeperClock)(nil)) + state.Register((*TTY)(nil)) + state.Register((*UTSNamespace)(nil)) + state.Register((*VDSOParamPage)(nil)) +} diff --git a/pkg/sentry/kernel/kernel_unsafe_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_unsafe_abi_autogen_unsafe.go new file mode 100644 index 000000000..9c39d630a --- /dev/null +++ b/pkg/sentry/kernel/kernel_unsafe_abi_autogen_unsafe.go @@ -0,0 +1,13 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build tag aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The build tags here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build tags, see +// tools/defs.bzl:calculate_sets(). + +package kernel + +import ( +) + diff --git a/pkg/sentry/kernel/kernel_unsafe_state_autogen.go b/pkg/sentry/kernel/kernel_unsafe_state_autogen.go new file mode 100644 index 000000000..12130bf74 --- /dev/null +++ b/pkg/sentry/kernel/kernel_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package kernel diff --git a/pkg/sentry/kernel/memevent/BUILD b/pkg/sentry/kernel/memevent/BUILD deleted file mode 100644 index 4486848d2..000000000 --- a/pkg/sentry/kernel/memevent/BUILD +++ /dev/null @@ -1,24 +0,0 @@ -load("//tools:defs.bzl", "go_library", "proto_library") - -package(licenses = ["notice"]) - -go_library( - name = "memevent", - srcs = ["memory_events.go"], - visibility = ["//:sandbox"], - deps = [ - ":memory_events_go_proto", - "//pkg/eventchannel", - "//pkg/log", - "//pkg/metric", - "//pkg/sentry/kernel", - "//pkg/sentry/usage", - "//pkg/sync", - ], -) - -proto_library( - name = "memory_events", - srcs = ["memory_events.proto"], - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/kernel/memevent/memevent_state_autogen.go b/pkg/sentry/kernel/memevent/memevent_state_autogen.go new file mode 100644 index 000000000..4a1679fa9 --- /dev/null +++ b/pkg/sentry/kernel/memevent/memevent_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package memevent diff --git a/pkg/sentry/kernel/memevent/memory_events.proto b/pkg/sentry/kernel/memevent/memory_events.proto deleted file mode 100644 index bf8029ff5..000000000 --- a/pkg/sentry/kernel/memevent/memory_events.proto +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -// MemoryUsageEvent describes the memory usage of the sandbox at a single -// instant in time. These messages are emitted periodically on the eventchannel. -message MemoryUsageEvent { - // The total memory usage of the sandboxed application in bytes, calculated - // using the 'fast' method. - uint64 total = 1; - - // Memory used to back memory-mapped regions for files in the application, in - // bytes. This corresponds to the usage.MemoryKind.Mapped memory type. - uint64 mapped = 2; -} diff --git a/pkg/sentry/kernel/memevent/memory_events_go_proto/memory_events.pb.go b/pkg/sentry/kernel/memevent/memory_events_go_proto/memory_events.pb.go new file mode 100644 index 000000000..ed1b8a8ca --- /dev/null +++ b/pkg/sentry/kernel/memevent/memory_events_go_proto/memory_events.pb.go @@ -0,0 +1,158 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.25.0 +// protoc v3.13.0 +// source: pkg/sentry/kernel/memevent/memory_events.proto + +package gvisor + +import ( + proto "github.com/golang/protobuf/proto" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// This is a compile-time assertion that a sufficiently up-to-date version +// of the legacy proto package is being used. +const _ = proto.ProtoPackageIsVersion4 + +type MemoryUsageEvent struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Total uint64 `protobuf:"varint,1,opt,name=total,proto3" json:"total,omitempty"` + Mapped uint64 `protobuf:"varint,2,opt,name=mapped,proto3" json:"mapped,omitempty"` +} + +func (x *MemoryUsageEvent) Reset() { + *x = MemoryUsageEvent{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_sentry_kernel_memevent_memory_events_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *MemoryUsageEvent) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*MemoryUsageEvent) ProtoMessage() {} + +func (x *MemoryUsageEvent) ProtoReflect() protoreflect.Message { + mi := &file_pkg_sentry_kernel_memevent_memory_events_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use MemoryUsageEvent.ProtoReflect.Descriptor instead. +func (*MemoryUsageEvent) Descriptor() ([]byte, []int) { + return file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescGZIP(), []int{0} +} + +func (x *MemoryUsageEvent) GetTotal() uint64 { + if x != nil { + return x.Total + } + return 0 +} + +func (x *MemoryUsageEvent) GetMapped() uint64 { + if x != nil { + return x.Mapped + } + return 0 +} + +var File_pkg_sentry_kernel_memevent_memory_events_proto protoreflect.FileDescriptor + +var file_pkg_sentry_kernel_memevent_memory_events_proto_rawDesc = []byte{ + 0x0a, 0x2e, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x2f, 0x6b, 0x65, 0x72, + 0x6e, 0x65, 0x6c, 0x2f, 0x6d, 0x65, 0x6d, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x2f, 0x6d, 0x65, 0x6d, + 0x6f, 0x72, 0x79, 0x5f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x12, 0x06, 0x67, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x22, 0x40, 0x0a, 0x10, 0x4d, 0x65, 0x6d, 0x6f, + 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x12, 0x14, 0x0a, 0x05, + 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, + 0x61, 0x6c, 0x12, 0x16, 0x0a, 0x06, 0x6d, 0x61, 0x70, 0x70, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x04, 0x52, 0x06, 0x6d, 0x61, 0x70, 0x70, 0x65, 0x64, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, +} + +var ( + file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescOnce sync.Once + file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescData = file_pkg_sentry_kernel_memevent_memory_events_proto_rawDesc +) + +func file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescGZIP() []byte { + file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescOnce.Do(func() { + file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescData) + }) + return file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescData +} + +var file_pkg_sentry_kernel_memevent_memory_events_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_pkg_sentry_kernel_memevent_memory_events_proto_goTypes = []interface{}{ + (*MemoryUsageEvent)(nil), // 0: gvisor.MemoryUsageEvent +} +var file_pkg_sentry_kernel_memevent_memory_events_proto_depIdxs = []int32{ + 0, // [0:0] is the sub-list for method output_type + 0, // [0:0] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_pkg_sentry_kernel_memevent_memory_events_proto_init() } +func file_pkg_sentry_kernel_memevent_memory_events_proto_init() { + if File_pkg_sentry_kernel_memevent_memory_events_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_pkg_sentry_kernel_memevent_memory_events_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*MemoryUsageEvent); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_pkg_sentry_kernel_memevent_memory_events_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_pkg_sentry_kernel_memevent_memory_events_proto_goTypes, + DependencyIndexes: file_pkg_sentry_kernel_memevent_memory_events_proto_depIdxs, + MessageInfos: file_pkg_sentry_kernel_memevent_memory_events_proto_msgTypes, + }.Build() + File_pkg_sentry_kernel_memevent_memory_events_proto = out.File + file_pkg_sentry_kernel_memevent_memory_events_proto_rawDesc = nil + file_pkg_sentry_kernel_memevent_memory_events_proto_goTypes = nil + file_pkg_sentry_kernel_memevent_memory_events_proto_depIdxs = nil +} diff --git a/pkg/sentry/kernel/pending_signals_list.go b/pkg/sentry/kernel/pending_signals_list.go new file mode 100644 index 000000000..d5b483b3e --- /dev/null +++ b/pkg/sentry/kernel/pending_signals_list.go @@ -0,0 +1,221 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type pendingSignalElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (pendingSignalElementMapper) linkerFor(elem *pendingSignal) *pendingSignal { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type pendingSignalList struct { + head *pendingSignal + tail *pendingSignal +} + +// Reset resets list l to the empty state. +func (l *pendingSignalList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *pendingSignalList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *pendingSignalList) Front() *pendingSignal { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *pendingSignalList) Back() *pendingSignal { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *pendingSignalList) Len() (count int) { + for e := l.Front(); e != nil; e = (pendingSignalElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *pendingSignalList) PushFront(e *pendingSignal) { + linker := pendingSignalElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + pendingSignalElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *pendingSignalList) PushBack(e *pendingSignal) { + linker := pendingSignalElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + pendingSignalElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *pendingSignalList) PushBackList(m *pendingSignalList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + pendingSignalElementMapper{}.linkerFor(l.tail).SetNext(m.head) + pendingSignalElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *pendingSignalList) InsertAfter(b, e *pendingSignal) { + bLinker := pendingSignalElementMapper{}.linkerFor(b) + eLinker := pendingSignalElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + pendingSignalElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *pendingSignalList) InsertBefore(a, e *pendingSignal) { + aLinker := pendingSignalElementMapper{}.linkerFor(a) + eLinker := pendingSignalElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + pendingSignalElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *pendingSignalList) Remove(e *pendingSignal) { + linker := pendingSignalElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + pendingSignalElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + pendingSignalElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type pendingSignalEntry struct { + next *pendingSignal + prev *pendingSignal +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *pendingSignalEntry) Next() *pendingSignal { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *pendingSignalEntry) Prev() *pendingSignal { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *pendingSignalEntry) SetNext(elem *pendingSignal) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *pendingSignalEntry) SetPrev(elem *pendingSignal) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD deleted file mode 100644 index 94ebac7c5..000000000 --- a/pkg/sentry/kernel/pipe/BUILD +++ /dev/null @@ -1,58 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "pipe", - srcs = [ - "device.go", - "node.go", - "pipe.go", - "pipe_unsafe.go", - "pipe_util.go", - "reader.go", - "reader_writer.go", - "save_restore.go", - "vfs.go", - "writer.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/amutex", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/marshal/primitive", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "pipe_test", - size = "small", - srcs = [ - "node_test.go", - "pipe_test.go", - ], - library = ":pipe", - deps = [ - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go deleted file mode 100644 index d25cf658e..000000000 --- a/pkg/sentry/kernel/pipe/node_test.go +++ /dev/null @@ -1,321 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pipe - -import ( - "testing" - "time" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" -) - -type sleeper struct { - context.Context - ch chan struct{} -} - -func newSleeperContext(t *testing.T) context.Context { - return &sleeper{ - Context: contexttest.Context(t), - ch: make(chan struct{}), - } -} - -func (s *sleeper) SleepStart() <-chan struct{} { - return s.ch -} - -func (s *sleeper) SleepFinish(bool) { -} - -func (s *sleeper) Cancel() { - s.ch <- struct{}{} -} - -func (s *sleeper) Interrupted() bool { - return len(s.ch) != 0 -} - -type openResult struct { - *fs.File - error -} - -var perms fs.FilePermissions = fs.FilePermissions{ - User: fs.PermMask{Read: true, Write: true}, -} - -func testOpenOrDie(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, doneChan chan<- struct{}) (*fs.File, error) { - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - d := fs.NewDirent(ctx, inode, "pipe") - file, err := n.GetFile(ctx, d, flags) - if err != nil { - t.Errorf("open with flags %+v failed: %v", flags, err) - return nil, err - } - if doneChan != nil { - doneChan <- struct{}{} - } - return file, err -} - -func testOpen(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, resChan chan<- openResult) (*fs.File, error) { - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - d := fs.NewDirent(ctx, inode, "pipe") - file, err := n.GetFile(ctx, d, flags) - if resChan != nil { - resChan <- openResult{file, err} - } - return file, err -} - -func newNamedPipe(t *testing.T) *Pipe { - return NewPipe(true, DefaultPipeSize) -} - -func newAnonPipe(t *testing.T) *Pipe { - return NewPipe(false, DefaultPipeSize) -} - -// assertRecvBlocks ensures that a recv attempt on c blocks for at least -// blockDuration. This is useful for checking that a goroutine that is supposed -// to be executing a blocking operation is actually blocking. -func assertRecvBlocks(t *testing.T, c <-chan struct{}, blockDuration time.Duration, failMsg string) { - select { - case <-c: - t.Fatalf(failMsg) - case <-time.After(blockDuration): - // Ok, blocked for the required duration. - } -} - -func TestReadOpenBlocksForWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - // Verify that the open for read is blocking. - assertRecvBlocks(t, rDone, time.Millisecond*100, - "open for read not blocking with no writers") - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - <-wDone - <-rDone -} - -func TestWriteOpenBlocksForReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - // Verify that the open for write is blocking - assertRecvBlocks(t, wDone, time.Millisecond*100, - "open for write not blocking with no readers") - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - <-rDone - <-wDone -} - -func TestMultipleWriteOpenDoesntCountAsReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone1 := make(chan struct{}) - rDone2 := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone1) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone2) - - assertRecvBlocks(t, rDone1, time.Millisecond*100, - "open for read didn't block with no writers") - assertRecvBlocks(t, rDone2, time.Millisecond*100, - "open for read didn't block with no writers") - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - <-wDone - <-rDone2 - <-rDone1 -} - -func TestClosedReaderBlocksWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rFile, _ := testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, NonBlocking: true}, nil) - rFile.DecRef(ctx) - - wDone := make(chan struct{}) - // This open for write should block because the reader is now gone. - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - assertRecvBlocks(t, wDone, time.Millisecond*100, - "open for write didn't block with no concurrent readers") - - // Open for read again. This should unblock the open for write. - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - <-rDone - <-wDone -} - -func TestReadWriteOpenNeverBlocks(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rwDone := make(chan struct{}) - // Open for read-write never wait for a reader or writer, even if the - // nonblocking flag is not set. - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, Write: true, NonBlocking: false}, rwDone) - <-rwDone -} - -func TestReadWriteOpenUnblocksReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - rwDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, Write: true}, rwDone) - - <-rwDone - <-rDone -} - -func TestReadWriteOpenUnblocksWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - rwDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, Write: true}, rwDone) - - <-rwDone - <-wDone -} - -func TestBlockedOpenIsCancellable(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - done := make(chan openResult) - go testOpen(ctx, t, f, fs.FileFlags{Read: true}, done) - select { - case <-done: - t.Fatalf("open for read didn't block with no writers") - case <-time.After(time.Millisecond * 100): - // Ok. - } - - ctx.(*sleeper).Cancel() - // If the cancel on the sleeper didn't work, the open for read would never - // return. - res := <-done - if res.error != syserror.ErrInterrupted { - t.Fatalf("Cancellation didn't cause GetFile to return fs.ErrInterrupted, got %v.", - res.error) - } -} - -func TestNonblockingReadOpenFileNoWriters(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Read: true, NonBlocking: true}, nil); err != nil { - t.Fatalf("Nonblocking open for read failed with error %v.", err) - } -} - -func TestNonblockingWriteOpenFileNoReaders(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true, NonBlocking: true}, nil); !linuxerr.Equals(linuxerr.ENXIO, err) { - t.Fatalf("Nonblocking open for write failed unexpected error %v.", err) - } -} - -func TestNonBlockingReadOpenWithWriter(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - // Open for write blocks since there are no readers yet. - assertRecvBlocks(t, wDone, time.Millisecond*100, - "Open for write didn't block with no reader.") - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Read: true, NonBlocking: true}, nil); err != nil { - t.Fatalf("Nonblocking open for read failed with error %v.", err) - } - - // Open for write should now be unblocked. - <-wDone -} - -func TestNonBlockingWriteOpenWithReader(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - // Open for write blocked, since no reader yet. - assertRecvBlocks(t, rDone, time.Millisecond*100, - "Open for reader didn't block with no writer.") - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true, NonBlocking: true}, nil); err != nil { - t.Fatalf("Nonblocking open for write failed with error %v.", err) - } - - // Open for write should now be unblocked. - <-rDone -} - -func TestAnonReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newAnonPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Read: true}, nil); err != nil { - t.Fatalf("open anon pipe for read failed: %v", err) - } -} - -func TestAnonWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newAnonPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true}, nil); err != nil { - t.Fatalf("open anon pipe for write failed: %v", err) - } -} diff --git a/pkg/sentry/kernel/pipe/pipe_state_autogen.go b/pkg/sentry/kernel/pipe/pipe_state_autogen.go new file mode 100644 index 000000000..57451e951 --- /dev/null +++ b/pkg/sentry/kernel/pipe/pipe_state_autogen.go @@ -0,0 +1,227 @@ +// automatically generated by stateify. + +package pipe + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (i *inodeOperations) StateTypeName() string { + return "pkg/sentry/kernel/pipe.inodeOperations" +} + +func (i *inodeOperations) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "p", + } +} + +func (i *inodeOperations) beforeSave() {} + +// +checklocksignore +func (i *inodeOperations) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeSimpleAttributes) + stateSinkObject.Save(1, &i.p) +} + +func (i *inodeOperations) afterLoad() {} + +// +checklocksignore +func (i *inodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeSimpleAttributes) + stateSourceObject.Load(1, &i.p) +} + +func (p *Pipe) StateTypeName() string { + return "pkg/sentry/kernel/pipe.Pipe" +} + +func (p *Pipe) StateFields() []string { + return []string{ + "isNamed", + "readers", + "writers", + "buf", + "off", + "size", + "max", + "hadWriter", + } +} + +func (p *Pipe) beforeSave() {} + +// +checklocksignore +func (p *Pipe) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.isNamed) + stateSinkObject.Save(1, &p.readers) + stateSinkObject.Save(2, &p.writers) + stateSinkObject.Save(3, &p.buf) + stateSinkObject.Save(4, &p.off) + stateSinkObject.Save(5, &p.size) + stateSinkObject.Save(6, &p.max) + stateSinkObject.Save(7, &p.hadWriter) +} + +// +checklocksignore +func (p *Pipe) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.isNamed) + stateSourceObject.Load(1, &p.readers) + stateSourceObject.Load(2, &p.writers) + stateSourceObject.Load(3, &p.buf) + stateSourceObject.Load(4, &p.off) + stateSourceObject.Load(5, &p.size) + stateSourceObject.Load(6, &p.max) + stateSourceObject.Load(7, &p.hadWriter) + stateSourceObject.AfterLoad(p.afterLoad) +} + +func (r *Reader) StateTypeName() string { + return "pkg/sentry/kernel/pipe.Reader" +} + +func (r *Reader) StateFields() []string { + return []string{ + "ReaderWriter", + } +} + +func (r *Reader) beforeSave() {} + +// +checklocksignore +func (r *Reader) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.ReaderWriter) +} + +func (r *Reader) afterLoad() {} + +// +checklocksignore +func (r *Reader) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.ReaderWriter) +} + +func (rw *ReaderWriter) StateTypeName() string { + return "pkg/sentry/kernel/pipe.ReaderWriter" +} + +func (rw *ReaderWriter) StateFields() []string { + return []string{ + "Pipe", + } +} + +func (rw *ReaderWriter) beforeSave() {} + +// +checklocksignore +func (rw *ReaderWriter) StateSave(stateSinkObject state.Sink) { + rw.beforeSave() + stateSinkObject.Save(0, &rw.Pipe) +} + +func (rw *ReaderWriter) afterLoad() {} + +// +checklocksignore +func (rw *ReaderWriter) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &rw.Pipe) +} + +func (vp *VFSPipe) StateTypeName() string { + return "pkg/sentry/kernel/pipe.VFSPipe" +} + +func (vp *VFSPipe) StateFields() []string { + return []string{ + "pipe", + } +} + +func (vp *VFSPipe) beforeSave() {} + +// +checklocksignore +func (vp *VFSPipe) StateSave(stateSinkObject state.Sink) { + vp.beforeSave() + stateSinkObject.Save(0, &vp.pipe) +} + +func (vp *VFSPipe) afterLoad() {} + +// +checklocksignore +func (vp *VFSPipe) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &vp.pipe) +} + +func (fd *VFSPipeFD) StateTypeName() string { + return "pkg/sentry/kernel/pipe.VFSPipeFD" +} + +func (fd *VFSPipeFD) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "LockFD", + "pipe", + } +} + +func (fd *VFSPipeFD) beforeSave() {} + +// +checklocksignore +func (fd *VFSPipeFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &fd.LockFD) + stateSinkObject.Save(4, &fd.pipe) +} + +func (fd *VFSPipeFD) afterLoad() {} + +// +checklocksignore +func (fd *VFSPipeFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &fd.LockFD) + stateSourceObject.Load(4, &fd.pipe) +} + +func (w *Writer) StateTypeName() string { + return "pkg/sentry/kernel/pipe.Writer" +} + +func (w *Writer) StateFields() []string { + return []string{ + "ReaderWriter", + } +} + +func (w *Writer) beforeSave() {} + +// +checklocksignore +func (w *Writer) StateSave(stateSinkObject state.Sink) { + w.beforeSave() + stateSinkObject.Save(0, &w.ReaderWriter) +} + +func (w *Writer) afterLoad() {} + +// +checklocksignore +func (w *Writer) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &w.ReaderWriter) +} + +func init() { + state.Register((*inodeOperations)(nil)) + state.Register((*Pipe)(nil)) + state.Register((*Reader)(nil)) + state.Register((*ReaderWriter)(nil)) + state.Register((*VFSPipe)(nil)) + state.Register((*VFSPipeFD)(nil)) + state.Register((*Writer)(nil)) +} diff --git a/pkg/sentry/kernel/pipe/pipe_test.go b/pkg/sentry/kernel/pipe/pipe_test.go deleted file mode 100644 index 867f4a76b..000000000 --- a/pkg/sentry/kernel/pipe/pipe_test.go +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pipe - -import ( - "bytes" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestPipeRW(t *testing.T) { - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, 65536) - defer r.DecRef(ctx) - defer w.DecRef(ctx) - - msg := []byte("here's some bytes") - wantN := int64(len(msg)) - n, err := w.Writev(ctx, usermem.BytesIOSequence(msg)) - if n != wantN || err != nil { - t.Fatalf("Writev: got (%d, %v), wanted (%d, nil)", n, err, wantN) - } - - buf := make([]byte, len(msg)) - n, err = r.Readv(ctx, usermem.BytesIOSequence(buf)) - if n != wantN || err != nil || !bytes.Equal(buf, msg) { - t.Fatalf("Readv: got (%d, %v) %q, wanted (%d, nil) %q", n, err, buf, wantN, msg) - } -} - -func TestPipeReadBlock(t *testing.T) { - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, 65536) - defer r.DecRef(ctx) - defer w.DecRef(ctx) - - n, err := r.Readv(ctx, usermem.BytesIOSequence(make([]byte, 1))) - if n != 0 || err != syserror.ErrWouldBlock { - t.Fatalf("Readv: got (%d, %v), wanted (0, %v)", n, err, syserror.ErrWouldBlock) - } -} - -func TestPipeWriteBlock(t *testing.T) { - const atomicIOBytes = 2 - const capacity = MinimumPipeSize - - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, capacity) - defer r.DecRef(ctx) - defer w.DecRef(ctx) - - msg := make([]byte, capacity+1) - n, err := w.Writev(ctx, usermem.BytesIOSequence(msg)) - if wantN, wantErr := int64(capacity), syserror.ErrWouldBlock; n != wantN || err != wantErr { - t.Fatalf("Writev: got (%d, %v), wanted (%d, %v)", n, err, wantN, wantErr) - } -} - -func TestPipeWriteUntilEnd(t *testing.T) { - const atomicIOBytes = 2 - - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, atomicIOBytes) - defer r.DecRef(ctx) - defer w.DecRef(ctx) - - msg := []byte("here's some bytes") - - wDone := make(chan struct{}, 0) - rDone := make(chan struct{}, 0) - defer func() { - // Signal the reader to stop and wait until it does so. - close(wDone) - <-rDone - }() - - go func() { - defer close(rDone) - // Read from r until done is closed. - ctx := contexttest.Context(t) - buf := make([]byte, len(msg)+1) - dst := usermem.BytesIOSequence(buf) - e, ch := waiter.NewChannelEntry(nil) - r.EventRegister(&e, waiter.ReadableEvents) - defer r.EventUnregister(&e) - for { - n, err := r.Readv(ctx, dst) - dst = dst.DropFirst64(n) - if err == syserror.ErrWouldBlock { - select { - case <-ch: - continue - case <-wDone: - // We expect to have 1 byte left in dst since len(buf) == - // len(msg)+1. - if dst.NumBytes() != 1 || !bytes.Equal(buf[:len(msg)], msg) { - t.Errorf("Reader: got %q (%d bytes remaining), wanted %q", buf, dst.NumBytes(), msg) - } - return - } - } - if err != nil { - t.Errorf("Readv: got unexpected error %v", err) - return - } - } - }() - - src := usermem.BytesIOSequence(msg) - e, ch := waiter.NewChannelEntry(nil) - w.EventRegister(&e, waiter.WritableEvents) - defer w.EventUnregister(&e) - for src.NumBytes() != 0 { - n, err := w.Writev(ctx, src) - src = src.DropFirst64(n) - if err == syserror.ErrWouldBlock { - <-ch - continue - } - if err != nil { - t.Fatalf("Writev: got (%d, %v)", n, err) - } - } -} diff --git a/pkg/sentry/kernel/pipe/pipe_unsafe_state_autogen.go b/pkg/sentry/kernel/pipe/pipe_unsafe_state_autogen.go new file mode 100644 index 000000000..d3b40feb4 --- /dev/null +++ b/pkg/sentry/kernel/pipe/pipe_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package pipe diff --git a/pkg/sentry/kernel/process_group_list.go b/pkg/sentry/kernel/process_group_list.go new file mode 100644 index 000000000..9c493504d --- /dev/null +++ b/pkg/sentry/kernel/process_group_list.go @@ -0,0 +1,221 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type processGroupElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (processGroupElementMapper) linkerFor(elem *ProcessGroup) *ProcessGroup { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type processGroupList struct { + head *ProcessGroup + tail *ProcessGroup +} + +// Reset resets list l to the empty state. +func (l *processGroupList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *processGroupList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *processGroupList) Front() *ProcessGroup { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *processGroupList) Back() *ProcessGroup { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *processGroupList) Len() (count int) { + for e := l.Front(); e != nil; e = (processGroupElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *processGroupList) PushFront(e *ProcessGroup) { + linker := processGroupElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + processGroupElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *processGroupList) PushBack(e *ProcessGroup) { + linker := processGroupElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + processGroupElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *processGroupList) PushBackList(m *processGroupList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + processGroupElementMapper{}.linkerFor(l.tail).SetNext(m.head) + processGroupElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *processGroupList) InsertAfter(b, e *ProcessGroup) { + bLinker := processGroupElementMapper{}.linkerFor(b) + eLinker := processGroupElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + processGroupElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *processGroupList) InsertBefore(a, e *ProcessGroup) { + aLinker := processGroupElementMapper{}.linkerFor(a) + eLinker := processGroupElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + processGroupElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *processGroupList) Remove(e *ProcessGroup) { + linker := processGroupElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + processGroupElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + processGroupElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type processGroupEntry struct { + next *ProcessGroup + prev *ProcessGroup +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *processGroupEntry) Next() *ProcessGroup { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *processGroupEntry) Prev() *ProcessGroup { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *processGroupEntry) SetNext(elem *ProcessGroup) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *processGroupEntry) SetPrev(elem *ProcessGroup) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/process_group_refs.go b/pkg/sentry/kernel/process_group_refs.go new file mode 100644 index 000000000..e29b2ea3a --- /dev/null +++ b/pkg/sentry/kernel/process_group_refs.go @@ -0,0 +1,140 @@ +package kernel + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const ProcessGroupenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var ProcessGroupobj *ProcessGroup + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type ProcessGroupRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *ProcessGroupRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *ProcessGroupRefs) RefType() string { + return fmt.Sprintf("%T", ProcessGroupobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *ProcessGroupRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *ProcessGroupRefs) LogRefs() bool { + return ProcessGroupenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *ProcessGroupRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *ProcessGroupRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if ProcessGroupenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.RefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *ProcessGroupRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if ProcessGroupenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *ProcessGroupRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if ProcessGroupenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *ProcessGroupRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/sched/BUILD b/pkg/sentry/kernel/sched/BUILD deleted file mode 100644 index 1b82e087b..000000000 --- a/pkg/sentry/kernel/sched/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "sched", - srcs = [ - "cpuset.go", - "sched.go", - ], - visibility = ["//pkg/sentry:internal"], -) - -go_test( - name = "sched_test", - size = "small", - srcs = ["cpuset_test.go"], - library = ":sched", -) diff --git a/pkg/sentry/kernel/sched/cpuset_test.go b/pkg/sentry/kernel/sched/cpuset_test.go deleted file mode 100644 index 3af9f1197..000000000 --- a/pkg/sentry/kernel/sched/cpuset_test.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sched - -import ( - "testing" -) - -func TestNumCPUs(t *testing.T) { - for i := uint(0); i < 1024; i++ { - c := NewCPUSet(i) - for j := uint(0); j < i; j++ { - c.Set(j) - } - n := c.NumCPUs() - if n != i { - t.Errorf("got wrong number of cpus %d, want %d", n, i) - } - } -} - -func TestClearAbove(t *testing.T) { - const n = 1024 - c := NewFullCPUSet(n) - for i := uint(0); i < n; i++ { - cpu := n - i - c.ClearAbove(cpu) - if got := c.NumCPUs(); got != cpu { - t.Errorf("iteration %d: got %d cpus, wanted %d", i, got, cpu) - } - } -} diff --git a/pkg/sentry/kernel/sched/sched_state_autogen.go b/pkg/sentry/kernel/sched/sched_state_autogen.go new file mode 100644 index 000000000..9705ca79d --- /dev/null +++ b/pkg/sentry/kernel/sched/sched_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package sched diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD deleted file mode 100644 index a787c00a8..000000000 --- a/pkg/sentry/kernel/semaphore/BUILD +++ /dev/null @@ -1,50 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "waiter_list", - out = "waiter_list.go", - package = "semaphore", - prefix = "waiter", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*waiter", - "Linker": "*waiter", - }, -) - -go_library( - name = "semaphore", - srcs = [ - "semaphore.go", - "waiter_list.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/log", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sync", - "//pkg/syserror", - ], -) - -go_test( - name = "semaphore_test", - size = "small", - srcs = ["semaphore_test.go"], - library = ":semaphore", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/contexttest", - "//pkg/sentry/kernel/auth", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go b/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go new file mode 100644 index 000000000..f90fbff34 --- /dev/null +++ b/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go @@ -0,0 +1,220 @@ +// automatically generated by stateify. + +package semaphore + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *Registry) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.Registry" +} + +func (r *Registry) StateFields() []string { + return []string{ + "userNS", + "semaphores", + "lastIDUsed", + "indexes", + } +} + +func (r *Registry) beforeSave() {} + +// +checklocksignore +func (r *Registry) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.userNS) + stateSinkObject.Save(1, &r.semaphores) + stateSinkObject.Save(2, &r.lastIDUsed) + stateSinkObject.Save(3, &r.indexes) +} + +func (r *Registry) afterLoad() {} + +// +checklocksignore +func (r *Registry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.userNS) + stateSourceObject.Load(1, &r.semaphores) + stateSourceObject.Load(2, &r.lastIDUsed) + stateSourceObject.Load(3, &r.indexes) +} + +func (s *Set) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.Set" +} + +func (s *Set) StateFields() []string { + return []string{ + "registry", + "ID", + "key", + "creator", + "owner", + "perms", + "opTime", + "changeTime", + "sems", + "dead", + } +} + +func (s *Set) beforeSave() {} + +// +checklocksignore +func (s *Set) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.registry) + stateSinkObject.Save(1, &s.ID) + stateSinkObject.Save(2, &s.key) + stateSinkObject.Save(3, &s.creator) + stateSinkObject.Save(4, &s.owner) + stateSinkObject.Save(5, &s.perms) + stateSinkObject.Save(6, &s.opTime) + stateSinkObject.Save(7, &s.changeTime) + stateSinkObject.Save(8, &s.sems) + stateSinkObject.Save(9, &s.dead) +} + +func (s *Set) afterLoad() {} + +// +checklocksignore +func (s *Set) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.registry) + stateSourceObject.Load(1, &s.ID) + stateSourceObject.Load(2, &s.key) + stateSourceObject.Load(3, &s.creator) + stateSourceObject.Load(4, &s.owner) + stateSourceObject.Load(5, &s.perms) + stateSourceObject.Load(6, &s.opTime) + stateSourceObject.Load(7, &s.changeTime) + stateSourceObject.Load(8, &s.sems) + stateSourceObject.Load(9, &s.dead) +} + +func (s *sem) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.sem" +} + +func (s *sem) StateFields() []string { + return []string{ + "value", + "pid", + } +} + +func (s *sem) beforeSave() {} + +// +checklocksignore +func (s *sem) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + if !state.IsZeroValue(&s.waiters) { + state.Failf("waiters is %#v, expected zero", &s.waiters) + } + stateSinkObject.Save(0, &s.value) + stateSinkObject.Save(1, &s.pid) +} + +func (s *sem) afterLoad() {} + +// +checklocksignore +func (s *sem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.value) + stateSourceObject.Load(1, &s.pid) +} + +func (w *waiter) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.waiter" +} + +func (w *waiter) StateFields() []string { + return []string{ + "waiterEntry", + "value", + "ch", + } +} + +func (w *waiter) beforeSave() {} + +// +checklocksignore +func (w *waiter) StateSave(stateSinkObject state.Sink) { + w.beforeSave() + stateSinkObject.Save(0, &w.waiterEntry) + stateSinkObject.Save(1, &w.value) + stateSinkObject.Save(2, &w.ch) +} + +func (w *waiter) afterLoad() {} + +// +checklocksignore +func (w *waiter) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &w.waiterEntry) + stateSourceObject.Load(1, &w.value) + stateSourceObject.Load(2, &w.ch) +} + +func (l *waiterList) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.waiterList" +} + +func (l *waiterList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *waiterList) beforeSave() {} + +// +checklocksignore +func (l *waiterList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *waiterList) afterLoad() {} + +// +checklocksignore +func (l *waiterList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *waiterEntry) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.waiterEntry" +} + +func (e *waiterEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *waiterEntry) beforeSave() {} + +// +checklocksignore +func (e *waiterEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *waiterEntry) afterLoad() {} + +// +checklocksignore +func (e *waiterEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func init() { + state.Register((*Registry)(nil)) + state.Register((*Set)(nil)) + state.Register((*sem)(nil)) + state.Register((*waiter)(nil)) + state.Register((*waiterList)(nil)) + state.Register((*waiterEntry)(nil)) +} diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go deleted file mode 100644 index e47acefdf..000000000 --- a/pkg/sentry/kernel/semaphore/semaphore_test.go +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package semaphore - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" -) - -func executeOps(ctx context.Context, t *testing.T, set *Set, ops []linux.Sembuf, block bool) chan struct{} { - ch, _, err := set.executeOps(ctx, ops, 123) - if err != nil { - t.Fatalf("ExecuteOps(ops) failed, err: %v, ops: %+v", err, ops) - } - if block { - if ch == nil { - t.Fatalf("ExecuteOps(ops) got: nil, expected: !nil, ops: %+v", ops) - } - if signalled(ch) { - t.Fatalf("ExecuteOps(ops) channel should not have been signalled, ops: %+v", ops) - } - } else { - if ch != nil { - t.Fatalf("ExecuteOps(ops) got: %v, expected: nil, ops: %+v", ch, ops) - } - } - return ch -} - -func signalled(ch chan struct{}) bool { - select { - case <-ch: - return true - default: - return false - } -} - -func TestBasic(t *testing.T) { - ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} - ops := []linux.Sembuf{ - {SemOp: 1}, - } - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -1 - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -1 - ch1 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 1 - executeOps(ctx, t, set, ops, false) - if !signalled(ch1) { - t.Fatalf("ExecuteOps(ops) channel should not have been signalled, ops: %+v", ops) - } -} - -func TestWaitForZero(t *testing.T) { - ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} - ops := []linux.Sembuf{ - {SemOp: 0}, - } - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -2 - ch1 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 0 - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = 1 - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = 0 - chZero1 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 0 - chZero2 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 1 - executeOps(ctx, t, set, ops, false) - if !signalled(ch1) { - t.Fatalf("ExecuteOps(ops) channel should have been signalled, ops: %+v, set: %+v", ops, set) - } - - ops[0].SemOp = -2 - executeOps(ctx, t, set, ops, false) - if !signalled(chZero1) { - t.Fatalf("ExecuteOps(ops) channel zero 1 should have been signalled, ops: %+v, set: %+v", ops, set) - } - if !signalled(chZero2) { - t.Fatalf("ExecuteOps(ops) channel zero 2 should have been signalled, ops: %+v, set: %+v", ops, set) - } -} - -func TestNoWait(t *testing.T) { - ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} - ops := []linux.Sembuf{ - {SemOp: 1}, - } - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -2 - ops[0].SemFlg = linux.IPC_NOWAIT - if _, _, err := set.executeOps(ctx, ops, 123); err != syserror.ErrWouldBlock { - t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, syserror.ErrWouldBlock) - } - - ops[0].SemOp = 0 - ops[0].SemFlg = linux.IPC_NOWAIT - if _, _, err := set.executeOps(ctx, ops, 123); err != syserror.ErrWouldBlock { - t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, syserror.ErrWouldBlock) - } -} - -func TestUnregister(t *testing.T) { - ctx := contexttest.Context(t) - r := NewRegistry(auth.NewRootUserNamespace()) - set, err := r.FindOrCreate(ctx, 123, 2, linux.FileMode(0x600), true, true, true) - if err != nil { - t.Fatalf("FindOrCreate() failed, err: %v", err) - } - if got := r.FindByID(set.ID); got.ID != set.ID { - t.Fatalf("FindById(%d) failed, got: %+v, expected: %+v", set.ID, got, set) - } - - ops := []linux.Sembuf{ - {SemOp: -1}, - } - chs := make([]chan struct{}, 0, 5) - for i := 0; i < 5; i++ { - ch := executeOps(ctx, t, set, ops, true) - chs = append(chs, ch) - } - - creds := auth.CredentialsFromContext(ctx) - if err := r.RemoveID(set.ID, creds); err != nil { - t.Fatalf("RemoveID(%d) failed, err: %v", set.ID, err) - } - if !set.dead { - t.Fatalf("set is not dead: %+v", set) - } - if got := r.FindByID(set.ID); got != nil { - t.Fatalf("FindById(%d) failed, got: %+v, expected: nil", set.ID, got) - } - for i, ch := range chs { - if !signalled(ch) { - t.Fatalf("channel %d should have been signalled", i) - } - } -} diff --git a/pkg/sentry/kernel/semaphore/waiter_list.go b/pkg/sentry/kernel/semaphore/waiter_list.go new file mode 100644 index 000000000..51586d43f --- /dev/null +++ b/pkg/sentry/kernel/semaphore/waiter_list.go @@ -0,0 +1,221 @@ +package semaphore + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type waiterElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (waiterElementMapper) linkerFor(elem *waiter) *waiter { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type waiterList struct { + head *waiter + tail *waiter +} + +// Reset resets list l to the empty state. +func (l *waiterList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *waiterList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *waiterList) Front() *waiter { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *waiterList) Back() *waiter { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *waiterList) Len() (count int) { + for e := l.Front(); e != nil; e = (waiterElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *waiterList) PushFront(e *waiter) { + linker := waiterElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + waiterElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *waiterList) PushBack(e *waiter) { + linker := waiterElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *waiterList) PushBackList(m *waiterList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(m.head) + waiterElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *waiterList) InsertAfter(b, e *waiter) { + bLinker := waiterElementMapper{}.linkerFor(b) + eLinker := waiterElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + waiterElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *waiterList) InsertBefore(a, e *waiter) { + aLinker := waiterElementMapper{}.linkerFor(a) + eLinker := waiterElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + waiterElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *waiterList) Remove(e *waiter) { + linker := waiterElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + waiterElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + waiterElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type waiterEntry struct { + next *waiter + prev *waiter +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *waiterEntry) Next() *waiter { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *waiterEntry) Prev() *waiter { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *waiterEntry) SetNext(elem *waiter) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *waiterEntry) SetPrev(elem *waiter) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go new file mode 100644 index 000000000..d45cc0a0f --- /dev/null +++ b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go @@ -0,0 +1,38 @@ +package kernel + +import ( + "unsafe" + + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/sync" +) + +// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race +// with any writer critical sections in seq. +// +//go:nosplit +func SeqAtomicLoadTaskGoroutineSchedInfo(seq *sync.SeqCount, ptr *TaskGoroutineSchedInfo) TaskGoroutineSchedInfo { + for { + if val, ok := SeqAtomicTryLoadTaskGoroutineSchedInfo(seq, seq.BeginRead(), ptr); ok { + return val + } + } +} + +// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section +// in seq initiated by a call to seq.BeginRead() that returned epoch. If the +// read would race with a writer critical section, SeqAtomicTryLoad returns +// (unspecified, false). +// +//go:nosplit +func SeqAtomicTryLoadTaskGoroutineSchedInfo(seq *sync.SeqCount, epoch sync.SeqCountEpoch, ptr *TaskGoroutineSchedInfo) (val TaskGoroutineSchedInfo, ok bool) { + if sync.RaceEnabled { + + gohacks.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + + val = *ptr + } + ok = seq.ReadOk(epoch) + return +} diff --git a/pkg/sentry/kernel/session_list.go b/pkg/sentry/kernel/session_list.go new file mode 100644 index 000000000..f53dea401 --- /dev/null +++ b/pkg/sentry/kernel/session_list.go @@ -0,0 +1,221 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type sessionElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (sessionElementMapper) linkerFor(elem *Session) *Session { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type sessionList struct { + head *Session + tail *Session +} + +// Reset resets list l to the empty state. +func (l *sessionList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *sessionList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *sessionList) Front() *Session { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *sessionList) Back() *Session { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *sessionList) Len() (count int) { + for e := l.Front(); e != nil; e = (sessionElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *sessionList) PushFront(e *Session) { + linker := sessionElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + sessionElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *sessionList) PushBack(e *Session) { + linker := sessionElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + sessionElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *sessionList) PushBackList(m *sessionList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + sessionElementMapper{}.linkerFor(l.tail).SetNext(m.head) + sessionElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *sessionList) InsertAfter(b, e *Session) { + bLinker := sessionElementMapper{}.linkerFor(b) + eLinker := sessionElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + sessionElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *sessionList) InsertBefore(a, e *Session) { + aLinker := sessionElementMapper{}.linkerFor(a) + eLinker := sessionElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + sessionElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *sessionList) Remove(e *Session) { + linker := sessionElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + sessionElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + sessionElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type sessionEntry struct { + next *Session + prev *Session +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *sessionEntry) Next() *Session { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *sessionEntry) Prev() *Session { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *sessionEntry) SetNext(elem *Session) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *sessionEntry) SetPrev(elem *Session) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/session_refs.go b/pkg/sentry/kernel/session_refs.go new file mode 100644 index 000000000..0186a0e6d --- /dev/null +++ b/pkg/sentry/kernel/session_refs.go @@ -0,0 +1,140 @@ +package kernel + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const SessionenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var Sessionobj *Session + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type SessionRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *SessionRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *SessionRefs) RefType() string { + return fmt.Sprintf("%T", Sessionobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *SessionRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *SessionRefs) LogRefs() bool { + return SessionenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *SessionRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *SessionRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if SessionenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.RefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *SessionRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if SessionenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *SessionRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if SessionenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *SessionRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD deleted file mode 100644 index 5b69333fe..000000000 --- a/pkg/sentry/kernel/shm/BUILD +++ /dev/null @@ -1,47 +0,0 @@ -load("//tools:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "shm_refs", - out = "shm_refs.go", - consts = { - "enableLogging": "true", - }, - package = "shm", - prefix = "Shm", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "Shm", - }, -) - -go_library( - name = "shm", - srcs = [ - "device.go", - "shm.go", - "shm_refs.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/usage", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/kernel/shm/shm_refs.go b/pkg/sentry/kernel/shm/shm_refs.go new file mode 100644 index 000000000..f832c1a99 --- /dev/null +++ b/pkg/sentry/kernel/shm/shm_refs.go @@ -0,0 +1,140 @@ +package shm + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const ShmenableLogging = true + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var Shmobj *Shm + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type ShmRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *ShmRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *ShmRefs) RefType() string { + return fmt.Sprintf("%T", Shmobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *ShmRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *ShmRefs) LogRefs() bool { + return ShmenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *ShmRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *ShmRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if ShmenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.RefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *ShmRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if ShmenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *ShmRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if ShmenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *ShmRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/shm/shm_state_autogen.go b/pkg/sentry/kernel/shm/shm_state_autogen.go new file mode 100644 index 000000000..30ceeaa03 --- /dev/null +++ b/pkg/sentry/kernel/shm/shm_state_autogen.go @@ -0,0 +1,147 @@ +// automatically generated by stateify. + +package shm + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *Registry) StateTypeName() string { + return "pkg/sentry/kernel/shm.Registry" +} + +func (r *Registry) StateFields() []string { + return []string{ + "userNS", + "shms", + "keysToShms", + "totalPages", + "lastIDUsed", + } +} + +func (r *Registry) beforeSave() {} + +// +checklocksignore +func (r *Registry) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.userNS) + stateSinkObject.Save(1, &r.shms) + stateSinkObject.Save(2, &r.keysToShms) + stateSinkObject.Save(3, &r.totalPages) + stateSinkObject.Save(4, &r.lastIDUsed) +} + +func (r *Registry) afterLoad() {} + +// +checklocksignore +func (r *Registry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.userNS) + stateSourceObject.Load(1, &r.shms) + stateSourceObject.Load(2, &r.keysToShms) + stateSourceObject.Load(3, &r.totalPages) + stateSourceObject.Load(4, &r.lastIDUsed) +} + +func (s *Shm) StateTypeName() string { + return "pkg/sentry/kernel/shm.Shm" +} + +func (s *Shm) StateFields() []string { + return []string{ + "ShmRefs", + "mfp", + "registry", + "ID", + "creator", + "size", + "effectiveSize", + "fr", + "key", + "perms", + "owner", + "attachTime", + "detachTime", + "changeTime", + "creatorPID", + "lastAttachDetachPID", + "pendingDestruction", + } +} + +func (s *Shm) beforeSave() {} + +// +checklocksignore +func (s *Shm) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.ShmRefs) + stateSinkObject.Save(1, &s.mfp) + stateSinkObject.Save(2, &s.registry) + stateSinkObject.Save(3, &s.ID) + stateSinkObject.Save(4, &s.creator) + stateSinkObject.Save(5, &s.size) + stateSinkObject.Save(6, &s.effectiveSize) + stateSinkObject.Save(7, &s.fr) + stateSinkObject.Save(8, &s.key) + stateSinkObject.Save(9, &s.perms) + stateSinkObject.Save(10, &s.owner) + stateSinkObject.Save(11, &s.attachTime) + stateSinkObject.Save(12, &s.detachTime) + stateSinkObject.Save(13, &s.changeTime) + stateSinkObject.Save(14, &s.creatorPID) + stateSinkObject.Save(15, &s.lastAttachDetachPID) + stateSinkObject.Save(16, &s.pendingDestruction) +} + +func (s *Shm) afterLoad() {} + +// +checklocksignore +func (s *Shm) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.ShmRefs) + stateSourceObject.Load(1, &s.mfp) + stateSourceObject.Load(2, &s.registry) + stateSourceObject.Load(3, &s.ID) + stateSourceObject.Load(4, &s.creator) + stateSourceObject.Load(5, &s.size) + stateSourceObject.Load(6, &s.effectiveSize) + stateSourceObject.Load(7, &s.fr) + stateSourceObject.Load(8, &s.key) + stateSourceObject.Load(9, &s.perms) + stateSourceObject.Load(10, &s.owner) + stateSourceObject.Load(11, &s.attachTime) + stateSourceObject.Load(12, &s.detachTime) + stateSourceObject.Load(13, &s.changeTime) + stateSourceObject.Load(14, &s.creatorPID) + stateSourceObject.Load(15, &s.lastAttachDetachPID) + stateSourceObject.Load(16, &s.pendingDestruction) +} + +func (r *ShmRefs) StateTypeName() string { + return "pkg/sentry/kernel/shm.ShmRefs" +} + +func (r *ShmRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *ShmRefs) beforeSave() {} + +// +checklocksignore +func (r *ShmRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *ShmRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func init() { + state.Register((*Registry)(nil)) + state.Register((*Shm)(nil)) + state.Register((*ShmRefs)(nil)) +} diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD deleted file mode 100644 index 1110ecca5..000000000 --- a/pkg/sentry/kernel/signalfd/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "signalfd", - srcs = ["signalfd.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/signalfd/signalfd_state_autogen.go b/pkg/sentry/kernel/signalfd/signalfd_state_autogen.go new file mode 100644 index 000000000..ad2622f27 --- /dev/null +++ b/pkg/sentry/kernel/signalfd/signalfd_state_autogen.go @@ -0,0 +1,39 @@ +// automatically generated by stateify. + +package signalfd + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *SignalOperations) StateTypeName() string { + return "pkg/sentry/kernel/signalfd.SignalOperations" +} + +func (s *SignalOperations) StateFields() []string { + return []string{ + "target", + "mask", + } +} + +func (s *SignalOperations) beforeSave() {} + +// +checklocksignore +func (s *SignalOperations) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.target) + stateSinkObject.Save(1, &s.mask) +} + +func (s *SignalOperations) afterLoad() {} + +// +checklocksignore +func (s *SignalOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.target) + stateSourceObject.Load(1, &s.mask) +} + +func init() { + state.Register((*SignalOperations)(nil)) +} diff --git a/pkg/sentry/kernel/socket_list.go b/pkg/sentry/kernel/socket_list.go new file mode 100644 index 000000000..2b2f5055e --- /dev/null +++ b/pkg/sentry/kernel/socket_list.go @@ -0,0 +1,221 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type socketElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (socketElementMapper) linkerFor(elem *SocketRecordVFS1) *SocketRecordVFS1 { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type socketList struct { + head *SocketRecordVFS1 + tail *SocketRecordVFS1 +} + +// Reset resets list l to the empty state. +func (l *socketList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *socketList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *socketList) Front() *SocketRecordVFS1 { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *socketList) Back() *SocketRecordVFS1 { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *socketList) Len() (count int) { + for e := l.Front(); e != nil; e = (socketElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *socketList) PushFront(e *SocketRecordVFS1) { + linker := socketElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + socketElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *socketList) PushBack(e *SocketRecordVFS1) { + linker := socketElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + socketElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *socketList) PushBackList(m *socketList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + socketElementMapper{}.linkerFor(l.tail).SetNext(m.head) + socketElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *socketList) InsertAfter(b, e *SocketRecordVFS1) { + bLinker := socketElementMapper{}.linkerFor(b) + eLinker := socketElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + socketElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *socketList) InsertBefore(a, e *SocketRecordVFS1) { + aLinker := socketElementMapper{}.linkerFor(a) + eLinker := socketElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + socketElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *socketList) Remove(e *SocketRecordVFS1) { + linker := socketElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + socketElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + socketElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type socketEntry struct { + next *SocketRecordVFS1 + prev *SocketRecordVFS1 +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *socketEntry) Next() *SocketRecordVFS1 { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *socketEntry) Prev() *SocketRecordVFS1 { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *socketEntry) SetNext(elem *SocketRecordVFS1) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *socketEntry) SetPrev(elem *SocketRecordVFS1) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/table_test.go b/pkg/sentry/kernel/table_test.go deleted file mode 100644 index 32cf47e05..000000000 --- a/pkg/sentry/kernel/table_test.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi" - "gvisor.dev/gvisor/pkg/sentry/arch" -) - -const ( - maxTestSyscall = 1000 -) - -func createSyscallTable() *SyscallTable { - m := make(map[uintptr]Syscall) - for i := uintptr(0); i <= maxTestSyscall; i++ { - j := i - m[i] = Syscall{ - Fn: func(*Task, arch.SyscallArguments) (uintptr, *SyscallControl, error) { - return j, nil, nil - }, - } - } - - s := &SyscallTable{ - OS: abi.Linux, - Arch: arch.AMD64, - Table: m, - } - - RegisterSyscallTable(s) - return s -} - -func TestTable(t *testing.T) { - table := createSyscallTable() - defer func() { - // Cleanup registered tables to keep tests separate. - allSyscallTables = []*SyscallTable{} - }() - - // Go through all functions and check that they return the right value. - for i := uintptr(0); i < maxTestSyscall; i++ { - fn := table.Lookup(i) - if fn == nil { - t.Errorf("Syscall %v is set to nil", i) - continue - } - - v, _, _ := fn(nil, arch.SyscallArguments{}) - if v != i { - t.Errorf("Wrong return value for syscall %v: expected %v, got %v", i, i, v) - } - } - - // Check that values outside the range return nil. - for i := uintptr(maxTestSyscall + 1); i < maxTestSyscall+100; i++ { - fn := table.Lookup(i) - if fn != nil { - t.Errorf("Syscall %v is not nil: %v", i, fn) - continue - } - } -} - -func BenchmarkTableLookup(b *testing.B) { - table := createSyscallTable() - - b.ResetTimer() - - j := uintptr(0) - for i := 0; i < b.N; i++ { - table.Lookup(j) - j = (j + 1) % 310 - } - - b.StopTimer() - // Cleanup registered tables to keep tests separate. - allSyscallTables = []*SyscallTable{} -} - -func BenchmarkTableMapLookup(b *testing.B) { - table := createSyscallTable() - - b.ResetTimer() - - j := uintptr(0) - for i := 0; i < b.N; i++ { - table.mapLookup(j) - j = (j + 1) % 310 - } - - b.StopTimer() - // Cleanup registered tables to keep tests separate. - allSyscallTables = []*SyscallTable{} -} diff --git a/pkg/sentry/kernel/task_list.go b/pkg/sentry/kernel/task_list.go new file mode 100644 index 000000000..66df9ac45 --- /dev/null +++ b/pkg/sentry/kernel/task_list.go @@ -0,0 +1,221 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type taskElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (taskElementMapper) linkerFor(elem *Task) *Task { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type taskList struct { + head *Task + tail *Task +} + +// Reset resets list l to the empty state. +func (l *taskList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *taskList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *taskList) Front() *Task { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *taskList) Back() *Task { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *taskList) Len() (count int) { + for e := l.Front(); e != nil; e = (taskElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *taskList) PushFront(e *Task) { + linker := taskElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + taskElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *taskList) PushBack(e *Task) { + linker := taskElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + taskElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *taskList) PushBackList(m *taskList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + taskElementMapper{}.linkerFor(l.tail).SetNext(m.head) + taskElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *taskList) InsertAfter(b, e *Task) { + bLinker := taskElementMapper{}.linkerFor(b) + eLinker := taskElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + taskElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *taskList) InsertBefore(a, e *Task) { + aLinker := taskElementMapper{}.linkerFor(a) + eLinker := taskElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + taskElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *taskList) Remove(e *Task) { + linker := taskElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + taskElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + taskElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type taskEntry struct { + next *Task + prev *Task +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *taskEntry) Next() *Task { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *taskEntry) Prev() *Task { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *taskEntry) SetNext(elem *Task) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *taskEntry) SetPrev(elem *Task) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/task_test.go b/pkg/sentry/kernel/task_test.go deleted file mode 100644 index cfcde9a7a..000000000 --- a/pkg/sentry/kernel/task_test.go +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/kernel/sched" -) - -func TestTaskCPU(t *testing.T) { - for _, test := range []struct { - mask sched.CPUSet - tid ThreadID - cpu int32 - }{ - { - mask: []byte{0xff}, - tid: 1, - cpu: 0, - }, - { - mask: []byte{0xff}, - tid: 10, - cpu: 1, - }, - { - // more than 8 cpus. - mask: []byte{0xff, 0xff}, - tid: 10, - cpu: 9, - }, - { - // missing the first cpu. - mask: []byte{0xfe}, - tid: 1, - cpu: 1, - }, - { - mask: []byte{0xfe}, - tid: 10, - cpu: 3, - }, - { - // missing the fifth cpu. - mask: []byte{0xef}, - tid: 10, - cpu: 2, - }, - } { - assigned := assignCPU(test.mask, test.tid) - if test.cpu != assigned { - t.Errorf("assignCPU(%v, %v) got %v, want %v", test.mask, test.tid, assigned, test.cpu) - } - } - -} diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD deleted file mode 100644 index e293d9a0f..000000000 --- a/pkg/sentry/kernel/time/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "time", - srcs = [ - "context.go", - "tcpip.go", - "time.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/sync", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/time/time_state_autogen.go b/pkg/sentry/kernel/time/time_state_autogen.go new file mode 100644 index 000000000..855751953 --- /dev/null +++ b/pkg/sentry/kernel/time/time_state_autogen.go @@ -0,0 +1,103 @@ +// automatically generated by stateify. + +package time + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (t *Time) StateTypeName() string { + return "pkg/sentry/kernel/time.Time" +} + +func (t *Time) StateFields() []string { + return []string{ + "ns", + } +} + +func (t *Time) beforeSave() {} + +// +checklocksignore +func (t *Time) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.ns) +} + +func (t *Time) afterLoad() {} + +// +checklocksignore +func (t *Time) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.ns) +} + +func (s *Setting) StateTypeName() string { + return "pkg/sentry/kernel/time.Setting" +} + +func (s *Setting) StateFields() []string { + return []string{ + "Enabled", + "Next", + "Period", + } +} + +func (s *Setting) beforeSave() {} + +// +checklocksignore +func (s *Setting) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Enabled) + stateSinkObject.Save(1, &s.Next) + stateSinkObject.Save(2, &s.Period) +} + +func (s *Setting) afterLoad() {} + +// +checklocksignore +func (s *Setting) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Enabled) + stateSourceObject.Load(1, &s.Next) + stateSourceObject.Load(2, &s.Period) +} + +func (t *Timer) StateTypeName() string { + return "pkg/sentry/kernel/time.Timer" +} + +func (t *Timer) StateFields() []string { + return []string{ + "clock", + "listener", + "setting", + "paused", + } +} + +func (t *Timer) beforeSave() {} + +// +checklocksignore +func (t *Timer) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.clock) + stateSinkObject.Save(1, &t.listener) + stateSinkObject.Save(2, &t.setting) + stateSinkObject.Save(3, &t.paused) +} + +func (t *Timer) afterLoad() {} + +// +checklocksignore +func (t *Timer) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.clock) + stateSourceObject.Load(1, &t.listener) + stateSourceObject.Load(2, &t.setting) + stateSourceObject.Load(3, &t.paused) +} + +func init() { + state.Register((*Time)(nil)) + state.Register((*Setting)(nil)) + state.Register((*Timer)(nil)) +} diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go deleted file mode 100644 index b6039505a..000000000 --- a/pkg/sentry/kernel/timekeeper_test.go +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - sentrytime "gvisor.dev/gvisor/pkg/sentry/time" - "gvisor.dev/gvisor/pkg/sentry/usage" -) - -// mockClocks is a sentrytime.Clocks that simply returns the times in the -// struct. -type mockClocks struct { - monotonic int64 - realtime int64 -} - -// Update implements sentrytime.Clocks.Update. It does nothing. -func (*mockClocks) Update() (monotonicParams sentrytime.Parameters, monotonicOk bool, realtimeParam sentrytime.Parameters, realtimeOk bool) { - return -} - -// Update implements sentrytime.Clocks.GetTime. -func (c *mockClocks) GetTime(id sentrytime.ClockID) (int64, error) { - switch id { - case sentrytime.Monotonic: - return c.monotonic, nil - case sentrytime.Realtime: - return c.realtime, nil - default: - return 0, linuxerr.EINVAL - } -} - -// stateTestClocklessTimekeeper returns a test Timekeeper which has not had -// SetClocks called. -func stateTestClocklessTimekeeper(tb testing.TB) *Timekeeper { - ctx := contexttest.Context(tb) - mfp := pgalloc.MemoryFileProviderFromContext(ctx) - fr, err := mfp.MemoryFile().Allocate(hostarch.PageSize, usage.Anonymous) - if err != nil { - tb.Fatalf("failed to allocate memory: %v", err) - } - return &Timekeeper{ - params: NewVDSOParamPage(mfp, fr), - } -} - -func stateTestTimekeeper(tb testing.TB) *Timekeeper { - t := stateTestClocklessTimekeeper(tb) - t.SetClocks(sentrytime.NewCalibratedClocks()) - return t -} - -// TestTimekeeperMonotonicZero tests that monotonic time starts at zero. -func TestTimekeeperMonotonicZero(t *testing.T) { - c := &mockClocks{ - monotonic: 100000, - } - - tk := stateTestClocklessTimekeeper(t) - tk.SetClocks(c) - defer tk.Destroy() - - now, err := tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 0 { - t.Errorf("GetTime got %d want 0", now) - } - - c.monotonic += 10 - - now, err = tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 10 { - t.Errorf("GetTime got %d want 10", now) - } -} - -// TestTimekeeperMonotonicJumpForward tests that monotonic time jumps forward -// after restore. -func TestTimekeeperMonotonicForward(t *testing.T) { - c := &mockClocks{ - monotonic: 900000, - realtime: 600000, - } - - tk := stateTestClocklessTimekeeper(t) - tk.restored = make(chan struct{}) - tk.saveMonotonic = 100000 - tk.saveRealtime = 400000 - tk.SetClocks(c) - defer tk.Destroy() - - // The monotonic clock should jump ahead by 200000 to 300000. - // - // The new system monotonic time (900000) is irrelevant to what the app - // sees. - now, err := tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 300000 { - t.Errorf("GetTime got %d want 300000", now) - } -} - -// TestTimekeeperMonotonicJumpBackwards tests that monotonic time does not jump -// backwards when realtime goes backwards. -func TestTimekeeperMonotonicJumpBackwards(t *testing.T) { - c := &mockClocks{ - monotonic: 900000, - realtime: 400000, - } - - tk := stateTestClocklessTimekeeper(t) - tk.restored = make(chan struct{}) - tk.saveMonotonic = 100000 - tk.saveRealtime = 600000 - tk.SetClocks(c) - defer tk.Destroy() - - // The monotonic clock should remain at 100000. - // - // The new system monotonic time (900000) is irrelevant to what the app - // sees and we don't want to jump the monotonic clock backwards like - // realtime did. - now, err := tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 100000 { - t.Errorf("GetTime got %d want 100000", now) - } -} diff --git a/pkg/sentry/kernel/uncaught_signal.proto b/pkg/sentry/kernel/uncaught_signal.proto deleted file mode 100644 index 0bdb062cb..000000000 --- a/pkg/sentry/kernel/uncaught_signal.proto +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -import "pkg/sentry/arch/registers.proto"; - -message UncaughtSignal { - // Thread ID. - int32 tid = 1; - - // Process ID. - int32 pid = 2; - - // Registers at the time of the fault or signal. - Registers registers = 3; - - // Signal number. - int32 signal_number = 4; - - // The memory location which caused the fault (set if applicable, 0 - // otherwise). This will be set for SIGILL, SIGFPE, SIGSEGV, and SIGBUS. - uint64 fault_addr = 5; -} diff --git a/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go b/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go new file mode 100644 index 000000000..54955f061 --- /dev/null +++ b/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go @@ -0,0 +1,193 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.25.0 +// protoc v3.13.0 +// source: pkg/sentry/kernel/uncaught_signal.proto + +package gvisor + +import ( + proto "github.com/golang/protobuf/proto" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + registers_go_proto "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// This is a compile-time assertion that a sufficiently up-to-date version +// of the legacy proto package is being used. +const _ = proto.ProtoPackageIsVersion4 + +type UncaughtSignal struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Tid int32 `protobuf:"varint,1,opt,name=tid,proto3" json:"tid,omitempty"` + Pid int32 `protobuf:"varint,2,opt,name=pid,proto3" json:"pid,omitempty"` + Registers *registers_go_proto.Registers `protobuf:"bytes,3,opt,name=registers,proto3" json:"registers,omitempty"` + SignalNumber int32 `protobuf:"varint,4,opt,name=signal_number,json=signalNumber,proto3" json:"signal_number,omitempty"` + FaultAddr uint64 `protobuf:"varint,5,opt,name=fault_addr,json=faultAddr,proto3" json:"fault_addr,omitempty"` +} + +func (x *UncaughtSignal) Reset() { + *x = UncaughtSignal{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_sentry_kernel_uncaught_signal_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *UncaughtSignal) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UncaughtSignal) ProtoMessage() {} + +func (x *UncaughtSignal) ProtoReflect() protoreflect.Message { + mi := &file_pkg_sentry_kernel_uncaught_signal_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UncaughtSignal.ProtoReflect.Descriptor instead. +func (*UncaughtSignal) Descriptor() ([]byte, []int) { + return file_pkg_sentry_kernel_uncaught_signal_proto_rawDescGZIP(), []int{0} +} + +func (x *UncaughtSignal) GetTid() int32 { + if x != nil { + return x.Tid + } + return 0 +} + +func (x *UncaughtSignal) GetPid() int32 { + if x != nil { + return x.Pid + } + return 0 +} + +func (x *UncaughtSignal) GetRegisters() *registers_go_proto.Registers { + if x != nil { + return x.Registers + } + return nil +} + +func (x *UncaughtSignal) GetSignalNumber() int32 { + if x != nil { + return x.SignalNumber + } + return 0 +} + +func (x *UncaughtSignal) GetFaultAddr() uint64 { + if x != nil { + return x.FaultAddr + } + return 0 +} + +var File_pkg_sentry_kernel_uncaught_signal_proto protoreflect.FileDescriptor + +var file_pkg_sentry_kernel_uncaught_signal_proto_rawDesc = []byte{ + 0x0a, 0x27, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x2f, 0x6b, 0x65, 0x72, + 0x6e, 0x65, 0x6c, 0x2f, 0x75, 0x6e, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x5f, 0x73, 0x69, 0x67, + 0x6e, 0x61, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x06, 0x67, 0x76, 0x69, 0x73, 0x6f, + 0x72, 0x1a, 0x1f, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x2f, 0x61, 0x72, + 0x63, 0x68, 0x2f, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x22, 0xa9, 0x01, 0x0a, 0x0e, 0x55, 0x6e, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x53, + 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x74, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x03, 0x74, 0x69, 0x64, 0x12, 0x10, 0x0a, 0x03, 0x70, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x70, 0x69, 0x64, 0x12, 0x2f, 0x0a, 0x09, 0x72, 0x65, 0x67, + 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x67, + 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x52, + 0x09, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x73, 0x69, + 0x67, 0x6e, 0x61, 0x6c, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x0c, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, + 0x1d, 0x0a, 0x0a, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x05, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x09, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x41, 0x64, 0x64, 0x72, 0x62, 0x06, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_pkg_sentry_kernel_uncaught_signal_proto_rawDescOnce sync.Once + file_pkg_sentry_kernel_uncaught_signal_proto_rawDescData = file_pkg_sentry_kernel_uncaught_signal_proto_rawDesc +) + +func file_pkg_sentry_kernel_uncaught_signal_proto_rawDescGZIP() []byte { + file_pkg_sentry_kernel_uncaught_signal_proto_rawDescOnce.Do(func() { + file_pkg_sentry_kernel_uncaught_signal_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_sentry_kernel_uncaught_signal_proto_rawDescData) + }) + return file_pkg_sentry_kernel_uncaught_signal_proto_rawDescData +} + +var file_pkg_sentry_kernel_uncaught_signal_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_pkg_sentry_kernel_uncaught_signal_proto_goTypes = []interface{}{ + (*UncaughtSignal)(nil), // 0: gvisor.UncaughtSignal + (*registers_go_proto.Registers)(nil), // 1: gvisor.Registers +} +var file_pkg_sentry_kernel_uncaught_signal_proto_depIdxs = []int32{ + 1, // 0: gvisor.UncaughtSignal.registers:type_name -> gvisor.Registers + 1, // [1:1] is the sub-list for method output_type + 1, // [1:1] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 1, // [1:1] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name +} + +func init() { file_pkg_sentry_kernel_uncaught_signal_proto_init() } +func file_pkg_sentry_kernel_uncaught_signal_proto_init() { + if File_pkg_sentry_kernel_uncaught_signal_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_pkg_sentry_kernel_uncaught_signal_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*UncaughtSignal); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_pkg_sentry_kernel_uncaught_signal_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_pkg_sentry_kernel_uncaught_signal_proto_goTypes, + DependencyIndexes: file_pkg_sentry_kernel_uncaught_signal_proto_depIdxs, + MessageInfos: file_pkg_sentry_kernel_uncaught_signal_proto_msgTypes, + }.Build() + File_pkg_sentry_kernel_uncaught_signal_proto = out.File + file_pkg_sentry_kernel_uncaught_signal_proto_rawDesc = nil + file_pkg_sentry_kernel_uncaught_signal_proto_goTypes = nil + file_pkg_sentry_kernel_uncaught_signal_proto_depIdxs = nil +} |