summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r--pkg/sentry/kernel/BUILD19
-rw-r--r--pkg/sentry/kernel/abstract_socket_namespace.go4
-rw-r--r--pkg/sentry/kernel/auth/BUILD14
-rw-r--r--pkg/sentry/kernel/auth/capability_set.go6
-rw-r--r--pkg/sentry/kernel/auth/context.go2
-rw-r--r--pkg/sentry/kernel/auth/credentials.go4
-rw-r--r--pkg/sentry/kernel/auth/id_map.go6
-rw-r--r--pkg/sentry/kernel/auth/user_namespace.go2
-rw-r--r--pkg/sentry/kernel/context.go4
-rw-r--r--pkg/sentry/kernel/contexttest/BUILD2
-rw-r--r--pkg/sentry/kernel/contexttest/contexttest.go10
-rw-r--r--pkg/sentry/kernel/epoll/BUILD3
-rw-r--r--pkg/sentry/kernel/epoll/epoll.go19
-rw-r--r--pkg/sentry/kernel/epoll/epoll_state.go4
-rw-r--r--pkg/sentry/kernel/epoll/epoll_test.go6
-rw-r--r--pkg/sentry/kernel/eventfd/BUILD2
-rw-r--r--pkg/sentry/kernel/eventfd/eventfd.go20
-rw-r--r--pkg/sentry/kernel/eventfd/eventfd_test.go6
-rw-r--r--pkg/sentry/kernel/fasync/BUILD2
-rw-r--r--pkg/sentry/kernel/fasync/fasync.go55
-rw-r--r--pkg/sentry/kernel/fd_map.go364
-rw-r--r--pkg/sentry/kernel/fd_map_test.go136
-rw-r--r--pkg/sentry/kernel/fd_table.go380
-rw-r--r--pkg/sentry/kernel/fd_table_test.go192
-rw-r--r--pkg/sentry/kernel/fd_table_unsafe.go103
-rw-r--r--pkg/sentry/kernel/fs_context.go12
-rw-r--r--pkg/sentry/kernel/futex/BUILD2
-rw-r--r--pkg/sentry/kernel/futex/futex.go33
-rw-r--r--pkg/sentry/kernel/futex/futex_test.go2
-rw-r--r--pkg/sentry/kernel/ipc_namespace.go8
-rw-r--r--pkg/sentry/kernel/kdefs/BUILD10
-rw-r--r--pkg/sentry/kernel/kdefs/kdefs.go20
-rw-r--r--pkg/sentry/kernel/kernel.go183
-rw-r--r--pkg/sentry/kernel/kernel_state.go4
-rw-r--r--pkg/sentry/kernel/memevent/BUILD4
-rw-r--r--pkg/sentry/kernel/memevent/memory_events.go12
-rw-r--r--pkg/sentry/kernel/pending_signals.go6
-rw-r--r--pkg/sentry/kernel/pending_signals_state.go2
-rw-r--r--pkg/sentry/kernel/pipe/BUILD2
-rw-r--r--pkg/sentry/kernel/pipe/buffer.go2
-rw-r--r--pkg/sentry/kernel/pipe/buffer_test.go2
-rw-r--r--pkg/sentry/kernel/pipe/device.go2
-rw-r--r--pkg/sentry/kernel/pipe/node.go12
-rw-r--r--pkg/sentry/kernel/pipe/node_test.go14
-rw-r--r--pkg/sentry/kernel/pipe/pipe.go38
-rw-r--r--pkg/sentry/kernel/pipe/pipe_test.go8
-rw-r--r--pkg/sentry/kernel/pipe/reader.go2
-rw-r--r--pkg/sentry/kernel/pipe/reader_writer.go16
-rw-r--r--pkg/sentry/kernel/pipe/writer.go2
-rw-r--r--pkg/sentry/kernel/posixtimer.go8
-rw-r--r--pkg/sentry/kernel/ptrace.go10
-rw-r--r--pkg/sentry/kernel/ptrace_amd64.go6
-rw-r--r--pkg/sentry/kernel/ptrace_arm64.go6
-rw-r--r--pkg/sentry/kernel/rseq.go6
-rw-r--r--pkg/sentry/kernel/sched/BUILD2
-rw-r--r--pkg/sentry/kernel/seccomp.go12
-rw-r--r--pkg/sentry/kernel/semaphore/BUILD2
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore.go16
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore_test.go10
-rw-r--r--pkg/sentry/kernel/sessions.go24
-rw-r--r--pkg/sentry/kernel/shm/BUILD2
-rw-r--r--pkg/sentry/kernel/shm/device.go2
-rw-r--r--pkg/sentry/kernel/shm/shm.go27
-rw-r--r--pkg/sentry/kernel/signal.go8
-rw-r--r--pkg/sentry/kernel/signal_handlers.go4
-rw-r--r--pkg/sentry/kernel/syscalls.go8
-rw-r--r--pkg/sentry/kernel/syslog.go1
-rw-r--r--pkg/sentry/kernel/table_test.go4
-rw-r--r--pkg/sentry/kernel/task.go118
-rw-r--r--pkg/sentry/kernel/task_acct.go10
-rw-r--r--pkg/sentry/kernel/task_block.go4
-rw-r--r--pkg/sentry/kernel/task_clone.go60
-rw-r--r--pkg/sentry/kernel/task_context.go20
-rw-r--r--pkg/sentry/kernel/task_exec.go12
-rw-r--r--pkg/sentry/kernel/task_exit.go14
-rw-r--r--pkg/sentry/kernel/task_futex.go8
-rw-r--r--pkg/sentry/kernel/task_identity.go228
-rw-r--r--pkg/sentry/kernel/task_log.go6
-rw-r--r--pkg/sentry/kernel/task_net.go2
-rw-r--r--pkg/sentry/kernel/task_run.go14
-rw-r--r--pkg/sentry/kernel/task_sched.go14
-rw-r--r--pkg/sentry/kernel/task_signals.go14
-rw-r--r--pkg/sentry/kernel/task_start.go29
-rw-r--r--pkg/sentry/kernel/task_stop.go2
-rw-r--r--pkg/sentry/kernel/task_syscall.go14
-rw-r--r--pkg/sentry/kernel/task_test.go2
-rw-r--r--pkg/sentry/kernel/task_usermem.go6
-rw-r--r--pkg/sentry/kernel/thread_group.go23
-rw-r--r--pkg/sentry/kernel/threads.go4
-rw-r--r--pkg/sentry/kernel/time/BUILD2
-rw-r--r--pkg/sentry/kernel/time/context.go2
-rw-r--r--pkg/sentry/kernel/time/time.go11
-rw-r--r--pkg/sentry/kernel/timekeeper.go12
-rw-r--r--pkg/sentry/kernel/timekeeper_state.go2
-rw-r--r--pkg/sentry/kernel/timekeeper_test.go12
-rw-r--r--pkg/sentry/kernel/uts_namespace.go2
-rw-r--r--pkg/sentry/kernel/vdso.go10
97 files changed, 1437 insertions, 1141 deletions
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 04e375910..7b92f1b8d 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -85,7 +85,7 @@ proto_library(
go_proto_library(
name = "uncaught_signal_go_proto",
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto",
proto = ":uncaught_signal_proto",
visibility = ["//visibility:public"],
deps = ["//pkg/sentry/arch:registers_go_proto"],
@@ -96,7 +96,8 @@ go_library(
srcs = [
"abstract_socket_namespace.go",
"context.go",
- "fd_map.go",
+ "fd_table.go",
+ "fd_table_unsafe.go",
"fs_context.go",
"ipc_namespace.go",
"kernel.go",
@@ -147,11 +148,11 @@ go_library(
"vdso.go",
"version.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel",
imports = [
- "gvisor.googlesource.com/gvisor/pkg/bpf",
- "gvisor.googlesource.com/gvisor/pkg/sentry/device",
- "gvisor.googlesource.com/gvisor/pkg/tcpip",
+ "gvisor.dev/gvisor/pkg/bpf",
+ "gvisor.dev/gvisor/pkg/sentry/device",
+ "gvisor.dev/gvisor/pkg/tcpip",
],
visibility = ["//:sandbox"],
deps = [
@@ -179,7 +180,6 @@ go_library(
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/epoll",
"//pkg/sentry/kernel/futex",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/sched",
"//pkg/sentry/kernel/semaphore",
"//pkg/sentry/kernel/shm",
@@ -214,7 +214,7 @@ go_test(
name = "kernel_test",
size = "small",
srcs = [
- "fd_map_test.go",
+ "fd_table_test.go",
"table_test.go",
"task_test.go",
"timekeeper_test.go",
@@ -223,9 +223,10 @@ go_test(
deps = [
"//pkg/abi",
"//pkg/sentry/arch",
+ "//pkg/sentry/context",
"//pkg/sentry/context/contexttest",
+ "//pkg/sentry/fs",
"//pkg/sentry/fs/filetest",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/sched",
"//pkg/sentry/limits",
"//pkg/sentry/pgalloc",
diff --git a/pkg/sentry/kernel/abstract_socket_namespace.go b/pkg/sentry/kernel/abstract_socket_namespace.go
index 5ce52e66c..244655b5c 100644
--- a/pkg/sentry/kernel/abstract_socket_namespace.go
+++ b/pkg/sentry/kernel/abstract_socket_namespace.go
@@ -18,8 +18,8 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
)
// +stateify savable
diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD
index abd4f2dae..42779baa9 100644
--- a/pkg/sentry/kernel/auth/BUILD
+++ b/pkg/sentry/kernel/auth/BUILD
@@ -4,6 +4,17 @@ load("//tools/go_generics:defs.bzl", "go_template_instance")
load("//tools/go_stateify:defs.bzl", "go_library")
go_template_instance(
+ name = "atomicptr_credentials",
+ out = "atomicptr_credentials.go",
+ package = "auth",
+ suffix = "Credentials",
+ template = "//third_party/gvsync:generic_atomicptr",
+ types = {
+ "Value": "Credentials",
+ },
+)
+
+go_template_instance(
name = "id_map_range",
out = "id_map_range.go",
package = "auth",
@@ -34,6 +45,7 @@ go_template_instance(
go_library(
name = "auth",
srcs = [
+ "atomicptr_credentials.go",
"auth.go",
"capability_set.go",
"context.go",
@@ -45,7 +57,7 @@ go_library(
"id_map_set.go",
"user_namespace.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/auth",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/auth/capability_set.go b/pkg/sentry/kernel/auth/capability_set.go
index 7a0c967cd..fc8c6745c 100644
--- a/pkg/sentry/kernel/auth/capability_set.go
+++ b/pkg/sentry/kernel/auth/capability_set.go
@@ -15,8 +15,8 @@
package auth
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bits"
)
// A CapabilitySet is a set of capabilities implemented as a bitset. The zero
@@ -24,7 +24,7 @@ import (
type CapabilitySet uint64
// AllCapabilities is a CapabilitySet containing all valid capabilities.
-var AllCapabilities = CapabilitySetOf(linux.MaxCapability+1) - 1
+var AllCapabilities = CapabilitySetOf(linux.CAP_LAST_CAP+1) - 1
// CapabilitySetOf returns a CapabilitySet containing only the given
// capability.
diff --git a/pkg/sentry/kernel/auth/context.go b/pkg/sentry/kernel/auth/context.go
index 16d110610..5c0e7d6b6 100644
--- a/pkg/sentry/kernel/auth/context.go
+++ b/pkg/sentry/kernel/auth/context.go
@@ -15,7 +15,7 @@
package auth
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextID is the auth package's type for context.Context.Value keys.
diff --git a/pkg/sentry/kernel/auth/credentials.go b/pkg/sentry/kernel/auth/credentials.go
index 1511a0324..e057d2c6d 100644
--- a/pkg/sentry/kernel/auth/credentials.go
+++ b/pkg/sentry/kernel/auth/credentials.go
@@ -15,8 +15,8 @@
package auth
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Credentials contains information required to authorize privileged operations
diff --git a/pkg/sentry/kernel/auth/id_map.go b/pkg/sentry/kernel/auth/id_map.go
index e5d6028d6..3d74bc610 100644
--- a/pkg/sentry/kernel/auth/id_map.go
+++ b/pkg/sentry/kernel/auth/id_map.go
@@ -15,9 +15,9 @@
package auth
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// MapFromKUID translates kuid, a UID in the root namespace, to a UID in ns.
diff --git a/pkg/sentry/kernel/auth/user_namespace.go b/pkg/sentry/kernel/auth/user_namespace.go
index a40dd668f..af28ccc65 100644
--- a/pkg/sentry/kernel/auth/user_namespace.go
+++ b/pkg/sentry/kernel/auth/user_namespace.go
@@ -18,7 +18,7 @@ import (
"math"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// A UserNamespace represents a user namespace. See user_namespaces(7) for
diff --git a/pkg/sentry/kernel/context.go b/pkg/sentry/kernel/context.go
index a1a084eab..e3f5b0d83 100644
--- a/pkg/sentry/kernel/context.go
+++ b/pkg/sentry/kernel/context.go
@@ -15,8 +15,8 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextID is the kernel package's type for context.Context.Value keys.
diff --git a/pkg/sentry/kernel/contexttest/BUILD b/pkg/sentry/kernel/contexttest/BUILD
index bfb2a0b73..bec13a3d9 100644
--- a/pkg/sentry/kernel/contexttest/BUILD
+++ b/pkg/sentry/kernel/contexttest/BUILD
@@ -6,7 +6,7 @@ go_library(
name = "contexttest",
testonly = 1,
srcs = ["contexttest.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/sentry/context",
diff --git a/pkg/sentry/kernel/contexttest/contexttest.go b/pkg/sentry/kernel/contexttest/contexttest.go
index ae67e2a25..82f9d8922 100644
--- a/pkg/sentry/kernel/contexttest/contexttest.go
+++ b/pkg/sentry/kernel/contexttest/contexttest.go
@@ -19,11 +19,11 @@ package contexttest
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
)
// Context returns a Context that may be used in tests. Uses ptrace as the
diff --git a/pkg/sentry/kernel/epoll/BUILD b/pkg/sentry/kernel/epoll/BUILD
index 3ac59e13e..f46c43128 100644
--- a/pkg/sentry/kernel/epoll/BUILD
+++ b/pkg/sentry/kernel/epoll/BUILD
@@ -22,7 +22,7 @@ go_library(
"epoll_list.go",
"epoll_state.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/epoll",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/refs",
@@ -30,7 +30,6 @@ go_library(
"//pkg/sentry/fs",
"//pkg/sentry/fs/anon",
"//pkg/sentry/fs/fsutil",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/kernel/epoll/epoll.go b/pkg/sentry/kernel/epoll/epoll.go
index 43ae22a5d..9c0a4e1b4 100644
--- a/pkg/sentry/kernel/epoll/epoll.go
+++ b/pkg/sentry/kernel/epoll/epoll.go
@@ -21,14 +21,13 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/anon"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Event describes the event mask that was observed and the user data to be
@@ -61,7 +60,7 @@ const (
// +stateify savable
type FileIdentifier struct {
File *fs.File `state:"wait"`
- Fd kdefs.FD
+ Fd int32
}
// pollEntry holds all the state associated with an event poll entry, that is,
@@ -155,7 +154,7 @@ var cycleMu sync.Mutex
// NewEventPoll allocates and initializes a new event poll object.
func NewEventPoll(ctx context.Context) *fs.File {
// name matches fs/eventpoll.c:epoll_create1.
- dirent := fs.NewDirent(anon.NewInode(ctx), fmt.Sprintf("anon_inode:[eventpoll]"))
+ dirent := fs.NewDirent(ctx, anon.NewInode(ctx), fmt.Sprintf("anon_inode:[eventpoll]"))
// Release the initial dirent reference after NewFile takes a reference.
defer dirent.DecRef()
return fs.NewFile(ctx, dirent, fs.FileFlags{}, &EventPoll{
diff --git a/pkg/sentry/kernel/epoll/epoll_state.go b/pkg/sentry/kernel/epoll/epoll_state.go
index 4c3c38f9e..a0d35d350 100644
--- a/pkg/sentry/kernel/epoll/epoll_state.go
+++ b/pkg/sentry/kernel/epoll/epoll_state.go
@@ -15,8 +15,8 @@
package epoll
import (
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// afterLoad is invoked by stateify.
diff --git a/pkg/sentry/kernel/epoll/epoll_test.go b/pkg/sentry/kernel/epoll/epoll_test.go
index 49b781b69..4a20d4c82 100644
--- a/pkg/sentry/kernel/epoll/epoll_test.go
+++ b/pkg/sentry/kernel/epoll/epoll_test.go
@@ -17,9 +17,9 @@ package epoll
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/filetest"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs/filetest"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func TestFileDestroyed(t *testing.T) {
diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD
index f2f1a1223..1c5f979d4 100644
--- a/pkg/sentry/kernel/eventfd/BUILD
+++ b/pkg/sentry/kernel/eventfd/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
go_library(
name = "eventfd",
srcs = ["eventfd.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/eventfd",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go
index fe474cbf0..12f0d429b 100644
--- a/pkg/sentry/kernel/eventfd/eventfd.go
+++ b/pkg/sentry/kernel/eventfd/eventfd.go
@@ -21,15 +21,15 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/fdnotifier"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/anon"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// EventOperations represents an event with the semantics of Linux's file-based event
@@ -68,7 +68,7 @@ type EventOperations struct {
// New creates a new event object with the supplied initial value and mode.
func New(ctx context.Context, initVal uint64, semMode bool) *fs.File {
// name matches fs/eventfd.c:eventfd_file_create.
- dirent := fs.NewDirent(anon.NewInode(ctx), "anon_inode:[eventfd]")
+ dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[eventfd]")
// Release the initial dirent reference after NewFile takes a reference.
defer dirent.DecRef()
return fs.NewFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}, &EventOperations{
diff --git a/pkg/sentry/kernel/eventfd/eventfd_test.go b/pkg/sentry/kernel/eventfd/eventfd_test.go
index 1159638e5..018c7f3ef 100644
--- a/pkg/sentry/kernel/eventfd/eventfd_test.go
+++ b/pkg/sentry/kernel/eventfd/eventfd_test.go
@@ -17,9 +17,9 @@ package eventfd
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func TestEventfd(t *testing.T) {
diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD
index 59b4a49e1..5eddca115 100644
--- a/pkg/sentry/kernel/fasync/BUILD
+++ b/pkg/sentry/kernel/fasync/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library")
go_library(
name = "fasync",
srcs = ["fasync.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/fasync",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/fasync",
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go
index 84cd08501..6b0bb0324 100644
--- a/pkg/sentry/kernel/fasync/fasync.go
+++ b/pkg/sentry/kernel/fasync/fasync.go
@@ -18,11 +18,11 @@ package fasync
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// New creates a new FileAsync.
@@ -34,9 +34,23 @@ func New() fs.FileAsync {
//
// +stateify savable
type FileAsync struct {
- mu sync.Mutex `state:"nosave"`
- e waiter.Entry
- requester *auth.Credentials
+ // e is immutable after first use (which is protected by mu below).
+ e waiter.Entry
+
+ // regMu protects registeration and unregistration actions on e.
+ //
+ // regMu must be held while registration decisions are being made
+ // through the registration action itself.
+ //
+ // Lock ordering: regMu, mu.
+ regMu sync.Mutex `state:"nosave"`
+
+ // mu protects all following fields.
+ //
+ // Lock ordering: e.mu, mu.
+ mu sync.Mutex `state:"nosave"`
+ requester *auth.Credentials
+ registered bool
// Only one of the following is allowed to be non-nil.
recipientPG *kernel.ProcessGroup
@@ -47,7 +61,7 @@ type FileAsync struct {
// Callback sends a signal.
func (a *FileAsync) Callback(e *waiter.Entry) {
a.mu.Lock()
- if a.e.Callback == nil {
+ if !a.registered {
a.mu.Unlock()
return
}
@@ -80,14 +94,21 @@ func (a *FileAsync) Callback(e *waiter.Entry) {
//
// The file must not be currently registered.
func (a *FileAsync) Register(w waiter.Waitable) {
+ a.regMu.Lock()
+ defer a.regMu.Unlock()
a.mu.Lock()
- defer a.mu.Unlock()
- if a.e.Callback != nil {
+ if a.registered {
+ a.mu.Unlock()
panic("registering already registered file")
}
- a.e.Callback = a
+ if a.e.Callback == nil {
+ a.e.Callback = a
+ }
+ a.registered = true
+
+ a.mu.Unlock()
w.EventRegister(&a.e, waiter.EventIn|waiter.EventOut|waiter.EventErr|waiter.EventHUp)
}
@@ -95,15 +116,19 @@ func (a *FileAsync) Register(w waiter.Waitable) {
//
// The file must be currently registered.
func (a *FileAsync) Unregister(w waiter.Waitable) {
+ a.regMu.Lock()
+ defer a.regMu.Unlock()
a.mu.Lock()
- defer a.mu.Unlock()
- if a.e.Callback == nil {
+ if !a.registered {
+ a.mu.Unlock()
panic("unregistering unregistered file")
}
+ a.registered = false
+
+ a.mu.Unlock()
w.EventUnregister(&a.e)
- a.e.Callback = nil
}
// Owner returns who is currently getting signals. All return values will be
diff --git a/pkg/sentry/kernel/fd_map.go b/pkg/sentry/kernel/fd_map.go
deleted file mode 100644
index c5636d233..000000000
--- a/pkg/sentry/kernel/fd_map.go
+++ /dev/null
@@ -1,364 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package kernel
-
-import (
- "bytes"
- "fmt"
- "sort"
- "sync"
- "sync/atomic"
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
-)
-
-// FDs is an ordering of FD's that can be made stable.
-type FDs []kdefs.FD
-
-func (f FDs) Len() int {
- return len(f)
-}
-
-func (f FDs) Swap(i, j int) {
- f[i], f[j] = f[j], f[i]
-}
-
-func (f FDs) Less(i, j int) bool {
- return f[i] < f[j]
-}
-
-// FDFlags define flags for an individual descriptor.
-//
-// +stateify savable
-type FDFlags struct {
- // CloseOnExec indicates the descriptor should be closed on exec.
- CloseOnExec bool
-}
-
-// ToLinuxFileFlags converts a kernel.FDFlags object to a Linux file flags
-// representation.
-func (f FDFlags) ToLinuxFileFlags() (mask uint) {
- if f.CloseOnExec {
- mask |= linux.O_CLOEXEC
- }
- return
-}
-
-// ToLinuxFDFlags converts a kernel.FDFlags object to a Linux descriptor flags
-// representation.
-func (f FDFlags) ToLinuxFDFlags() (mask uint) {
- if f.CloseOnExec {
- mask |= linux.FD_CLOEXEC
- }
- return
-}
-
-// descriptor holds the details about a file descriptor, namely a pointer the
-// file itself and the descriptor flags.
-//
-// +stateify savable
-type descriptor struct {
- file *fs.File
- flags FDFlags
-}
-
-// FDMap is used to manage File references and flags.
-//
-// +stateify savable
-type FDMap struct {
- refs.AtomicRefCount
- k *Kernel
- files map[kdefs.FD]descriptor
- mu sync.RWMutex `state:"nosave"`
- uid uint64
-}
-
-// ID returns a unique identifier for this FDMap.
-func (f *FDMap) ID() uint64 {
- return f.uid
-}
-
-// NewFDMap allocates a new FDMap that may be used by tasks in k.
-func (k *Kernel) NewFDMap() *FDMap {
- return &FDMap{
- k: k,
- files: make(map[kdefs.FD]descriptor),
- uid: atomic.AddUint64(&k.fdMapUids, 1),
- }
-}
-
-// destroy removes all of the file descriptors from the map.
-func (f *FDMap) destroy() {
- f.RemoveIf(func(*fs.File, FDFlags) bool {
- return true
- })
-}
-
-// DecRef implements RefCounter.DecRef with destructor f.destroy.
-func (f *FDMap) DecRef() {
- f.DecRefWithDestructor(f.destroy)
-}
-
-// Size returns the number of file descriptor slots currently allocated.
-func (f *FDMap) Size() int {
- f.mu.RLock()
- defer f.mu.RUnlock()
-
- return len(f.files)
-}
-
-// String is a stringer for FDMap.
-func (f *FDMap) String() string {
- f.mu.RLock()
- defer f.mu.RUnlock()
-
- var b bytes.Buffer
- for k, v := range f.files {
- n, _ := v.file.Dirent.FullName(nil /* root */)
- b.WriteString(fmt.Sprintf("\tfd:%d => name %s\n", k, n))
- }
- return b.String()
-}
-
-// NewFDFrom allocates a new FD guaranteed to be the lowest number available
-// greater than or equal to from. This property is important as Unix programs
-// tend to count on this allocation order.
-func (f *FDMap) NewFDFrom(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) (kdefs.FD, error) {
- if fd < 0 {
- // Don't accept negative FDs.
- return 0, syscall.EINVAL
- }
-
- f.mu.Lock()
- defer f.mu.Unlock()
-
- // Finds the lowest fd not in the handles map.
- lim := limitSet.Get(limits.NumberOfFiles)
- for i := fd; lim.Cur == limits.Infinity || i < kdefs.FD(lim.Cur); i++ {
- if _, ok := f.files[i]; !ok {
- file.IncRef()
- f.files[i] = descriptor{file, flags}
- return i, nil
- }
- }
-
- return -1, syscall.EMFILE
-}
-
-// NewFDAt sets the file reference for the given FD. If there is an
-// active reference for that FD, the ref count for that existing reference
-// is decremented.
-func (f *FDMap) NewFDAt(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) error {
- if fd < 0 {
- // Don't accept negative FDs.
- return syscall.EBADF
- }
-
- // In this one case we do not do a defer of the Unlock. The
- // reason is that we must have done all the work needed for
- // discarding any old open file before we return to the
- // caller. In other words, the DecRef(), below, must have
- // completed by the time we return to the caller to ensure
- // side effects are, in fact, effected. A classic example is
- // dup2(fd1, fd2); if fd2 was already open, it must be closed,
- // and we don't want to resume the caller until it is; we have
- // to block on the DecRef(). Hence we can not just do a 'go
- // oldfile.DecRef()', since there would be no guarantee that
- // it would be done before we the caller resumed. Since we
- // must wait for the DecRef() to finish, and that could take
- // time, it's best to first call f.muUnlock beore so we are
- // not blocking other uses of this FDMap on the DecRef() call.
- f.mu.Lock()
- oldDesc, oldExists := f.files[fd]
- lim := limitSet.Get(limits.NumberOfFiles).Cur
- // if we're closing one then the effective limit is one
- // more than the actual limit.
- if oldExists && lim != limits.Infinity {
- lim++
- }
- if lim != limits.Infinity && fd >= kdefs.FD(lim) {
- f.mu.Unlock()
- return syscall.EMFILE
- }
-
- file.IncRef()
- f.files[fd] = descriptor{file, flags}
- f.mu.Unlock()
-
- if oldExists {
- oldDesc.file.DecRef()
- }
- return nil
-}
-
-// SetFlags sets the flags for the given file descriptor, if it is valid.
-func (f *FDMap) SetFlags(fd kdefs.FD, flags FDFlags) {
- f.mu.Lock()
- defer f.mu.Unlock()
-
- desc, ok := f.files[fd]
- if !ok {
- return
- }
-
- f.files[fd] = descriptor{desc.file, flags}
-}
-
-// GetDescriptor returns a reference to the file and the flags for the FD. It
-// bumps its reference count as well. It returns nil if there is no File
-// for the FD, i.e. if the FD is invalid. The caller must use DecRef
-// when they are done.
-func (f *FDMap) GetDescriptor(fd kdefs.FD) (*fs.File, FDFlags) {
- f.mu.RLock()
- defer f.mu.RUnlock()
-
- if desc, ok := f.files[fd]; ok {
- desc.file.IncRef()
- return desc.file, desc.flags
- }
- return nil, FDFlags{}
-}
-
-// GetFile returns a reference to the File for the FD and bumps
-// its reference count as well. It returns nil if there is no File
-// for the FD, i.e. if the FD is invalid. The caller must use DecRef
-// when they are done.
-func (f *FDMap) GetFile(fd kdefs.FD) *fs.File {
- f.mu.RLock()
- if desc, ok := f.files[fd]; ok {
- desc.file.IncRef()
- f.mu.RUnlock()
- return desc.file
- }
- f.mu.RUnlock()
- return nil
-}
-
-// fds returns an ordering of FDs.
-func (f *FDMap) fds() FDs {
- fds := make(FDs, 0, len(f.files))
- for fd := range f.files {
- fds = append(fds, fd)
- }
- sort.Sort(fds)
- return fds
-}
-
-// GetFDs returns a list of valid fds.
-func (f *FDMap) GetFDs() FDs {
- f.mu.RLock()
- defer f.mu.RUnlock()
- return f.fds()
-}
-
-// GetRefs returns a stable slice of references to all files and bumps the
-// reference count on each. The caller must use DecRef on each reference when
-// they're done using the slice.
-func (f *FDMap) GetRefs() []*fs.File {
- f.mu.RLock()
- defer f.mu.RUnlock()
-
- fds := f.fds()
- fs := make([]*fs.File, 0, len(fds))
- for _, fd := range fds {
- desc := f.files[fd]
- desc.file.IncRef()
- fs = append(fs, desc.file)
- }
- return fs
-}
-
-// Fork returns an independent FDMap pointing to the same descriptors.
-func (f *FDMap) Fork() *FDMap {
- f.mu.RLock()
- defer f.mu.RUnlock()
-
- clone := f.k.NewFDMap()
-
- // Grab a extra reference for every file.
- for fd, desc := range f.files {
- desc.file.IncRef()
- clone.files[fd] = desc
- }
-
- // That's it!
- return clone
-}
-
-// unlock releases all file locks held by this FDMap's uid. Must only be
-// called on a non-nil *fs.File.
-func (f *FDMap) unlock(file *fs.File) {
- id := lock.UniqueID(f.ID())
- file.Dirent.Inode.LockCtx.Posix.UnlockRegion(id, lock.LockRange{0, lock.LockEOF})
-}
-
-// inotifyFileClose generates the appropriate inotify events for f being closed.
-func inotifyFileClose(f *fs.File) {
- var ev uint32
- d := f.Dirent
-
- if fs.IsDir(d.Inode.StableAttr) {
- ev |= linux.IN_ISDIR
- }
-
- if f.Flags().Write {
- ev |= linux.IN_CLOSE_WRITE
- } else {
- ev |= linux.IN_CLOSE_NOWRITE
- }
-
- d.InotifyEvent(ev, 0)
-}
-
-// Remove removes an FD from the FDMap, and returns (File, true) if a File
-// one was found. Callers are expected to decrement the reference count on
-// the File. Otherwise returns (nil, false).
-func (f *FDMap) Remove(fd kdefs.FD) (*fs.File, bool) {
- f.mu.Lock()
- desc := f.files[fd]
- delete(f.files, fd)
- f.mu.Unlock()
- if desc.file != nil {
- f.unlock(desc.file)
- inotifyFileClose(desc.file)
- return desc.file, true
- }
- return nil, false
-}
-
-// RemoveIf removes all FDs where cond is true.
-func (f *FDMap) RemoveIf(cond func(*fs.File, FDFlags) bool) {
- var removed []*fs.File
- f.mu.Lock()
- for fd, desc := range f.files {
- if desc.file != nil && cond(desc.file, desc.flags) {
- delete(f.files, fd)
- removed = append(removed, desc.file)
- }
- }
- f.mu.Unlock()
-
- for _, file := range removed {
- f.unlock(file)
- inotifyFileClose(file)
- file.DecRef()
- }
-}
diff --git a/pkg/sentry/kernel/fd_map_test.go b/pkg/sentry/kernel/fd_map_test.go
deleted file mode 100644
index 22db4c7cf..000000000
--- a/pkg/sentry/kernel/fd_map_test.go
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package kernel
-
-import (
- "testing"
-
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/filetest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
-)
-
-const (
- // maxFD is the maximum FD to try to create in the map.
- // This number of open files has been seen in the wild.
- maxFD = 2 * 1024
-)
-
-func newTestFDMap() *FDMap {
- return &FDMap{
- files: make(map[kdefs.FD]descriptor),
- }
-}
-
-// TestFDMapMany allocates maxFD FDs, i.e. maxes out the FDMap,
-// until there is no room, then makes sure that NewFDAt works
-// and also that if we remove one and add one that works too.
-func TestFDMapMany(t *testing.T) {
- file := filetest.NewTestFile(t)
- limitSet := limits.NewLimitSet()
- limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true /* privileged */)
-
- f := newTestFDMap()
- for i := 0; i < maxFD; i++ {
- if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err != nil {
- t.Fatalf("Allocated %v FDs but wanted to allocate %v", i, maxFD)
- }
- }
-
- if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err == nil {
- t.Fatalf("f.NewFDFrom(0, r) in full map: got nil, wanted error")
- }
-
- if err := f.NewFDAt(1, file, FDFlags{}, limitSet); err != nil {
- t.Fatalf("f.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err)
- }
-}
-
-// TestFDMap does a set of simple tests to make sure simple adds,
-// removes, GetRefs, and DecRefs work. The ordering is just weird
-// enough that a table-driven approach seemed clumsy.
-func TestFDMap(t *testing.T) {
- file := filetest.NewTestFile(t)
- limitSet := limits.NewLimitSet()
- limitSet.Set(limits.NumberOfFiles, limits.Limit{1, maxFD}, true /* privileged */)
-
- f := newTestFDMap()
- if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err != nil {
- t.Fatalf("Adding an FD to an empty 1-size map: got %v, want nil", err)
- }
-
- if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err == nil {
- t.Fatalf("Adding an FD to a filled 1-size map: got nil, wanted an error")
- }
-
- largeLimit := limits.Limit{maxFD, maxFD}
- limitSet.Set(limits.NumberOfFiles, largeLimit, true /* privileged */)
-
- if fd, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err != nil {
- t.Fatalf("Adding an FD to a resized map: got %v, want nil", err)
- } else if fd != kdefs.FD(1) {
- t.Fatalf("Added an FD to a resized map: got %v, want 1", fd)
- }
-
- if err := f.NewFDAt(1, file, FDFlags{}, limitSet); err != nil {
- t.Fatalf("Replacing FD 1 via f.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err)
- }
-
- if err := f.NewFDAt(maxFD+1, file, FDFlags{}, limitSet); err == nil {
- t.Fatalf("Using an FD that was too large via f.NewFDAt(%v, r, FDFlags{}): got nil, wanted an error", maxFD+1)
- }
-
- if ref := f.GetFile(1); ref == nil {
- t.Fatalf("f.GetFile(1): got nil, wanted %v", file)
- }
-
- if ref := f.GetFile(2); ref != nil {
- t.Fatalf("f.GetFile(2): got a %v, wanted nil", ref)
- }
-
- ref, ok := f.Remove(1)
- if !ok {
- t.Fatalf("f.Remove(1) for an existing FD: failed, want success")
- }
- ref.DecRef()
-
- if ref, ok := f.Remove(1); ok {
- ref.DecRef()
- t.Fatalf("r.Remove(1) for a removed FD: got success, want failure")
- }
-
-}
-
-func TestDescriptorFlags(t *testing.T) {
- file := filetest.NewTestFile(t)
- f := newTestFDMap()
- limitSet := limits.NewLimitSet()
- limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true /* privileged */)
-
- origFlags := FDFlags{CloseOnExec: true}
-
- if err := f.NewFDAt(2, file, origFlags, limitSet); err != nil {
- t.Fatalf("f.NewFDAt(2, r, FDFlags{}): got %v, wanted nil", err)
- }
-
- newFile, newFlags := f.GetDescriptor(2)
- if newFile == nil {
- t.Fatalf("f.GetFile(2): got a %v, wanted nil", newFile)
- }
-
- if newFlags != origFlags {
- t.Fatalf("new File flags %+v don't match original %+v", newFlags, origFlags)
- }
-}
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
new file mode 100644
index 000000000..1f3a57dc1
--- /dev/null
+++ b/pkg/sentry/kernel/fd_table.go
@@ -0,0 +1,380 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+ "bytes"
+ "fmt"
+ "math"
+ "sync"
+ "sync/atomic"
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/lock"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+)
+
+// FDFlags define flags for an individual descriptor.
+//
+// +stateify savable
+type FDFlags struct {
+ // CloseOnExec indicates the descriptor should be closed on exec.
+ CloseOnExec bool
+}
+
+// ToLinuxFileFlags converts a kernel.FDFlags object to a Linux file flags
+// representation.
+func (f FDFlags) ToLinuxFileFlags() (mask uint) {
+ if f.CloseOnExec {
+ mask |= linux.O_CLOEXEC
+ }
+ return
+}
+
+// ToLinuxFDFlags converts a kernel.FDFlags object to a Linux descriptor flags
+// representation.
+func (f FDFlags) ToLinuxFDFlags() (mask uint) {
+ if f.CloseOnExec {
+ mask |= linux.FD_CLOEXEC
+ }
+ return
+}
+
+// descriptor holds the details about a file descriptor, namely a pointer to
+// the file itself and the descriptor flags.
+//
+// Note that this is immutable and can only be changed via operations on the
+// descriptorTable.
+//
+// +stateify savable
+type descriptor struct {
+ file *fs.File
+ flags FDFlags
+}
+
+// FDTable is used to manage File references and flags.
+//
+// +stateify savable
+type FDTable struct {
+ refs.AtomicRefCount
+ k *Kernel
+
+ // uid is a unique identifier.
+ uid uint64
+
+ // mu protects below.
+ mu sync.Mutex `state:"nosave"`
+
+ // used contains the number of non-nil entries.
+ used int32
+
+ // descriptorTable holds descriptors.
+ descriptorTable `state:".(map[int32]descriptor)"`
+}
+
+func (f *FDTable) saveDescriptorTable() map[int32]descriptor {
+ m := make(map[int32]descriptor)
+ f.forEach(func(fd int32, file *fs.File, flags FDFlags) {
+ m[fd] = descriptor{
+ file: file,
+ flags: flags,
+ }
+ })
+ return m
+}
+
+func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) {
+ f.init() // Initialize table.
+ for fd, d := range m {
+ f.set(fd, d.file, d.flags)
+
+ // Note that we do _not_ need to acquire a extra table
+ // reference here. The table reference will already be
+ // accounted for in the file, so we drop the reference taken by
+ // set above.
+ d.file.DecRef()
+ }
+}
+
+// drop drops the table reference.
+func (f *FDTable) drop(file *fs.File) {
+ // Release locks.
+ file.Dirent.Inode.LockCtx.Posix.UnlockRegion(lock.UniqueID(f.uid), lock.LockRange{0, lock.LockEOF})
+
+ // Send inotify events.
+ d := file.Dirent
+ var ev uint32
+ if fs.IsDir(d.Inode.StableAttr) {
+ ev |= linux.IN_ISDIR
+ }
+ if file.Flags().Write {
+ ev |= linux.IN_CLOSE_WRITE
+ } else {
+ ev |= linux.IN_CLOSE_NOWRITE
+ }
+ d.InotifyEvent(ev, 0)
+
+ // Drop the table reference.
+ file.DecRef()
+}
+
+// ID returns a unique identifier for this FDTable.
+func (f *FDTable) ID() uint64 {
+ return f.uid
+}
+
+// NewFDTable allocates a new FDTable that may be used by tasks in k.
+func (k *Kernel) NewFDTable() *FDTable {
+ f := &FDTable{
+ k: k,
+ uid: atomic.AddUint64(&k.fdMapUids, 1),
+ }
+ f.init()
+ return f
+}
+
+// destroy removes all of the file descriptors from the map.
+func (f *FDTable) destroy() {
+ f.RemoveIf(func(*fs.File, FDFlags) bool {
+ return true
+ })
+}
+
+// DecRef implements RefCounter.DecRef with destructor f.destroy.
+func (f *FDTable) DecRef() {
+ f.DecRefWithDestructor(f.destroy)
+}
+
+// Size returns the number of file descriptor slots currently allocated.
+func (f *FDTable) Size() int {
+ size := atomic.LoadInt32(&f.used)
+ return int(size)
+}
+
+// forEach iterates over all non-nil files.
+//
+// It is the caller's responsibility to acquire an appropriate lock.
+func (f *FDTable) forEach(fn func(fd int32, file *fs.File, flags FDFlags)) {
+ fd := int32(0)
+ for {
+ file, flags, ok := f.get(fd)
+ if !ok {
+ break
+ }
+ if file != nil {
+ if !file.TryIncRef() {
+ continue // Race caught.
+ }
+ fn(int32(fd), file, flags)
+ file.DecRef()
+ }
+ fd++
+ }
+}
+
+// String is a stringer for FDTable.
+func (f *FDTable) String() string {
+ var b bytes.Buffer
+ f.forEach(func(fd int32, file *fs.File, flags FDFlags) {
+ n, _ := file.Dirent.FullName(nil /* root */)
+ b.WriteString(fmt.Sprintf("\tfd:%d => name %s\n", fd, n))
+ })
+ return b.String()
+}
+
+// NewFDs allocates new FDs guaranteed to be the lowest number available
+// greater than or equal to the fd parameter. All files will share the set
+// flags. Success is guaranteed to be all or none.
+func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags FDFlags) (fds []int32, err error) {
+ if fd < 0 {
+ // Don't accept negative FDs.
+ return nil, syscall.EINVAL
+ }
+
+ // Default limit.
+ end := int32(math.MaxInt32)
+
+ // Ensure we don't get past the provided limit.
+ if limitSet := limits.FromContext(ctx); limitSet != nil {
+ lim := limitSet.Get(limits.NumberOfFiles)
+ if lim.Cur != limits.Infinity {
+ end = int32(lim.Cur)
+ }
+ if fd >= end {
+ return nil, syscall.EMFILE
+ }
+ }
+
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ // Install all entries.
+ for i := fd; i < end && len(fds) < len(files); i++ {
+ if d, _, _ := f.get(i); d == nil {
+ f.set(i, files[len(fds)], flags) // Set the descriptor.
+ fds = append(fds, i) // Record the file descriptor.
+ }
+ }
+
+ // Failure? Unwind existing FDs.
+ if len(fds) < len(files) {
+ for _, i := range fds {
+ f.set(i, nil, FDFlags{}) // Zap entry.
+ }
+ return nil, syscall.EMFILE
+ }
+
+ return fds, nil
+}
+
+// NewFDAt sets the file reference for the given FD. If there is an active
+// reference for that FD, the ref count for that existing reference is
+// decremented.
+func (f *FDTable) NewFDAt(ctx context.Context, fd int32, file *fs.File, flags FDFlags) error {
+ if fd < 0 {
+ // Don't accept negative FDs.
+ return syscall.EBADF
+ }
+
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ // Check the limit for the provided file.
+ if limitSet := limits.FromContext(ctx); limitSet != nil {
+ if lim := limitSet.Get(limits.NumberOfFiles); lim.Cur != limits.Infinity && uint64(fd) >= lim.Cur {
+ return syscall.EMFILE
+ }
+ }
+
+ // Install the entry.
+ f.set(fd, file, flags)
+ return nil
+}
+
+// SetFlags sets the flags for the given file descriptor.
+//
+// True is returned iff flags were changed.
+func (f *FDTable) SetFlags(fd int32, flags FDFlags) error {
+ if fd < 0 {
+ // Don't accept negative FDs.
+ return syscall.EBADF
+ }
+
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ file, _, _ := f.get(fd)
+ if file == nil {
+ // No file found.
+ return syscall.EBADF
+ }
+
+ // Update the flags.
+ f.set(fd, file, flags)
+ return nil
+}
+
+// Get returns a reference to the file and the flags for the FD or nil if no
+// file is defined for the given fd.
+//
+// N.B. Callers are required to use DecRef when they are done.
+//
+//go:nosplit
+func (f *FDTable) Get(fd int32) (*fs.File, FDFlags) {
+ if fd < 0 {
+ return nil, FDFlags{}
+ }
+
+ for {
+ file, flags, _ := f.get(fd)
+ if file != nil {
+ if !file.TryIncRef() {
+ continue // Race caught.
+ }
+ // Reference acquired.
+ return file, flags
+ }
+ // No file available.
+ return nil, FDFlags{}
+ }
+}
+
+// GetFDs returns a list of valid fds.
+func (f *FDTable) GetFDs() []int32 {
+ fds := make([]int32, 0, f.used)
+ f.forEach(func(fd int32, file *fs.File, flags FDFlags) {
+ fds = append(fds, fd)
+ })
+ return fds
+}
+
+// GetRefs returns a stable slice of references to all files and bumps the
+// reference count on each. The caller must use DecRef on each reference when
+// they're done using the slice.
+func (f *FDTable) GetRefs() []*fs.File {
+ files := make([]*fs.File, 0, f.Size())
+ f.forEach(func(_ int32, file *fs.File, flags FDFlags) {
+ file.IncRef() // Acquire a reference for caller.
+ files = append(files, file)
+ })
+ return files
+}
+
+// Fork returns an independent FDTable.
+func (f *FDTable) Fork() *FDTable {
+ clone := f.k.NewFDTable()
+
+ f.forEach(func(fd int32, file *fs.File, flags FDFlags) {
+ // The set function here will acquire an appropriate table
+ // reference for the clone. We don't need anything else.
+ clone.set(fd, file, flags)
+ })
+ return clone
+}
+
+// Remove removes an FD from and returns a non-file iff successful.
+//
+// N.B. Callers are required to use DecRef when they are done.
+func (f *FDTable) Remove(fd int32) *fs.File {
+ if fd < 0 {
+ return nil
+ }
+
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ orig, _, _ := f.get(fd)
+ if orig != nil {
+ orig.IncRef() // Reference for caller.
+ f.set(fd, nil, FDFlags{}) // Zap entry.
+ }
+ return orig
+}
+
+// RemoveIf removes all FDs where cond is true.
+func (f *FDTable) RemoveIf(cond func(*fs.File, FDFlags) bool) {
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ f.forEach(func(fd int32, file *fs.File, flags FDFlags) {
+ if cond(file, flags) {
+ f.set(fd, nil, FDFlags{}) // Clear from table.
+ }
+ })
+}
diff --git a/pkg/sentry/kernel/fd_table_test.go b/pkg/sentry/kernel/fd_table_test.go
new file mode 100644
index 000000000..2413788e7
--- /dev/null
+++ b/pkg/sentry/kernel/fd_table_test.go
@@ -0,0 +1,192 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+ "runtime"
+ "sync"
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/filetest"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+)
+
+const (
+ // maxFD is the maximum FD to try to create in the map.
+ //
+ // This number of open files has been seen in the wild.
+ maxFD = 2 * 1024
+)
+
+func runTest(t testing.TB, fn func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet)) {
+ t.Helper() // Don't show in stacks.
+
+ // Create the limits and context.
+ limitSet := limits.NewLimitSet()
+ limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true)
+ ctx := contexttest.WithLimitSet(contexttest.Context(t), limitSet)
+
+ // Create a test file.;
+ file := filetest.NewTestFile(t)
+
+ // Create the table.
+ fdTable := new(FDTable)
+ fdTable.init()
+
+ // Run the test.
+ fn(ctx, fdTable, file, limitSet)
+}
+
+// TestFDTableMany allocates maxFD FDs, i.e. maxes out the FDTable, until there
+// is no room, then makes sure that NewFDAt works and also that if we remove
+// one and add one that works too.
+func TestFDTableMany(t *testing.T) {
+ runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) {
+ for i := 0; i < maxFD; i++ {
+ if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil {
+ t.Fatalf("Allocated %v FDs but wanted to allocate %v", i, maxFD)
+ }
+ }
+
+ if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil {
+ t.Fatalf("fdTable.NewFDs(0, r) in full map: got nil, wanted error")
+ }
+
+ if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil {
+ t.Fatalf("fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err)
+ }
+ })
+}
+
+// TestFDTable does a set of simple tests to make sure simple adds, removes,
+// GetRefs, and DecRefs work. The ordering is just weird enough that a
+// table-driven approach seemed clumsy.
+func TestFDTable(t *testing.T) {
+ runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet) {
+ // Cap the limit at one.
+ limitSet.Set(limits.NumberOfFiles, limits.Limit{1, maxFD}, true)
+
+ if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil {
+ t.Fatalf("Adding an FD to an empty 1-size map: got %v, want nil", err)
+ }
+
+ if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil {
+ t.Fatalf("Adding an FD to a filled 1-size map: got nil, wanted an error")
+ }
+
+ // Remove the previous limit.
+ limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true)
+
+ if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil {
+ t.Fatalf("Adding an FD to a resized map: got %v, want nil", err)
+ } else if len(fds) != 1 || fds[0] != 1 {
+ t.Fatalf("Added an FD to a resized map: got %v, want {1}", fds)
+ }
+
+ if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil {
+ t.Fatalf("Replacing FD 1 via fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err)
+ }
+
+ if err := fdTable.NewFDAt(ctx, maxFD+1, file, FDFlags{}); err == nil {
+ t.Fatalf("Using an FD that was too large via fdTable.NewFDAt(%v, r, FDFlags{}): got nil, wanted an error", maxFD+1)
+ }
+
+ if ref, _ := fdTable.Get(1); ref == nil {
+ t.Fatalf("fdTable.Get(1): got nil, wanted %v", file)
+ }
+
+ if ref, _ := fdTable.Get(2); ref != nil {
+ t.Fatalf("fdTable.Get(2): got a %v, wanted nil", ref)
+ }
+
+ ref := fdTable.Remove(1)
+ if ref == nil {
+ t.Fatalf("fdTable.Remove(1) for an existing FD: failed, want success")
+ }
+ ref.DecRef()
+
+ if ref := fdTable.Remove(1); ref != nil {
+ t.Fatalf("r.Remove(1) for a removed FD: got success, want failure")
+ }
+ })
+}
+
+func TestDescriptorFlags(t *testing.T) {
+ runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) {
+ if err := fdTable.NewFDAt(ctx, 2, file, FDFlags{CloseOnExec: true}); err != nil {
+ t.Fatalf("fdTable.NewFDAt(2, r, FDFlags{}): got %v, wanted nil", err)
+ }
+
+ newFile, flags := fdTable.Get(2)
+ if newFile == nil {
+ t.Fatalf("fdTable.Get(2): got a %v, wanted nil", newFile)
+ }
+
+ if !flags.CloseOnExec {
+ t.Fatalf("new File flags %v don't match original %d\n", flags, 0)
+ }
+ })
+}
+
+func BenchmarkFDLookupAndDecRef(b *testing.B) {
+ b.StopTimer() // Setup.
+
+ runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) {
+ fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{})
+ if err != nil {
+ b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err)
+ }
+
+ b.StartTimer() // Benchmark.
+ for i := 0; i < b.N; i++ {
+ tf, _ := fdTable.Get(fds[i%len(fds)])
+ tf.DecRef()
+ }
+ })
+}
+
+func BenchmarkFDLookupAndDecRefConcurrent(b *testing.B) {
+ b.StopTimer() // Setup.
+
+ runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) {
+ fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{})
+ if err != nil {
+ b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err)
+ }
+
+ concurrency := runtime.GOMAXPROCS(0)
+ if concurrency < 4 {
+ concurrency = 4
+ }
+ each := b.N / concurrency
+
+ b.StartTimer() // Benchmark.
+ var wg sync.WaitGroup
+ for i := 0; i < concurrency; i++ {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ for i := 0; i < each; i++ {
+ tf, _ := fdTable.Get(fds[i%len(fds)])
+ tf.DecRef()
+ }
+ }()
+ }
+ wg.Wait()
+ })
+}
diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go
new file mode 100644
index 000000000..e009df974
--- /dev/null
+++ b/pkg/sentry/kernel/fd_table_unsafe.go
@@ -0,0 +1,103 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+ "sync/atomic"
+ "unsafe"
+
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+)
+
+type descriptorTable struct {
+ // slice is a *[]unsafe.Pointer, where each element is actually
+ // *descriptor object, updated atomically.
+ //
+ // Changes to the slice itself requiring holding FDTable.mu.
+ slice unsafe.Pointer `state:".(map[int32]*descriptor)"`
+}
+
+// init initializes the table.
+func (f *FDTable) init() {
+ var slice []unsafe.Pointer // Empty slice.
+ atomic.StorePointer(&f.slice, unsafe.Pointer(&slice))
+}
+
+// get gets a file entry.
+//
+// The boolean indicates whether this was in range.
+//
+//go:nosplit
+func (f *FDTable) get(fd int32) (*fs.File, FDFlags, bool) {
+ slice := *(*[]unsafe.Pointer)(atomic.LoadPointer(&f.slice))
+ if fd >= int32(len(slice)) {
+ return nil, FDFlags{}, false
+ }
+ d := (*descriptor)(atomic.LoadPointer(&slice[fd]))
+ if d == nil {
+ return nil, FDFlags{}, true
+ }
+ return d.file, d.flags, true
+}
+
+// set sets an entry.
+//
+// This handles accounting changes, as well as acquiring and releasing the
+// reference needed by the table iff the file is different.
+//
+// Precondition: mu must be held.
+func (f *FDTable) set(fd int32, file *fs.File, flags FDFlags) {
+ slice := *(*[]unsafe.Pointer)(atomic.LoadPointer(&f.slice))
+
+ // Grow the table as required.
+ if last := int32(len(slice)); fd >= last {
+ end := fd + 1
+ if end < 2*last {
+ end = 2 * last
+ }
+ slice = append(slice, make([]unsafe.Pointer, end-last)...)
+ atomic.StorePointer(&f.slice, unsafe.Pointer(&slice))
+ }
+
+ // Create the new element.
+ var d *descriptor
+ if file != nil {
+ d = &descriptor{
+ file: file,
+ flags: flags,
+ }
+ }
+
+ // Update the single element.
+ orig := (*descriptor)(atomic.SwapPointer(&slice[fd], unsafe.Pointer(d)))
+
+ // Acquire a table reference.
+ if file != nil && (orig == nil || file != orig.file) {
+ file.IncRef()
+ }
+
+ // Drop the table reference.
+ if orig != nil && file != orig.file {
+ f.drop(orig.file)
+ }
+
+ // Adjust used.
+ switch {
+ case orig == nil && file != nil:
+ atomic.AddInt32(&f.used, 1)
+ case orig != nil && file == nil:
+ atomic.AddInt32(&f.used, -1)
+ }
+}
diff --git a/pkg/sentry/kernel/fs_context.go b/pkg/sentry/kernel/fs_context.go
index d8115f59a..ded27d668 100644
--- a/pkg/sentry/kernel/fs_context.go
+++ b/pkg/sentry/kernel/fs_context.go
@@ -18,8 +18,8 @@ import (
"fmt"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// FSContext contains filesystem context.
@@ -51,18 +51,20 @@ type FSContext struct {
func newFSContext(root, cwd *fs.Dirent, umask uint) *FSContext {
root.IncRef()
cwd.IncRef()
- return &FSContext{
+ f := FSContext{
root: root,
cwd: cwd,
umask: umask,
}
+ f.EnableLeakCheck("kernel.FSContext")
+ return &f
}
// destroy is the destructor for an FSContext.
//
// This will call DecRef on both root and cwd Dirents. If either call to
-// DecRef returns an error, then it will be propigated. If both calls to
-// DecRef return an error, then the one from root.DecRef will be propigated.
+// DecRef returns an error, then it will be propagated. If both calls to
+// DecRef return an error, then the one from root.DecRef will be propagated.
//
// Note that there may still be calls to WorkingDirectory() or RootDirectory()
// (that return nil). This is because valid references may still be held via
diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD
index b6af5b20b..a5cf1f627 100644
--- a/pkg/sentry/kernel/futex/BUILD
+++ b/pkg/sentry/kernel/futex/BUILD
@@ -33,7 +33,7 @@ go_library(
"futex.go",
"waiter_list.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/futex",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go
index bb38eb81e..278cc8143 100644
--- a/pkg/sentry/kernel/futex/futex.go
+++ b/pkg/sentry/kernel/futex/futex.go
@@ -20,10 +20,10 @@ package futex
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// KeyKind indicates the type of a Key.
@@ -729,14 +729,14 @@ func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool
}
b := m.lockBucket(&k)
- err = m.unlockPILocked(t, addr, tid, b)
+ err = m.unlockPILocked(t, addr, tid, b, &k)
k.release()
b.mu.Unlock()
return err
}
-func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *bucket) error {
+func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *bucket, key *Key) error {
cur, err := t.LoadUint32(addr)
if err != nil {
return err
@@ -746,7 +746,22 @@ func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *buc
return syserror.EPERM
}
- if b.waiters.Empty() {
+ var next *Waiter // Who's the next owner?
+ var next2 *Waiter // Who's the one after that?
+ for w := b.waiters.Front(); w != nil; w = w.Next() {
+ if !w.key.matches(key) {
+ continue
+ }
+
+ if next == nil {
+ next = w
+ } else {
+ next2 = w
+ break
+ }
+ }
+
+ if next == nil {
// It's safe to set 0 because there are no waiters, no new owner, and the
// executing task is the current owner (no owner died bit).
prev, err := t.CompareAndSwapUint32(addr, cur, 0)
@@ -761,12 +776,10 @@ func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *buc
return nil
}
- next := b.waiters.Front()
-
// Set next owner's TID, waiters if there are any. Resets owner died bit, if
// set, because the executing task takes over as the owner.
val := next.tid
- if next.Next() != nil {
+ if next2 != nil {
val |= linux.FUTEX_WAITERS
}
diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go
index 2de5239bf..65e5d1428 100644
--- a/pkg/sentry/kernel/futex/futex_test.go
+++ b/pkg/sentry/kernel/futex/futex_test.go
@@ -23,7 +23,7 @@ import (
"testing"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// testData implements the Target interface, and allows us to
diff --git a/pkg/sentry/kernel/ipc_namespace.go b/pkg/sentry/kernel/ipc_namespace.go
index ebe12812c..80a070d7e 100644
--- a/pkg/sentry/kernel/ipc_namespace.go
+++ b/pkg/sentry/kernel/ipc_namespace.go
@@ -15,9 +15,9 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/semaphore"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/shm"
)
// IPCNamespace represents an IPC namespace.
@@ -40,7 +40,7 @@ func NewIPCNamespace(userNS *auth.UserNamespace) *IPCNamespace {
}
}
-// SemaphoreRegistry returns the semanphore set registry for this namespace.
+// SemaphoreRegistry returns the semaphore set registry for this namespace.
func (i *IPCNamespace) SemaphoreRegistry() *semaphore.Registry {
return i.semaphores
}
diff --git a/pkg/sentry/kernel/kdefs/BUILD b/pkg/sentry/kernel/kdefs/BUILD
deleted file mode 100644
index 38aaca134..000000000
--- a/pkg/sentry/kernel/kdefs/BUILD
+++ /dev/null
@@ -1,10 +0,0 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "kdefs",
- srcs = ["kdefs.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs",
- visibility = ["//:sandbox"],
-)
diff --git a/pkg/sentry/kernel/kdefs/kdefs.go b/pkg/sentry/kernel/kdefs/kdefs.go
deleted file mode 100644
index 304da2032..000000000
--- a/pkg/sentry/kernel/kdefs/kdefs.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package kdefs defines common kernel definitions.
-//
-package kdefs
-
-// FD is a File Descriptor.
-type FD int32
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index f253a81d9..38b49cba2 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -39,34 +39,34 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/timerfd"
- "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/loader"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/port"
- sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl"
- uspb "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid"
- "gvisor.googlesource.com/gvisor/pkg/state"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/timerfd"
+ "gvisor.dev/gvisor/pkg/sentry/hostcpu"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/epoll"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/loader"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port"
+ sentrytime "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/unimpl"
+ uspb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/state"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
// Kernel represents an emulated Linux kernel. It must be initialized by calling
@@ -155,7 +155,7 @@ type Kernel struct {
// cpuClockTicker increments cpuClock.
cpuClockTicker *ktime.Timer `state:"nosave"`
- // fdMapUids is an ever-increasing counter for generating FDMap uids.
+ // fdMapUids is an ever-increasing counter for generating FDTable uids.
//
// fdMapUids is mutable, and is accessed using atomic memory operations.
fdMapUids uint64
@@ -381,13 +381,27 @@ func (k *Kernel) SaveTo(w io.Writer) error {
// flushMountSourceRefs flushes the MountSources for all mounted filesystems
// and open FDs.
func (k *Kernel) flushMountSourceRefs() error {
- // Flush all mount sources for currently mounted filesystems.
+ // Flush all mount sources for currently mounted filesystems in the
+ // root mount namespace.
k.mounts.FlushMountSourceRefs()
+ // Some tasks may have other mount namespaces; flush those as well.
+ flushed := make(map[*fs.MountNamespace]struct{})
+ k.tasks.mu.RLock()
+ k.tasks.forEachThreadGroupLocked(func(tg *ThreadGroup) {
+ if _, ok := flushed[tg.mounts]; ok {
+ // Already flushed.
+ return
+ }
+ tg.mounts.FlushMountSourceRefs()
+ flushed[tg.mounts] = struct{}{}
+ })
+ k.tasks.mu.RUnlock()
+
// There may be some open FDs whose filesystems have been unmounted. We
// must flush those as well.
- return k.tasks.forEachFDPaused(func(desc descriptor) error {
- desc.file.Dirent.Inode.MountSource.FlushDirentRefs()
+ return k.tasks.forEachFDPaused(func(file *fs.File) error {
+ file.Dirent.Inode.MountSource.FlushDirentRefs()
return nil
})
}
@@ -396,35 +410,35 @@ func (k *Kernel) flushMountSourceRefs() error {
// task.
//
// Precondition: Must be called with the kernel paused.
-func (ts *TaskSet) forEachFDPaused(f func(descriptor) error) error {
+func (ts *TaskSet) forEachFDPaused(f func(*fs.File) error) (err error) {
ts.mu.RLock()
defer ts.mu.RUnlock()
for t := range ts.Root.tids {
// We can skip locking Task.mu here since the kernel is paused.
- if t.fds == nil {
+ if t.fdTable == nil {
continue
}
- for _, desc := range t.fds.files {
- if err := f(desc); err != nil {
- return err
+ t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) {
+ if lastErr := f(file); lastErr != nil && err == nil {
+ err = lastErr
}
- }
+ })
}
- return nil
+ return err
}
func (ts *TaskSet) flushWritesToFiles(ctx context.Context) error {
- return ts.forEachFDPaused(func(desc descriptor) error {
- if flags := desc.file.Flags(); !flags.Write {
+ return ts.forEachFDPaused(func(file *fs.File) error {
+ if flags := file.Flags(); !flags.Write {
return nil
}
- if sattr := desc.file.Dirent.Inode.StableAttr; !fs.IsFile(sattr) && !fs.IsDir(sattr) {
+ if sattr := file.Dirent.Inode.StableAttr; !fs.IsFile(sattr) && !fs.IsDir(sattr) {
return nil
}
// Here we need all metadata synced.
- syncErr := desc.file.Fsync(ctx, 0, fs.FileMaxOffset, fs.SyncAll)
+ syncErr := file.Fsync(ctx, 0, fs.FileMaxOffset, fs.SyncAll)
if err := fs.SaveFileFsyncError(syncErr); err != nil {
- name, _ := desc.file.Dirent.FullName(nil /* root */)
+ name, _ := file.Dirent.FullName(nil /* root */)
// Wrap this error in ErrSaveRejection
// so that it will trigger a save
// error, rather than a panic. This
@@ -469,14 +483,12 @@ func (ts *TaskSet) unregisterEpollWaiters() {
defer ts.mu.RUnlock()
for t := range ts.Root.tids {
// We can skip locking Task.mu here since the kernel is paused.
- if fdmap := t.fds; fdmap != nil {
- for _, desc := range fdmap.files {
- if desc.file != nil {
- if e, ok := desc.file.FileOperations.(*epoll.EventPoll); ok {
- e.UnregisterEpollWaiters()
- }
+ if t.fdTable != nil {
+ t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) {
+ if e, ok := file.FileOperations.(*epoll.EventPoll); ok {
+ e.UnregisterEpollWaiters()
}
- }
+ })
}
}
}
@@ -524,6 +536,8 @@ func (k *Kernel) LoadFrom(r io.Reader, net inet.Stack) error {
}
log.Infof("Memory load took [%s].", time.Since(memoryStart))
+ log.Infof("Overall load took [%s]", time.Since(loadStart))
+
// Ensure that all pending asynchronous work is complete:
// - namedpipe opening
// - inode file opening
@@ -588,9 +602,9 @@ type CreateProcessArgs struct {
// Credentials is the initial credentials.
Credentials *auth.Credentials
- // FDMap is the initial set of file descriptors. If CreateProcess succeeds,
- // it takes a reference on FDMap.
- FDMap *FDMap
+ // FDTable is the initial set of file descriptors. If CreateProcess succeeds,
+ // it takes a reference on FDTable.
+ FDTable *FDTable
// Umask is the initial umask.
Umask uint
@@ -611,12 +625,18 @@ type CreateProcessArgs struct {
// AbstractSocketNamespace is the initial Abstract Socket namespace.
AbstractSocketNamespace *AbstractSocketNamespace
+ // MountNamespace optionally contains the mount namespace for this
+ // process. If nil, the kernel's mount namespace is used.
+ //
+ // Anyone setting MountNamespace must donate a reference (i.e.
+ // increment it).
+ MountNamespace *fs.MountNamespace
+
// Root optionally contains the dirent that serves as the root for the
// process. If nil, the mount namespace's root is used as the process'
// root.
//
- // Anyone setting Root must donate a reference (i.e. increment it) to
- // keep it alive until it is decremented by CreateProcess.
+ // Anyone setting Root must donate a reference (i.e. increment it).
Root *fs.Dirent
// ContainerID is the container that the process belongs to.
@@ -659,7 +679,7 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} {
return ctx.args.Credentials
case fs.CtxRoot:
if ctx.args.Root != nil {
- // Take a refernce on the root dirent that will be
+ // Take a reference on the root dirent that will be
// given to the caller.
ctx.args.Root.IncRef()
return ctx.args.Root
@@ -715,20 +735,29 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
return nil, 0, fmt.Errorf("no kernel MountNamespace")
}
- tg := k.newThreadGroup(k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock)
+ // Grab the mount namespace.
+ mounts := args.MountNamespace
+ if mounts == nil {
+ // If no MountNamespace was configured, then use the kernel's
+ // root mount namespace, with an extra reference that will be
+ // donated to the task.
+ mounts = k.mounts
+ mounts.IncRef()
+ }
+
+ tg := k.newThreadGroup(mounts, k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock)
ctx := args.NewContext(k)
// Grab the root directory.
root := args.Root
if root == nil {
- root = fs.RootFromContext(ctx)
- // Is the root STILL nil?
- if root == nil {
- return nil, 0, fmt.Errorf("CreateProcessArgs.Root was not provided, and failed to get root from context")
- }
+ // If no Root was configured, then get it from the
+ // MountNamespace.
+ root = mounts.Root()
}
+ // The call to newFSContext below will take a reference on root, so we
+ // don't need to hold this one.
defer root.DecRef()
- args.Root = nil
// Grab the working directory.
remainingTraversals := uint(args.MaxSymlinkTraversals)
@@ -760,9 +789,9 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
return nil, 0, errors.New(se.String())
}
- // Take a reference on the FDMap, which will be transferred to
+ // Take a reference on the FDTable, which will be transferred to
// TaskSet.NewTask().
- args.FDMap.IncRef()
+ args.FDTable.IncRef()
// Create the task.
config := &TaskConfig{
@@ -770,7 +799,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
ThreadGroup: tg,
TaskContext: tc,
FSContext: newFSContext(root, wd, args.Umask),
- FDMap: args.FDMap,
+ FDTable: args.FDTable,
Credentials: args.Credentials,
AllowedCPUMask: sched.NewFullCPUSet(k.applicationCores),
UTSNamespace: args.UTSNamespace,
@@ -842,7 +871,7 @@ func (k *Kernel) pauseTimeLocked() {
}
// By precondition, nothing else can be interacting with PIDNamespace.tids
- // or FDMap.files, so we can iterate them without synchronization. (We
+ // or FDTable.files, so we can iterate them without synchronization. (We
// can't hold the TaskSet mutex when pausing thread group timers because
// thread group timers call ThreadGroup.SendSignal, which takes the TaskSet
// mutex, while holding the Timer mutex.)
@@ -853,14 +882,14 @@ func (k *Kernel) pauseTimeLocked() {
it.PauseTimer()
}
}
- // This means we'll iterate FDMaps shared by multiple tasks repeatedly,
+ // This means we'll iterate FDTables shared by multiple tasks repeatedly,
// but ktime.Timer.Pause is idempotent so this is harmless.
- if fdm := t.fds; fdm != nil {
- for _, desc := range fdm.files {
- if tfd, ok := desc.file.FileOperations.(*timerfd.TimerOperations); ok {
+ if t.fdTable != nil {
+ t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) {
+ if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok {
tfd.PauseTimer()
}
- }
+ })
}
}
k.timekeeper.PauseUpdates()
@@ -885,12 +914,12 @@ func (k *Kernel) resumeTimeLocked() {
it.ResumeTimer()
}
}
- if fdm := t.fds; fdm != nil {
- for _, desc := range fdm.files {
- if tfd, ok := desc.file.FileOperations.(*timerfd.TimerOperations); ok {
+ if t.fdTable != nil {
+ t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) {
+ if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok {
tfd.ResumeTimer()
}
- }
+ })
}
}
}
diff --git a/pkg/sentry/kernel/kernel_state.go b/pkg/sentry/kernel/kernel_state.go
index 48c3ff5a9..909219086 100644
--- a/pkg/sentry/kernel/kernel_state.go
+++ b/pkg/sentry/kernel/kernel_state.go
@@ -15,8 +15,8 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
// saveDanglingEndpoints is invoked by stateify.
diff --git a/pkg/sentry/kernel/memevent/BUILD b/pkg/sentry/kernel/memevent/BUILD
index 347a69062..ebcfaa619 100644
--- a/pkg/sentry/kernel/memevent/BUILD
+++ b/pkg/sentry/kernel/memevent/BUILD
@@ -6,7 +6,7 @@ package(licenses = ["notice"])
go_library(
name = "memevent",
srcs = ["memory_events.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/memevent",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent",
visibility = ["//:sandbox"],
deps = [
":memory_events_go_proto",
@@ -26,7 +26,7 @@ proto_library(
go_proto_library(
name = "memory_events_go_proto",
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto",
proto = ":memory_events_proto",
visibility = ["//visibility:public"],
)
diff --git a/pkg/sentry/kernel/memevent/memory_events.go b/pkg/sentry/kernel/memevent/memory_events.go
index 0e2cee807..b0d98e7f0 100644
--- a/pkg/sentry/kernel/memevent/memory_events.go
+++ b/pkg/sentry/kernel/memevent/memory_events.go
@@ -20,12 +20,12 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- pb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ pb "gvisor.dev/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
)
var totalTicks = metric.MustCreateNewUint64Metric("/memory_events/ticks", false /*sync*/, "Total number of memory event periods that have elapsed since startup.")
diff --git a/pkg/sentry/kernel/pending_signals.go b/pkg/sentry/kernel/pending_signals.go
index c93f6598a..77a35b788 100644
--- a/pkg/sentry/kernel/pending_signals.go
+++ b/pkg/sentry/kernel/pending_signals.go
@@ -15,9 +15,9 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
const (
diff --git a/pkg/sentry/kernel/pending_signals_state.go b/pkg/sentry/kernel/pending_signals_state.go
index 2c902c7e3..ca8b4e164 100644
--- a/pkg/sentry/kernel/pending_signals_state.go
+++ b/pkg/sentry/kernel/pending_signals_state.go
@@ -15,7 +15,7 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// +stateify savable
diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD
index b07d15a2a..4d15cca85 100644
--- a/pkg/sentry/kernel/pipe/BUILD
+++ b/pkg/sentry/kernel/pipe/BUILD
@@ -27,7 +27,7 @@ go_library(
"reader_writer.go",
"writer.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/pipe",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/pipe/buffer.go b/pkg/sentry/kernel/pipe/buffer.go
index 4360dc44f..69ef2a720 100644
--- a/pkg/sentry/kernel/pipe/buffer.go
+++ b/pkg/sentry/kernel/pipe/buffer.go
@@ -17,7 +17,7 @@ package pipe
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
)
// buffer encapsulates a queueable byte buffer.
diff --git a/pkg/sentry/kernel/pipe/buffer_test.go b/pkg/sentry/kernel/pipe/buffer_test.go
index 4b7dbc43f..ee1b90115 100644
--- a/pkg/sentry/kernel/pipe/buffer_test.go
+++ b/pkg/sentry/kernel/pipe/buffer_test.go
@@ -18,7 +18,7 @@ import (
"testing"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
func TestBufferSize(t *testing.T) {
diff --git a/pkg/sentry/kernel/pipe/device.go b/pkg/sentry/kernel/pipe/device.go
index eb59e15a1..89f5d9342 100644
--- a/pkg/sentry/kernel/pipe/device.go
+++ b/pkg/sentry/kernel/pipe/device.go
@@ -14,7 +14,7 @@
package pipe
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
// pipeDevice is used for all pipe files.
var pipeDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go
index dc7da529e..a2dc72204 100644
--- a/pkg/sentry/kernel/pipe/node.go
+++ b/pkg/sentry/kernel/pipe/node.go
@@ -17,12 +17,12 @@ package pipe
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/amutex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/amutex"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// inodeOperations implements fs.InodeOperations for pipes.
diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go
index 9a946b380..adbad7764 100644
--- a/pkg/sentry/kernel/pipe/node_test.go
+++ b/pkg/sentry/kernel/pipe/node_test.go
@@ -18,11 +18,11 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
type sleeper struct {
@@ -63,7 +63,7 @@ var perms fs.FilePermissions = fs.FilePermissions{
func testOpenOrDie(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, doneChan chan<- struct{}) (*fs.File, error) {
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe})
- d := fs.NewDirent(inode, "pipe")
+ d := fs.NewDirent(ctx, inode, "pipe")
file, err := n.GetFile(ctx, d, flags)
if err != nil {
t.Fatalf("open with flags %+v failed: %v", flags, err)
@@ -76,7 +76,7 @@ func testOpenOrDie(ctx context.Context, t *testing.T, n fs.InodeOperations, flag
func testOpen(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, resChan chan<- openResult) (*fs.File, error) {
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe})
- d := fs.NewDirent(inode, "pipe")
+ d := fs.NewDirent(ctx, inode, "pipe")
file, err := n.GetFile(ctx, d, flags)
if resChan != nil {
resChan <- openResult{file, err}
diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go
index 73438dc62..247e2928e 100644
--- a/pkg/sentry/kernel/pipe/pipe.go
+++ b/pkg/sentry/kernel/pipe/pipe.go
@@ -21,11 +21,11 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
@@ -39,19 +39,6 @@ const (
MaximumPipeSize = 8 << 20
)
-// Sizer is an interface for setting and getting the size of a pipe.
-//
-// It is implemented by Pipe and, through embedding, all other types.
-type Sizer interface {
- // PipeSize returns the pipe capacity in bytes.
- PipeSize() int64
-
- // SetPipeSize sets the new pipe capacity in bytes.
- //
- // The new size is returned (which may be capped).
- SetPipeSize(int64) (int64, error)
-}
-
// Pipe is an encapsulation of a platform-independent pipe.
// It manages a buffered byte queue shared between a reader/writer
// pair.
@@ -150,8 +137,8 @@ func NewConnectedPipe(ctx context.Context, sizeBytes, atomicIOBytes int64) (*fs.
InodeID: ino,
BlockSize: int64(atomicIOBytes),
}
- ms := fs.NewPseudoMountSource()
- d := fs.NewDirent(fs.NewInode(iops, ms, sattr), fmt.Sprintf("pipe:[%d]", ino))
+ ms := fs.NewPseudoMountSource(ctx)
+ d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, ms, sattr), fmt.Sprintf("pipe:[%d]", ino))
// The p.Open calls below will each take a reference on the Dirent. We
// must drop the one we already have.
defer d.DecRef()
@@ -162,6 +149,7 @@ func NewConnectedPipe(ctx context.Context, sizeBytes, atomicIOBytes int64) (*fs.
//
// Precondition: at least one of flags.Read or flags.Write must be set.
func (p *Pipe) Open(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) *fs.File {
+ flags.NonSeekable = true
switch {
case flags.Read && flags.Write:
p.rOpen()
@@ -398,15 +386,15 @@ func (p *Pipe) queued() int64 {
return p.size
}
-// PipeSize implements PipeSizer.PipeSize.
-func (p *Pipe) PipeSize() int64 {
+// FifoSize implements fs.FifoSizer.FifoSize.
+func (p *Pipe) FifoSize(context.Context, *fs.File) (int64, error) {
p.mu.Lock()
defer p.mu.Unlock()
- return p.max
+ return p.max, nil
}
-// SetPipeSize implements PipeSize.SetPipeSize.
-func (p *Pipe) SetPipeSize(size int64) (int64, error) {
+// SetFifoSize implements fs.FifoSizer.SetFifoSize.
+func (p *Pipe) SetFifoSize(size int64) (int64, error) {
if size < 0 {
return 0, syserror.EINVAL
}
diff --git a/pkg/sentry/kernel/pipe/pipe_test.go b/pkg/sentry/kernel/pipe/pipe_test.go
index 298c6587b..e3a14b665 100644
--- a/pkg/sentry/kernel/pipe/pipe_test.go
+++ b/pkg/sentry/kernel/pipe/pipe_test.go
@@ -18,10 +18,10 @@ import (
"bytes"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func TestPipeRW(t *testing.T) {
diff --git a/pkg/sentry/kernel/pipe/reader.go b/pkg/sentry/kernel/pipe/reader.go
index 656be824d..7724b4452 100644
--- a/pkg/sentry/kernel/pipe/reader.go
+++ b/pkg/sentry/kernel/pipe/reader.go
@@ -15,7 +15,7 @@
package pipe
import (
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Reader satisfies the fs.FileOperations interface for read-only pipes.
diff --git a/pkg/sentry/kernel/pipe/reader_writer.go b/pkg/sentry/kernel/pipe/reader_writer.go
index e560b9be9..f69dbf27b 100644
--- a/pkg/sentry/kernel/pipe/reader_writer.go
+++ b/pkg/sentry/kernel/pipe/reader_writer.go
@@ -18,13 +18,13 @@ import (
"math"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// ReaderWriter satisfies the FileOperations interface and services both
@@ -77,7 +77,7 @@ func (rw *ReaderWriter) Readiness(mask waiter.EventMask) waiter.EventMask {
}
// Ioctl implements fs.FileOperations.Ioctl.
-func (rw *ReaderWriter) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (rw *ReaderWriter) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
// Switch on ioctl request.
switch int(args[1].Int()) {
case linux.FIONREAD:
diff --git a/pkg/sentry/kernel/pipe/writer.go b/pkg/sentry/kernel/pipe/writer.go
index 8d5b68541..5bc6aa931 100644
--- a/pkg/sentry/kernel/pipe/writer.go
+++ b/pkg/sentry/kernel/pipe/writer.go
@@ -15,7 +15,7 @@
package pipe
import (
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Writer satisfies the fs.FileOperations interface for write-only pipes.
diff --git a/pkg/sentry/kernel/posixtimer.go b/pkg/sentry/kernel/posixtimer.go
index a016b4087..c5d095af7 100644
--- a/pkg/sentry/kernel/posixtimer.go
+++ b/pkg/sentry/kernel/posixtimer.go
@@ -17,10 +17,10 @@ package kernel
import (
"math"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// IntervalTimer represents a POSIX interval timer as described by
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index 193447b17..3be171cdc 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -17,11 +17,11 @@ package kernel
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// ptraceOptions are the subset of options controlling a task's ptrace behavior
diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go
index 048eeaa3f..5514cf432 100644
--- a/pkg/sentry/kernel/ptrace_amd64.go
+++ b/pkg/sentry/kernel/ptrace_amd64.go
@@ -17,9 +17,9 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// ptraceArch implements arch-specific ptrace commands.
diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go
index 4899c813f..0acdf769d 100644
--- a/pkg/sentry/kernel/ptrace_arm64.go
+++ b/pkg/sentry/kernel/ptrace_arm64.go
@@ -17,9 +17,9 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// ptraceArch implements arch-specific ptrace commands.
diff --git a/pkg/sentry/kernel/rseq.go b/pkg/sentry/kernel/rseq.go
index c4fb2c56c..24ea002ba 100644
--- a/pkg/sentry/kernel/rseq.go
+++ b/pkg/sentry/kernel/rseq.go
@@ -15,9 +15,9 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/hostcpu"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Restartable sequences, as described in https://lwn.net/Articles/650333/.
diff --git a/pkg/sentry/kernel/sched/BUILD b/pkg/sentry/kernel/sched/BUILD
index 184e8a35b..1725b8562 100644
--- a/pkg/sentry/kernel/sched/BUILD
+++ b/pkg/sentry/kernel/sched/BUILD
@@ -8,7 +8,7 @@ go_library(
"cpuset.go",
"sched.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/sched",
visibility = ["//pkg/sentry:internal"],
)
diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go
index cc75eb08a..2347dcf36 100644
--- a/pkg/sentry/kernel/seccomp.go
+++ b/pkg/sentry/kernel/seccomp.go
@@ -17,12 +17,12 @@ package kernel
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/bpf"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const maxSyscallFilterInstructions = 1 << 15
diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD
index 840943ca8..36edf10f3 100644
--- a/pkg/sentry/kernel/semaphore/BUILD
+++ b/pkg/sentry/kernel/semaphore/BUILD
@@ -21,7 +21,7 @@ go_library(
"semaphore.go",
"waiter_list.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/semaphore",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go
index 9d0620e02..93fe68a3e 100644
--- a/pkg/sentry/kernel/semaphore/semaphore.go
+++ b/pkg/sentry/kernel/semaphore/semaphore.go
@@ -19,13 +19,13 @@ import (
"fmt"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -86,7 +86,7 @@ type Set struct {
dead bool
}
-// sem represents a single semanphore from a set.
+// sem represents a single semaphore from a set.
//
// +stateify savable
type sem struct {
diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go
index abfcd0fb4..c235f6ca4 100644
--- a/pkg/sentry/kernel/semaphore/semaphore_test.go
+++ b/pkg/sentry/kernel/semaphore/semaphore_test.go
@@ -17,11 +17,11 @@ package semaphore
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func executeOps(ctx context.Context, t *testing.T, set *Set, ops []linux.Sembuf, block bool) chan struct{} {
diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go
index 610e199da..81fcd8258 100644
--- a/pkg/sentry/kernel/sessions.go
+++ b/pkg/sentry/kernel/sessions.go
@@ -15,10 +15,10 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// SessionID is the public identifier.
@@ -294,6 +294,7 @@ func (tg *ThreadGroup) createSession() error {
id: SessionID(id),
leader: tg,
}
+ s.refs.EnableLeakCheck("kernel.Session")
// Create a new ProcessGroup, belonging to that Session.
// This also has a single reference (assigned below).
@@ -307,6 +308,7 @@ func (tg *ThreadGroup) createSession() error {
session: s,
ancestors: 0,
}
+ pg.refs.EnableLeakCheck("kernel.ProcessGroup")
// Tie them and return the result.
s.processGroups.PushBack(pg)
@@ -378,11 +380,13 @@ func (tg *ThreadGroup) CreateProcessGroup() error {
// We manually adjust the ancestors if the parent is in the same
// session.
tg.processGroup.session.incRef()
- pg := &ProcessGroup{
+ pg := ProcessGroup{
id: ProcessGroupID(id),
originator: tg,
session: tg.processGroup.session,
}
+ pg.refs.EnableLeakCheck("kernel.ProcessGroup")
+
if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session {
pg.ancestors++
}
@@ -390,20 +394,20 @@ func (tg *ThreadGroup) CreateProcessGroup() error {
// Assign the new process group; adjust children.
oldParentPG := tg.parentPG()
tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) {
- childTG.processGroup.incRefWithParent(pg)
+ childTG.processGroup.incRefWithParent(&pg)
childTG.processGroup.decRefWithParent(oldParentPG)
})
tg.processGroup.decRefWithParent(oldParentPG)
- tg.processGroup = pg
+ tg.processGroup = &pg
// Add the new process group to the session.
- pg.session.processGroups.PushBack(pg)
+ pg.session.processGroups.PushBack(&pg)
// Ensure this translation is added to all namespaces.
for ns := tg.pidns; ns != nil; ns = ns.parent {
local := ns.tgids[tg]
- ns.pgids[pg] = ProcessGroupID(local)
- ns.processGroups[ProcessGroupID(local)] = pg
+ ns.pgids[&pg] = ProcessGroupID(local)
+ ns.processGroups[ProcessGroupID(local)] = &pg
}
return nil
diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD
index bc2089872..aa7471eb6 100644
--- a/pkg/sentry/kernel/shm/BUILD
+++ b/pkg/sentry/kernel/shm/BUILD
@@ -8,7 +8,7 @@ go_library(
"device.go",
"shm.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/shm",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/shm/device.go b/pkg/sentry/kernel/shm/device.go
index 3cb759072..6b0d5818b 100644
--- a/pkg/sentry/kernel/shm/device.go
+++ b/pkg/sentry/kernel/shm/device.go
@@ -14,7 +14,7 @@
package shm
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
// shmDevice is the kernel shm device.
var shmDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index 00393b5f0..5bd610f68 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -37,19 +37,19 @@ import (
"fmt"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Key represents a shm segment key. Analogous to a file name.
@@ -224,6 +224,7 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi
creatorPID: pid,
changeTime: ktime.NowFromContext(ctx),
}
+ shm.EnableLeakCheck("kernel.Shm")
// Find the next available ID.
for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
diff --git a/pkg/sentry/kernel/signal.go b/pkg/sentry/kernel/signal.go
index b528ec0dc..02eede93d 100644
--- a/pkg/sentry/kernel/signal.go
+++ b/pkg/sentry/kernel/signal.go
@@ -17,10 +17,10 @@ package kernel
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
)
// SignalPanic is used to panic the running threads. It is a signal which
diff --git a/pkg/sentry/kernel/signal_handlers.go b/pkg/sentry/kernel/signal_handlers.go
index ce8bcb5e5..a16f3d57f 100644
--- a/pkg/sentry/kernel/signal_handlers.go
+++ b/pkg/sentry/kernel/signal_handlers.go
@@ -17,8 +17,8 @@ package kernel
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// SignalHandlers holds information about signal actions.
diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go
index 27cd3728b..220fa73a2 100644
--- a/pkg/sentry/kernel/syscalls.go
+++ b/pkg/sentry/kernel/syscalls.go
@@ -19,10 +19,10 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/bits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// maxSyscallNum is the highest supported syscall number.
diff --git a/pkg/sentry/kernel/syslog.go b/pkg/sentry/kernel/syslog.go
index 175d1b247..8227ecf1d 100644
--- a/pkg/sentry/kernel/syslog.go
+++ b/pkg/sentry/kernel/syslog.go
@@ -67,6 +67,7 @@ func (s *syslog) Log() []byte {
"Creating process schedule...",
"Generating random numbers by fair dice roll...",
"Rewriting operating system in Javascript...",
+ "Reticulating splines...",
"Consulting tar man page...",
"Forking spaghetti code...",
"Checking naughty and nice process list...",
diff --git a/pkg/sentry/kernel/table_test.go b/pkg/sentry/kernel/table_test.go
index 3f2b042c8..32cf47e05 100644
--- a/pkg/sentry/kernel/table_test.go
+++ b/pkg/sentry/kernel/table_test.go
@@ -17,8 +17,8 @@ package kernel
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
const (
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index 4d889422f..e91f82bb3 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -18,24 +18,24 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bpf"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl"
- "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/third_party/gvsync"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/unimpl"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/third_party/gvsync"
)
// Task represents a thread of execution in the untrusted app. It
@@ -236,15 +236,15 @@ type Task struct {
// tc is protected by mu, and is owned by the task goroutine.
tc TaskContext
- // fsc is the task's filesystem context.
+ // fsContext is the task's filesystem context.
//
- // fsc is protected by mu, and is owned by the task goroutine.
- fsc *FSContext
+ // fsContext is protected by mu, and is owned by the task goroutine.
+ fsContext *FSContext
- // fds is the task's file descriptor table.
+ // fdTable is the task's file descriptor table.
//
- // fds is protected by mu, and is owned by the task goroutine.
- fds *FDMap
+ // fdTable is protected by mu, and is owned by the task goroutine.
+ fdTable *FDTable
// If vforkParent is not nil, it is the task that created this task with
// vfork() or clone(CLONE_VFORK), and should have its vforkStop ended when
@@ -386,10 +386,11 @@ type Task struct {
// creds is the task's credentials.
//
- // creds is protected by mu, however the value itself is immutable and can
- // only be changed by a copy. After reading the pointer, access will
- // proceed outside the scope of mu. creds is owned by the task goroutine.
- creds *auth.Credentials
+ // creds.Load() may be called without synchronization. creds.Store() is
+ // serialized by mu. creds is owned by the task goroutine. All
+ // auth.Credentials objects that creds may point to, or have pointed to
+ // in the past, must be treated as immutable.
+ creds auth.AtomicPtrCredentials
// utsns is the task's UTS namespace.
//
@@ -597,11 +598,11 @@ func (t *Task) Value(key interface{}) interface{} {
case CtxTask:
return t
case auth.CtxCredentials:
- return t.creds
+ return t.Credentials()
case context.CtxThreadGroupID:
return int32(t.ThreadGroup().ID())
case fs.CtxRoot:
- return t.fsc.RootDirectory()
+ return t.fsContext.RootDirectory()
case fs.CtxDirentCacheLimiter:
return t.k.DirentCacheLimiter
case inet.CtxStack:
@@ -665,9 +666,9 @@ func (t *Task) SyscallRestartBlock() SyscallRestartBlock {
// Preconditions: The caller must be running on the task goroutine, or t.mu
// must be locked.
func (t *Task) IsChrooted() bool {
- realRoot := t.k.mounts.Root()
+ realRoot := t.tg.mounts.Root()
defer realRoot.DecRef()
- root := t.fsc.RootDirectory()
+ root := t.fsContext.RootDirectory()
if root != nil {
defer root.DecRef()
}
@@ -688,29 +689,68 @@ func (t *Task) TaskContext() *TaskContext {
// Precondition: The caller must be running on the task goroutine, or t.mu must
// be locked.
func (t *Task) FSContext() *FSContext {
- return t.fsc
+ return t.fsContext
}
-// FDMap returns t's FDMap. FDMap does not take an additional reference on the
-// returned FDMap.
+// FDTable returns t's FDTable. FDMTable does not take an additional reference
+// on the returned FDMap.
//
// Precondition: The caller must be running on the task goroutine, or t.mu must
// be locked.
-func (t *Task) FDMap() *FDMap {
- return t.fds
+func (t *Task) FDTable() *FDTable {
+ return t.fdTable
+}
+
+// GetFile is a convenience wrapper t.FDTable().GetFile.
+//
+// Precondition: same as FDTable.
+func (t *Task) GetFile(fd int32) *fs.File {
+ f, _ := t.fdTable.Get(fd)
+ return f
+}
+
+// NewFDs is a convenience wrapper for t.FDTable().NewFDs.
+//
+// This automatically passes the task as the context.
+//
+// Precondition: same as FDTable.
+func (t *Task) NewFDs(fd int32, files []*fs.File, flags FDFlags) ([]int32, error) {
+ return t.fdTable.NewFDs(t, fd, files, flags)
+}
+
+// NewFDFrom is a convenience wrapper for t.FDTable().NewFDs with a single file.
+//
+// This automatically passes the task as the context.
+//
+// Precondition: same as FDTable.
+func (t *Task) NewFDFrom(fd int32, file *fs.File, flags FDFlags) (int32, error) {
+ fds, err := t.fdTable.NewFDs(t, fd, []*fs.File{file}, flags)
+ if err != nil {
+ return 0, err
+ }
+ return fds[0], nil
+}
+
+// NewFDAt is a convenience wrapper for t.FDTable().NewFDAt.
+//
+// This automatically passes the task as the context.
+//
+// Precondition: same as FDTable.
+func (t *Task) NewFDAt(fd int32, file *fs.File, flags FDFlags) error {
+ return t.fdTable.NewFDAt(t, fd, file, flags)
}
// WithMuLocked executes f with t.mu locked.
func (t *Task) WithMuLocked(f func(*Task)) {
t.mu.Lock()
- defer t.mu.Unlock()
f(t)
+ t.mu.Unlock()
}
// MountNamespace returns t's MountNamespace. MountNamespace does not take an
// additional reference on the returned MountNamespace.
func (t *Task) MountNamespace() *fs.MountNamespace {
- return t.k.mounts
+ return t.tg.mounts
}
// AbstractSockets returns t's AbstractSocketNamespace.
diff --git a/pkg/sentry/kernel/task_acct.go b/pkg/sentry/kernel/task_acct.go
index 1ca2a82eb..5f3e60fe8 100644
--- a/pkg/sentry/kernel/task_acct.go
+++ b/pkg/sentry/kernel/task_acct.go
@@ -17,11 +17,11 @@ package kernel
// Accounting, limits, timers.
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Getitimer implements getitimer(2).
diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go
index 1c76c4d84..2a2e6f662 100644
--- a/pkg/sentry/kernel/task_block.go
+++ b/pkg/sentry/kernel/task_block.go
@@ -17,8 +17,8 @@ package kernel
import (
"time"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// BlockWithTimeout blocks t until an event is received from C, the application
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index bba8ddd39..0916fd658 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -15,10 +15,10 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bpf"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// SharingOptions controls what resources are shared by a new task created by
@@ -214,20 +214,20 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
}
}
- var fsc *FSContext
+ var fsContext *FSContext
if opts.NewFSContext {
- fsc = t.fsc.Fork()
+ fsContext = t.fsContext.Fork()
} else {
- fsc = t.fsc
- fsc.IncRef()
+ fsContext = t.fsContext
+ fsContext.IncRef()
}
- var fds *FDMap
+ var fdTable *FDTable
if opts.NewFiles {
- fds = t.fds.Fork()
+ fdTable = t.fdTable.Fork()
} else {
- fds = t.fds
- fds.IncRef()
+ fdTable = t.fdTable
+ fdTable.IncRef()
}
pidns := t.tg.pidns
@@ -238,11 +238,12 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
}
tg := t.tg
if opts.NewThreadGroup {
+ tg.mounts.IncRef()
sh := t.tg.signalHandlers
if opts.NewSignalHandlers {
sh = sh.Fork()
}
- tg = t.k.newThreadGroup(pidns, sh, opts.TerminationSignal, tg.limits.GetCopy(), t.k.monotonicClock)
+ tg = t.k.newThreadGroup(tg.mounts, pidns, sh, opts.TerminationSignal, tg.limits.GetCopy(), t.k.monotonicClock)
}
cfg := &TaskConfig{
@@ -250,8 +251,8 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
ThreadGroup: tg,
SignalMask: t.SignalMask(),
TaskContext: tc,
- FSContext: fsc,
- FDMap: fds,
+ FSContext: fsContext,
+ FDTable: fdTable,
Credentials: creds,
Niceness: t.Niceness(),
NetworkNamespaced: t.netns,
@@ -424,6 +425,7 @@ func (t *Task) Unshare(opts *SharingOptions) error {
if opts.NewAddressSpace || opts.NewSignalHandlers {
return syserror.EINVAL
}
+ creds := t.Credentials()
if opts.NewThreadGroup {
t.tg.signalHandlers.mu.Lock()
if t.tg.tasksCount != 1 {
@@ -438,8 +440,6 @@ func (t *Task) Unshare(opts *SharingOptions) error {
if t.IsChrooted() {
return syserror.EPERM
}
- // This temporary is needed because Go.
- creds := t.Credentials()
newUserNS, err := creds.NewChildUserNamespace()
if err != nil {
return err
@@ -448,6 +448,8 @@ func (t *Task) Unshare(opts *SharingOptions) error {
if err != nil {
return err
}
+ // Need to reload creds, becaue t.SetUserNamespace() changed task credentials.
+ creds = t.Credentials()
}
haveCapSysAdmin := t.HasCapability(linux.CAP_SYS_ADMIN)
if opts.NewPIDNamespace {
@@ -472,7 +474,7 @@ func (t *Task) Unshare(opts *SharingOptions) error {
}
// Note that this must happen after NewUserNamespace, so the
// new user namespace is used if there is one.
- t.utsns = t.utsns.Clone(t.creds.UserNamespace)
+ t.utsns = t.utsns.Clone(creds.UserNamespace)
}
if opts.NewIPCNamespace {
if !haveCapSysAdmin {
@@ -481,24 +483,24 @@ func (t *Task) Unshare(opts *SharingOptions) error {
}
// Note that "If CLONE_NEWIPC is set, then create the process in a new IPC
// namespace"
- t.ipcns = NewIPCNamespace(t.creds.UserNamespace)
+ t.ipcns = NewIPCNamespace(creds.UserNamespace)
}
- var oldfds *FDMap
+ var oldFDTable *FDTable
if opts.NewFiles {
- oldfds = t.fds
- t.fds = oldfds.Fork()
+ oldFDTable = t.fdTable
+ t.fdTable = oldFDTable.Fork()
}
- var oldfsc *FSContext
+ var oldFSContext *FSContext
if opts.NewFSContext {
- oldfsc = t.fsc
- t.fsc = oldfsc.Fork()
+ oldFSContext = t.fsContext
+ t.fsContext = oldFSContext.Fork()
}
t.mu.Unlock()
- if oldfds != nil {
- oldfds.DecRef()
+ if oldFDTable != nil {
+ oldFDTable.DecRef()
}
- if oldfsc != nil {
- oldfsc.DecRef()
+ if oldFSContext != nil {
+ oldFSContext.DecRef()
}
return nil
}
diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go
index bbd294141..54b1676b0 100644
--- a/pkg/sentry/kernel/task_context.go
+++ b/pkg/sentry/kernel/task_context.go
@@ -17,16 +17,16 @@ package kernel
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/loader"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/loader"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
)
var errNoSyscalls = syserr.New("no syscall table found", linux.ENOEXEC)
diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go
index 35d5cb90c..17a089b90 100644
--- a/pkg/sentry/kernel/task_exec.go
+++ b/pkg/sentry/kernel/task_exec.go
@@ -65,11 +65,11 @@ package kernel
// """
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// execStop is a TaskStop that a task sets on itself when it wants to execve
@@ -195,7 +195,7 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState {
t.tg.pidns.owner.mu.Unlock()
// Remove FDs with the CloseOnExec flag set.
- t.fds.RemoveIf(func(file *fs.File, flags FDFlags) bool {
+ t.fdTable.RemoveIf(func(file *fs.File, flags FDFlags) bool {
return flags.CloseOnExec
})
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index 158e665d3..535f03e50 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -29,11 +29,11 @@ import (
"fmt"
"strconv"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// An ExitStatus is a value communicated from an exiting task or thread group
@@ -265,8 +265,8 @@ func (*runExitMain) execute(t *Task) taskRunState {
// Releasing the MM unblocks a blocked CLONE_VFORK parent.
t.unstopVforkParent()
- t.fsc.DecRef()
- t.fds.DecRef()
+ t.fsContext.DecRef()
+ t.fdTable.DecRef()
// If this is the last task to exit from the thread group, release the
// thread group's resources.
diff --git a/pkg/sentry/kernel/task_futex.go b/pkg/sentry/kernel/task_futex.go
index f98097c2c..c211b5b74 100644
--- a/pkg/sentry/kernel/task_futex.go
+++ b/pkg/sentry/kernel/task_futex.go
@@ -15,8 +15,8 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// Futex returns t's futex manager.
@@ -34,14 +34,14 @@ func (t *Task) SwapUint32(addr usermem.Addr, new uint32) (uint32, error) {
})
}
-// CompareAndSwapUint32 implemets futex.Target.CompareAndSwapUint32.
+// CompareAndSwapUint32 implements futex.Target.CompareAndSwapUint32.
func (t *Task) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) {
return t.MemoryManager().CompareAndSwapUint32(t, addr, old, new, usermem.IOOpts{
AddressSpaceActive: true,
})
}
-// LoadUint32 implemets futex.Target.LoadUint32.
+// LoadUint32 implements futex.Target.LoadUint32.
func (t *Task) LoadUint32(addr usermem.Addr) (uint32, error) {
return t.MemoryManager().LoadUint32(t, addr, usermem.IOOpts{
AddressSpaceActive: true,
diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go
index ec95f78d0..78ff14b20 100644
--- a/pkg/sentry/kernel/task_identity.go
+++ b/pkg/sentry/kernel/task_identity.go
@@ -15,40 +15,32 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Credentials returns t's credentials.
//
// This value must be considered immutable.
func (t *Task) Credentials() *auth.Credentials {
- t.mu.Lock()
- defer t.mu.Unlock()
- return t.creds
+ return t.creds.Load()
}
// UserNamespace returns the user namespace associated with the task.
func (t *Task) UserNamespace() *auth.UserNamespace {
- t.mu.Lock()
- defer t.mu.Unlock()
- return t.creds.UserNamespace
+ return t.Credentials().UserNamespace
}
// HasCapabilityIn checks if the task has capability cp in user namespace ns.
func (t *Task) HasCapabilityIn(cp linux.Capability, ns *auth.UserNamespace) bool {
- t.mu.Lock()
- defer t.mu.Unlock()
- return t.creds.HasCapabilityIn(cp, ns)
+ return t.Credentials().HasCapabilityIn(cp, ns)
}
// HasCapability checks if the task has capability cp in its user namespace.
func (t *Task) HasCapability(cp linux.Capability) bool {
- t.mu.Lock()
- defer t.mu.Unlock()
- return t.creds.HasCapability(cp)
+ return t.Credentials().HasCapability(cp)
}
// SetUID implements the semantics of setuid(2).
@@ -57,9 +49,12 @@ func (t *Task) SetUID(uid auth.UID) error {
if !uid.Ok() {
return syserror.EINVAL
}
+
t.mu.Lock()
defer t.mu.Unlock()
- kuid := t.creds.UserNamespace.MapToKUID(uid)
+
+ creds := t.Credentials()
+ kuid := creds.UserNamespace.MapToKUID(uid)
if !kuid.Ok() {
return syserror.EINVAL
}
@@ -67,17 +62,17 @@ func (t *Task) SetUID(uid auth.UID) error {
// effective UID of the caller is root (more precisely: if the caller has
// the CAP_SETUID capability), the real UID and saved set-user-ID are also
// set." - setuid(2)
- if t.creds.HasCapability(linux.CAP_SETUID) {
+ if creds.HasCapability(linux.CAP_SETUID) {
t.setKUIDsUncheckedLocked(kuid, kuid, kuid)
return nil
}
// "EPERM: The user is not privileged (Linux: does not have the CAP_SETUID
// capability) and uid does not match the real UID or saved set-user-ID of
// the calling process."
- if kuid != t.creds.RealKUID && kuid != t.creds.SavedKUID {
+ if kuid != creds.RealKUID && kuid != creds.SavedKUID {
return syserror.EPERM
}
- t.setKUIDsUncheckedLocked(t.creds.RealKUID, kuid, t.creds.SavedKUID)
+ t.setKUIDsUncheckedLocked(creds.RealKUID, kuid, creds.SavedKUID)
return nil
}
@@ -87,37 +82,38 @@ func (t *Task) SetREUID(r, e auth.UID) error {
defer t.mu.Unlock()
// "Supplying a value of -1 for either the real or effective user ID forces
// the system to leave that ID unchanged." - setreuid(2)
- newR := t.creds.RealKUID
+ creds := t.Credentials()
+ newR := creds.RealKUID
if r.Ok() {
- newR = t.creds.UserNamespace.MapToKUID(r)
+ newR = creds.UserNamespace.MapToKUID(r)
if !newR.Ok() {
return syserror.EINVAL
}
}
- newE := t.creds.EffectiveKUID
+ newE := creds.EffectiveKUID
if e.Ok() {
- newE = t.creds.UserNamespace.MapToKUID(e)
+ newE = creds.UserNamespace.MapToKUID(e)
if !newE.Ok() {
return syserror.EINVAL
}
}
- if !t.creds.HasCapability(linux.CAP_SETUID) {
+ if !creds.HasCapability(linux.CAP_SETUID) {
// "Unprivileged processes may only set the effective user ID to the
// real user ID, the effective user ID, or the saved set-user-ID."
- if newE != t.creds.RealKUID && newE != t.creds.EffectiveKUID && newE != t.creds.SavedKUID {
+ if newE != creds.RealKUID && newE != creds.EffectiveKUID && newE != creds.SavedKUID {
return syserror.EPERM
}
// "Unprivileged users may only set the real user ID to the real user
// ID or the effective user ID."
- if newR != t.creds.RealKUID && newR != t.creds.EffectiveKUID {
+ if newR != creds.RealKUID && newR != creds.EffectiveKUID {
return syserror.EPERM
}
}
// "If the real user ID is set (i.e., ruid is not -1) or the effective user
// ID is set to a value not equal to the previous real user ID, the saved
// set-user-ID will be set to the new effective user ID."
- newS := t.creds.SavedKUID
- if r.Ok() || (e.Ok() && newE != t.creds.EffectiveKUID) {
+ newS := creds.SavedKUID
+ if r.Ok() || (e.Ok() && newE != creds.EffectiveKUID) {
newS = newE
}
t.setKUIDsUncheckedLocked(newR, newE, newS)
@@ -136,23 +132,24 @@ func (t *Task) SetRESUID(r, e, s auth.UID) error {
// arguments equals -1, the corresponding value is not changed." -
// setresuid(2)
var err error
- newR := t.creds.RealKUID
+ creds := t.Credentials()
+ newR := creds.RealKUID
if r.Ok() {
- newR, err = t.creds.UseUID(r)
+ newR, err = creds.UseUID(r)
if err != nil {
return err
}
}
- newE := t.creds.EffectiveKUID
+ newE := creds.EffectiveKUID
if e.Ok() {
- newE, err = t.creds.UseUID(e)
+ newE, err = creds.UseUID(e)
if err != nil {
return err
}
}
- newS := t.creds.SavedKUID
+ newS := creds.SavedKUID
if s.Ok() {
- newS, err = t.creds.UseUID(s)
+ newS, err = creds.UseUID(s)
if err != nil {
return err
}
@@ -163,10 +160,10 @@ func (t *Task) SetRESUID(r, e, s auth.UID) error {
// Preconditions: t.mu must be locked.
func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
- root := t.creds.UserNamespace.MapToKUID(auth.RootUID)
- oldR, oldE, oldS := t.creds.RealKUID, t.creds.EffectiveKUID, t.creds.SavedKUID
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.RealKUID, t.creds.EffectiveKUID, t.creds.SavedKUID = newR, newE, newS
+ creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
+ root := creds.UserNamespace.MapToKUID(auth.RootUID)
+ oldR, oldE, oldS := creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID
+ creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID = newR, newE, newS
// "1. If one or more of the real, effective or saved set user IDs was
// previously 0, and as a result of the UID changes all of these IDs have a
@@ -184,9 +181,9 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
// being cleared." (A thread's effective capability set is always
// cleared when such a credential change is made,
// regardless of the setting of the "keep capabilities" flag.)
- if !t.creds.KeepCaps {
- t.creds.PermittedCaps = 0
- t.creds.EffectiveCaps = 0
+ if !creds.KeepCaps {
+ creds.PermittedCaps = 0
+ creds.EffectiveCaps = 0
}
}
// """
@@ -197,9 +194,9 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
// permitted set is copied to the effective set.
// """
if oldE == root && newE != root {
- t.creds.EffectiveCaps = 0
+ creds.EffectiveCaps = 0
} else if oldE != root && newE == root {
- t.creds.EffectiveCaps = t.creds.PermittedCaps
+ creds.EffectiveCaps = creds.PermittedCaps
}
// "4. If the filesystem user ID is changed from 0 to nonzero (see
// setfsuid(2)), then the following capabilities are cleared from the
@@ -220,6 +217,7 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
// Not documented, but compare Linux's kernel/cred.c:commit_creds().
t.parentDeathSignal = 0
}
+ t.creds.Store(creds)
}
// SetGID implements the semantics of setgid(2).
@@ -227,20 +225,23 @@ func (t *Task) SetGID(gid auth.GID) error {
if !gid.Ok() {
return syserror.EINVAL
}
+
t.mu.Lock()
defer t.mu.Unlock()
- kgid := t.creds.UserNamespace.MapToKGID(gid)
+
+ creds := t.Credentials()
+ kgid := creds.UserNamespace.MapToKGID(gid)
if !kgid.Ok() {
return syserror.EINVAL
}
- if t.creds.HasCapability(linux.CAP_SETGID) {
+ if creds.HasCapability(linux.CAP_SETGID) {
t.setKGIDsUncheckedLocked(kgid, kgid, kgid)
return nil
}
- if kgid != t.creds.RealKGID && kgid != t.creds.SavedKGID {
+ if kgid != creds.RealKGID && kgid != creds.SavedKGID {
return syserror.EPERM
}
- t.setKGIDsUncheckedLocked(t.creds.RealKGID, kgid, t.creds.SavedKGID)
+ t.setKGIDsUncheckedLocked(creds.RealKGID, kgid, creds.SavedKGID)
return nil
}
@@ -248,30 +249,32 @@ func (t *Task) SetGID(gid auth.GID) error {
func (t *Task) SetREGID(r, e auth.GID) error {
t.mu.Lock()
defer t.mu.Unlock()
- newR := t.creds.RealKGID
+
+ creds := t.Credentials()
+ newR := creds.RealKGID
if r.Ok() {
- newR = t.creds.UserNamespace.MapToKGID(r)
+ newR = creds.UserNamespace.MapToKGID(r)
if !newR.Ok() {
return syserror.EINVAL
}
}
- newE := t.creds.EffectiveKGID
+ newE := creds.EffectiveKGID
if e.Ok() {
- newE = t.creds.UserNamespace.MapToKGID(e)
+ newE = creds.UserNamespace.MapToKGID(e)
if !newE.Ok() {
return syserror.EINVAL
}
}
- if !t.creds.HasCapability(linux.CAP_SETGID) {
- if newE != t.creds.RealKGID && newE != t.creds.EffectiveKGID && newE != t.creds.SavedKGID {
+ if !creds.HasCapability(linux.CAP_SETGID) {
+ if newE != creds.RealKGID && newE != creds.EffectiveKGID && newE != creds.SavedKGID {
return syserror.EPERM
}
- if newR != t.creds.RealKGID && newR != t.creds.EffectiveKGID {
+ if newR != creds.RealKGID && newR != creds.EffectiveKGID {
return syserror.EPERM
}
}
- newS := t.creds.SavedKGID
- if r.Ok() || (e.Ok() && newE != t.creds.EffectiveKGID) {
+ newS := creds.SavedKGID
+ if r.Ok() || (e.Ok() && newE != creds.EffectiveKGID) {
newS = newE
}
t.setKGIDsUncheckedLocked(newR, newE, newS)
@@ -280,26 +283,29 @@ func (t *Task) SetREGID(r, e auth.GID) error {
// SetRESGID implements the semantics of the setresgid(2) syscall.
func (t *Task) SetRESGID(r, e, s auth.GID) error {
+ var err error
+
t.mu.Lock()
defer t.mu.Unlock()
- var err error
- newR := t.creds.RealKGID
+
+ creds := t.Credentials()
+ newR := creds.RealKGID
if r.Ok() {
- newR, err = t.creds.UseGID(r)
+ newR, err = creds.UseGID(r)
if err != nil {
return err
}
}
- newE := t.creds.EffectiveKGID
+ newE := creds.EffectiveKGID
if e.Ok() {
- newE, err = t.creds.UseGID(e)
+ newE, err = creds.UseGID(e)
if err != nil {
return err
}
}
- newS := t.creds.SavedKGID
+ newS := creds.SavedKGID
if s.Ok() {
- newS, err = t.creds.UseGID(s)
+ newS, err = creds.UseGID(s)
if err != nil {
return err
}
@@ -309,9 +315,9 @@ func (t *Task) SetRESGID(r, e, s auth.GID) error {
}
func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) {
- oldE := t.creds.EffectiveKGID
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.RealKGID, t.creds.EffectiveKGID, t.creds.SavedKGID = newR, newE, newS
+ creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
+ oldE := creds.EffectiveKGID
+ creds.RealKGID, creds.EffectiveKGID, creds.SavedKGID = newR, newE, newS
if oldE != newE {
// "[dumpability] is reset to the current value contained in
@@ -327,6 +333,7 @@ func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) {
// kernel/cred.c:commit_creds().
t.parentDeathSignal = 0
}
+ t.creds.Store(creds)
}
// SetExtraGIDs attempts to change t's supplemental groups. All IDs are
@@ -334,19 +341,21 @@ func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) {
func (t *Task) SetExtraGIDs(gids []auth.GID) error {
t.mu.Lock()
defer t.mu.Unlock()
- if !t.creds.HasCapability(linux.CAP_SETGID) {
+ creds := t.Credentials()
+ if !creds.HasCapability(linux.CAP_SETGID) {
return syserror.EPERM
}
kgids := make([]auth.KGID, len(gids))
for i, gid := range gids {
- kgid := t.creds.UserNamespace.MapToKGID(gid)
+ kgid := creds.UserNamespace.MapToKGID(gid)
if !kgid.Ok() {
return syserror.EINVAL
}
kgids[i] = kgid
}
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.ExtraKGIDs = kgids
+ creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+ creds.ExtraKGIDs = kgids
+ t.creds.Store(creds)
return nil
}
@@ -360,27 +369,29 @@ func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.Capabili
if effective & ^permitted != 0 {
return syserror.EPERM
}
+ creds := t.Credentials()
// "It is also a limiting superset for the capabilities that may be added
// to the inheritable set by a thread that does not have the CAP_SETPCAP
// capability in its effective set."
- if !t.creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(t.creds.InheritableCaps|t.creds.PermittedCaps) != 0) {
+ if !creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(creds.InheritableCaps|creds.PermittedCaps) != 0) {
return syserror.EPERM
}
// "If a thread drops a capability from its permitted set, it can never
// reacquire that capability (unless it execve(2)s ..."
- if permitted & ^t.creds.PermittedCaps != 0 {
+ if permitted & ^creds.PermittedCaps != 0 {
return syserror.EPERM
}
// "... if a capability is not in the bounding set, then a thread can't add
// this capability to its inheritable set, even if it was in its permitted
// capabilities ..."
- if inheritable & ^(t.creds.InheritableCaps|t.creds.BoundingCaps) != 0 {
+ if inheritable & ^(creds.InheritableCaps|creds.BoundingCaps) != 0 {
return syserror.EPERM
}
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.PermittedCaps = permitted
- t.creds.InheritableCaps = inheritable
- t.creds.EffectiveCaps = effective
+ creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+ creds.PermittedCaps = permitted
+ creds.InheritableCaps = inheritable
+ creds.EffectiveCaps = effective
+ t.creds.Store(creds)
return nil
}
@@ -389,11 +400,13 @@ func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.Capabili
func (t *Task) DropBoundingCapability(cp linux.Capability) error {
t.mu.Lock()
defer t.mu.Unlock()
- if !t.creds.HasCapability(linux.CAP_SETPCAP) {
+ creds := t.Credentials()
+ if !creds.HasCapability(linux.CAP_SETPCAP) {
return syserror.EPERM
}
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.BoundingCaps &^= auth.CapabilitySetOf(cp)
+ creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+ creds.BoundingCaps &^= auth.CapabilitySetOf(cp)
+ t.creds.Store(creds)
return nil
}
@@ -402,31 +415,33 @@ func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error {
t.mu.Lock()
defer t.mu.Unlock()
+ creds := t.Credentials()
// "A process reassociating itself with a user namespace must have the
// CAP_SYS_ADMIN capability in the target user namespace." - setns(2)
//
// If t just created ns, then t.creds is guaranteed to have CAP_SYS_ADMIN
// in ns (by rule 3 in auth.Credentials.HasCapability).
- if !t.creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) {
+ if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) {
return syserror.EPERM
}
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.UserNamespace = ns
+ creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+ creds.UserNamespace = ns
// "The child process created by clone(2) with the CLONE_NEWUSER flag
// starts out with a complete set of capabilities in the new user
// namespace. Likewise, a process that creates a new user namespace using
// unshare(2) or joins an existing user namespace using setns(2) gains a
// full set of capabilities in that namespace."
- t.creds.PermittedCaps = auth.AllCapabilities
- t.creds.InheritableCaps = 0
- t.creds.EffectiveCaps = auth.AllCapabilities
- t.creds.BoundingCaps = auth.AllCapabilities
+ creds.PermittedCaps = auth.AllCapabilities
+ creds.InheritableCaps = 0
+ creds.EffectiveCaps = auth.AllCapabilities
+ creds.BoundingCaps = auth.AllCapabilities
// "A call to clone(2), unshare(2), or setns(2) using the CLONE_NEWUSER
// flag sets the "securebits" flags (see capabilities(7)) to their default
// values (all flags disabled) in the child (for clone(2)) or caller (for
// unshare(2), or setns(2)." - user_namespaces(7)
- t.creds.KeepCaps = false
+ creds.KeepCaps = false
+ t.creds.Store(creds)
return nil
}
@@ -435,8 +450,9 @@ func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error {
func (t *Task) SetKeepCaps(k bool) {
t.mu.Lock()
defer t.mu.Unlock()
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.KeepCaps = k
+ creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
+ creds.KeepCaps = k
+ t.creds.Store(creds)
}
// updateCredsForExec updates t.creds to reflect an execve().
@@ -512,15 +528,16 @@ func (t *Task) updateCredsForExecLocked() {
// the effective user ID.
var newPermitted auth.CapabilitySet // since F(inheritable) == F(permitted) == 0
fileEffective := false
- root := t.creds.UserNamespace.MapToKUID(auth.RootUID)
- if t.creds.EffectiveKUID == root || t.creds.RealKUID == root {
- newPermitted = t.creds.InheritableCaps | t.creds.BoundingCaps
- if t.creds.EffectiveKUID == root {
+ creds := t.Credentials()
+ root := creds.UserNamespace.MapToKUID(auth.RootUID)
+ if creds.EffectiveKUID == root || creds.RealKUID == root {
+ newPermitted = creds.InheritableCaps | creds.BoundingCaps
+ if creds.EffectiveKUID == root {
fileEffective = true
}
}
- t.creds = t.creds.Fork() // See doc for creds.
+ creds = creds.Fork() // The credentials object is immutable. See doc for creds.
// Now we enter poorly-documented, somewhat confusing territory. (The
// accompanying comment in Linux's security/commoncap.c:cap_bprm_set_creds
@@ -562,27 +579,28 @@ func (t *Task) updateCredsForExecLocked() {
// But since no_new_privs is always set (A3 is always true), this becomes
// much simpler. If B1 and B2 are false, C2 is a no-op. If B3 is false, C1
// is a no-op. So we can just do C1 and C2 unconditionally.
- if t.creds.EffectiveKUID != t.creds.RealKUID || t.creds.EffectiveKGID != t.creds.RealKGID {
- t.creds.EffectiveKUID = t.creds.RealKUID
- t.creds.EffectiveKGID = t.creds.RealKGID
+ if creds.EffectiveKUID != creds.RealKUID || creds.EffectiveKGID != creds.RealKGID {
+ creds.EffectiveKUID = creds.RealKUID
+ creds.EffectiveKGID = creds.RealKGID
t.parentDeathSignal = 0
}
// (Saved set-user-ID is always set to the new effective user ID, and saved
// set-group-ID is always set to the new effective group ID, regardless of
// the above.)
- t.creds.SavedKUID = t.creds.RealKUID
- t.creds.SavedKGID = t.creds.RealKGID
- t.creds.PermittedCaps &= newPermitted
+ creds.SavedKUID = creds.RealKUID
+ creds.SavedKGID = creds.RealKGID
+ creds.PermittedCaps &= newPermitted
if fileEffective {
- t.creds.EffectiveCaps = t.creds.PermittedCaps
+ creds.EffectiveCaps = creds.PermittedCaps
} else {
- t.creds.EffectiveCaps = 0
+ creds.EffectiveCaps = 0
}
// prctl(2): The "keep capabilities" value will be reset to 0 on subsequent
// calls to execve(2).
- t.creds.KeepCaps = false
+ creds.KeepCaps = false
// "The bounding set is inherited at fork(2) from the thread's parent, and
// is preserved across an execve(2)". So we're done.
+ t.creds.Store(creds)
}
diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go
index e0e57e8bd..a29e9b9eb 100644
--- a/pkg/sentry/kernel/task_log.go
+++ b/pkg/sentry/kernel/task_log.go
@@ -18,8 +18,8 @@ import (
"fmt"
"sort"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
const (
@@ -63,7 +63,7 @@ func (t *Task) DebugDumpState() {
if mm := t.MemoryManager(); mm != nil {
t.Debugf("Mappings:\n%s", mm)
}
- t.Debugf("FDMap:\n%s", t.fds)
+ t.Debugf("FDTable:\n%s", t.fdTable)
}
// debugDumpRegisters logs register state at log level debug.
diff --git a/pkg/sentry/kernel/task_net.go b/pkg/sentry/kernel/task_net.go
index 04c684c1a..172a31e1d 100644
--- a/pkg/sentry/kernel/task_net.go
+++ b/pkg/sentry/kernel/task_net.go
@@ -15,7 +15,7 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
)
// IsNetworkNamespaced returns true if t is in a non-root network namespace.
diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go
index a79101a18..c92266c59 100644
--- a/pkg/sentry/kernel/task_run.go
+++ b/pkg/sentry/kernel/task_run.go
@@ -19,13 +19,13 @@ import (
"runtime"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/hostcpu"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// A taskRunState is a reified state in the task state machine. See README.md
diff --git a/pkg/sentry/kernel/task_sched.go b/pkg/sentry/kernel/task_sched.go
index 1c94ab11b..e76c069b0 100644
--- a/pkg/sentry/kernel/task_sched.go
+++ b/pkg/sentry/kernel/task_sched.go
@@ -22,13 +22,13 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/hostcpu"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// TaskGoroutineState is a coarse representation of the current execution
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index 654cf7525..266959a07 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -21,13 +21,13 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- ucspb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// SignalAction is an internal signal action.
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index b42531e57..a88bf3951 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -15,13 +15,13 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// TaskConfig defines the configuration of a new Task (see below).
@@ -52,9 +52,10 @@ type TaskConfig struct {
// succeeds.
FSContext *FSContext
- // FDMap is the FDMap of the new task. A reference must be held on FDMap,
- // which is transferred to TaskSet.NewTask whether or not it succeeds.
- FDMap *FDMap
+ // FDTable is the FDTableof the new task. A reference must be held on
+ // FDMap, which is transferred to TaskSet.NewTask whether or not it
+ // succeeds.
+ FDTable *FDTable
// Credentials is the Credentials of the new task.
Credentials *auth.Credentials
@@ -90,7 +91,7 @@ func (ts *TaskSet) NewTask(cfg *TaskConfig) (*Task, error) {
if err != nil {
cfg.TaskContext.release()
cfg.FSContext.DecRef()
- cfg.FDMap.DecRef()
+ cfg.FDTable.DecRef()
return nil, err
}
return t, nil
@@ -112,14 +113,13 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
signalMask: cfg.SignalMask,
signalStack: arch.SignalStack{Flags: arch.SignalStackFlagDisable},
tc: *tc,
- fsc: cfg.FSContext,
- fds: cfg.FDMap,
+ fsContext: cfg.FSContext,
+ fdTable: cfg.FDTable,
p: cfg.Kernel.Platform.NewContext(),
k: cfg.Kernel,
ptraceTracees: make(map[*Task]struct{}),
allowedCPUMask: cfg.AllowedCPUMask.Copy(),
ioUsage: &usage.IO{},
- creds: cfg.Credentials,
niceness: cfg.Niceness,
netns: cfg.NetworkNamespaced,
utsns: cfg.UTSNamespace,
@@ -129,6 +129,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
futexWaiter: futex.NewWaiter(),
containerID: cfg.ContainerID,
}
+ t.creds.Store(cfg.Credentials)
t.endStopCond.L = &t.tg.signalHandlers.mu
t.ptraceTracer.Store((*Task)(nil))
// We don't construct t.blockingTimer until Task.run(); see that function
diff --git a/pkg/sentry/kernel/task_stop.go b/pkg/sentry/kernel/task_stop.go
index e735a5dd0..10c6e455c 100644
--- a/pkg/sentry/kernel/task_stop.go
+++ b/pkg/sentry/kernel/task_stop.go
@@ -172,7 +172,7 @@ func (t *Task) beginStopLocked() {
}
}
-// endStopLocked decerements t.stopCount to indicate that an existing internal
+// endStopLocked decrements t.stopCount to indicate that an existing internal
// or external stop no longer applies to t.
//
// Preconditions: The signal mutex must be locked.
diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go
index a9283d0df..b543d536a 100644
--- a/pkg/sentry/kernel/task_syscall.go
+++ b/pkg/sentry/kernel/task_syscall.go
@@ -19,13 +19,13 @@ import (
"os"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bits"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// SyscallRestartErrno represents a ERESTART* errno defined in the Linux's kernel
diff --git a/pkg/sentry/kernel/task_test.go b/pkg/sentry/kernel/task_test.go
index b895361d0..cfcde9a7a 100644
--- a/pkg/sentry/kernel/task_test.go
+++ b/pkg/sentry/kernel/task_test.go
@@ -17,7 +17,7 @@ package kernel
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
)
func TestTaskCPU(t *testing.T) {
diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go
index 461bd7316..518bfe1bd 100644
--- a/pkg/sentry/kernel/task_usermem.go
+++ b/pkg/sentry/kernel/task_usermem.go
@@ -17,9 +17,9 @@ package kernel
import (
"math"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// MAX_RW_COUNT is the maximum size in bytes of a single read or write.
diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go
index 8bd53928e..2a97e3e8e 100644
--- a/pkg/sentry/kernel/thread_group.go
+++ b/pkg/sentry/kernel/thread_group.go
@@ -18,10 +18,11 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
)
// A ThreadGroup is a logical grouping of tasks that has widespread
@@ -236,13 +237,21 @@ type ThreadGroup struct {
// rscr is the thread group's RSEQ critical region.
rscr atomic.Value `state:".(*RSEQCriticalRegion)"`
+
+ // mounts is the thread group's mount namespace. This does not really
+ // correspond to a "mount namespace" in Linux, but is more like a
+ // complete VFS that need not be shared between processes. See the
+ // comment in mounts.go for more information.
+ //
+ // mounts is immutable.
+ mounts *fs.MountNamespace
}
// newThreadGroup returns a new, empty thread group in PID namespace ns. The
// thread group leader will send its parent terminationSignal when it exits.
// The new thread group isn't visible to the system until a task has been
// created inside of it by a successful call to TaskSet.NewTask.
-func (k *Kernel) newThreadGroup(ns *PIDNamespace, sh *SignalHandlers, terminationSignal linux.Signal, limits *limits.LimitSet, monotonicClock *timekeeperClock) *ThreadGroup {
+func (k *Kernel) newThreadGroup(mounts *fs.MountNamespace, ns *PIDNamespace, sh *SignalHandlers, terminationSignal linux.Signal, limits *limits.LimitSet, monotonicClock *timekeeperClock) *ThreadGroup {
tg := &ThreadGroup{
threadGroupNode: threadGroupNode{
pidns: ns,
@@ -251,6 +260,7 @@ func (k *Kernel) newThreadGroup(ns *PIDNamespace, sh *SignalHandlers, terminatio
terminationSignal: terminationSignal,
ioUsage: &usage.IO{},
limits: limits,
+ mounts: mounts,
}
tg.itimerRealTimer = ktime.NewTimer(k.monotonicClock, &itimerRealListener{tg: tg})
tg.timers = make(map[linux.TimerID]*IntervalTimer)
@@ -258,7 +268,7 @@ func (k *Kernel) newThreadGroup(ns *PIDNamespace, sh *SignalHandlers, terminatio
return tg
}
-// saveRscr is invopked by stateify.
+// saveRscr is invoked by stateify.
func (tg *ThreadGroup) saveRscr() *RSEQCriticalRegion {
return tg.rscr.Load().(*RSEQCriticalRegion)
}
@@ -298,6 +308,7 @@ func (tg *ThreadGroup) release() {
for _, it := range its {
it.DestroyTimer()
}
+ tg.mounts.DecRef()
}
// forEachChildThreadGroupLocked indicates over all child ThreadGroups.
diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go
index 656bbd46c..b21b182fc 100644
--- a/pkg/sentry/kernel/threads.go
+++ b/pkg/sentry/kernel/threads.go
@@ -18,8 +18,8 @@ import (
"fmt"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// TasksLimit is the maximum number of threads for untrusted application.
diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD
index 584f7c7cc..9beae4b31 100644
--- a/pkg/sentry/kernel/time/BUILD
+++ b/pkg/sentry/kernel/time/BUILD
@@ -8,7 +8,7 @@ go_library(
"context.go",
"time.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/time",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/time/context.go b/pkg/sentry/kernel/time/context.go
index c0660d362..8ef483dd3 100644
--- a/pkg/sentry/kernel/time/context.go
+++ b/pkg/sentry/kernel/time/context.go
@@ -15,7 +15,7 @@
package time
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextID is the time package's type for context.Context.Value keys.
diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go
index 3846cf1ea..aa6c75d25 100644
--- a/pkg/sentry/kernel/time/time.go
+++ b/pkg/sentry/kernel/time/time.go
@@ -22,9 +22,9 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Events that may be generated by a Clock.
@@ -142,6 +142,11 @@ func (t Time) Timeval() linux.Timeval {
return linux.NsecToTimeval(t.Nanoseconds())
}
+// StatxTimestamp converts Time to a Linux statx_timestamp.
+func (t Time) StatxTimestamp() linux.StatxTimestamp {
+ return linux.NsecToStatxTimestamp(t.Nanoseconds())
+}
+
// Add adds the duration of d to t.
func (t Time) Add(d time.Duration) Time {
if t.ns > 0 && d.Nanoseconds() > math.MaxInt64-int64(t.ns) {
diff --git a/pkg/sentry/kernel/timekeeper.go b/pkg/sentry/kernel/timekeeper.go
index 505a4fa4f..76417342a 100644
--- a/pkg/sentry/kernel/timekeeper.go
+++ b/pkg/sentry/kernel/timekeeper.go
@@ -19,11 +19,11 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/log"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/log"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ sentrytime "gvisor.dev/gvisor/pkg/sentry/time"
)
// Timekeeper manages all of the kernel clocks.
@@ -122,7 +122,7 @@ func (t *Timekeeper) SetClocks(c sentrytime.Clocks) {
//
// In a restored sentry, monotonic time jumps forward by approximately
// the same amount as real time. There are no guarantees here, we are
- // just making a best-effort attempt to to make it appear that the app
+ // just making a best-effort attempt to make it appear that the app
// was simply not scheduled for a long period, rather than that the
// real time clock was changed.
//
diff --git a/pkg/sentry/kernel/timekeeper_state.go b/pkg/sentry/kernel/timekeeper_state.go
index 6ce358a05..8e961c832 100644
--- a/pkg/sentry/kernel/timekeeper_state.go
+++ b/pkg/sentry/kernel/timekeeper_state.go
@@ -15,7 +15,7 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/time"
)
// beforeSave is invoked by stateify.
diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go
index a92ad689e..849c5b646 100644
--- a/pkg/sentry/kernel/timekeeper_test.go
+++ b/pkg/sentry/kernel/timekeeper_test.go
@@ -17,12 +17,12 @@ package kernel
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ sentrytime "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// mockClocks is a sentrytime.Clocks that simply returns the times in the
diff --git a/pkg/sentry/kernel/uts_namespace.go b/pkg/sentry/kernel/uts_namespace.go
index 96fe3cbb9..0a563e715 100644
--- a/pkg/sentry/kernel/uts_namespace.go
+++ b/pkg/sentry/kernel/uts_namespace.go
@@ -17,7 +17,7 @@ package kernel
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
// UTSNamespace represents a UTS namespace, a holder of two system identifiers:
diff --git a/pkg/sentry/kernel/vdso.go b/pkg/sentry/kernel/vdso.go
index d40ad74f4..fdd10c56c 100644
--- a/pkg/sentry/kernel/vdso.go
+++ b/pkg/sentry/kernel/vdso.go
@@ -17,11 +17,11 @@ package kernel
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// vdsoParams are the parameters exposed to the VDSO.