summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/device/BUILD4
-rw-r--r--pkg/sentry/device/device.go111
-rw-r--r--pkg/sentry/kernel/BUILD2
-rw-r--r--pkg/sentry/kernel/kernel.go3
-rw-r--r--pkg/sentry/kernel/kernel_state.go11
-rw-r--r--test/syscalls/linux/stat.cc33
6 files changed, 139 insertions, 25 deletions
diff --git a/pkg/sentry/device/BUILD b/pkg/sentry/device/BUILD
index 01de708d3..4ccf0674d 100644
--- a/pkg/sentry/device/BUILD
+++ b/pkg/sentry/device/BUILD
@@ -1,7 +1,7 @@
-load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
-
package(licenses = ["notice"])
+load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
+
go_library(
name = "device",
srcs = ["device.go"],
diff --git a/pkg/sentry/device/device.go b/pkg/sentry/device/device.go
index 27e4eb258..ae4fa1d93 100644
--- a/pkg/sentry/device/device.go
+++ b/pkg/sentry/device/device.go
@@ -14,11 +14,6 @@
// Package device defines reserved virtual kernel devices and structures
// for managing them.
-//
-// Saving and restoring devices is not necessary if the devices are initialized
-// as package global variables. Package initialization happens in a single goroutine
-// and in a deterministic order, so minor device numbers will be assigned in the
-// same order as packages are loaded.
package device
import (
@@ -30,7 +25,83 @@ import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
)
+// Registry tracks all simple devices and related state on the system for
+// save/restore.
+//
+// The set of devices across save/restore must remain consistent. That is, no
+// devices may be created or removed on restore relative to the saved
+// system. Practically, this means do not create new devices specifically as
+// part of restore.
+//
+// +stateify savable
+type Registry struct {
+ // lastAnonDeviceMinor is the last minor device number used for an anonymous
+ // device. Must be accessed atomically.
+ lastAnonDeviceMinor uint64
+
+ // mu protects the fields below.
+ mu sync.Mutex `state:"nosave"`
+
+ devices map[ID]*Device
+}
+
+// SimpleDevices is the system-wide simple device registry. This is
+// saved/restored by kernel.Kernel, but defined here to allow access without
+// depending on the kernel package. See kernel.Kernel.deviceRegistry.
+var SimpleDevices = newRegistry()
+
+func newRegistry() *Registry {
+ return &Registry{
+ devices: make(map[ID]*Device),
+ }
+}
+
+// newAnonID assigns a major and minor number to an anonymous device ID.
+func (r *Registry) newAnonID() ID {
+ return ID{
+ // Anon devices always have a major number of 0.
+ Major: 0,
+ // Use the next minor number.
+ Minor: atomic.AddUint64(&r.lastAnonDeviceMinor, 1),
+ }
+}
+
+// newAnonDevice allocates a new anonymous device with a unique minor device
+// number, and registers it with r.
+func (r *Registry) newAnonDevice() *Device {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ d := &Device{
+ ID: r.newAnonID(),
+ }
+ r.devices[d.ID] = d
+ return d
+}
+
+// LoadFrom initializes the internal state of all devices in r from other. The
+// set of devices in both registries must match. Devices may not be created or
+// destroyed across save/restore.
+func (r *Registry) LoadFrom(other *Registry) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ other.mu.Lock()
+ defer other.mu.Unlock()
+ if len(r.devices) != len(other.devices) {
+ panic(fmt.Sprintf("Devices were added or removed when restoring the registry:\nnew:\n%+v\nold:\n%+v", r.devices, other.devices))
+ }
+ for id, otherD := range other.devices {
+ ourD, ok := r.devices[id]
+ if !ok {
+ panic(fmt.Sprintf("Device %+v could not be restored as it wasn't defined in the new registry", otherD))
+ }
+ ourD.loadFrom(otherD)
+ }
+ atomic.StoreUint64(&r.lastAnonDeviceMinor, atomic.LoadUint64(&other.lastAnonDeviceMinor))
+}
+
// ID identifies a device.
+//
+// +stateify savable
type ID struct {
Major uint64
Minor uint64
@@ -41,18 +112,12 @@ func (i *ID) DeviceID() uint64 {
return uint64(linux.MakeDeviceID(uint16(i.Major), uint32(i.Minor)))
}
-// nextAnonDeviceMinor is the next minor number for a new anonymous device.
-// Must be accessed atomically.
-var nextAnonDeviceMinor uint64
-
// NewAnonDevice creates a new anonymous device. Packages that require an anonymous
// device should initialize the device in a global variable in a file called device.go:
//
// var myDevice = device.NewAnonDevice()
func NewAnonDevice() *Device {
- return &Device{
- ID: newAnonID(),
- }
+ return SimpleDevices.newAnonDevice()
}
// NewAnonMultiDevice creates a new multi-keyed anonymous device. Packages that require
@@ -62,21 +127,13 @@ func NewAnonDevice() *Device {
// var myDevice = device.NewAnonMultiDevice()
func NewAnonMultiDevice() *MultiDevice {
return &MultiDevice{
- ID: newAnonID(),
- }
-}
-
-// newAnonID assigns a major and minor number to an anonymous device ID.
-func newAnonID() ID {
- return ID{
- // Anon devices always have a major number of 0.
- Major: 0,
- // Use the next minor number.
- Minor: atomic.AddUint64(&nextAnonDeviceMinor, 1),
+ ID: SimpleDevices.newAnonID(),
}
}
// Device is a simple virtual kernel device.
+//
+// +stateify savable
type Device struct {
ID
@@ -84,6 +141,14 @@ type Device struct {
last uint64
}
+// loadFrom initializes d from other. The IDs of both devices must match.
+func (d *Device) loadFrom(other *Device) {
+ if d.ID != other.ID {
+ panic(fmt.Sprintf("Attempting to initialize a device %+v from %+v, but device IDs don't match", d, other))
+ }
+ atomic.StoreUint64(&d.last, atomic.LoadUint64(&other.last))
+}
+
// NextIno generates a new inode number
func (d *Device) NextIno() uint64 {
return atomic.AddUint64(&d.last, 1)
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 4d34bc733..99a2fd964 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -137,6 +137,7 @@ go_library(
importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel",
imports = [
"gvisor.googlesource.com/gvisor/pkg/bpf",
+ "gvisor.googlesource.com/gvisor/pkg/sentry/device",
"gvisor.googlesource.com/gvisor/pkg/tcpip",
],
visibility = ["//:sandbox"],
@@ -156,6 +157,7 @@ go_library(
"//pkg/secio",
"//pkg/sentry/arch",
"//pkg/sentry/context",
+ "//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/lock",
"//pkg/sentry/fs/timerfd",
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index f5cbd6c23..f7f471aaa 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -185,6 +185,9 @@ type Kernel struct {
// socketTable is used to track all sockets on the system. Protected by
// extMu.
socketTable map[int]map[*refs.WeakRef]struct{}
+
+ // deviceRegistry is used to save/restore device.SimpleDevices.
+ deviceRegistry struct{} `state:".(*device.Registry)"`
}
// InitKernelArgs holds arguments to Init.
diff --git a/pkg/sentry/kernel/kernel_state.go b/pkg/sentry/kernel/kernel_state.go
index a0a69b498..aae6f9ad2 100644
--- a/pkg/sentry/kernel/kernel_state.go
+++ b/pkg/sentry/kernel/kernel_state.go
@@ -15,6 +15,7 @@
package kernel
import (
+ "gvisor.googlesource.com/gvisor/pkg/sentry/device"
"gvisor.googlesource.com/gvisor/pkg/tcpip"
)
@@ -29,3 +30,13 @@ func (k *Kernel) loadDanglingEndpoints(es []tcpip.Endpoint) {
tcpip.AddDanglingEndpoint(e)
}
}
+
+// saveDeviceRegistry is invoked by stateify.
+func (k *Kernel) saveDeviceRegistry() *device.Registry {
+ return device.SimpleDevices
+}
+
+// loadDeviceRegistry is invoked by stateify.
+func (k *Kernel) loadDeviceRegistry(r *device.Registry) {
+ device.SimpleDevices.LoadFrom(r)
+}
diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc
index f96da5706..553fb7e56 100644
--- a/test/syscalls/linux/stat.cc
+++ b/test/syscalls/linux/stat.cc
@@ -429,6 +429,39 @@ TEST_F(StatTest, LstatELOOPPath) {
ASSERT_THAT(lstat(path.c_str(), &s), SyscallFailsWithErrno(ELOOP));
}
+// Ensure that inode allocation for anonymous devices work correctly across
+// save/restore. In particular, inode numbers should be unique across S/R.
+TEST(SimpleStatTest, AnonDeviceAllocatesUniqueInodesAcrossSaveRestore) {
+ // Use sockets as a convenient way to create inodes on an anonymous device.
+ int fd;
+ ASSERT_THAT(fd = socket(AF_UNIX, SOCK_STREAM, 0), SyscallSucceeds());
+ FileDescriptor fd1(fd);
+ MaybeSave();
+ ASSERT_THAT(fd = socket(AF_UNIX, SOCK_STREAM, 0), SyscallSucceeds());
+ FileDescriptor fd2(fd);
+
+ struct stat st1;
+ struct stat st2;
+ ASSERT_THAT(fstat(fd1.get(), &st1), SyscallSucceeds());
+ ASSERT_THAT(fstat(fd2.get(), &st2), SyscallSucceeds());
+
+ // The two fds should have different inode numbers. Specifically, since fd2
+ // was created later, it should have a higher inode number.
+ EXPECT_GT(st2.st_ino, st1.st_ino);
+
+ // Verify again after another S/R cycle. The inode numbers should remain the
+ // same.
+ MaybeSave();
+
+ struct stat st1_after;
+ struct stat st2_after;
+ ASSERT_THAT(fstat(fd1.get(), &st1_after), SyscallSucceeds());
+ ASSERT_THAT(fstat(fd2.get(), &st2_after), SyscallSucceeds());
+
+ EXPECT_EQ(st1_after.st_ino, st1.st_ino);
+ EXPECT_EQ(st2_after.st_ino, st2.st_ino);
+}
+
} // namespace
} // namespace testing