summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/device/device.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/device/device.go')
-rw-r--r--pkg/sentry/device/device.go266
1 files changed, 266 insertions, 0 deletions
diff --git a/pkg/sentry/device/device.go b/pkg/sentry/device/device.go
new file mode 100644
index 000000000..458d03b30
--- /dev/null
+++ b/pkg/sentry/device/device.go
@@ -0,0 +1,266 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package device defines reserved virtual kernel devices and structures
+// for managing them.
+package device
+
+import (
+ "bytes"
+ "fmt"
+ "sync"
+ "sync/atomic"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+)
+
+// Registry tracks all simple devices and related state on the system for
+// save/restore.
+//
+// The set of devices across save/restore must remain consistent. That is, no
+// devices may be created or removed on restore relative to the saved
+// system. Practically, this means do not create new devices specifically as
+// part of restore.
+//
+// +stateify savable
+type Registry struct {
+ // lastAnonDeviceMinor is the last minor device number used for an anonymous
+ // device. Must be accessed atomically.
+ lastAnonDeviceMinor uint64
+
+ // mu protects the fields below.
+ mu sync.Mutex `state:"nosave"`
+
+ devices map[ID]*Device
+}
+
+// SimpleDevices is the system-wide simple device registry. This is
+// saved/restored by kernel.Kernel, but defined here to allow access without
+// depending on the kernel package. See kernel.Kernel.deviceRegistry.
+var SimpleDevices = newRegistry()
+
+func newRegistry() *Registry {
+ return &Registry{
+ devices: make(map[ID]*Device),
+ }
+}
+
+// newAnonID assigns a major and minor number to an anonymous device ID.
+func (r *Registry) newAnonID() ID {
+ return ID{
+ // Anon devices always have a major number of 0.
+ Major: 0,
+ // Use the next minor number.
+ Minor: atomic.AddUint64(&r.lastAnonDeviceMinor, 1),
+ }
+}
+
+// newAnonDevice allocates a new anonymous device with a unique minor device
+// number, and registers it with r.
+func (r *Registry) newAnonDevice() *Device {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ d := &Device{
+ ID: r.newAnonID(),
+ }
+ r.devices[d.ID] = d
+ return d
+}
+
+// LoadFrom initializes the internal state of all devices in r from other. The
+// set of devices in both registries must match. Devices may not be created or
+// destroyed across save/restore.
+func (r *Registry) LoadFrom(other *Registry) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ other.mu.Lock()
+ defer other.mu.Unlock()
+ if len(r.devices) != len(other.devices) {
+ panic(fmt.Sprintf("Devices were added or removed when restoring the registry:\nnew:\n%+v\nold:\n%+v", r.devices, other.devices))
+ }
+ for id, otherD := range other.devices {
+ ourD, ok := r.devices[id]
+ if !ok {
+ panic(fmt.Sprintf("Device %+v could not be restored as it wasn't defined in the new registry", otherD))
+ }
+ ourD.loadFrom(otherD)
+ }
+ atomic.StoreUint64(&r.lastAnonDeviceMinor, atomic.LoadUint64(&other.lastAnonDeviceMinor))
+}
+
+// ID identifies a device.
+//
+// +stateify savable
+type ID struct {
+ Major uint64
+ Minor uint64
+}
+
+// DeviceID formats a major and minor device number into a standard device number.
+func (i *ID) DeviceID() uint64 {
+ return uint64(linux.MakeDeviceID(uint16(i.Major), uint32(i.Minor)))
+}
+
+// NewAnonDevice creates a new anonymous device. Packages that require an anonymous
+// device should initialize the device in a global variable in a file called device.go:
+//
+// var myDevice = device.NewAnonDevice()
+func NewAnonDevice() *Device {
+ return SimpleDevices.newAnonDevice()
+}
+
+// NewAnonMultiDevice creates a new multi-keyed anonymous device. Packages that require
+// a multi-key anonymous device should initialize the device in a global variable in a
+// file called device.go:
+//
+// var myDevice = device.NewAnonMultiDevice()
+func NewAnonMultiDevice() *MultiDevice {
+ return &MultiDevice{
+ ID: SimpleDevices.newAnonID(),
+ }
+}
+
+// Device is a simple virtual kernel device.
+//
+// +stateify savable
+type Device struct {
+ ID
+
+ // last is the last generated inode.
+ last uint64
+}
+
+// loadFrom initializes d from other. The IDs of both devices must match.
+func (d *Device) loadFrom(other *Device) {
+ if d.ID != other.ID {
+ panic(fmt.Sprintf("Attempting to initialize a device %+v from %+v, but device IDs don't match", d, other))
+ }
+ atomic.StoreUint64(&d.last, atomic.LoadUint64(&other.last))
+}
+
+// NextIno generates a new inode number
+func (d *Device) NextIno() uint64 {
+ return atomic.AddUint64(&d.last, 1)
+}
+
+// MultiDeviceKey provides a hashable key for a MultiDevice. The key consists
+// of a raw device and inode for a resource, which must consistently identify
+// the unique resource. It may optionally include a secondary device if
+// appropriate.
+//
+// Note that using the path is not enough, because filesystems may rename a file
+// to a different backing resource, at which point the path points to a different
+// entity. Using only the inode is also not enough because the inode is assumed
+// to be unique only within the device on which the resource exists.
+type MultiDeviceKey struct {
+ Device uint64
+ SecondaryDevice string
+ Inode uint64
+}
+
+// String stringifies the key.
+func (m MultiDeviceKey) String() string {
+ return fmt.Sprintf("key{device: %d, sdevice: %s, inode: %d}", m.Device, m.SecondaryDevice, m.Inode)
+}
+
+// MultiDevice allows for remapping resources that come from a variety of raw
+// devices into a single device. The device ID should be one of the static
+// Device IDs above and cannot be reused.
+type MultiDevice struct {
+ ID
+
+ mu sync.Mutex
+ last uint64
+ cache map[MultiDeviceKey]uint64
+ rcache map[uint64]MultiDeviceKey
+}
+
+// String stringifies MultiDevice.
+func (m *MultiDevice) String() string {
+ buf := bytes.NewBuffer(nil)
+ buf.WriteString("cache{")
+ for k, v := range m.cache {
+ buf.WriteString(fmt.Sprintf("%s -> %d, ", k, v))
+ }
+ buf.WriteString("}")
+ return buf.String()
+}
+
+// Map maps a raw device and inode into the inode space of MultiDevice,
+// returning a virtualized inode. Raw devices and inodes can be reused;
+// in this case, the same virtual inode will be returned.
+func (m *MultiDevice) Map(key MultiDeviceKey) uint64 {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+
+ if m.cache == nil {
+ m.cache = make(map[MultiDeviceKey]uint64)
+ m.rcache = make(map[uint64]MultiDeviceKey)
+ }
+
+ id, ok := m.cache[key]
+ if ok {
+ return id
+ }
+ // Step over reserved entries that may have been loaded.
+ idx := m.last + 1
+ for {
+ if _, ok := m.rcache[idx]; !ok {
+ break
+ }
+ idx++
+ }
+ // We found a non-reserved entry, use it.
+ m.last = idx
+ m.cache[key] = m.last
+ m.rcache[m.last] = key
+ return m.last
+}
+
+// Load loads a raw device and inode into MultiDevice inode mappings
+// with value as the virtual inode.
+//
+// By design, inodes start from 1 and continue until max uint64. This means
+// that the zero value, which is often the uninitialized value, can be rejected
+// as invalid.
+func (m *MultiDevice) Load(key MultiDeviceKey, value uint64) bool {
+ // Reject the uninitialized value; see comment above.
+ if value == 0 {
+ return false
+ }
+
+ m.mu.Lock()
+ defer m.mu.Unlock()
+
+ if m.cache == nil {
+ m.cache = make(map[MultiDeviceKey]uint64)
+ m.rcache = make(map[uint64]MultiDeviceKey)
+ }
+
+ if val, exists := m.cache[key]; exists && val != value {
+ return false
+ }
+ if k, exists := m.rcache[value]; exists && k != key {
+ // Should never happen.
+ panic("MultiDevice's caches are inconsistent")
+ }
+
+ // Cache value at key.
+ m.cache[key] = value
+
+ // Prevent value from being used by new inode mappings.
+ m.rcache[value] = key
+
+ return true
+}