// Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package device defines reserved virtual kernel devices and structures // for managing them. package device import ( "bytes" "fmt" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sync" ) // Registry tracks all simple devices and related state on the system for // save/restore. // // The set of devices across save/restore must remain consistent. That is, no // devices may be created or removed on restore relative to the saved // system. Practically, this means do not create new devices specifically as // part of restore. // // +stateify savable type Registry struct { // lastAnonDeviceMinor is the last minor device number used for an anonymous // device. Must be accessed atomically. lastAnonDeviceMinor uint64 // mu protects the fields below. mu sync.Mutex `state:"nosave"` devices map[ID]*Device } // SimpleDevices is the system-wide simple device registry. This is // saved/restored by kernel.Kernel, but defined here to allow access without // depending on the kernel package. See kernel.Kernel.deviceRegistry. var SimpleDevices = newRegistry() func newRegistry() *Registry { return &Registry{ devices: make(map[ID]*Device), } } // newAnonID assigns a major and minor number to an anonymous device ID. func (r *Registry) newAnonID() ID { return ID{ // Anon devices always have a major number of 0. Major: 0, // Use the next minor number. Minor: atomic.AddUint64(&r.lastAnonDeviceMinor, 1), } } // newAnonDevice allocates a new anonymous device with a unique minor device // number, and registers it with r. func (r *Registry) newAnonDevice() *Device { r.mu.Lock() defer r.mu.Unlock() d := &Device{ ID: r.newAnonID(), } r.devices[d.ID] = d return d } // LoadFrom initializes the internal state of all devices in r from other. The // set of devices in both registries must match. Devices may not be created or // destroyed across save/restore. func (r *Registry) LoadFrom(other *Registry) { r.mu.Lock() defer r.mu.Unlock() other.mu.Lock() defer other.mu.Unlock() if len(r.devices) != len(other.devices) { panic(fmt.Sprintf("Devices were added or removed when restoring the registry:\nnew:\n%+v\nold:\n%+v", r.devices, other.devices)) } for id, otherD := range other.devices { ourD, ok := r.devices[id] if !ok { panic(fmt.Sprintf("Device %+v could not be restored as it wasn't defined in the new registry", otherD)) } ourD.loadFrom(otherD) } atomic.StoreUint64(&r.lastAnonDeviceMinor, atomic.LoadUint64(&other.lastAnonDeviceMinor)) } // ID identifies a device. // // +stateify savable type ID struct { Major uint64 Minor uint64 } // DeviceID formats a major and minor device number into a standard device number. func (i *ID) DeviceID() uint64 { return uint64(linux.MakeDeviceID(uint16(i.Major), uint32(i.Minor))) } // NewAnonDevice creates a new anonymous device. Packages that require an anonymous // device should initialize the device in a global variable in a file called device.go: // // var myDevice = device.NewAnonDevice() func NewAnonDevice() *Device { return SimpleDevices.newAnonDevice() } // NewAnonMultiDevice creates a new multi-keyed anonymous device. Packages that require // a multi-key anonymous device should initialize the device in a global variable in a // file called device.go: // // var myDevice = device.NewAnonMultiDevice() func NewAnonMultiDevice() *MultiDevice { return &MultiDevice{ ID: SimpleDevices.newAnonID(), } } // Device is a simple virtual kernel device. // // +stateify savable type Device struct { ID // last is the last generated inode. last uint64 } // loadFrom initializes d from other. The IDs of both devices must match. func (d *Device) loadFrom(other *Device) { if d.ID != other.ID { panic(fmt.Sprintf("Attempting to initialize a device %+v from %+v, but device IDs don't match", d, other)) } atomic.StoreUint64(&d.last, atomic.LoadUint64(&other.last)) } // NextIno generates a new inode number func (d *Device) NextIno() uint64 { return atomic.AddUint64(&d.last, 1) } // MultiDeviceKey provides a hashable key for a MultiDevice. The key consists // of a raw device and inode for a resource, which must consistently identify // the unique resource. It may optionally include a secondary device if // appropriate. // // Note that using the path is not enough, because filesystems may rename a file // to a different backing resource, at which point the path points to a different // entity. Using only the inode is also not enough because the inode is assumed // to be unique only within the device on which the resource exists. type MultiDeviceKey struct { Device uint64 SecondaryDevice string Inode uint64 } // String stringifies the key. func (m MultiDeviceKey) String() string { return fmt.Sprintf("key{device: %d, sdevice: %s, inode: %d}", m.Device, m.SecondaryDevice, m.Inode) } // MultiDevice allows for remapping resources that come from a variety of raw // devices into a single device. The device ID should be one of the static // Device IDs above and cannot be reused. type MultiDevice struct { ID mu sync.Mutex last uint64 cache map[MultiDeviceKey]uint64 rcache map[uint64]MultiDeviceKey } // String stringifies MultiDevice. func (m *MultiDevice) String() string { m.mu.Lock() defer m.mu.Unlock() buf := bytes.NewBuffer(nil) buf.WriteString("cache{") for k, v := range m.cache { buf.WriteString(fmt.Sprintf("%s -> %d, ", k, v)) } buf.WriteString("}") return buf.String() } // Map maps a raw device and inode into the inode space of MultiDevice, // returning a virtualized inode. Raw devices and inodes can be reused; // in this case, the same virtual inode will be returned. func (m *MultiDevice) Map(key MultiDeviceKey) uint64 { m.mu.Lock() defer m.mu.Unlock() if m.cache == nil { m.cache = make(map[MultiDeviceKey]uint64) m.rcache = make(map[uint64]MultiDeviceKey) } id, ok := m.cache[key] if ok { return id } // Step over reserved entries that may have been loaded. idx := m.last + 1 for { if _, ok := m.rcache[idx]; !ok { break } idx++ } // We found a non-reserved entry, use it. m.last = idx m.cache[key] = m.last m.rcache[m.last] = key return m.last } // Load loads a raw device and inode into MultiDevice inode mappings // with value as the virtual inode. // // By design, inodes start from 1 and continue until max uint64. This means // that the zero value, which is often the uninitialized value, can be rejected // as invalid. func (m *MultiDevice) Load(key MultiDeviceKey, value uint64) bool { // Reject the uninitialized value; see comment above. if value == 0 { return false } m.mu.Lock() defer m.mu.Unlock() if m.cache == nil { m.cache = make(map[MultiDeviceKey]uint64) m.rcache = make(map[uint64]MultiDeviceKey) } if val, exists := m.cache[key]; exists && val != value { return false } if k, exists := m.rcache[value]; exists && k != key { // Should never happen. panic("MultiDevice's caches are inconsistent") } // Cache value at key. m.cache[key] = value // Prevent value from being used by new inode mappings. m.rcache[value] = key return true }