diff options
Diffstat (limited to 'pkg/sentry/fs/mount.go')
-rw-r--r-- | pkg/sentry/fs/mount.go | 298 |
1 files changed, 298 insertions, 0 deletions
diff --git a/pkg/sentry/fs/mount.go b/pkg/sentry/fs/mount.go new file mode 100644 index 000000000..a2943b097 --- /dev/null +++ b/pkg/sentry/fs/mount.go @@ -0,0 +1,298 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "bytes" + "fmt" + "sync" + "sync/atomic" + + "gvisor.googlesource.com/gvisor/pkg/refs" +) + +// DirentOperations provide file systems greater control over how long a Dirent stays pinned +// in core. Implementations must not take Dirent.mu. +type DirentOperations interface { + // Revalidate returns true if the Dirent is stale and its InodeOperations needs to be reloaded. Revalidate + // will never be called on a Dirent that is mounted. + Revalidate(dirent *Dirent) bool + + // Keep returns true if the Dirent should be kept in memory for as long as possible + // beyond any active references. + Keep(dirent *Dirent) bool +} + +// MountSourceOperations contains filesystem specific operations. +type MountSourceOperations interface { + // TODO: Add: + // + // StatFS() (Info, error) + // BlockSize() int64 + // FS() Filesystem + + // DirentOperations provide optional extra management of Dirents. + DirentOperations + + // Destroy destroys the MountSource. + Destroy() + + // Below are MountSourceOperations that do not conform to Linux. + + // ResetInodeMappings clears all mappings of Inodes before SaveInodeMapping + // is called. + ResetInodeMappings() + + // SaveInodeMappings is called during saving to store, for each reachable + // Inode in the mounted filesystem, a mapping of Inode.StableAttr.InodeID + // to the Inode's path relative to its mount point. If an Inode is + // reachable at more than one path due to hard links, it is unspecified + // which path is mapped. Filesystems that do not use this information to + // restore inodes can make SaveInodeMappings a no-op. + SaveInodeMapping(inode *Inode, path string) +} + +// InodeMappings defines a fmt.Stringer MountSource Inode mappings. +type InodeMappings map[uint64]string + +// String implements fmt.Stringer.String. +func (i InodeMappings) String() string { + var mappingsBuf bytes.Buffer + mappingsBuf.WriteString("\n") + for ino, name := range i { + mappingsBuf.WriteString(fmt.Sprintf("\t%q\t\tinode number %d\n", name, ino)) + } + return mappingsBuf.String() +} + +// MountSource represents a source of file objects. +// +// MountSource corresponds to struct super_block in Linux. +// +// A mount source may represent a physical device (or a partition of a physical +// device) or a virtual source of files such as procfs for a specific PID +// namespace. There should be only one mount source per logical device. E.g. +// there should be only procfs mount source for a given PID namespace. +// +// A mount source represents files as inodes. Every inode belongs to exactly +// one mount source. Each file object may only be represented using one inode +// object in a sentry instance. +// +// This is an amalgamation of structs super_block, vfsmount, and mount, while +// MountSourceOperations is akin to struct super_operations. +// +// Hence, mount source also contains common mounted file system state, such as +// mount flags, the root Dirent, and children mounts. For now, this +// amalgamation implies that a mount source cannot be shared by multiple mounts +// (e.g. cannot be mounted at different locations). +// +// TODO: Move mount-specific information out of MountSource. +type MountSource struct { + refs.AtomicRefCount + + // MountSourceOperations defines filesystem specific behavior. + MountSourceOperations + + // Filesystem is the filesystem backing the mount. Can be nil if there + // is no filesystem backing the mount. + Filesystem Filesystem + + // Flags are the flags that this filesystem was mounted with. + Flags MountSourceFlags + + // fscache keeps Dirents pinned beyond application references to them. + // It must be flushed before kernel.SaveTo. + fscache *DirentCache `state:"nosave"` + + // direntRefs is the sum of references on all Dirents in this MountSource. + // + // direntRefs is increased when a Dirent in MountSource is IncRef'd, and + // decreased when a Dirent in MountSource is DecRef'd. + // + // To cleanly unmount a MountSource, one must check that no direntRefs are + // held anymore. To check, one must hold root.parent.dirMu of the + // MountSource's root Dirent before reading direntRefs to prevent further + // walks to Dirents in this MountSource. + // + // direntRefs must be atomically changed. + direntRefs uint64 + + // mu protects the fields below, which are set by the MountNamespace + // during MountSource/Unmount. + mu sync.Mutex `state:"nosave"` + + // id is a unique id for this mount. + id uint64 + + // root is the root Dirent of this mount. + root *Dirent + + // parent is the parent MountSource, or nil if this MountSource is the root. + parent *MountSource + + // children are the child MountSources of this MountSource. + children map[*MountSource]struct{} +} + +// defaultDirentCacheSize is the number of Dirents that the VFS can hold an extra +// reference on. +const defaultDirentCacheSize uint64 = 1000 + +// NewMountSource returns a new MountSource. Filesystem may be nil if there is no +// filesystem backing the mount. +func NewMountSource(mops MountSourceOperations, filesystem Filesystem, flags MountSourceFlags) *MountSource { + return &MountSource{ + MountSourceOperations: mops, + Flags: flags, + Filesystem: filesystem, + fscache: NewDirentCache(defaultDirentCacheSize), + children: make(map[*MountSource]struct{}), + } +} + +// Parent returns the parent mount, or nil if this mount is the root. +func (msrc *MountSource) Parent() *MountSource { + msrc.mu.Lock() + defer msrc.mu.Unlock() + return msrc.parent +} + +// ID returns the ID of this mount. +func (msrc *MountSource) ID() uint64 { + msrc.mu.Lock() + defer msrc.mu.Unlock() + return msrc.id +} + +// Children returns the (immediate) children of this MountSource. +func (msrc *MountSource) Children() []*MountSource { + msrc.mu.Lock() + defer msrc.mu.Unlock() + + ms := make([]*MountSource, 0, len(msrc.children)) + for c := range msrc.children { + ms = append(ms, c) + } + return ms +} + +// Submounts returns all mounts that are descendants of this mount. +func (msrc *MountSource) Submounts() []*MountSource { + var ms []*MountSource + for _, c := range msrc.Children() { + ms = append(ms, c) + ms = append(ms, c.Submounts()...) + } + return ms +} + +// Root returns the root dirent of this mount. +func (msrc *MountSource) Root() *Dirent { + msrc.mu.Lock() + defer msrc.mu.Unlock() + return msrc.root +} + +// DirentRefs returns the current mount direntRefs. +func (msrc *MountSource) DirentRefs() uint64 { + return atomic.LoadUint64(&msrc.direntRefs) +} + +// IncDirentRefs increases direntRefs. +func (msrc *MountSource) IncDirentRefs() { + atomic.AddUint64(&msrc.direntRefs, 1) +} + +// DecDirentRefs decrements direntRefs. +func (msrc *MountSource) DecDirentRefs() { + if atomic.AddUint64(&msrc.direntRefs, ^uint64(0)) == ^uint64(0) { + panic("Decremented zero mount reference direntRefs") + } +} + +func (msrc *MountSource) destroy() { + if c := msrc.DirentRefs(); c != 0 { + panic(fmt.Sprintf("MountSource with non-zero direntRefs is being destroyed: %d", c)) + } + msrc.MountSourceOperations.Destroy() +} + +// DecRef drops a reference on the MountSource. +func (msrc *MountSource) DecRef() { + msrc.DecRefWithDestructor(msrc.destroy) +} + +// FlushDirentRefs drops all references held by the MountSource on Dirents. +func (msrc *MountSource) FlushDirentRefs() { + msrc.fscache.Invalidate() +} + +// NewCachingMountSource returns a generic mount that will cache dirents +// aggressively. Filesystem may be nil if there is no backing filesystem. +func NewCachingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource { + return NewMountSource(&SimpleMountSourceOperations{ + keep: true, + }, filesystem, flags) +} + +// NewNonCachingMountSource returns a generic mount that will never cache dirents. +// Filesystem may be nil if there is no backing filesystem. +func NewNonCachingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource { + return NewMountSource(&SimpleMountSourceOperations{ + keep: false, + }, filesystem, flags) +} + +// SimpleMountSourceOperations implements MountSourceOperations. +type SimpleMountSourceOperations struct { + keep bool +} + +// Revalidate implements MountSourceOperations.Revalidate. +func (*SimpleMountSourceOperations) Revalidate(*Dirent) bool { + return false +} + +// Keep implements MountSourceOperations.Keep. +func (smo *SimpleMountSourceOperations) Keep(*Dirent) bool { + return smo.keep +} + +// ResetInodeMappings implements MountSourceOperations.ResetInodeMappings. +func (*SimpleMountSourceOperations) ResetInodeMappings() {} + +// SaveInodeMapping implements MountSourceOperations.SaveInodeMapping. +func (*SimpleMountSourceOperations) SaveInodeMapping(*Inode, string) {} + +// Destroy implements MountSourceOperations.Destroy. +func (*SimpleMountSourceOperations) Destroy() {} + +// Info defines attributes of a filesystem. +type Info struct { + // Type is the filesystem type magic value. + Type uint64 + + // TotalBlocks is the total data blocks in the filesystem. + TotalBlocks uint64 + + // FreeBlocks is the number of free blocks available. + FreeBlocks uint64 + + // TotalFiles is the total file nodes in the filesystem. + TotalFiles uint64 + + // FreeFiles is the number of free file nodes. + FreeFiles uint64 +} |