diff options
Diffstat (limited to 'pkg/sentry/vfs')
-rw-r--r-- | pkg/sentry/vfs/BUILD | 37 | ||||
-rw-r--r-- | pkg/sentry/vfs/README.md | 9 | ||||
-rw-r--r-- | pkg/sentry/vfs/file_description.go | 47 | ||||
-rw-r--r-- | pkg/sentry/vfs/filesystem.go | 37 | ||||
-rw-r--r-- | pkg/sentry/vfs/g3doc/inotify.md | 18 | ||||
-rw-r--r-- | pkg/sentry/vfs/mount.go | 21 | ||||
-rw-r--r-- | pkg/sentry/vfs/mount_unsafe.go | 2 |
7 files changed, 64 insertions, 107 deletions
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 642769e7c..8093ca55c 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -27,6 +27,39 @@ go_template_instance( }, ) +go_template_instance( + name = "file_description_refs", + out = "file_description_refs.go", + package = "vfs", + prefix = "FileDescription", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "FileDescription", + }, +) + +go_template_instance( + name = "mount_namespace_refs", + out = "mount_namespace_refs.go", + package = "vfs", + prefix = "MountNamespace", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "MountNamespace", + }, +) + +go_template_instance( + name = "filesystem_refs", + out = "filesystem_refs.go", + package = "vfs", + prefix = "Filesystem", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "Filesystem", + }, +) + go_library( name = "vfs", srcs = [ @@ -40,12 +73,15 @@ go_library( "event_list.go", "file_description.go", "file_description_impl_util.go", + "file_description_refs.go", "filesystem.go", "filesystem_impl_util.go", + "filesystem_refs.go", "filesystem_type.go", "inotify.go", "lock.go", "mount.go", + "mount_namespace_refs.go", "mount_unsafe.go", "options.go", "pathname.go", @@ -63,6 +99,7 @@ go_library( "//pkg/fspath", "//pkg/gohacks", "//pkg/log", + "//pkg/refs", "//pkg/safemem", "//pkg/sentry/arch", "//pkg/sentry/fs", diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md index 4b9faf2ea..5aad31b78 100644 --- a/pkg/sentry/vfs/README.md +++ b/pkg/sentry/vfs/README.md @@ -184,12 +184,3 @@ This construction, which is essentially a type-safe analogue to Linux's - File locking - `O_ASYNC` - -- Reference counts in the `vfs` package do not use the `refs` package since - `refs.AtomicRefCount` adds 64 bytes of overhead to each 8-byte reference - count, resulting in considerable cache bloat. 24 bytes of this overhead is - for weak reference support, which have poor performance and will not be used - by VFS2. The remaining 40 bytes is to store a descriptive string and stack - trace for reference leak checking; we can support reference leak checking - without incurring this space overhead by including the applicable - information directly in finalizers for applicable types. diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 33910e095..22a54fa48 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -38,9 +38,7 @@ import ( // // FileDescription is analogous to Linux's struct file. type FileDescription struct { - // refs is the reference count. refs is accessed using atomic memory - // operations. - refs int64 + FileDescriptionRefs // flagsMu protects statusFlags and asyncHandler below. flagsMu sync.Mutex @@ -131,7 +129,7 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou } } - fd.refs = 1 + fd.EnableLeakCheck() // Remove "file creation flags" to mirror the behavior from file.f_flags in // fs/open.c:do_dentry_open. @@ -149,30 +147,9 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou return nil } -// IncRef increments fd's reference count. -func (fd *FileDescription) IncRef() { - atomic.AddInt64(&fd.refs, 1) -} - -// TryIncRef increments fd's reference count and returns true. If fd's -// reference count is already zero, TryIncRef does nothing and returns false. -// -// TryIncRef does not require that a reference is held on fd. -func (fd *FileDescription) TryIncRef() bool { - for { - refs := atomic.LoadInt64(&fd.refs) - if refs <= 0 { - return false - } - if atomic.CompareAndSwapInt64(&fd.refs, refs, refs+1) { - return true - } - } -} - // DecRef decrements fd's reference count. func (fd *FileDescription) DecRef(ctx context.Context) { - if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 { + fd.FileDescriptionRefs.DecRef(func() { // Unregister fd from all epoll instances. fd.epollMu.Lock() epolls := fd.epolls @@ -208,15 +185,7 @@ func (fd *FileDescription) DecRef(ctx context.Context) { } fd.asyncHandler = nil fd.flagsMu.Unlock() - } else if refs < 0 { - panic("FileDescription.DecRef() called without holding a reference") - } -} - -// Refs returns the current number of references. The returned count -// is inherently racy and is unsafe to use without external synchronization. -func (fd *FileDescription) Refs() int64 { - return atomic.LoadInt64(&fd.refs) + }) } // Mount returns the mount on which fd was opened. It does not take a reference @@ -851,7 +820,7 @@ func (fd *FileDescription) SetAsyncHandler(newHandler func() FileAsync) FileAsyn // FileReadWriteSeeker is a helper struct to pass a FileDescription as // io.Reader/io.Writer/io.ReadSeeker/etc. type FileReadWriteSeeker struct { - Fd *FileDescription + FD *FileDescription Ctx context.Context ROpts ReadOptions WOpts WriteOptions @@ -860,18 +829,18 @@ type FileReadWriteSeeker struct { // Read implements io.ReadWriteSeeker.Read. func (f *FileReadWriteSeeker) Read(p []byte) (int, error) { dst := usermem.BytesIOSequence(p) - ret, err := f.Fd.Read(f.Ctx, dst, f.ROpts) + ret, err := f.FD.Read(f.Ctx, dst, f.ROpts) return int(ret), err } // Seek implements io.ReadWriteSeeker.Seek. func (f *FileReadWriteSeeker) Seek(offset int64, whence int) (int64, error) { - return f.Fd.Seek(f.Ctx, offset, int32(whence)) + return f.FD.Seek(f.Ctx, offset, int32(whence)) } // Write implements io.ReadWriteSeeker.Write. func (f *FileReadWriteSeeker) Write(p []byte) (int, error) { buf := usermem.BytesIOSequence(p) - ret, err := f.Fd.Write(f.Ctx, buf, f.WOpts) + ret, err := f.FD.Write(f.Ctx, buf, f.WOpts) return int(ret), err } diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index 2c60cfab2..46851f638 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -15,8 +15,6 @@ package vfs import ( - "sync/atomic" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" @@ -34,9 +32,7 @@ import ( // // +stateify savable type Filesystem struct { - // refs is the reference count. refs is accessed using atomic memory - // operations. - refs int64 + FilesystemRefs // vfs is the VirtualFilesystem that uses this Filesystem. vfs is // immutable. @@ -52,7 +48,7 @@ type Filesystem struct { // Init must be called before first use of fs. func (fs *Filesystem) Init(vfsObj *VirtualFilesystem, fsType FilesystemType, impl FilesystemImpl) { - fs.refs = 1 + fs.EnableLeakCheck() fs.vfs = vfsObj fs.fsType = fsType fs.impl = impl @@ -76,39 +72,14 @@ func (fs *Filesystem) Impl() FilesystemImpl { return fs.impl } -// IncRef increments fs' reference count. -func (fs *Filesystem) IncRef() { - if atomic.AddInt64(&fs.refs, 1) <= 1 { - panic("Filesystem.IncRef() called without holding a reference") - } -} - -// TryIncRef increments fs' reference count and returns true. If fs' reference -// count is zero, TryIncRef does nothing and returns false. -// -// TryIncRef does not require that a reference is held on fs. -func (fs *Filesystem) TryIncRef() bool { - for { - refs := atomic.LoadInt64(&fs.refs) - if refs <= 0 { - return false - } - if atomic.CompareAndSwapInt64(&fs.refs, refs, refs+1) { - return true - } - } -} - // DecRef decrements fs' reference count. func (fs *Filesystem) DecRef(ctx context.Context) { - if refs := atomic.AddInt64(&fs.refs, -1); refs == 0 { + fs.FilesystemRefs.DecRef(func() { fs.vfs.filesystemsMu.Lock() delete(fs.vfs.filesystems, fs) fs.vfs.filesystemsMu.Unlock() fs.impl.Release(ctx) - } else if refs < 0 { - panic("Filesystem.decRef() called without holding a reference") - } + }) } // FilesystemImpl contains implementation details for a Filesystem. diff --git a/pkg/sentry/vfs/g3doc/inotify.md b/pkg/sentry/vfs/g3doc/inotify.md index e7da49faa..833db213f 100644 --- a/pkg/sentry/vfs/g3doc/inotify.md +++ b/pkg/sentry/vfs/g3doc/inotify.md @@ -28,9 +28,9 @@ The set of all watches held on a single file (i.e., the watch target) is stored in vfs.Watches. Each watch will belong to a different inotify instance (an instance can only have one watch on any watch target). The watches are stored in a map indexed by their vfs.Inotify owner’s id. Hard links and file descriptions -to a single file will all share the same vfs.Watches. Activity on the target -causes its vfs.Watches to generate notifications on its watches’ inotify -instances. +to a single file will all share the same vfs.Watches (with the exception of the +gofer filesystem, described in a later section). Activity on the target causes +its vfs.Watches to generate notifications on its watches’ inotify instances. ### vfs.Watch @@ -103,12 +103,12 @@ inotify: unopened p9 file (and possibly an open FID), through which the Sentry interacts with the gofer. * *Solution:* Because there is no inode structure stored in the sandbox, - inotify watches must be held on the dentry. This would be an issue in - the presence of hard links, where multiple dentries would need to share - the same set of watches, but in VFS2, we do not support the internal - creation of hard links on gofer fs. As a result, we make the assumption - that every dentry corresponds to a unique inode. However, the next point - raises an issue with this assumption: + inotify watches must be held on the dentry. For the purposes of inotify, + we assume that every dentry corresponds to a unique inode, which may + cause unexpected behavior in the presence of hard links, where multiple + dentries should share the same set of watches. Indeed, it is impossible + for us to be absolutely sure whether dentries correspond to the same + file or not, due to the following point: * **The Sentry cannot always be aware of hard links on the remote filesystem.** There is no way for us to confirm whether two files on the remote filesystem are actually links to the same inode. QIDs and inodes are diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index cd5456eef..db5fb3bb1 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -128,16 +128,14 @@ func (mnt *Mount) Options() MountOptions { // // +stateify savable type MountNamespace struct { + MountNamespaceRefs + // Owner is the usernamespace that owns this mount namespace. Owner *auth.UserNamespace // root is the MountNamespace's root mount. root is immutable. root *Mount - // refs is the reference count. refs is accessed using atomic memory - // operations. - refs int64 - // mountpoints maps all Dentries which are mount points in this namespace // to the number of Mounts for which they are mount points. mountpoints is // protected by VirtualFilesystem.mountMu. @@ -168,9 +166,9 @@ func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth } mntns := &MountNamespace{ Owner: creds.UserNamespace, - refs: 1, mountpoints: make(map[*Dentry]uint32), } + mntns.EnableLeakCheck() mntns.root = newMount(vfs, fs, root, mntns, &MountOptions{}) return mntns, nil } @@ -509,17 +507,10 @@ func (mnt *Mount) DecRef(ctx context.Context) { } } -// IncRef increments mntns' reference count. -func (mntns *MountNamespace) IncRef() { - if atomic.AddInt64(&mntns.refs, 1) <= 1 { - panic("MountNamespace.IncRef() called without holding a reference") - } -} - // DecRef decrements mntns' reference count. func (mntns *MountNamespace) DecRef(ctx context.Context) { vfs := mntns.root.fs.VirtualFilesystem() - if refs := atomic.AddInt64(&mntns.refs, -1); refs == 0 { + mntns.MountNamespaceRefs.DecRef(func() { vfs.mountMu.Lock() vfs.mounts.seq.BeginWrite() vdsToDecRef, mountsToDecRef := vfs.umountRecursiveLocked(mntns.root, &umountRecursiveOptions{ @@ -533,9 +524,7 @@ func (mntns *MountNamespace) DecRef(ctx context.Context) { for _, mnt := range mountsToDecRef { mnt.DecRef(ctx) } - } else if refs < 0 { - panic("MountNamespace.DecRef() called without holding a reference") - } + }) } // getMountAt returns the last Mount in the stack mounted at (mnt, d). It takes diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go index 777d631cb..da2a2e9c4 100644 --- a/pkg/sentry/vfs/mount_unsafe.go +++ b/pkg/sentry/vfs/mount_unsafe.go @@ -13,7 +13,7 @@ // limitations under the License. // +build go1.12 -// +build !go1.16 +// +build !go1.17 // Check go:linkname function signatures when updating Go version. |