summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/vfs
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/vfs')
-rw-r--r--pkg/sentry/vfs/BUILD37
-rw-r--r--pkg/sentry/vfs/README.md9
-rw-r--r--pkg/sentry/vfs/file_description.go47
-rw-r--r--pkg/sentry/vfs/filesystem.go37
-rw-r--r--pkg/sentry/vfs/g3doc/inotify.md18
-rw-r--r--pkg/sentry/vfs/mount.go21
-rw-r--r--pkg/sentry/vfs/mount_unsafe.go2
7 files changed, 64 insertions, 107 deletions
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index 642769e7c..8093ca55c 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -27,6 +27,39 @@ go_template_instance(
},
)
+go_template_instance(
+ name = "file_description_refs",
+ out = "file_description_refs.go",
+ package = "vfs",
+ prefix = "FileDescription",
+ template = "//pkg/refs_vfs2:refs_template",
+ types = {
+ "T": "FileDescription",
+ },
+)
+
+go_template_instance(
+ name = "mount_namespace_refs",
+ out = "mount_namespace_refs.go",
+ package = "vfs",
+ prefix = "MountNamespace",
+ template = "//pkg/refs_vfs2:refs_template",
+ types = {
+ "T": "MountNamespace",
+ },
+)
+
+go_template_instance(
+ name = "filesystem_refs",
+ out = "filesystem_refs.go",
+ package = "vfs",
+ prefix = "Filesystem",
+ template = "//pkg/refs_vfs2:refs_template",
+ types = {
+ "T": "Filesystem",
+ },
+)
+
go_library(
name = "vfs",
srcs = [
@@ -40,12 +73,15 @@ go_library(
"event_list.go",
"file_description.go",
"file_description_impl_util.go",
+ "file_description_refs.go",
"filesystem.go",
"filesystem_impl_util.go",
+ "filesystem_refs.go",
"filesystem_type.go",
"inotify.go",
"lock.go",
"mount.go",
+ "mount_namespace_refs.go",
"mount_unsafe.go",
"options.go",
"pathname.go",
@@ -63,6 +99,7 @@ go_library(
"//pkg/fspath",
"//pkg/gohacks",
"//pkg/log",
+ "//pkg/refs",
"//pkg/safemem",
"//pkg/sentry/arch",
"//pkg/sentry/fs",
diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md
index 4b9faf2ea..5aad31b78 100644
--- a/pkg/sentry/vfs/README.md
+++ b/pkg/sentry/vfs/README.md
@@ -184,12 +184,3 @@ This construction, which is essentially a type-safe analogue to Linux's
- File locking
- `O_ASYNC`
-
-- Reference counts in the `vfs` package do not use the `refs` package since
- `refs.AtomicRefCount` adds 64 bytes of overhead to each 8-byte reference
- count, resulting in considerable cache bloat. 24 bytes of this overhead is
- for weak reference support, which have poor performance and will not be used
- by VFS2. The remaining 40 bytes is to store a descriptive string and stack
- trace for reference leak checking; we can support reference leak checking
- without incurring this space overhead by including the applicable
- information directly in finalizers for applicable types.
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 33910e095..22a54fa48 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -38,9 +38,7 @@ import (
//
// FileDescription is analogous to Linux's struct file.
type FileDescription struct {
- // refs is the reference count. refs is accessed using atomic memory
- // operations.
- refs int64
+ FileDescriptionRefs
// flagsMu protects statusFlags and asyncHandler below.
flagsMu sync.Mutex
@@ -131,7 +129,7 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou
}
}
- fd.refs = 1
+ fd.EnableLeakCheck()
// Remove "file creation flags" to mirror the behavior from file.f_flags in
// fs/open.c:do_dentry_open.
@@ -149,30 +147,9 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou
return nil
}
-// IncRef increments fd's reference count.
-func (fd *FileDescription) IncRef() {
- atomic.AddInt64(&fd.refs, 1)
-}
-
-// TryIncRef increments fd's reference count and returns true. If fd's
-// reference count is already zero, TryIncRef does nothing and returns false.
-//
-// TryIncRef does not require that a reference is held on fd.
-func (fd *FileDescription) TryIncRef() bool {
- for {
- refs := atomic.LoadInt64(&fd.refs)
- if refs <= 0 {
- return false
- }
- if atomic.CompareAndSwapInt64(&fd.refs, refs, refs+1) {
- return true
- }
- }
-}
-
// DecRef decrements fd's reference count.
func (fd *FileDescription) DecRef(ctx context.Context) {
- if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 {
+ fd.FileDescriptionRefs.DecRef(func() {
// Unregister fd from all epoll instances.
fd.epollMu.Lock()
epolls := fd.epolls
@@ -208,15 +185,7 @@ func (fd *FileDescription) DecRef(ctx context.Context) {
}
fd.asyncHandler = nil
fd.flagsMu.Unlock()
- } else if refs < 0 {
- panic("FileDescription.DecRef() called without holding a reference")
- }
-}
-
-// Refs returns the current number of references. The returned count
-// is inherently racy and is unsafe to use without external synchronization.
-func (fd *FileDescription) Refs() int64 {
- return atomic.LoadInt64(&fd.refs)
+ })
}
// Mount returns the mount on which fd was opened. It does not take a reference
@@ -851,7 +820,7 @@ func (fd *FileDescription) SetAsyncHandler(newHandler func() FileAsync) FileAsyn
// FileReadWriteSeeker is a helper struct to pass a FileDescription as
// io.Reader/io.Writer/io.ReadSeeker/etc.
type FileReadWriteSeeker struct {
- Fd *FileDescription
+ FD *FileDescription
Ctx context.Context
ROpts ReadOptions
WOpts WriteOptions
@@ -860,18 +829,18 @@ type FileReadWriteSeeker struct {
// Read implements io.ReadWriteSeeker.Read.
func (f *FileReadWriteSeeker) Read(p []byte) (int, error) {
dst := usermem.BytesIOSequence(p)
- ret, err := f.Fd.Read(f.Ctx, dst, f.ROpts)
+ ret, err := f.FD.Read(f.Ctx, dst, f.ROpts)
return int(ret), err
}
// Seek implements io.ReadWriteSeeker.Seek.
func (f *FileReadWriteSeeker) Seek(offset int64, whence int) (int64, error) {
- return f.Fd.Seek(f.Ctx, offset, int32(whence))
+ return f.FD.Seek(f.Ctx, offset, int32(whence))
}
// Write implements io.ReadWriteSeeker.Write.
func (f *FileReadWriteSeeker) Write(p []byte) (int, error) {
buf := usermem.BytesIOSequence(p)
- ret, err := f.Fd.Write(f.Ctx, buf, f.WOpts)
+ ret, err := f.FD.Write(f.Ctx, buf, f.WOpts)
return int(ret), err
}
diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go
index 2c60cfab2..46851f638 100644
--- a/pkg/sentry/vfs/filesystem.go
+++ b/pkg/sentry/vfs/filesystem.go
@@ -15,8 +15,6 @@
package vfs
import (
- "sync/atomic"
-
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
@@ -34,9 +32,7 @@ import (
//
// +stateify savable
type Filesystem struct {
- // refs is the reference count. refs is accessed using atomic memory
- // operations.
- refs int64
+ FilesystemRefs
// vfs is the VirtualFilesystem that uses this Filesystem. vfs is
// immutable.
@@ -52,7 +48,7 @@ type Filesystem struct {
// Init must be called before first use of fs.
func (fs *Filesystem) Init(vfsObj *VirtualFilesystem, fsType FilesystemType, impl FilesystemImpl) {
- fs.refs = 1
+ fs.EnableLeakCheck()
fs.vfs = vfsObj
fs.fsType = fsType
fs.impl = impl
@@ -76,39 +72,14 @@ func (fs *Filesystem) Impl() FilesystemImpl {
return fs.impl
}
-// IncRef increments fs' reference count.
-func (fs *Filesystem) IncRef() {
- if atomic.AddInt64(&fs.refs, 1) <= 1 {
- panic("Filesystem.IncRef() called without holding a reference")
- }
-}
-
-// TryIncRef increments fs' reference count and returns true. If fs' reference
-// count is zero, TryIncRef does nothing and returns false.
-//
-// TryIncRef does not require that a reference is held on fs.
-func (fs *Filesystem) TryIncRef() bool {
- for {
- refs := atomic.LoadInt64(&fs.refs)
- if refs <= 0 {
- return false
- }
- if atomic.CompareAndSwapInt64(&fs.refs, refs, refs+1) {
- return true
- }
- }
-}
-
// DecRef decrements fs' reference count.
func (fs *Filesystem) DecRef(ctx context.Context) {
- if refs := atomic.AddInt64(&fs.refs, -1); refs == 0 {
+ fs.FilesystemRefs.DecRef(func() {
fs.vfs.filesystemsMu.Lock()
delete(fs.vfs.filesystems, fs)
fs.vfs.filesystemsMu.Unlock()
fs.impl.Release(ctx)
- } else if refs < 0 {
- panic("Filesystem.decRef() called without holding a reference")
- }
+ })
}
// FilesystemImpl contains implementation details for a Filesystem.
diff --git a/pkg/sentry/vfs/g3doc/inotify.md b/pkg/sentry/vfs/g3doc/inotify.md
index e7da49faa..833db213f 100644
--- a/pkg/sentry/vfs/g3doc/inotify.md
+++ b/pkg/sentry/vfs/g3doc/inotify.md
@@ -28,9 +28,9 @@ The set of all watches held on a single file (i.e., the watch target) is stored
in vfs.Watches. Each watch will belong to a different inotify instance (an
instance can only have one watch on any watch target). The watches are stored in
a map indexed by their vfs.Inotify owner’s id. Hard links and file descriptions
-to a single file will all share the same vfs.Watches. Activity on the target
-causes its vfs.Watches to generate notifications on its watches’ inotify
-instances.
+to a single file will all share the same vfs.Watches (with the exception of the
+gofer filesystem, described in a later section). Activity on the target causes
+its vfs.Watches to generate notifications on its watches’ inotify instances.
### vfs.Watch
@@ -103,12 +103,12 @@ inotify:
unopened p9 file (and possibly an open FID), through which the Sentry
interacts with the gofer.
* *Solution:* Because there is no inode structure stored in the sandbox,
- inotify watches must be held on the dentry. This would be an issue in
- the presence of hard links, where multiple dentries would need to share
- the same set of watches, but in VFS2, we do not support the internal
- creation of hard links on gofer fs. As a result, we make the assumption
- that every dentry corresponds to a unique inode. However, the next point
- raises an issue with this assumption:
+ inotify watches must be held on the dentry. For the purposes of inotify,
+ we assume that every dentry corresponds to a unique inode, which may
+ cause unexpected behavior in the presence of hard links, where multiple
+ dentries should share the same set of watches. Indeed, it is impossible
+ for us to be absolutely sure whether dentries correspond to the same
+ file or not, due to the following point:
* **The Sentry cannot always be aware of hard links on the remote
filesystem.** There is no way for us to confirm whether two files on the
remote filesystem are actually links to the same inode. QIDs and inodes are
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index cd5456eef..db5fb3bb1 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -128,16 +128,14 @@ func (mnt *Mount) Options() MountOptions {
//
// +stateify savable
type MountNamespace struct {
+ MountNamespaceRefs
+
// Owner is the usernamespace that owns this mount namespace.
Owner *auth.UserNamespace
// root is the MountNamespace's root mount. root is immutable.
root *Mount
- // refs is the reference count. refs is accessed using atomic memory
- // operations.
- refs int64
-
// mountpoints maps all Dentries which are mount points in this namespace
// to the number of Mounts for which they are mount points. mountpoints is
// protected by VirtualFilesystem.mountMu.
@@ -168,9 +166,9 @@ func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth
}
mntns := &MountNamespace{
Owner: creds.UserNamespace,
- refs: 1,
mountpoints: make(map[*Dentry]uint32),
}
+ mntns.EnableLeakCheck()
mntns.root = newMount(vfs, fs, root, mntns, &MountOptions{})
return mntns, nil
}
@@ -509,17 +507,10 @@ func (mnt *Mount) DecRef(ctx context.Context) {
}
}
-// IncRef increments mntns' reference count.
-func (mntns *MountNamespace) IncRef() {
- if atomic.AddInt64(&mntns.refs, 1) <= 1 {
- panic("MountNamespace.IncRef() called without holding a reference")
- }
-}
-
// DecRef decrements mntns' reference count.
func (mntns *MountNamespace) DecRef(ctx context.Context) {
vfs := mntns.root.fs.VirtualFilesystem()
- if refs := atomic.AddInt64(&mntns.refs, -1); refs == 0 {
+ mntns.MountNamespaceRefs.DecRef(func() {
vfs.mountMu.Lock()
vfs.mounts.seq.BeginWrite()
vdsToDecRef, mountsToDecRef := vfs.umountRecursiveLocked(mntns.root, &umountRecursiveOptions{
@@ -533,9 +524,7 @@ func (mntns *MountNamespace) DecRef(ctx context.Context) {
for _, mnt := range mountsToDecRef {
mnt.DecRef(ctx)
}
- } else if refs < 0 {
- panic("MountNamespace.DecRef() called without holding a reference")
- }
+ })
}
// getMountAt returns the last Mount in the stack mounted at (mnt, d). It takes
diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go
index 777d631cb..da2a2e9c4 100644
--- a/pkg/sentry/vfs/mount_unsafe.go
+++ b/pkg/sentry/vfs/mount_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.12
-// +build !go1.16
+// +build !go1.17
// Check go:linkname function signatures when updating Go version.