summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fs')
-rw-r--r--pkg/sentry/fs/copy_up.go12
-rw-r--r--pkg/sentry/fs/copy_up_test.go4
-rw-r--r--pkg/sentry/fs/dev/net_tun.go4
-rw-r--r--pkg/sentry/fs/dirent.go110
-rw-r--r--pkg/sentry/fs/dirent_cache.go3
-rw-r--r--pkg/sentry/fs/dirent_refs_test.go16
-rw-r--r--pkg/sentry/fs/dirent_state.go3
-rw-r--r--pkg/sentry/fs/fdpipe/pipe.go2
-rw-r--r--pkg/sentry/fs/fdpipe/pipe_opener_test.go16
-rw-r--r--pkg/sentry/fs/fdpipe/pipe_test.go18
-rw-r--r--pkg/sentry/fs/file.go21
-rw-r--r--pkg/sentry/fs/file_operations.go3
-rw-r--r--pkg/sentry/fs/file_overlay.go22
-rw-r--r--pkg/sentry/fs/filesystems.go14
-rw-r--r--pkg/sentry/fs/fs.go3
-rw-r--r--pkg/sentry/fs/fsutil/BUILD7
-rw-r--r--pkg/sentry/fs/fsutil/dirty_set.go7
-rw-r--r--pkg/sentry/fs/fsutil/file.go4
-rw-r--r--pkg/sentry/fs/fsutil/file_range_set.go15
-rw-r--r--pkg/sentry/fs/fsutil/frame_ref_set.go10
-rw-r--r--pkg/sentry/fs/fsutil/host_file_mapper.go5
-rw-r--r--pkg/sentry/fs/fsutil/host_mappable.go19
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go25
-rw-r--r--pkg/sentry/fs/g3doc/.gitignore1
-rw-r--r--pkg/sentry/fs/g3doc/fuse.md263
-rw-r--r--pkg/sentry/fs/gofer/file.go4
-rw-r--r--pkg/sentry/fs/gofer/gofer_test.go8
-rw-r--r--pkg/sentry/fs/gofer/handles.go5
-rw-r--r--pkg/sentry/fs/gofer/inode.go7
-rw-r--r--pkg/sentry/fs/gofer/path.go6
-rw-r--r--pkg/sentry/fs/gofer/session.go16
-rw-r--r--pkg/sentry/fs/gofer/session_state.go3
-rw-r--r--pkg/sentry/fs/gofer/socket.go6
-rw-r--r--pkg/sentry/fs/host/BUILD1
-rw-r--r--pkg/sentry/fs/host/control.go2
-rw-r--r--pkg/sentry/fs/host/file.go4
-rw-r--r--pkg/sentry/fs/host/inode.go3
-rw-r--r--pkg/sentry/fs/host/inode_test.go2
-rw-r--r--pkg/sentry/fs/host/socket.go20
-rw-r--r--pkg/sentry/fs/host/socket_iovec.go7
-rw-r--r--pkg/sentry/fs/host/socket_test.go38
-rw-r--r--pkg/sentry/fs/host/tty.go10
-rw-r--r--pkg/sentry/fs/host/wait_test.go2
-rw-r--r--pkg/sentry/fs/inode.go11
-rw-r--r--pkg/sentry/fs/inode_inotify.go5
-rw-r--r--pkg/sentry/fs/inode_overlay.go30
-rw-r--r--pkg/sentry/fs/inode_overlay_test.go8
-rw-r--r--pkg/sentry/fs/inotify.go8
-rw-r--r--pkg/sentry/fs/inotify_watch.go9
-rw-r--r--pkg/sentry/fs/lock/lock.go41
-rw-r--r--pkg/sentry/fs/lock/lock_set_functions.go8
-rw-r--r--pkg/sentry/fs/lock/lock_test.go111
-rw-r--r--pkg/sentry/fs/mount.go12
-rw-r--r--pkg/sentry/fs/mount_overlay.go6
-rw-r--r--pkg/sentry/fs/mount_test.go29
-rw-r--r--pkg/sentry/fs/mounts.go102
-rw-r--r--pkg/sentry/fs/mounts_test.go2
-rw-r--r--pkg/sentry/fs/overlay.go10
-rw-r--r--pkg/sentry/fs/proc/fds.go18
-rw-r--r--pkg/sentry/fs/proc/mounts.go8
-rw-r--r--pkg/sentry/fs/proc/net.go12
-rw-r--r--pkg/sentry/fs/proc/proc.go2
-rw-r--r--pkg/sentry/fs/proc/sys_net.go95
-rw-r--r--pkg/sentry/fs/proc/task.go4
-rw-r--r--pkg/sentry/fs/ramfs/dir.go18
-rw-r--r--pkg/sentry/fs/ramfs/tree_test.go2
-rw-r--r--pkg/sentry/fs/timerfd/timerfd.go4
-rw-r--r--pkg/sentry/fs/tmpfs/file_test.go2
-rw-r--r--pkg/sentry/fs/tty/dir.go8
-rw-r--r--pkg/sentry/fs/tty/fs.go2
-rw-r--r--pkg/sentry/fs/tty/master.go8
-rw-r--r--pkg/sentry/fs/tty/slave.go4
-rw-r--r--pkg/sentry/fs/user/BUILD8
-rw-r--r--pkg/sentry/fs/user/path.go170
-rw-r--r--pkg/sentry/fs/user/user.go10
-rw-r--r--pkg/sentry/fs/user/user_test.go8
76 files changed, 977 insertions, 549 deletions
diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go
index ab1424c95..735452b07 100644
--- a/pkg/sentry/fs/copy_up.go
+++ b/pkg/sentry/fs/copy_up.go
@@ -201,7 +201,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error {
parentUpper := parent.Inode.overlay.upper
root := RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
// Create the file in the upper filesystem and get an Inode for it.
@@ -212,7 +212,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error {
log.Warningf("copy up failed to create file: %v", err)
return syserror.EIO
}
- defer childFile.DecRef()
+ defer childFile.DecRef(ctx)
childUpperInode = childFile.Dirent.Inode
case Directory:
@@ -226,7 +226,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error {
cleanupUpper(ctx, parentUpper, next.name, werr)
return syserror.EIO
}
- defer childUpper.DecRef()
+ defer childUpper.DecRef(ctx)
childUpperInode = childUpper.Inode
case Symlink:
@@ -246,7 +246,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error {
cleanupUpper(ctx, parentUpper, next.name, werr)
return syserror.EIO
}
- defer childUpper.DecRef()
+ defer childUpper.DecRef(ctx)
childUpperInode = childUpper.Inode
default:
@@ -352,14 +352,14 @@ func copyContentsLocked(ctx context.Context, upper *Inode, lower *Inode, size in
if err != nil {
return err
}
- defer upperFile.DecRef()
+ defer upperFile.DecRef(ctx)
// Get a handle to the lower filesystem, which we will read from.
lowerFile, err := overlayFile(ctx, lower, FileFlags{Read: true})
if err != nil {
return err
}
- defer lowerFile.DecRef()
+ defer lowerFile.DecRef(ctx)
// Use a buffer pool to minimize allocations.
buf := copyUpBuffers.Get().([]byte)
diff --git a/pkg/sentry/fs/copy_up_test.go b/pkg/sentry/fs/copy_up_test.go
index 91792d9fe..c7a11eec1 100644
--- a/pkg/sentry/fs/copy_up_test.go
+++ b/pkg/sentry/fs/copy_up_test.go
@@ -126,7 +126,7 @@ func makeOverlayTestFiles(t *testing.T) []*overlayTestFile {
if err != nil {
t.Fatalf("failed to create file %q: %v", name, err)
}
- defer f.DecRef()
+ defer f.DecRef(ctx)
relname, _ := f.Dirent.FullName(lowerRoot)
@@ -171,7 +171,7 @@ func makeOverlayTestFiles(t *testing.T) []*overlayTestFile {
if err != nil {
t.Fatalf("failed to find %q: %v", f.name, err)
}
- defer d.DecRef()
+ defer d.DecRef(ctx)
f.File, err = d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true})
if err != nil {
diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go
index dc7ad075a..ec474e554 100644
--- a/pkg/sentry/fs/dev/net_tun.go
+++ b/pkg/sentry/fs/dev/net_tun.go
@@ -80,8 +80,8 @@ type netTunFileOperations struct {
var _ fs.FileOperations = (*netTunFileOperations)(nil)
// Release implements fs.FileOperations.Release.
-func (fops *netTunFileOperations) Release() {
- fops.device.Release()
+func (fops *netTunFileOperations) Release(ctx context.Context) {
+ fops.device.Release(ctx)
}
// Ioctl implements fs.FileOperations.Ioctl.
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go
index 65be12175..a2f751068 100644
--- a/pkg/sentry/fs/dirent.go
+++ b/pkg/sentry/fs/dirent.go
@@ -325,7 +325,7 @@ func (d *Dirent) SyncAll(ctx context.Context) {
for _, w := range d.children {
if child := w.Get(); child != nil {
child.(*Dirent).SyncAll(ctx)
- child.DecRef()
+ child.DecRef(ctx)
}
}
}
@@ -451,7 +451,7 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl
// which don't hold a hard reference on their parent (their parent holds a
// hard reference on them, and they contain virtually no state). But this is
// good house-keeping.
- child.DecRef()
+ child.DecRef(ctx)
return nil, syscall.ENOENT
}
@@ -468,20 +468,20 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl
// their pins on the child. Inotify doesn't properly support filesystems that
// revalidate dirents (since watches are lost on revalidation), but if we fail
// to unpin the watches child will never be GCed.
- cd.Inode.Watches.Unpin(cd)
+ cd.Inode.Watches.Unpin(ctx, cd)
// This child needs to be revalidated, fallthrough to unhash it. Make sure
// to not leak a reference from Get().
//
// Note that previous lookups may still have a reference to this stale child;
// this can't be helped, but we can ensure that *new* lookups are up-to-date.
- child.DecRef()
+ child.DecRef(ctx)
}
// Either our weak reference expired or we need to revalidate it. Unhash child first, we're
// about to replace it.
delete(d.children, name)
- w.Drop()
+ w.Drop(ctx)
}
// Slow path: load the InodeOperations into memory. Since this is a hot path and the lookup may be
@@ -512,12 +512,12 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl
// There are active references to the existing child, prefer it to the one we
// retrieved from Lookup. Likely the Lookup happened very close to the insertion
// of child, so considering one stale over the other is fairly arbitrary.
- c.DecRef()
+ c.DecRef(ctx)
// The child that was installed could be negative.
if cd.IsNegative() {
// If so, don't leak a reference and short circuit.
- child.DecRef()
+ child.DecRef(ctx)
return nil, syscall.ENOENT
}
@@ -531,7 +531,7 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl
// we did the Inode.Lookup. Fully drop the weak reference and fallback to using the child
// we looked up.
delete(d.children, name)
- w.Drop()
+ w.Drop(ctx)
}
// Give the looked up child a parent. We cannot kick out entries, since we just checked above
@@ -587,7 +587,7 @@ func (d *Dirent) exists(ctx context.Context, root *Dirent, name string) bool {
return false
}
// Child exists.
- child.DecRef()
+ child.DecRef(ctx)
return true
}
@@ -622,7 +622,7 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi
}
child := file.Dirent
- d.finishCreate(child, name)
+ d.finishCreate(ctx, child, name)
// Return the reference and the new file. When the last reference to
// the file is dropped, file.Dirent may no longer be cached.
@@ -631,7 +631,7 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi
// finishCreate validates the created file, adds it as a child of this dirent,
// and notifies any watchers.
-func (d *Dirent) finishCreate(child *Dirent, name string) {
+func (d *Dirent) finishCreate(ctx context.Context, child *Dirent, name string) {
// Sanity check c, its name must be consistent.
if child.name != name {
panic(fmt.Sprintf("create from %q to %q returned unexpected name %q", d.name, name, child.name))
@@ -650,14 +650,14 @@ func (d *Dirent) finishCreate(child *Dirent, name string) {
panic(fmt.Sprintf("hashed child %q over a positive child", child.name))
}
// Don't leak a reference.
- old.DecRef()
+ old.DecRef(ctx)
// Drop d's reference.
- old.DecRef()
+ old.DecRef(ctx)
}
// Finally drop the useless weak reference on the floor.
- w.Drop()
+ w.Drop(ctx)
}
d.Inode.Watches.Notify(name, linux.IN_CREATE, 0)
@@ -686,17 +686,17 @@ func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, c
panic(fmt.Sprintf("hashed over a positive child %q", old.(*Dirent).name))
}
// Don't leak a reference.
- old.DecRef()
+ old.DecRef(ctx)
// Drop d's reference.
- old.DecRef()
+ old.DecRef(ctx)
}
// Unhash the negative Dirent, name needs to exist now.
delete(d.children, name)
// Finally drop the useless weak reference on the floor.
- w.Drop()
+ w.Drop(ctx)
}
// Execute the create operation.
@@ -756,7 +756,7 @@ func (d *Dirent) Bind(ctx context.Context, root *Dirent, name string, data trans
if e != nil {
return e
}
- d.finishCreate(childDir, name)
+ d.finishCreate(ctx, childDir, name)
return nil
})
if err == syscall.EEXIST {
@@ -901,7 +901,7 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent,
// references to children.
//
// Preconditions: d.mu must be held.
-func (d *Dirent) flush() {
+func (d *Dirent) flush(ctx context.Context) {
expired := make(map[string]*refs.WeakRef)
for n, w := range d.children {
// Call flush recursively on each child before removing our
@@ -912,7 +912,7 @@ func (d *Dirent) flush() {
if !cd.IsNegative() {
// Flush the child.
cd.mu.Lock()
- cd.flush()
+ cd.flush(ctx)
cd.mu.Unlock()
// Allow the file system to drop extra references on child.
@@ -920,13 +920,13 @@ func (d *Dirent) flush() {
}
// Don't leak a reference.
- child.DecRef()
+ child.DecRef(ctx)
}
// Check if the child dirent is closed, and mark it as expired if it is.
// We must call w.Get() again here, since the child could have been closed
// by the calls to flush() and cache.Remove() in the above if-block.
if child := w.Get(); child != nil {
- child.DecRef()
+ child.DecRef(ctx)
} else {
expired[n] = w
}
@@ -935,7 +935,7 @@ func (d *Dirent) flush() {
// Remove expired entries.
for n, w := range expired {
delete(d.children, n)
- w.Drop()
+ w.Drop(ctx)
}
}
@@ -977,7 +977,7 @@ func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err
if !ok {
panic("mount must mount over an existing dirent")
}
- weakRef.Drop()
+ weakRef.Drop(ctx)
// Note that even though `d` is now hidden, it still holds a reference
// to its parent.
@@ -1002,13 +1002,13 @@ func (d *Dirent) unmount(ctx context.Context, replacement *Dirent) error {
if !ok {
panic("mount must mount over an existing dirent")
}
- weakRef.Drop()
+ weakRef.Drop(ctx)
// d is not reachable anymore, and hence not mounted anymore.
d.mounted = false
// Drop mount reference.
- d.DecRef()
+ d.DecRef(ctx)
return nil
}
@@ -1029,7 +1029,7 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath
// Child does not exist.
return err
}
- defer child.DecRef()
+ defer child.DecRef(ctx)
// Remove cannot remove directories.
if IsDir(child.Inode.StableAttr) {
@@ -1055,7 +1055,7 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath
atomic.StoreInt32(&child.deleted, 1)
if w, ok := d.children[name]; ok {
delete(d.children, name)
- w.Drop()
+ w.Drop(ctx)
}
// Allow the file system to drop extra references on child.
@@ -1067,7 +1067,7 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath
// inode may have other links. If this was the last link, the events for the
// watch removal will be queued by the inode destructor.
child.Inode.Watches.MarkUnlinked()
- child.Inode.Watches.Unpin(child)
+ child.Inode.Watches.Unpin(ctx, child)
d.Inode.Watches.Notify(name, linux.IN_DELETE, 0)
return nil
@@ -1100,7 +1100,7 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string)
// Child does not exist.
return err
}
- defer child.DecRef()
+ defer child.DecRef(ctx)
// RemoveDirectory can only remove directories.
if !IsDir(child.Inode.StableAttr) {
@@ -1121,7 +1121,7 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string)
atomic.StoreInt32(&child.deleted, 1)
if w, ok := d.children[name]; ok {
delete(d.children, name)
- w.Drop()
+ w.Drop(ctx)
}
// Allow the file system to drop extra references on child.
@@ -1130,14 +1130,14 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string)
// Finally, let inotify know the child is being unlinked. Drop any extra
// refs from inotify to this child dirent.
child.Inode.Watches.MarkUnlinked()
- child.Inode.Watches.Unpin(child)
+ child.Inode.Watches.Unpin(ctx, child)
d.Inode.Watches.Notify(name, linux.IN_ISDIR|linux.IN_DELETE, 0)
return nil
}
// destroy closes this node and all children.
-func (d *Dirent) destroy() {
+func (d *Dirent) destroy(ctx context.Context) {
if d.IsNegative() {
// Nothing to tear-down and no parent references to drop, since a negative
// Dirent does not take a references on its parent, has no Inode and no children.
@@ -1153,19 +1153,19 @@ func (d *Dirent) destroy() {
if c.(*Dirent).IsNegative() {
// The parent holds both weak and strong refs in the case of
// negative dirents.
- c.DecRef()
+ c.DecRef(ctx)
}
// Drop the reference we just acquired in WeakRef.Get.
- c.DecRef()
+ c.DecRef(ctx)
}
- w.Drop()
+ w.Drop(ctx)
}
d.children = nil
allDirents.remove(d)
// Drop our reference to the Inode.
- d.Inode.DecRef()
+ d.Inode.DecRef(ctx)
// Allow the Dirent to be GC'ed after this point, since the Inode may still
// be referenced after the Dirent is destroyed (for instance by filesystem
@@ -1175,7 +1175,7 @@ func (d *Dirent) destroy() {
// Drop the reference we have on our parent if we took one. renameMu doesn't need to be
// held because d can't be reparented without any references to it left.
if d.parent != nil {
- d.parent.DecRef()
+ d.parent.DecRef(ctx)
}
}
@@ -1201,14 +1201,14 @@ func (d *Dirent) TryIncRef() bool {
// DecRef decreases the Dirent's refcount and drops its reference on its mount.
//
// DecRef implements RefCounter.DecRef with destructor d.destroy.
-func (d *Dirent) DecRef() {
+func (d *Dirent) DecRef(ctx context.Context) {
if d.Inode != nil {
// Keep mount around, since DecRef may destroy d.Inode.
msrc := d.Inode.MountSource
- d.DecRefWithDestructor(d.destroy)
+ d.DecRefWithDestructor(ctx, d.destroy)
msrc.DecDirentRefs()
} else {
- d.DecRefWithDestructor(d.destroy)
+ d.DecRefWithDestructor(ctx, d.destroy)
}
}
@@ -1359,7 +1359,7 @@ func (d *Dirent) MayDelete(ctx context.Context, root *Dirent, name string) error
if err != nil {
return err
}
- defer victim.DecRef()
+ defer victim.DecRef(ctx)
return d.mayDelete(ctx, victim)
}
@@ -1411,7 +1411,7 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string
if err != nil {
return err
}
- defer renamed.DecRef()
+ defer renamed.DecRef(ctx)
// Check that the renamed dirent is deletable.
if err := oldParent.mayDelete(ctx, renamed); err != nil {
@@ -1453,13 +1453,13 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string
// Check that we can delete replaced.
if err := newParent.mayDelete(ctx, replaced); err != nil {
- replaced.DecRef()
+ replaced.DecRef(ctx)
return err
}
// Target should not be an ancestor of source.
if oldParent.descendantOf(replaced) {
- replaced.DecRef()
+ replaced.DecRef(ctx)
// Note that Linux returns EINVAL if the source is an
// ancestor of target, but ENOTEMPTY if the target is
@@ -1470,7 +1470,7 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string
// Check that replaced is not a mount point.
if replaced.isMountPointLocked() {
- replaced.DecRef()
+ replaced.DecRef(ctx)
return syscall.EBUSY
}
@@ -1478,11 +1478,11 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string
oldIsDir := IsDir(renamed.Inode.StableAttr)
newIsDir := IsDir(replaced.Inode.StableAttr)
if !newIsDir && oldIsDir {
- replaced.DecRef()
+ replaced.DecRef(ctx)
return syscall.ENOTDIR
}
if !oldIsDir && newIsDir {
- replaced.DecRef()
+ replaced.DecRef(ctx)
return syscall.EISDIR
}
@@ -1493,13 +1493,13 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string
// open across renames is currently broken for multiple
// reasons, so we flush all references on the replaced node and
// its children.
- replaced.Inode.Watches.Unpin(replaced)
+ replaced.Inode.Watches.Unpin(ctx, replaced)
replaced.mu.Lock()
- replaced.flush()
+ replaced.flush(ctx)
replaced.mu.Unlock()
// Done with replaced.
- replaced.DecRef()
+ replaced.DecRef(ctx)
}
if err := renamed.Inode.Rename(ctx, oldParent, renamed, newParent, newName, replaced != nil); err != nil {
@@ -1513,14 +1513,14 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string
// can't destroy oldParent (and try to retake its lock) because
// Rename's caller must be holding a reference.
newParent.IncRef()
- oldParent.DecRef()
+ oldParent.DecRef(ctx)
}
if w, ok := newParent.children[newName]; ok {
- w.Drop()
+ w.Drop(ctx)
delete(newParent.children, newName)
}
if w, ok := oldParent.children[oldName]; ok {
- w.Drop()
+ w.Drop(ctx)
delete(oldParent.children, oldName)
}
@@ -1551,7 +1551,7 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string
// Same as replaced.flush above.
renamed.mu.Lock()
- renamed.flush()
+ renamed.flush(ctx)
renamed.mu.Unlock()
return nil
diff --git a/pkg/sentry/fs/dirent_cache.go b/pkg/sentry/fs/dirent_cache.go
index 33de32c69..7d9dd717e 100644
--- a/pkg/sentry/fs/dirent_cache.go
+++ b/pkg/sentry/fs/dirent_cache.go
@@ -17,6 +17,7 @@ package fs
import (
"fmt"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sync"
)
@@ -101,7 +102,7 @@ func (c *DirentCache) remove(d *Dirent) {
panic(fmt.Sprintf("trying to remove %v, which is not in the dirent cache", d))
}
c.list.Remove(d)
- d.DecRef()
+ d.DecRef(context.Background())
c.currentSize--
if c.limit != nil {
c.limit.dec()
diff --git a/pkg/sentry/fs/dirent_refs_test.go b/pkg/sentry/fs/dirent_refs_test.go
index 98d69c6f2..176b894ba 100644
--- a/pkg/sentry/fs/dirent_refs_test.go
+++ b/pkg/sentry/fs/dirent_refs_test.go
@@ -51,7 +51,7 @@ func TestWalkPositive(t *testing.T) {
t.Fatalf("child name = %q has a ref count of %d, want %d", d.name, got, 1)
}
- d.DecRef()
+ d.DecRef(ctx)
if got := root.ReadRefs(); got != 1 {
t.Fatalf("root has a ref count of %d, want %d", got, 1)
@@ -61,7 +61,7 @@ func TestWalkPositive(t *testing.T) {
t.Fatalf("child name = %q has a ref count of %d, want %d", d.name, got, 0)
}
- root.flush()
+ root.flush(ctx)
if got := len(root.children); got != 0 {
t.Fatalf("root has %d children, want %d", got, 0)
@@ -114,7 +114,7 @@ func TestWalkNegative(t *testing.T) {
t.Fatalf("child has a ref count of %d, want %d", got, 2)
}
- child.DecRef()
+ child.DecRef(ctx)
if got := child.(*Dirent).ReadRefs(); got != 1 {
t.Fatalf("child has a ref count of %d, want %d", got, 1)
@@ -124,7 +124,7 @@ func TestWalkNegative(t *testing.T) {
t.Fatalf("root has %d children, want %d", got, 1)
}
- root.DecRef()
+ root.DecRef(ctx)
if got := root.ReadRefs(); got != 0 {
t.Fatalf("root has a ref count of %d, want %d", got, 0)
@@ -351,9 +351,9 @@ func TestRemoveExtraRefs(t *testing.T) {
t.Fatalf("dirent has a ref count of %d, want %d", got, 1)
}
- d.DecRef()
+ d.DecRef(ctx)
- test.root.flush()
+ test.root.flush(ctx)
if got := len(test.root.children); got != 0 {
t.Errorf("root has %d children, want %d", got, 0)
@@ -403,8 +403,8 @@ func TestRenameExtraRefs(t *testing.T) {
t.Fatalf("Rename got error %v, want nil", err)
}
- oldParent.flush()
- newParent.flush()
+ oldParent.flush(ctx)
+ newParent.flush(ctx)
// Expect to have only active references.
if got := renamed.ReadRefs(); got != 1 {
diff --git a/pkg/sentry/fs/dirent_state.go b/pkg/sentry/fs/dirent_state.go
index f623d6c0e..67a35f0b2 100644
--- a/pkg/sentry/fs/dirent_state.go
+++ b/pkg/sentry/fs/dirent_state.go
@@ -18,6 +18,7 @@ import (
"fmt"
"sync/atomic"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/refs"
)
@@ -48,7 +49,7 @@ func (d *Dirent) saveChildren() map[string]*Dirent {
for name, w := range d.children {
if rc := w.Get(); rc != nil {
// Drop the reference count obtain in w.Get()
- rc.DecRef()
+ rc.DecRef(context.Background())
cd := rc.(*Dirent)
if cd.IsNegative() {
diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go
index 9fce177ad..b99199798 100644
--- a/pkg/sentry/fs/fdpipe/pipe.go
+++ b/pkg/sentry/fs/fdpipe/pipe.go
@@ -115,7 +115,7 @@ func (p *pipeOperations) Readiness(mask waiter.EventMask) (eventMask waiter.Even
}
// Release implements fs.FileOperations.Release.
-func (p *pipeOperations) Release() {
+func (p *pipeOperations) Release(context.Context) {
fdnotifier.RemoveFD(int32(p.file.FD()))
p.file.Close()
p.file = nil
diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go
index e556da48a..b9cec4b13 100644
--- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go
+++ b/pkg/sentry/fs/fdpipe/pipe_opener_test.go
@@ -182,7 +182,7 @@ func TestTryOpen(t *testing.T) {
// Cleanup the state of the pipe, and remove the fd from the
// fdnotifier. Sadly this needed to maintain the correctness
// of other tests because the fdnotifier is global.
- pipeOps.Release()
+ pipeOps.Release(ctx)
}
continue
}
@@ -191,7 +191,7 @@ func TestTryOpen(t *testing.T) {
}
if pipeOps != nil {
// Same as above.
- pipeOps.Release()
+ pipeOps.Release(ctx)
}
}
}
@@ -279,7 +279,7 @@ func TestPipeOpenUnblocksEventually(t *testing.T) {
pipeOps, err := Open(ctx, opener, flags)
if pipeOps != nil {
// Same as TestTryOpen.
- pipeOps.Release()
+ pipeOps.Release(ctx)
}
// Check that the partner opened the file successfully.
@@ -325,7 +325,7 @@ func TestCopiedReadAheadBuffer(t *testing.T) {
ctx := contexttest.Context(t)
pipeOps, err := pipeOpenState.TryOpen(ctx, opener, fs.FileFlags{Read: true})
if pipeOps != nil {
- pipeOps.Release()
+ pipeOps.Release(ctx)
t.Fatalf("open(%s, %o) got file, want nil", name, syscall.O_RDONLY)
}
if err != syserror.ErrWouldBlock {
@@ -351,7 +351,7 @@ func TestCopiedReadAheadBuffer(t *testing.T) {
if pipeOps == nil {
t.Fatalf("open(%s, %o) got nil file, want not nil", name, syscall.O_RDONLY)
}
- defer pipeOps.Release()
+ defer pipeOps.Release(ctx)
if err != nil {
t.Fatalf("open(%s, %o) got error %v, want nil", name, syscall.O_RDONLY, err)
@@ -471,14 +471,14 @@ func TestPipeHangup(t *testing.T) {
f := <-fdchan
if f < 0 {
t.Errorf("%s: partner routine got fd %d, want > 0", test.desc, f)
- pipeOps.Release()
+ pipeOps.Release(ctx)
continue
}
if test.hangupSelf {
// Hangup self and assert that our partner got the expected hangup
// error.
- pipeOps.Release()
+ pipeOps.Release(ctx)
if test.flags.Read {
// Partner is writer.
@@ -490,7 +490,7 @@ func TestPipeHangup(t *testing.T) {
} else {
// Hangup our partner and expect us to get the hangup error.
syscall.Close(f)
- defer pipeOps.Release()
+ defer pipeOps.Release(ctx)
if test.flags.Read {
assertReaderHungup(t, test.desc, pipeOps.(*pipeOperations).file)
diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go
index a0082ecca..1c9e82562 100644
--- a/pkg/sentry/fs/fdpipe/pipe_test.go
+++ b/pkg/sentry/fs/fdpipe/pipe_test.go
@@ -98,10 +98,11 @@ func TestNewPipe(t *testing.T) {
}
f := fd.New(gfd)
- p, err := newPipeOperations(contexttest.Context(t), nil, test.flags, f, test.readAheadBuffer)
+ ctx := contexttest.Context(t)
+ p, err := newPipeOperations(ctx, nil, test.flags, f, test.readAheadBuffer)
if p != nil {
// This is necessary to remove the fd from the global fd notifier.
- defer p.Release()
+ defer p.Release(ctx)
} else {
// If there is no p to DecRef on, because newPipeOperations failed, then the
// file still needs to be closed.
@@ -153,13 +154,14 @@ func TestPipeDestruction(t *testing.T) {
syscall.Close(fds[1])
// Test the read end, but it doesn't really matter which.
- p, err := newPipeOperations(contexttest.Context(t), nil, fs.FileFlags{Read: true}, f, nil)
+ ctx := contexttest.Context(t)
+ p, err := newPipeOperations(ctx, nil, fs.FileFlags{Read: true}, f, nil)
if err != nil {
f.Close()
t.Fatalf("newPipeOperations got error %v, want nil", err)
}
// Drop our only reference, which should trigger the destructor.
- p.Release()
+ p.Release(ctx)
if fdnotifier.HasFD(int32(fds[0])) {
t.Fatalf("after DecRef fdnotifier has fd %d, want no longer registered", fds[0])
@@ -282,7 +284,7 @@ func TestPipeRequest(t *testing.T) {
if err != nil {
t.Fatalf("%s: newPipeOperations got error %v, want nil", test.desc, err)
}
- defer p.Release()
+ defer p.Release(ctx)
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe})
file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p)
@@ -334,7 +336,7 @@ func TestPipeReadAheadBuffer(t *testing.T) {
rfile.Close()
t.Fatalf("newPipeOperations got error %v, want nil", err)
}
- defer p.Release()
+ defer p.Release(ctx)
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{
Type: fs.Pipe,
@@ -380,7 +382,7 @@ func TestPipeReadsAccumulate(t *testing.T) {
}
// Don't forget to remove the fd from the fd notifier. Otherwise other tests will
// likely be borked, because it's global :(
- defer p.Release()
+ defer p.Release(ctx)
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{
Type: fs.Pipe,
@@ -448,7 +450,7 @@ func TestPipeWritesAccumulate(t *testing.T) {
}
// Don't forget to remove the fd from the fd notifier. Otherwise other tests
// will likely be borked, because it's global :(
- defer p.Release()
+ defer p.Release(ctx)
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{
Type: fs.Pipe,
diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go
index 846252c89..72ea70fcf 100644
--- a/pkg/sentry/fs/file.go
+++ b/pkg/sentry/fs/file.go
@@ -142,17 +142,17 @@ func NewFile(ctx context.Context, dirent *Dirent, flags FileFlags, fops FileOper
}
// DecRef destroys the File when it is no longer referenced.
-func (f *File) DecRef() {
- f.DecRefWithDestructor(func() {
+func (f *File) DecRef(ctx context.Context) {
+ f.DecRefWithDestructor(ctx, func(context.Context) {
// Drop BSD style locks.
lockRng := lock.LockRange{Start: 0, End: lock.LockEOF}
- f.Dirent.Inode.LockCtx.BSD.UnlockRegion(lock.UniqueID(f.UniqueID), lockRng)
+ f.Dirent.Inode.LockCtx.BSD.UnlockRegion(f, lockRng)
// Release resources held by the FileOperations.
- f.FileOperations.Release()
+ f.FileOperations.Release(ctx)
// Release a reference on the Dirent.
- f.Dirent.DecRef()
+ f.Dirent.DecRef(ctx)
// Only unregister if we are currently registered. There is nothing
// to register if f.async is nil (this happens when async mode is
@@ -310,7 +310,6 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
if !f.mu.Lock(ctx) {
return 0, syserror.ErrInterrupted
}
-
unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
// Handle append mode.
if f.Flags().Append {
@@ -355,7 +354,6 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
// offset."
unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
defer unlockAppendMu()
-
if f.Flags().Append {
if err := f.offsetForAppend(ctx, &offset); err != nil {
return 0, err
@@ -374,9 +372,10 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
return f.FileOperations.Write(ctx, f, src, offset)
}
-// offsetForAppend sets the given offset to the end of the file.
+// offsetForAppend atomically sets the given offset to the end of the file.
//
-// Precondition: the file.Dirent.Inode.appendMu mutex should be held for writing.
+// Precondition: the file.Dirent.Inode.appendMu mutex should be held for
+// writing.
func (f *File) offsetForAppend(ctx context.Context, offset *int64) error {
uattr, err := f.Dirent.Inode.UnstableAttr(ctx)
if err != nil {
@@ -386,7 +385,7 @@ func (f *File) offsetForAppend(ctx context.Context, offset *int64) error {
}
// Update the offset.
- *offset = uattr.Size
+ atomic.StoreInt64(offset, uattr.Size)
return nil
}
@@ -461,7 +460,7 @@ func (f *File) UnstableAttr(ctx context.Context) (UnstableAttr, error) {
func (f *File) MappedName(ctx context.Context) string {
root := RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
name, _ := f.Dirent.FullName(root)
return name
diff --git a/pkg/sentry/fs/file_operations.go b/pkg/sentry/fs/file_operations.go
index beba0f771..305c0f840 100644
--- a/pkg/sentry/fs/file_operations.go
+++ b/pkg/sentry/fs/file_operations.go
@@ -67,7 +67,7 @@ type SpliceOpts struct {
// - File.Flags(): This value may change during the operation.
type FileOperations interface {
// Release release resources held by FileOperations.
- Release()
+ Release(ctx context.Context)
// Waitable defines how this File can be waited on for read and
// write readiness.
@@ -160,6 +160,7 @@ type FileOperations interface {
// refer.
//
// Preconditions: The AddressSpace (if any) that io refers to is activated.
+ // Must only be called from a task goroutine.
Ioctl(ctx context.Context, file *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error)
}
diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go
index dcc1df38f..9dc58d5ff 100644
--- a/pkg/sentry/fs/file_overlay.go
+++ b/pkg/sentry/fs/file_overlay.go
@@ -54,7 +54,7 @@ func overlayFile(ctx context.Context, inode *Inode, flags FileFlags) (*File, err
// Drop the extra reference on the Dirent. Now there's only one reference
// on the dirent, either owned by f (if non-nil), or the Dirent is about
// to be destroyed (if GetFile failed).
- dirent.DecRef()
+ dirent.DecRef(ctx)
return f, err
}
@@ -89,12 +89,12 @@ type overlayFileOperations struct {
}
// Release implements FileOperations.Release.
-func (f *overlayFileOperations) Release() {
+func (f *overlayFileOperations) Release(ctx context.Context) {
if f.upper != nil {
- f.upper.DecRef()
+ f.upper.DecRef(ctx)
}
if f.lower != nil {
- f.lower.DecRef()
+ f.lower.DecRef(ctx)
}
}
@@ -164,7 +164,7 @@ func (f *overlayFileOperations) Seek(ctx context.Context, file *File, whence See
func (f *overlayFileOperations) Readdir(ctx context.Context, file *File, serializer DentrySerializer) (int64, error) {
root := RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
dirCtx := &DirCtx{
@@ -497,7 +497,7 @@ func readdirOne(ctx context.Context, d *Dirent) (map[string]DentAttr, error) {
if err != nil {
return nil, err
}
- defer dir.DecRef()
+ defer dir.DecRef(ctx)
// Use a stub serializer to read the entries into memory.
stubSerializer := &CollectEntriesSerializer{}
@@ -521,10 +521,10 @@ type overlayMappingIdentity struct {
}
// DecRef implements AtomicRefCount.DecRef.
-func (omi *overlayMappingIdentity) DecRef() {
- omi.AtomicRefCount.DecRefWithDestructor(func() {
- omi.overlayFile.DecRef()
- omi.id.DecRef()
+func (omi *overlayMappingIdentity) DecRef(ctx context.Context) {
+ omi.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
+ omi.overlayFile.DecRef(ctx)
+ omi.id.DecRef(ctx)
})
}
@@ -544,7 +544,7 @@ func (omi *overlayMappingIdentity) InodeID() uint64 {
func (omi *overlayMappingIdentity) MappedName(ctx context.Context) string {
root := RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
name, _ := omi.overlayFile.Dirent.FullName(root)
return name
diff --git a/pkg/sentry/fs/filesystems.go b/pkg/sentry/fs/filesystems.go
index 084da2a8d..d41f30bbb 100644
--- a/pkg/sentry/fs/filesystems.go
+++ b/pkg/sentry/fs/filesystems.go
@@ -87,20 +87,6 @@ func RegisterFilesystem(f Filesystem) {
filesystems.registered[f.Name()] = f
}
-// UnregisterFilesystem removes a file system from the global set. To keep the
-// file system set compatible with save/restore, UnregisterFilesystem must be
-// called before save/restore methods.
-//
-// For instance, packages may unregister their file system after it is mounted.
-// This makes sense for pseudo file systems that should not be visible or
-// mountable. See whitelistfs in fs/host/fs.go for one example.
-func UnregisterFilesystem(name string) {
- filesystems.mu.Lock()
- defer filesystems.mu.Unlock()
-
- delete(filesystems.registered, name)
-}
-
// FindFilesystem returns a Filesystem registered at name or (nil, false) if name
// is not a file system type that can be found in /proc/filesystems.
func FindFilesystem(name string) (Filesystem, bool) {
diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go
index bdba6efe5..d2dbff268 100644
--- a/pkg/sentry/fs/fs.go
+++ b/pkg/sentry/fs/fs.go
@@ -42,9 +42,10 @@
// Dirent.dirMu
// Dirent.mu
// DirentCache.mu
-// Locks in InodeOperations implementations or overlayEntry
// Inode.Watches.mu (see `Inotify` for other lock ordering)
// MountSource.mu
+// Inode.appendMu
+// Locks in InodeOperations implementations or overlayEntry
//
// If multiple Dirent or MountSource locks must be taken, locks in the parent must be
// taken before locks in their children.
diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD
index 789369220..5fb419bcd 100644
--- a/pkg/sentry/fs/fsutil/BUILD
+++ b/pkg/sentry/fs/fsutil/BUILD
@@ -8,7 +8,6 @@ go_template_instance(
out = "dirty_set_impl.go",
imports = {
"memmap": "gvisor.dev/gvisor/pkg/sentry/memmap",
- "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
},
package = "fsutil",
prefix = "Dirty",
@@ -25,14 +24,14 @@ go_template_instance(
name = "frame_ref_set_impl",
out = "frame_ref_set_impl.go",
imports = {
- "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
+ "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap",
},
package = "fsutil",
prefix = "FrameRef",
template = "//pkg/segment:generic_set",
types = {
"Key": "uint64",
- "Range": "platform.FileRange",
+ "Range": "memmap.FileRange",
"Value": "uint64",
"Functions": "FrameRefSetFunctions",
},
@@ -43,7 +42,6 @@ go_template_instance(
out = "file_range_set_impl.go",
imports = {
"memmap": "gvisor.dev/gvisor/pkg/sentry/memmap",
- "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
},
package = "fsutil",
prefix = "FileRange",
@@ -86,7 +84,6 @@ go_library(
"//pkg/sentry/kernel/time",
"//pkg/sentry/memmap",
"//pkg/sentry/pgalloc",
- "//pkg/sentry/platform",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/usage",
"//pkg/state",
diff --git a/pkg/sentry/fs/fsutil/dirty_set.go b/pkg/sentry/fs/fsutil/dirty_set.go
index c6cd45087..2c9446c1d 100644
--- a/pkg/sentry/fs/fsutil/dirty_set.go
+++ b/pkg/sentry/fs/fsutil/dirty_set.go
@@ -20,7 +20,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -159,7 +158,7 @@ func (ds *DirtySet) AllowClean(mr memmap.MappableRange) {
// repeatedly until all bytes have been written. max is the true size of the
// cached object; offsets beyond max will not be passed to writeAt, even if
// they are marked dirty.
-func SyncDirty(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, dirty *DirtySet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+func SyncDirty(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, dirty *DirtySet, max uint64, mem memmap.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
var changedDirty bool
defer func() {
if changedDirty {
@@ -194,7 +193,7 @@ func SyncDirty(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet
// successful partial write, SyncDirtyAll will call it repeatedly until all
// bytes have been written. max is the true size of the cached object; offsets
// beyond max will not be passed to writeAt, even if they are marked dirty.
-func SyncDirtyAll(ctx context.Context, cache *FileRangeSet, dirty *DirtySet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+func SyncDirtyAll(ctx context.Context, cache *FileRangeSet, dirty *DirtySet, max uint64, mem memmap.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
dseg := dirty.FirstSegment()
for dseg.Ok() {
if err := syncDirtyRange(ctx, dseg.Range(), cache, max, mem, writeAt); err != nil {
@@ -210,7 +209,7 @@ func SyncDirtyAll(ctx context.Context, cache *FileRangeSet, dirty *DirtySet, max
}
// Preconditions: mr must be page-aligned.
-func syncDirtyRange(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+func syncDirtyRange(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, max uint64, mem memmap.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
for cseg := cache.LowerBoundSegment(mr.Start); cseg.Ok() && cseg.Start() < mr.End; cseg = cseg.NextSegment() {
wbr := cseg.Range().Intersect(mr)
if max < wbr.Start {
diff --git a/pkg/sentry/fs/fsutil/file.go b/pkg/sentry/fs/fsutil/file.go
index 08695391c..dc9efa5df 100644
--- a/pkg/sentry/fs/fsutil/file.go
+++ b/pkg/sentry/fs/fsutil/file.go
@@ -31,7 +31,7 @@ import (
type FileNoopRelease struct{}
// Release is a no-op.
-func (FileNoopRelease) Release() {}
+func (FileNoopRelease) Release(context.Context) {}
// SeekWithDirCursor is used to implement fs.FileOperations.Seek. If dirCursor
// is not nil and the seek was on a directory, the cursor will be updated.
@@ -296,7 +296,7 @@ func (sdfo *StaticDirFileOperations) IterateDir(ctx context.Context, d *fs.Diren
func (sdfo *StaticDirFileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
root := fs.RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
dirCtx := &fs.DirCtx{
Serializer: serializer,
diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go
index 5643cdac9..bbafebf03 100644
--- a/pkg/sentry/fs/fsutil/file_range_set.go
+++ b/pkg/sentry/fs/fsutil/file_range_set.go
@@ -23,13 +23,12 @@ import (
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
- "gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/usermem"
)
// FileRangeSet maps offsets into a memmap.Mappable to offsets into a
-// platform.File. It is used to implement Mappables that store data in
+// memmap.File. It is used to implement Mappables that store data in
// sparsely-allocated memory.
//
// type FileRangeSet <generated by go_generics>
@@ -65,20 +64,20 @@ func (FileRangeSetFunctions) Split(mr memmap.MappableRange, frstart uint64, spli
}
// FileRange returns the FileRange mapped by seg.
-func (seg FileRangeIterator) FileRange() platform.FileRange {
+func (seg FileRangeIterator) FileRange() memmap.FileRange {
return seg.FileRangeOf(seg.Range())
}
// FileRangeOf returns the FileRange mapped by mr.
//
// Preconditions: seg.Range().IsSupersetOf(mr). mr.Length() != 0.
-func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) platform.FileRange {
+func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) memmap.FileRange {
frstart := seg.Value() + (mr.Start - seg.Start())
- return platform.FileRange{frstart, frstart + mr.Length()}
+ return memmap.FileRange{frstart, frstart + mr.Length()}
}
// Fill attempts to ensure that all memmap.Mappable offsets in required are
-// mapped to a platform.File offset, by allocating from mf with the given
+// mapped to a memmap.File offset, by allocating from mf with the given
// memory usage kind and invoking readAt to store data into memory. (If readAt
// returns a successful partial read, Fill will call it repeatedly until all
// bytes have been read.) EOF is handled consistently with the requirements of
@@ -141,7 +140,7 @@ func (frs *FileRangeSet) Fill(ctx context.Context, required, optional memmap.Map
}
// Drop removes segments for memmap.Mappable offsets in mr, freeing the
-// corresponding platform.FileRanges.
+// corresponding memmap.FileRanges.
//
// Preconditions: mr must be page-aligned.
func (frs *FileRangeSet) Drop(mr memmap.MappableRange, mf *pgalloc.MemoryFile) {
@@ -154,7 +153,7 @@ func (frs *FileRangeSet) Drop(mr memmap.MappableRange, mf *pgalloc.MemoryFile) {
}
// DropAll removes all segments in mr, freeing the corresponding
-// platform.FileRanges.
+// memmap.FileRanges.
func (frs *FileRangeSet) DropAll(mf *pgalloc.MemoryFile) {
for seg := frs.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
mf.DecRef(seg.FileRange())
diff --git a/pkg/sentry/fs/fsutil/frame_ref_set.go b/pkg/sentry/fs/fsutil/frame_ref_set.go
index dd6f5aba6..a808894df 100644
--- a/pkg/sentry/fs/fsutil/frame_ref_set.go
+++ b/pkg/sentry/fs/fsutil/frame_ref_set.go
@@ -17,7 +17,7 @@ package fsutil
import (
"math"
- "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/usage"
)
@@ -39,7 +39,7 @@ func (FrameRefSetFunctions) ClearValue(val *uint64) {
}
// Merge implements segment.Functions.Merge.
-func (FrameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.FileRange, val2 uint64) (uint64, bool) {
+func (FrameRefSetFunctions) Merge(_ memmap.FileRange, val1 uint64, _ memmap.FileRange, val2 uint64) (uint64, bool) {
if val1 != val2 {
return 0, false
}
@@ -47,13 +47,13 @@ func (FrameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.
}
// Split implements segment.Functions.Split.
-func (FrameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) {
+func (FrameRefSetFunctions) Split(_ memmap.FileRange, val uint64, _ uint64) (uint64, uint64) {
return val, val
}
// IncRefAndAccount adds a reference on the range fr. All newly inserted segments
// are accounted as host page cache memory mappings.
-func (refs *FrameRefSet) IncRefAndAccount(fr platform.FileRange) {
+func (refs *FrameRefSet) IncRefAndAccount(fr memmap.FileRange) {
seg, gap := refs.Find(fr.Start)
for {
switch {
@@ -74,7 +74,7 @@ func (refs *FrameRefSet) IncRefAndAccount(fr platform.FileRange) {
// DecRefAndAccount removes a reference on the range fr and untracks segments
// that are removed from memory accounting.
-func (refs *FrameRefSet) DecRefAndAccount(fr platform.FileRange) {
+func (refs *FrameRefSet) DecRefAndAccount(fr memmap.FileRange) {
seg := refs.FindSegment(fr.Start)
for seg.Ok() && seg.Start() < fr.End {
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go
index e82afd112..ef0113b52 100644
--- a/pkg/sentry/fs/fsutil/host_file_mapper.go
+++ b/pkg/sentry/fs/fsutil/host_file_mapper.go
@@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -126,7 +125,7 @@ func (f *HostFileMapper) DecRefOn(mr memmap.MappableRange) {
// offsets in fr or until the next call to UnmapAll.
//
// Preconditions: The caller must hold a reference on all offsets in fr.
-func (f *HostFileMapper) MapInternal(fr platform.FileRange, fd int, write bool) (safemem.BlockSeq, error) {
+func (f *HostFileMapper) MapInternal(fr memmap.FileRange, fd int, write bool) (safemem.BlockSeq, error) {
chunks := ((fr.End + chunkMask) >> chunkShift) - (fr.Start >> chunkShift)
f.mapsMu.Lock()
defer f.mapsMu.Unlock()
@@ -146,7 +145,7 @@ func (f *HostFileMapper) MapInternal(fr platform.FileRange, fd int, write bool)
}
// Preconditions: f.mapsMu must be locked.
-func (f *HostFileMapper) forEachMappingBlockLocked(fr platform.FileRange, fd int, write bool, fn func(safemem.Block)) error {
+func (f *HostFileMapper) forEachMappingBlockLocked(fr memmap.FileRange, fd int, write bool, fn func(safemem.Block)) error {
prot := syscall.PROT_READ
if write {
prot |= syscall.PROT_WRITE
diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go
index 78fec553e..c15d8a946 100644
--- a/pkg/sentry/fs/fsutil/host_mappable.go
+++ b/pkg/sentry/fs/fsutil/host_mappable.go
@@ -21,18 +21,17 @@ import (
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/usermem"
)
-// HostMappable implements memmap.Mappable and platform.File over a
+// HostMappable implements memmap.Mappable and memmap.File over a
// CachedFileObject.
//
// Lock order (compare the lock order model in mm/mm.go):
// truncateMu ("fs locks")
// mu ("memmap.Mappable locks not taken by Translate")
-// ("platform.File locks")
+// ("memmap.File locks")
// backingFile ("CachedFileObject locks")
//
// +stateify savable
@@ -124,24 +123,24 @@ func (h *HostMappable) NotifyChangeFD() error {
return nil
}
-// MapInternal implements platform.File.MapInternal.
-func (h *HostMappable) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+// MapInternal implements memmap.File.MapInternal.
+func (h *HostMappable) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
return h.hostFileMapper.MapInternal(fr, h.backingFile.FD(), at.Write)
}
-// FD implements platform.File.FD.
+// FD implements memmap.File.FD.
func (h *HostMappable) FD() int {
return h.backingFile.FD()
}
-// IncRef implements platform.File.IncRef.
-func (h *HostMappable) IncRef(fr platform.FileRange) {
+// IncRef implements memmap.File.IncRef.
+func (h *HostMappable) IncRef(fr memmap.FileRange) {
mr := memmap.MappableRange{Start: fr.Start, End: fr.End}
h.hostFileMapper.IncRefOn(mr)
}
-// DecRef implements platform.File.DecRef.
-func (h *HostMappable) DecRef(fr platform.FileRange) {
+// DecRef implements memmap.File.DecRef.
+func (h *HostMappable) DecRef(fr memmap.FileRange) {
mr := memmap.MappableRange{Start: fr.Start, End: fr.End}
h.hostFileMapper.DecRefOn(mr)
}
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index 800c8b4e1..fe8b0b6ac 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -26,7 +26,6 @@ import (
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
- "gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/usermem"
@@ -934,7 +933,7 @@ func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange
// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
func (c *CachingInodeOperations) InvalidateUnsavable(ctx context.Context) error {
- // Whether we have a host fd (and consequently what platform.File is
+ // Whether we have a host fd (and consequently what memmap.File is
// mapped) can change across save/restore, so invalidate all translations
// unconditionally.
c.mapsMu.Lock()
@@ -999,10 +998,10 @@ func (c *CachingInodeOperations) Evict(ctx context.Context, er pgalloc.Evictable
}
}
-// IncRef implements platform.File.IncRef. This is used when we directly map an
-// underlying host fd and CachingInodeOperations is used as the platform.File
+// IncRef implements memmap.File.IncRef. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the memmap.File
// during translation.
-func (c *CachingInodeOperations) IncRef(fr platform.FileRange) {
+func (c *CachingInodeOperations) IncRef(fr memmap.FileRange) {
// Hot path. Avoid defers.
c.dataMu.Lock()
seg, gap := c.refs.Find(fr.Start)
@@ -1024,10 +1023,10 @@ func (c *CachingInodeOperations) IncRef(fr platform.FileRange) {
}
}
-// DecRef implements platform.File.DecRef. This is used when we directly map an
-// underlying host fd and CachingInodeOperations is used as the platform.File
+// DecRef implements memmap.File.DecRef. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the memmap.File
// during translation.
-func (c *CachingInodeOperations) DecRef(fr platform.FileRange) {
+func (c *CachingInodeOperations) DecRef(fr memmap.FileRange) {
// Hot path. Avoid defers.
c.dataMu.Lock()
seg := c.refs.FindSegment(fr.Start)
@@ -1046,15 +1045,15 @@ func (c *CachingInodeOperations) DecRef(fr platform.FileRange) {
c.dataMu.Unlock()
}
-// MapInternal implements platform.File.MapInternal. This is used when we
+// MapInternal implements memmap.File.MapInternal. This is used when we
// directly map an underlying host fd and CachingInodeOperations is used as the
-// platform.File during translation.
-func (c *CachingInodeOperations) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+// memmap.File during translation.
+func (c *CachingInodeOperations) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
return c.hostFileMapper.MapInternal(fr, c.backingFile.FD(), at.Write)
}
-// FD implements platform.File.FD. This is used when we directly map an
-// underlying host fd and CachingInodeOperations is used as the platform.File
+// FD implements memmap.File.FD. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the memmap.File
// during translation.
func (c *CachingInodeOperations) FD() int {
return c.backingFile.FD()
diff --git a/pkg/sentry/fs/g3doc/.gitignore b/pkg/sentry/fs/g3doc/.gitignore
new file mode 100644
index 000000000..2d19fc766
--- /dev/null
+++ b/pkg/sentry/fs/g3doc/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/pkg/sentry/fs/g3doc/fuse.md b/pkg/sentry/fs/g3doc/fuse.md
new file mode 100644
index 000000000..2ca84dd74
--- /dev/null
+++ b/pkg/sentry/fs/g3doc/fuse.md
@@ -0,0 +1,263 @@
+# Foreword
+
+This document describes an on-going project to support FUSE filesystems within
+the sentry. This is intended to become the final documentation for this
+subsystem, and is therefore written in the past tense. However FUSE support is
+currently incomplete and the document will be updated as things progress.
+
+# FUSE: Filesystem in Userspace
+
+The sentry supports dispatching filesystem operations to a FUSE server, allowing
+FUSE filesystem to be used with a sandbox.
+
+## Overview
+
+FUSE has two main components:
+
+1. A client kernel driver (canonically `fuse.ko` in Linux), which forwards
+ filesystem operations (usually initiated by syscalls) to the server.
+
+2. A server, which is a userspace daemon that implements the actual filesystem.
+
+The sentry implements the client component, which allows a server daemon running
+within the sandbox to implement a filesystem within the sandbox.
+
+A FUSE filesystem is initialized with `mount(2)`, typically with the help of a
+utility like `fusermount(1)`. Various mount options exist for establishing
+ownership and access permissions on the filesystem, but the most important mount
+option is a file descriptor used to establish communication between the client
+and server.
+
+The FUSE device FD is obtained by opening `/dev/fuse`. During regular operation,
+the client and server use the FUSE protocol described in `fuse(4)` to service
+filesystem operations. See the "Protocol" section below for more information
+about this protocol. The core of the sentry support for FUSE is the client-side
+implementation of this protocol.
+
+## FUSE in the Sentry
+
+The sentry's FUSE client targets VFS2 and has the following components:
+
+- An implementation of `/dev/fuse`.
+
+- A VFS2 filesystem for mapping syscalls to FUSE ops. Since we're targeting
+ VFS2, one point of contention may be the lack of inodes in VFS2. We can
+ tentatively implement a kernfs-based filesystem to bridge the gap in APIs.
+ The kernfs base functionality can serve the role of the Linux inode cache
+ and, the filesystem can map VFS2 syscalls to kernfs inode operations; see
+ the `kernfs.Inode` interface.
+
+The FUSE protocol lends itself well to marshaling with `go_marshal`. The various
+request and response packets can be defined in the ABI package and converted to
+and from the wire format using `go_marshal`.
+
+### Design Goals
+
+- While filesystem performance is always important, the sentry's FUSE support
+ is primarily concerned with compatibility, with performance as a secondary
+ concern.
+
+- Avoiding deadlocks from a hung server daemon.
+
+- Consider the potential for denial of service from a malicious server daemon.
+ Protecting itself from userspace is already a design goal for the sentry,
+ but needs additional consideration for FUSE. Normally, an operating system
+ doesn't rely on userspace to make progress with filesystem operations. Since
+ this changes with FUSE, it opens up the possibility of creating a chain of
+ dependencies controlled by userspace, which could affect an entire sandbox.
+ For example: a FUSE op can block a syscall, which could be holding a
+ subsystem lock, which can then block another task goroutine.
+
+### Milestones
+
+Below are some broad goals to aim for while implementing FUSE in the sentry.
+Many FUSE ops can be grouped into broad categories of functionality, and most
+ops can be implemented in parallel.
+
+#### Minimal client that can mount a trivial FUSE filesystem.
+
+- Implement `/dev/fuse` - a character device used to establish an FD for
+ communication between the sentry and the server daemon.
+
+- Implement basic FUSE ops like `FUSE_INIT`, `FUSE_DESTROY`.
+
+#### Read-only mount with basic file operations
+
+- Implement the majority of file, directory and file descriptor FUSE ops. For
+ this milestone, we can skip uncommon or complex operations like mmap, mknod,
+ file locking, poll, and extended attributes. We can stub these out along
+ with any ops that modify the filesystem. The exact list of required ops are
+ to be determined, but the goal is to mount a real filesystem as read-only,
+ and be able to read contents from the filesystem in the sentry.
+
+#### Full read-write support
+
+- Implement the remaining FUSE ops and decide if we can omit rarely used
+ operations like ioctl.
+
+# Appendix
+
+## FUSE Protocol
+
+The FUSE protocol is a request-response protocol. All requests are initiated by
+the client. The wire-format for the protocol is raw C structs serialized to
+memory.
+
+All FUSE requests begin with the following request header:
+
+```c
+struct fuse_in_header {
+ uint32_t len; // Length of the request, including this header.
+ uint32_t opcode; // Requested operation.
+ uint64_t unique; // A unique identifier for this request.
+ uint64_t nodeid; // ID of the filesystem object being operated on.
+ uint32_t uid; // UID of the requesting process.
+ uint32_t gid; // GID of the requesting process.
+ uint32_t pid; // PID of the requesting process.
+ uint32_t padding;
+};
+```
+
+The request is then followed by a payload specific to the `opcode`.
+
+All responses begin with this response header:
+
+```c
+struct fuse_out_header {
+ uint32_t len; // Length of the response, including this header.
+ int32_t error; // Status of the request, 0 if success.
+ uint64_t unique; // The unique identifier from the corresponding request.
+};
+```
+
+The response payload also depends on the request `opcode`. If `error != 0`, the
+response payload must be empty.
+
+### Operations
+
+The following is a list of all FUSE operations used in `fuse_in_header.opcode`
+as of Linux v4.4, and a brief description of their purpose. These are defined in
+`uapi/linux/fuse.h`. Many of these have a corresponding request and response
+payload struct; `fuse(4)` has details for some of these. We also note how these
+operations map to the sentry virtual filesystem.
+
+#### FUSE meta-operations
+
+These operations are specific to FUSE and don't have a corresponding action in a
+generic filesystem.
+
+- `FUSE_INIT`: This operation initializes a new FUSE filesystem, and is the
+ first message sent by the client after mount. This is used for version and
+ feature negotiation. This is related to `mount(2)`.
+- `FUSE_DESTROY`: Teardown a FUSE filesystem, related to `unmount(2)`.
+- `FUSE_INTERRUPT`: Interrupts an in-flight operation, specified by the
+ `fuse_in_header.unique` value provided in the corresponding request header.
+ The client can send at most one of these per request, and will enter an
+ uninterruptible wait for a reply. The server is expected to reply promptly.
+- `FUSE_FORGET`: A hint to the server that server should evict the indicate
+ node from any caches. This is wired up to `(struct
+ super_operations).evict_inode` in Linux, which is in turned hooked as the
+ inode cache shrinker which is typically triggered by system memory pressure.
+- `FUSE_BATCH_FORGET`: Batch version of `FUSE_FORGET`.
+
+#### Filesystem Syscalls
+
+These FUSE ops map directly to an equivalent filesystem syscall, or family of
+syscalls. The relevant syscalls have a similar name to the operation, unless
+otherwise noted.
+
+Node creation:
+
+- `FUSE_MKNOD`
+- `FUSE_MKDIR`
+- `FUSE_CREATE`: This is equivalent to `open(2)` and `creat(2)`, which
+ atomically creates and opens a node.
+
+Node attributes and extended attributes:
+
+- `FUSE_GETATTR`
+- `FUSE_SETATTR`
+- `FUSE_SETXATTR`
+- `FUSE_GETXATTR`
+- `FUSE_LISTXATTR`
+- `FUSE_REMOVEXATTR`
+
+Node link manipulation:
+
+- `FUSE_READLINK`
+- `FUSE_LINK`
+- `FUSE_SYMLINK`
+- `FUSE_UNLINK`
+
+Directory operations:
+
+- `FUSE_RMDIR`
+- `FUSE_RENAME`
+- `FUSE_RENAME2`
+- `FUSE_OPENDIR`: `open(2)` for directories.
+- `FUSE_RELEASEDIR`: `close(2)` for directories.
+- `FUSE_READDIR`
+- `FUSE_READDIRPLUS`
+- `FUSE_FSYNCDIR`: `fsync(2)` for directories.
+- `FUSE_LOOKUP`: Establishes a unique identifier for a FS node. This is
+ reminiscent of `VirtualFilesystem.GetDentryAt` in that it resolves a path
+ component to a node. However the returned identifier is opaque to the
+ client. The server must remember this mapping, as this is how the client
+ will reference the node in the future.
+
+File operations:
+
+- `FUSE_OPEN`: `open(2)` for files.
+- `FUSE_RELEASE`: `close(2)` for files.
+- `FUSE_FSYNC`
+- `FUSE_FALLOCATE`
+- `FUSE_SETUPMAPPING`: Creates a memory map on a file for `mmap(2)`.
+- `FUSE_REMOVEMAPPING`: Removes a memory map for `munmap(2)`.
+
+File locking:
+
+- `FUSE_GETLK`
+- `FUSE_SETLK`
+- `FUSE_SETLKW`
+- `FUSE_COPY_FILE_RANGE`
+
+File descriptor operations:
+
+- `FUSE_IOCTL`
+- `FUSE_POLL`
+- `FUSE_LSEEK`
+
+Filesystem operations:
+
+- `FUSE_STATFS`
+
+#### Permissions
+
+- `FUSE_ACCESS` is used to check if a node is accessible, as part of many
+ syscall implementations. Maps to `vfs.FilesystemImpl.AccessAt` in the
+ sentry.
+
+#### I/O Operations
+
+These ops are used to read and write file pages. They're used to implement both
+I/O syscalls like `read(2)`, `write(2)` and `mmap(2)`.
+
+- `FUSE_READ`
+- `FUSE_WRITE`
+
+#### Miscellaneous
+
+- `FUSE_FLUSH`: Used by the client to indicate when a file descriptor is
+ closed. Distinct from `FUSE_FSYNC`, which corresponds to an `fsync(2)`
+ syscall from the user. Maps to `vfs.FileDescriptorImpl.Release` in the
+ sentry.
+- `FUSE_BMAP`: Old address space API for block defrag. Probably not needed.
+- `FUSE_NOTIFY_REPLY`: [TODO: what does this do?]
+
+# References
+
+- [fuse(4) Linux manual page](https://www.man7.org/linux/man-pages/man4/fuse.4.html)
+- [Linux kernel FUSE documentation](https://www.kernel.org/doc/html/latest/filesystems/fuse.html)
+- [The reference implementation of the Linux FUSE (Filesystem in Userspace)
+ interface](https://github.com/libfuse/libfuse)
+- [The kernel interface of FUSE](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fuse.h)
diff --git a/pkg/sentry/fs/gofer/file.go b/pkg/sentry/fs/gofer/file.go
index b2fcab127..c0bc63a32 100644
--- a/pkg/sentry/fs/gofer/file.go
+++ b/pkg/sentry/fs/gofer/file.go
@@ -114,7 +114,7 @@ func NewFile(ctx context.Context, dirent *fs.Dirent, name string, flags fs.FileF
}
// Release implements fs.FileOpeations.Release.
-func (f *fileOperations) Release() {
+func (f *fileOperations) Release(context.Context) {
f.handles.DecRef()
}
@@ -122,7 +122,7 @@ func (f *fileOperations) Release() {
func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
root := fs.RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
dirCtx := &fs.DirCtx{
diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go
index 2df2fe889..326fed954 100644
--- a/pkg/sentry/fs/gofer/gofer_test.go
+++ b/pkg/sentry/fs/gofer/gofer_test.go
@@ -232,7 +232,7 @@ func TestRevalidation(t *testing.T) {
// We must release the dirent, of the test will fail
// with a reference leak. This is tracked by p9test.
- defer dirent.DecRef()
+ defer dirent.DecRef(ctx)
// Walk again. Depending on the cache policy, we may
// get a new dirent.
@@ -246,7 +246,7 @@ func TestRevalidation(t *testing.T) {
if !test.preModificationWantReload && dirent != newDirent {
t.Errorf("Lookup with cachePolicy=%s got new dirent %+v, wanted old dirent %+v", test.cachePolicy, newDirent, dirent)
}
- newDirent.DecRef() // See above.
+ newDirent.DecRef(ctx) // See above.
// Modify the underlying mocked file's modification
// time for the next walk that occurs.
@@ -287,7 +287,7 @@ func TestRevalidation(t *testing.T) {
if test.postModificationWantUpdatedAttrs && gotModTimeSeconds != nowSeconds {
t.Fatalf("Lookup with cachePolicy=%s got new modification time %v, wanted %v", test.cachePolicy, gotModTimeSeconds, nowSeconds)
}
- newDirent.DecRef() // See above.
+ newDirent.DecRef(ctx) // See above.
// Remove the file from the remote fs, subsequent walks
// should now fail to find anything.
@@ -303,7 +303,7 @@ func TestRevalidation(t *testing.T) {
t.Errorf("Lookup with cachePolicy=%s got new dirent and error %v, wanted old dirent and nil error", test.cachePolicy, err)
}
if err == nil {
- newDirent.DecRef() // See above.
+ newDirent.DecRef(ctx) // See above.
}
})
}
diff --git a/pkg/sentry/fs/gofer/handles.go b/pkg/sentry/fs/gofer/handles.go
index fc14249be..f324dbf26 100644
--- a/pkg/sentry/fs/gofer/handles.go
+++ b/pkg/sentry/fs/gofer/handles.go
@@ -47,7 +47,8 @@ type handles struct {
// DecRef drops a reference on handles.
func (h *handles) DecRef() {
- h.DecRefWithDestructor(func() {
+ ctx := context.Background()
+ h.DecRefWithDestructor(ctx, func(context.Context) {
if h.Host != nil {
if h.isHostBorrowed {
h.Host.Release()
@@ -57,7 +58,7 @@ func (h *handles) DecRef() {
}
}
}
- if err := h.File.close(context.Background()); err != nil {
+ if err := h.File.close(ctx); err != nil {
log.Warningf("error closing p9 file: %v", err)
}
})
diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go
index a016c896e..3a225fd39 100644
--- a/pkg/sentry/fs/gofer/inode.go
+++ b/pkg/sentry/fs/gofer/inode.go
@@ -441,8 +441,9 @@ func (i *inodeOperations) Release(ctx context.Context) {
// asynchronously.
//
// We use AsyncWithContext to avoid needing to allocate an extra
- // anonymous function on the heap.
- fs.AsyncWithContext(ctx, i.fileState.Release)
+ // anonymous function on the heap. We must use background context
+ // because the async work cannot happen on the task context.
+ fs.AsyncWithContext(context.Background(), i.fileState.Release)
}
// Mappable implements fs.InodeOperations.Mappable.
@@ -640,7 +641,7 @@ func (i *inodeOperations) Allocate(ctx context.Context, inode *fs.Inode, offset,
// WriteOut implements fs.InodeOperations.WriteOut.
func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
- if !i.session().cachePolicy.cacheUAttrs(inode) {
+ if inode.MountSource.Flags.ReadOnly || !i.session().cachePolicy.cacheUAttrs(inode) {
return nil
}
diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go
index cf9800100..3c66dc3c2 100644
--- a/pkg/sentry/fs/gofer/path.go
+++ b/pkg/sentry/fs/gofer/path.go
@@ -168,7 +168,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string
// Construct the positive Dirent.
d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
- defer d.DecRef()
+ defer d.DecRef(ctx)
// Construct the new file, caching the handles if allowed.
h := handles{
@@ -371,7 +371,7 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string
// Find out if file being deleted is a socket or pipe that needs to be
// removed from endpoint map.
if d, err := i.Lookup(ctx, dir, name); err == nil {
- defer d.DecRef()
+ defer d.DecRef(ctx)
if fs.IsSocket(d.Inode.StableAttr) || fs.IsPipe(d.Inode.StableAttr) {
switch iops := d.Inode.InodeOperations.(type) {
@@ -392,7 +392,7 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string
return err
}
if key != nil {
- i.session().overrides.remove(*key)
+ i.session().overrides.remove(ctx, *key)
}
i.touchModificationAndStatusChangeTime(ctx, dir)
diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go
index b5efc86f2..7cf3522ff 100644
--- a/pkg/sentry/fs/gofer/session.go
+++ b/pkg/sentry/fs/gofer/session.go
@@ -89,10 +89,10 @@ func (e *overrideMaps) addPipe(key device.MultiDeviceKey, d *fs.Dirent, inode *f
// remove deletes the key from the maps.
//
// Precondition: maps must have been locked with 'lock'.
-func (e *overrideMaps) remove(key device.MultiDeviceKey) {
+func (e *overrideMaps) remove(ctx context.Context, key device.MultiDeviceKey) {
endpoint := e.keyMap[key]
delete(e.keyMap, key)
- endpoint.dirent.DecRef()
+ endpoint.dirent.DecRef(ctx)
}
// lock blocks other addition and removal operations from happening while
@@ -197,7 +197,7 @@ type session struct {
}
// Destroy tears down the session.
-func (s *session) Destroy() {
+func (s *session) Destroy(ctx context.Context) {
s.client.Close()
}
@@ -329,7 +329,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
s.client, err = p9.NewClient(conn, s.msize, s.version)
if err != nil {
// Drop our reference on the session, it needs to be torn down.
- s.DecRef()
+ s.DecRef(ctx)
return nil, err
}
@@ -340,7 +340,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
ctx.UninterruptibleSleepFinish(false)
if err != nil {
// Same as above.
- s.DecRef()
+ s.DecRef(ctx)
return nil, err
}
@@ -348,7 +348,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
if err != nil {
s.attach.close(ctx)
// Same as above, but after we execute the Close request.
- s.DecRef()
+ s.DecRef(ctx)
return nil, err
}
@@ -393,13 +393,13 @@ func (s *session) fillKeyMap(ctx context.Context) error {
// fillPathMap populates paths for overrides from dirents in direntMap
// before save.
-func (s *session) fillPathMap() error {
+func (s *session) fillPathMap(ctx context.Context) error {
unlock := s.overrides.lock()
defer unlock()
for _, endpoint := range s.overrides.keyMap {
mountRoot := endpoint.dirent.MountRoot()
- defer mountRoot.DecRef()
+ defer mountRoot.DecRef(ctx)
dirPath, _ := endpoint.dirent.FullName(mountRoot)
if dirPath == "" {
return fmt.Errorf("error getting path from dirent")
diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go
index 2d398b753..48b423dd8 100644
--- a/pkg/sentry/fs/gofer/session_state.go
+++ b/pkg/sentry/fs/gofer/session_state.go
@@ -26,7 +26,8 @@ import (
// beforeSave is invoked by stateify.
func (s *session) beforeSave() {
if s.overrides != nil {
- if err := s.fillPathMap(); err != nil {
+ ctx := &dummyClockContext{context.Background()}
+ if err := s.fillPathMap(ctx); err != nil {
panic("failed to save paths to override map before saving" + err.Error())
}
}
diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go
index 40f2c1cad..8a1c69ac2 100644
--- a/pkg/sentry/fs/gofer/socket.go
+++ b/pkg/sentry/fs/gofer/socket.go
@@ -134,14 +134,14 @@ func (e *endpoint) UnidirectionalConnect(ctx context.Context) (transport.Connect
// We don't need the receiver.
c.CloseRecv()
- c.Release()
+ c.Release(ctx)
return c, nil
}
// Release implements transport.BoundEndpoint.Release.
-func (e *endpoint) Release() {
- e.inode.DecRef()
+func (e *endpoint) Release(ctx context.Context) {
+ e.inode.DecRef(ctx)
}
// Passcred implements transport.BoundEndpoint.Passcred.
diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD
index aabce6cc9..d41d23a43 100644
--- a/pkg/sentry/fs/host/BUILD
+++ b/pkg/sentry/fs/host/BUILD
@@ -30,6 +30,7 @@ go_library(
"//pkg/context",
"//pkg/fd",
"//pkg/fdnotifier",
+ "//pkg/iovec",
"//pkg/log",
"//pkg/refs",
"//pkg/safemem",
diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go
index 39299b7e4..0d8d36afa 100644
--- a/pkg/sentry/fs/host/control.go
+++ b/pkg/sentry/fs/host/control.go
@@ -57,7 +57,7 @@ func (c *scmRights) Clone() transport.RightsControlMessage {
}
// Release implements transport.RightsControlMessage.Release.
-func (c *scmRights) Release() {
+func (c *scmRights) Release(ctx context.Context) {
for _, fd := range c.fds {
syscall.Close(fd)
}
diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go
index 3e48b8b2c..86d1a87f0 100644
--- a/pkg/sentry/fs/host/file.go
+++ b/pkg/sentry/fs/host/file.go
@@ -110,7 +110,7 @@ func newFileFromDonatedFD(ctx context.Context, donated int, saveable, isTTY bool
name := fmt.Sprintf("host:[%d]", inode.StableAttr.InodeID)
dirent := fs.NewDirent(ctx, inode, name)
- defer dirent.DecRef()
+ defer dirent.DecRef(ctx)
if isTTY {
return newTTYFile(ctx, dirent, flags, iops), nil
@@ -169,7 +169,7 @@ func (f *fileOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
root := fs.RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
dirCtx := &fs.DirCtx{
Serializer: serializer,
diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go
index 62f1246aa..fbfba1b58 100644
--- a/pkg/sentry/fs/host/inode.go
+++ b/pkg/sentry/fs/host/inode.go
@@ -368,6 +368,9 @@ func (i *inodeOperations) Allocate(ctx context.Context, inode *fs.Inode, offset,
// WriteOut implements fs.InodeOperations.WriteOut.
func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
+ if inode.MountSource.Flags.ReadOnly {
+ return nil
+ }
// Have we been using host kernel metadata caches?
if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) {
// Then the metadata is already up to date on the host.
diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go
index c507f57eb..41a23b5da 100644
--- a/pkg/sentry/fs/host/inode_test.go
+++ b/pkg/sentry/fs/host/inode_test.go
@@ -36,7 +36,7 @@ func TestCloseFD(t *testing.T) {
if err != nil {
t.Fatalf("Failed to create File: %v", err)
}
- file.DecRef()
+ file.DecRef(ctx)
s := make([]byte, 10)
if c, err := syscall.Read(p[0], s); c != 0 || err != nil {
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go
index b6e94583e..a2f3d5918 100644
--- a/pkg/sentry/fs/host/socket.go
+++ b/pkg/sentry/fs/host/socket.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/fdnotifier"
- "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/socket/control"
@@ -39,11 +38,6 @@ import (
// LINT.IfChange
-// maxSendBufferSize is the maximum host send buffer size allowed for endpoint.
-//
-// N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max).
-const maxSendBufferSize = 8 << 20
-
// ConnectedEndpoint is a host FD backed implementation of
// transport.ConnectedEndpoint and transport.Receiver.
//
@@ -103,10 +97,6 @@ func (c *ConnectedEndpoint) init() *syserr.Error {
if err != nil {
return syserr.FromError(err)
}
- if sndbuf > maxSendBufferSize {
- log.Warningf("Socket send buffer too large: %d", sndbuf)
- return syserr.ErrInvalidEndpointState
- }
c.stype = linux.SockType(stype)
c.sndbuf = int64(sndbuf)
@@ -204,7 +194,7 @@ func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error)
}
// Send implements transport.ConnectedEndpoint.Send.
-func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) {
+func (c *ConnectedEndpoint) Send(ctx context.Context, data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) {
c.mu.RLock()
defer c.mu.RUnlock()
@@ -281,7 +271,7 @@ func (c *ConnectedEndpoint) EventUpdate() {
}
// Recv implements transport.Receiver.Recv.
-func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
+func (c *ConnectedEndpoint) Recv(ctx context.Context, data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
c.mu.RLock()
defer c.mu.RUnlock()
@@ -328,7 +318,7 @@ func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights int, peek
}
// close releases all resources related to the endpoint.
-func (c *ConnectedEndpoint) close() {
+func (c *ConnectedEndpoint) close(context.Context) {
fdnotifier.RemoveFD(int32(c.file.FD()))
c.file.Close()
c.file = nil
@@ -384,8 +374,8 @@ func (c *ConnectedEndpoint) RecvMaxQueueSize() int64 {
}
// Release implements transport.ConnectedEndpoint.Release and transport.Receiver.Release.
-func (c *ConnectedEndpoint) Release() {
- c.ref.DecRefWithDestructor(c.close)
+func (c *ConnectedEndpoint) Release(ctx context.Context) {
+ c.ref.DecRefWithDestructor(ctx, c.close)
}
// CloseUnread implements transport.ConnectedEndpoint.CloseUnread.
diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go
index 5c18dbd5e..905afb50d 100644
--- a/pkg/sentry/fs/host/socket_iovec.go
+++ b/pkg/sentry/fs/host/socket_iovec.go
@@ -17,15 +17,12 @@ package host
import (
"syscall"
- "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/iovec"
"gvisor.dev/gvisor/pkg/syserror"
)
// LINT.IfChange
-// maxIovs is the maximum number of iovecs to pass to the host.
-var maxIovs = linux.UIO_MAXIOV
-
// copyToMulti copies as many bytes from src to dst as possible.
func copyToMulti(dst [][]byte, src []byte) {
for _, d := range dst {
@@ -76,7 +73,7 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec
}
}
- if iovsRequired > maxIovs {
+ if iovsRequired > iovec.MaxIovs {
// The kernel will reject our call if we pass this many iovs.
// Use a single intermediate buffer instead.
b := make([]byte, stopLen)
diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go
index affdbcacb..9d58ea448 100644
--- a/pkg/sentry/fs/host/socket_test.go
+++ b/pkg/sentry/fs/host/socket_test.go
@@ -67,11 +67,12 @@ func TestSocketIsBlocking(t *testing.T) {
if fl&syscall.O_NONBLOCK == syscall.O_NONBLOCK {
t.Fatalf("Expected socket %v to be blocking", pair[1])
}
- sock, err := newSocket(contexttest.Context(t), pair[0], false)
+ ctx := contexttest.Context(t)
+ sock, err := newSocket(ctx, pair[0], false)
if err != nil {
t.Fatalf("newSocket(%v) failed => %v", pair[0], err)
}
- defer sock.DecRef()
+ defer sock.DecRef(ctx)
// Test that the socket now is non-blocking.
if fl, err = getFl(pair[0]); err != nil {
t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[0], err)
@@ -93,11 +94,12 @@ func TestSocketWritev(t *testing.T) {
if err != nil {
t.Fatalf("host socket creation failed: %v", err)
}
- socket, err := newSocket(contexttest.Context(t), pair[0], false)
+ ctx := contexttest.Context(t)
+ socket, err := newSocket(ctx, pair[0], false)
if err != nil {
t.Fatalf("newSocket(%v) => %v", pair[0], err)
}
- defer socket.DecRef()
+ defer socket.DecRef(ctx)
buf := []byte("hello world\n")
n, err := socket.Writev(contexttest.Context(t), usermem.BytesIOSequence(buf))
if err != nil {
@@ -115,11 +117,12 @@ func TestSocketWritevLen0(t *testing.T) {
if err != nil {
t.Fatalf("host socket creation failed: %v", err)
}
- socket, err := newSocket(contexttest.Context(t), pair[0], false)
+ ctx := contexttest.Context(t)
+ socket, err := newSocket(ctx, pair[0], false)
if err != nil {
t.Fatalf("newSocket(%v) => %v", pair[0], err)
}
- defer socket.DecRef()
+ defer socket.DecRef(ctx)
n, err := socket.Writev(contexttest.Context(t), usermem.BytesIOSequence(nil))
if err != nil {
t.Fatalf("socket writev failed: %v", err)
@@ -136,11 +139,12 @@ func TestSocketSendMsgLen0(t *testing.T) {
if err != nil {
t.Fatalf("host socket creation failed: %v", err)
}
- sfile, err := newSocket(contexttest.Context(t), pair[0], false)
+ ctx := contexttest.Context(t)
+ sfile, err := newSocket(ctx, pair[0], false)
if err != nil {
t.Fatalf("newSocket(%v) => %v", pair[0], err)
}
- defer sfile.DecRef()
+ defer sfile.DecRef(ctx)
s := sfile.FileOperations.(socket.Socket)
n, terr := s.SendMsg(nil, usermem.BytesIOSequence(nil), []byte{}, 0, false, ktime.Time{}, socket.ControlMessages{})
@@ -158,18 +162,19 @@ func TestListen(t *testing.T) {
if err != nil {
t.Fatalf("syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) => %v", err)
}
- sfile1, err := newSocket(contexttest.Context(t), pair[0], false)
+ ctx := contexttest.Context(t)
+ sfile1, err := newSocket(ctx, pair[0], false)
if err != nil {
t.Fatalf("newSocket(%v) => %v", pair[0], err)
}
- defer sfile1.DecRef()
+ defer sfile1.DecRef(ctx)
socket1 := sfile1.FileOperations.(socket.Socket)
- sfile2, err := newSocket(contexttest.Context(t), pair[1], false)
+ sfile2, err := newSocket(ctx, pair[1], false)
if err != nil {
t.Fatalf("newSocket(%v) => %v", pair[1], err)
}
- defer sfile2.DecRef()
+ defer sfile2.DecRef(ctx)
socket2 := sfile2.FileOperations.(socket.Socket)
// Socketpairs can not be listened to.
@@ -185,11 +190,11 @@ func TestListen(t *testing.T) {
if err != nil {
t.Fatalf("syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) => %v", err)
}
- sfile3, err := newSocket(contexttest.Context(t), sock, false)
+ sfile3, err := newSocket(ctx, sock, false)
if err != nil {
t.Fatalf("newSocket(%v) => %v", sock, err)
}
- defer sfile3.DecRef()
+ defer sfile3.DecRef(ctx)
socket3 := sfile3.FileOperations.(socket.Socket)
// This socket is not bound so we can't listen on it.
@@ -237,9 +242,10 @@ func TestRelease(t *testing.T) {
}
c := &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f)}
want := &ConnectedEndpoint{queue: c.queue}
- want.ref.DecRef()
+ ctx := contexttest.Context(t)
+ want.ref.DecRef(ctx)
fdnotifier.AddFD(int32(c.file.FD()), nil)
- c.Release()
+ c.Release(ctx)
if !reflect.DeepEqual(c, want) {
t.Errorf("got = %#v, want = %#v", c, want)
}
diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go
index cb91355ab..b5229098c 100644
--- a/pkg/sentry/fs/host/tty.go
+++ b/pkg/sentry/fs/host/tty.go
@@ -113,12 +113,12 @@ func (t *TTYFileOperations) Write(ctx context.Context, file *fs.File, src userme
}
// Release implements fs.FileOperations.Release.
-func (t *TTYFileOperations) Release() {
+func (t *TTYFileOperations) Release(ctx context.Context) {
t.mu.Lock()
t.fgProcessGroup = nil
t.mu.Unlock()
- t.fileOperations.Release()
+ t.fileOperations.Release(ctx)
}
// Ioctl implements fs.FileOperations.Ioctl.
@@ -308,9 +308,9 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e
task := kernel.TaskFromContext(ctx)
if task == nil {
// No task? Linux does not have an analog for this case, but
- // tty_check_change is more of a blacklist of cases than a
- // whitelist, and is surprisingly permissive. Allowing the
- // change seems most appropriate.
+ // tty_check_change only blocks specific cases and is
+ // surprisingly permissive. Allowing the change seems
+ // appropriate.
return nil
}
diff --git a/pkg/sentry/fs/host/wait_test.go b/pkg/sentry/fs/host/wait_test.go
index ce397a5e3..c143f4ce2 100644
--- a/pkg/sentry/fs/host/wait_test.go
+++ b/pkg/sentry/fs/host/wait_test.go
@@ -39,7 +39,7 @@ func TestWait(t *testing.T) {
t.Fatalf("NewFile failed: %v", err)
}
- defer file.DecRef()
+ defer file.DecRef(ctx)
r := file.Readiness(waiter.EventIn)
if r != 0 {
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index a34fbc946..b79cd9877 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -96,13 +96,12 @@ func NewInode(ctx context.Context, iops InodeOperations, msrc *MountSource, satt
}
// DecRef drops a reference on the Inode.
-func (i *Inode) DecRef() {
- i.DecRefWithDestructor(i.destroy)
+func (i *Inode) DecRef(ctx context.Context) {
+ i.DecRefWithDestructor(ctx, i.destroy)
}
// destroy releases the Inode and releases the msrc reference taken.
-func (i *Inode) destroy() {
- ctx := context.Background()
+func (i *Inode) destroy(ctx context.Context) {
if err := i.WriteOut(ctx); err != nil {
// FIXME(b/65209558): Mark as warning again once noatime is
// properly supported.
@@ -122,12 +121,12 @@ func (i *Inode) destroy() {
i.Watches.targetDestroyed()
if i.overlay != nil {
- i.overlay.release()
+ i.overlay.release(ctx)
} else {
i.InodeOperations.Release(ctx)
}
- i.MountSource.DecRef()
+ i.MountSource.DecRef(ctx)
}
// Mappable calls i.InodeOperations.Mappable.
diff --git a/pkg/sentry/fs/inode_inotify.go b/pkg/sentry/fs/inode_inotify.go
index efd3c962b..9911a00c2 100644
--- a/pkg/sentry/fs/inode_inotify.go
+++ b/pkg/sentry/fs/inode_inotify.go
@@ -17,6 +17,7 @@ package fs
import (
"fmt"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sync"
)
@@ -136,11 +137,11 @@ func (w *Watches) Notify(name string, events, cookie uint32) {
}
// Unpin unpins dirent from all watches in this set.
-func (w *Watches) Unpin(d *Dirent) {
+func (w *Watches) Unpin(ctx context.Context, d *Dirent) {
w.mu.RLock()
defer w.mu.RUnlock()
for _, watch := range w.ws {
- watch.Unpin(d)
+ watch.Unpin(ctx, d)
}
}
diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index 537c8d257..dc2e353d9 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -85,7 +85,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name
upperInode = child.Inode
upperInode.IncRef()
}
- child.DecRef()
+ child.DecRef(ctx)
}
// Are we done?
@@ -108,7 +108,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name
entry, err := newOverlayEntry(ctx, upperInode, nil, false)
if err != nil {
// Don't leak resources.
- upperInode.DecRef()
+ upperInode.DecRef(ctx)
parent.copyMu.RUnlock()
return nil, false, err
}
@@ -129,7 +129,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name
if err != nil && err != syserror.ENOENT {
// Don't leak resources.
if upperInode != nil {
- upperInode.DecRef()
+ upperInode.DecRef(ctx)
}
parent.copyMu.RUnlock()
return nil, false, err
@@ -152,7 +152,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name
}
}
}
- child.DecRef()
+ child.DecRef(ctx)
}
}
@@ -183,7 +183,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name
// unnecessary because we don't need to copy-up and we will always
// operate (e.g. read/write) on the upper Inode.
if !IsDir(upperInode.StableAttr) {
- lowerInode.DecRef()
+ lowerInode.DecRef(ctx)
lowerInode = nil
}
}
@@ -194,10 +194,10 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name
// Well, not quite, we failed at the last moment, how depressing.
// Be sure not to leak resources.
if upperInode != nil {
- upperInode.DecRef()
+ upperInode.DecRef(ctx)
}
if lowerInode != nil {
- lowerInode.DecRef()
+ lowerInode.DecRef(ctx)
}
parent.copyMu.RUnlock()
return nil, false, err
@@ -248,7 +248,7 @@ func overlayCreate(ctx context.Context, o *overlayEntry, parent *Dirent, name st
// user) will clobber the real path for the underlying Inode.
upperFile.Dirent.Inode.IncRef()
upperDirent := NewTransientDirent(upperFile.Dirent.Inode)
- upperFile.Dirent.DecRef()
+ upperFile.Dirent.DecRef(ctx)
upperFile.Dirent = upperDirent
// Create the overlay inode and dirent. We need this to construct the
@@ -259,7 +259,7 @@ func overlayCreate(ctx context.Context, o *overlayEntry, parent *Dirent, name st
// The overlay file created below with NewFile will take a reference on
// the overlayDirent, and it should be the only thing holding a
// reference at the time of creation, so we must drop this reference.
- defer overlayDirent.DecRef()
+ defer overlayDirent.DecRef(ctx)
// Create a new overlay file that wraps the upper file.
flags.Pread = upperFile.Flags().Pread
@@ -399,7 +399,7 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena
if !replaced.IsNegative() && IsDir(replaced.Inode.StableAttr) {
children, err := readdirOne(ctx, replaced)
if err != nil {
- replaced.DecRef()
+ replaced.DecRef(ctx)
return err
}
@@ -407,12 +407,12 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena
// included among the returned children, so we don't
// need to bother checking for them.
if len(children) > 0 {
- replaced.DecRef()
+ replaced.DecRef(ctx)
return syserror.ENOTEMPTY
}
}
- replaced.DecRef()
+ replaced.DecRef(ctx)
}
}
@@ -455,12 +455,12 @@ func overlayBind(ctx context.Context, o *overlayEntry, parent *Dirent, name stri
// Grab the inode and drop the dirent, we don't need it.
inode := d.Inode
inode.IncRef()
- d.DecRef()
+ d.DecRef(ctx)
// Create a new overlay entry and dirent for the socket.
entry, err := newOverlayEntry(ctx, inode, nil, false)
if err != nil {
- inode.DecRef()
+ inode.DecRef(ctx)
return nil, err
}
// Use the parent's MountSource, since that corresponds to the overlay,
@@ -672,7 +672,7 @@ func overlayGetlink(ctx context.Context, o *overlayEntry) (*Dirent, error) {
// ground and claim that jumping around the filesystem like this
// is not supported.
name, _ := dirent.FullName(nil)
- dirent.DecRef()
+ dirent.DecRef(ctx)
// Claim that the path is not accessible.
err = syserror.EACCES
diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go
index 389c219d6..aa9851b26 100644
--- a/pkg/sentry/fs/inode_overlay_test.go
+++ b/pkg/sentry/fs/inode_overlay_test.go
@@ -316,7 +316,7 @@ func TestCacheFlush(t *testing.T) {
t.Fatalf("NewMountNamespace failed: %v", err)
}
root := mns.Root()
- defer root.DecRef()
+ defer root.DecRef(ctx)
ctx = &rootContext{
Context: ctx,
@@ -345,7 +345,7 @@ func TestCacheFlush(t *testing.T) {
}
// Drop the file reference.
- file.DecRef()
+ file.DecRef(ctx)
// Dirent should have 2 refs left.
if got, want := dirent.ReadRefs(), 2; int(got) != want {
@@ -361,7 +361,7 @@ func TestCacheFlush(t *testing.T) {
}
// Drop our ref.
- dirent.DecRef()
+ dirent.DecRef(ctx)
// We should be back to zero refs.
if got, want := dirent.ReadRefs(), 0; int(got) != want {
@@ -398,7 +398,7 @@ func (d *dir) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags
if err != nil {
return nil, err
}
- defer file.DecRef()
+ defer file.DecRef(ctx)
// Wrap the file's FileOperations in a dirFile.
fops := &dirFile{
FileOperations: file.FileOperations,
diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go
index e3a715c1f..c5c07d564 100644
--- a/pkg/sentry/fs/inotify.go
+++ b/pkg/sentry/fs/inotify.go
@@ -80,7 +80,7 @@ func NewInotify(ctx context.Context) *Inotify {
// Release implements FileOperations.Release. Release removes all watches and
// frees all resources for an inotify instance.
-func (i *Inotify) Release() {
+func (i *Inotify) Release(ctx context.Context) {
// We need to hold i.mu to avoid a race with concurrent calls to
// Inotify.targetDestroyed from Watches. There's no risk of Watches
// accessing this Inotify after the destructor ends, because we remove all
@@ -93,7 +93,7 @@ func (i *Inotify) Release() {
// the owner's destructor.
w.target.Watches.Remove(w.ID())
// Don't leak any references to the target, held by pins in the watch.
- w.destroy()
+ w.destroy(ctx)
}
}
@@ -321,7 +321,7 @@ func (i *Inotify) AddWatch(target *Dirent, mask uint32) int32 {
//
// RmWatch looks up an inotify watch for the given 'wd' and configures the
// target dirent to stop sending events to this inotify instance.
-func (i *Inotify) RmWatch(wd int32) error {
+func (i *Inotify) RmWatch(ctx context.Context, wd int32) error {
i.mu.Lock()
// Find the watch we were asked to removed.
@@ -346,7 +346,7 @@ func (i *Inotify) RmWatch(wd int32) error {
i.queueEvent(newEvent(watch.wd, "", linux.IN_IGNORED, 0))
// Remove all pins.
- watch.destroy()
+ watch.destroy(ctx)
return nil
}
diff --git a/pkg/sentry/fs/inotify_watch.go b/pkg/sentry/fs/inotify_watch.go
index 900cba3ca..605423d22 100644
--- a/pkg/sentry/fs/inotify_watch.go
+++ b/pkg/sentry/fs/inotify_watch.go
@@ -18,6 +18,7 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sync"
)
@@ -105,12 +106,12 @@ func (w *Watch) Pin(d *Dirent) {
// Unpin drops any extra refs held on dirent due to a previous Pin
// call. Calling Unpin multiple times for the same dirent, or on a dirent
// without a corresponding Pin call is a no-op.
-func (w *Watch) Unpin(d *Dirent) {
+func (w *Watch) Unpin(ctx context.Context, d *Dirent) {
w.mu.Lock()
defer w.mu.Unlock()
if w.pins[d] {
delete(w.pins, d)
- d.DecRef()
+ d.DecRef(ctx)
}
}
@@ -125,11 +126,11 @@ func (w *Watch) TargetDestroyed() {
// this watch. Destroy does not cause any new events to be generated. The caller
// is responsible for ensuring there are no outstanding references to this
// watch.
-func (w *Watch) destroy() {
+func (w *Watch) destroy(ctx context.Context) {
w.mu.Lock()
defer w.mu.Unlock()
for d := range w.pins {
- d.DecRef()
+ d.DecRef(ctx)
}
w.pins = nil
}
diff --git a/pkg/sentry/fs/lock/lock.go b/pkg/sentry/fs/lock/lock.go
index 926538d90..8a5d9c7eb 100644
--- a/pkg/sentry/fs/lock/lock.go
+++ b/pkg/sentry/fs/lock/lock.go
@@ -62,7 +62,7 @@ import (
type LockType int
// UniqueID is a unique identifier of the holder of a regional file lock.
-type UniqueID uint64
+type UniqueID interface{}
const (
// ReadLock describes a POSIX regional file lock to be taken
@@ -98,12 +98,7 @@ type Lock struct {
// If len(Readers) > 0 then HasWriter must be false.
Readers map[UniqueID]bool
- // HasWriter indicates that this is a write lock held by a single
- // UniqueID.
- HasWriter bool
-
- // Writer is only valid if HasWriter is true. It identifies a
- // single write lock holder.
+ // Writer holds the writer unique ID. It's nil if there are no writers.
Writer UniqueID
}
@@ -186,7 +181,6 @@ func makeLock(uid UniqueID, t LockType) Lock {
case ReadLock:
value.Readers[uid] = true
case WriteLock:
- value.HasWriter = true
value.Writer = uid
default:
panic(fmt.Sprintf("makeLock: invalid lock type %d", t))
@@ -196,10 +190,7 @@ func makeLock(uid UniqueID, t LockType) Lock {
// isHeld returns true if uid is a holder of Lock.
func (l Lock) isHeld(uid UniqueID) bool {
- if l.HasWriter && l.Writer == uid {
- return true
- }
- return l.Readers[uid]
+ return l.Writer == uid || l.Readers[uid]
}
// lock sets uid as a holder of a typed lock on Lock.
@@ -214,20 +205,20 @@ func (l *Lock) lock(uid UniqueID, t LockType) {
}
// We cannot downgrade a write lock to a read lock unless the
// uid is the same.
- if l.HasWriter {
+ if l.Writer != nil {
if l.Writer != uid {
panic(fmt.Sprintf("lock: cannot downgrade write lock to read lock for uid %d, writer is %d", uid, l.Writer))
}
// Ensure that there is only one reader if upgrading.
l.Readers = make(map[UniqueID]bool)
// Ensure that there is no longer a writer.
- l.HasWriter = false
+ l.Writer = nil
}
l.Readers[uid] = true
return
case WriteLock:
// If we are already the writer, then this is a no-op.
- if l.HasWriter && l.Writer == uid {
+ if l.Writer == uid {
return
}
// We can only upgrade a read lock to a write lock if there
@@ -243,7 +234,6 @@ func (l *Lock) lock(uid UniqueID, t LockType) {
}
// Ensure that there is only a writer.
l.Readers = make(map[UniqueID]bool)
- l.HasWriter = true
l.Writer = uid
default:
panic(fmt.Sprintf("lock: invalid lock type %d", t))
@@ -277,9 +267,8 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool {
switch t {
case ReadLock:
return l.lockable(r, func(value Lock) bool {
- // If there is no writer, there's no problem adding
- // another reader.
- if !value.HasWriter {
+ // If there is no writer, there's no problem adding another reader.
+ if value.Writer == nil {
return true
}
// If there is a writer, then it must be the same uid
@@ -289,10 +278,9 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool {
case WriteLock:
return l.lockable(r, func(value Lock) bool {
// If there are only readers.
- if !value.HasWriter {
- // Then this uid can only take a write lock if
- // this is a private upgrade, meaning that the
- // only reader is uid.
+ if value.Writer == nil {
+ // Then this uid can only take a write lock if this is a private
+ // upgrade, meaning that the only reader is uid.
return len(value.Readers) == 1 && value.Readers[uid]
}
// If the uid is already a writer on this region, then
@@ -304,7 +292,8 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool {
}
}
-// lock returns true if uid took a lock of type t on the entire range of LockRange.
+// lock returns true if uid took a lock of type t on the entire range of
+// LockRange.
//
// Preconditions: r.Start <= r.End (will panic otherwise).
func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool {
@@ -339,7 +328,7 @@ func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool {
seg, _ = l.SplitUnchecked(seg, r.End)
}
- // Set the lock on the segment. This is guaranteed to
+ // Set the lock on the segment. This is guaranteed to
// always be safe, given canLock above.
value := seg.ValuePtr()
value.lock(uid, t)
@@ -386,7 +375,7 @@ func (l *LockSet) unlock(uid UniqueID, r LockRange) {
value := seg.Value()
var remove bool
- if value.HasWriter && value.Writer == uid {
+ if value.Writer == uid {
// If we are unlocking a writer, then since there can
// only ever be one writer and no readers, then this
// lock should always be removed from the set.
diff --git a/pkg/sentry/fs/lock/lock_set_functions.go b/pkg/sentry/fs/lock/lock_set_functions.go
index 8a3ace0c1..50a16e662 100644
--- a/pkg/sentry/fs/lock/lock_set_functions.go
+++ b/pkg/sentry/fs/lock/lock_set_functions.go
@@ -44,14 +44,9 @@ func (lockSetFunctions) Merge(r1 LockRange, val1 Lock, r2 LockRange, val2 Lock)
return Lock{}, false
}
}
- if val1.HasWriter != val2.HasWriter {
+ if val1.Writer != val2.Writer {
return Lock{}, false
}
- if val1.HasWriter {
- if val1.Writer != val2.Writer {
- return Lock{}, false
- }
- }
return val1, true
}
@@ -62,7 +57,6 @@ func (lockSetFunctions) Split(r LockRange, val Lock, split uint64) (Lock, Lock)
for k, v := range val.Readers {
val0.Readers[k] = v
}
- val0.HasWriter = val.HasWriter
val0.Writer = val.Writer
return val, val0
diff --git a/pkg/sentry/fs/lock/lock_test.go b/pkg/sentry/fs/lock/lock_test.go
index ba002aeb7..fad90984b 100644
--- a/pkg/sentry/fs/lock/lock_test.go
+++ b/pkg/sentry/fs/lock/lock_test.go
@@ -42,9 +42,6 @@ func equals(e0, e1 []entry) bool {
if !reflect.DeepEqual(e0[i].LockRange, e1[i].LockRange) {
return false
}
- if e0[i].Lock.HasWriter != e1[i].Lock.HasWriter {
- return false
- }
if e0[i].Lock.Writer != e1[i].Lock.Writer {
return false
}
@@ -105,7 +102,7 @@ func TestCanLock(t *testing.T) {
LockRange: LockRange{2048, 3072},
},
{
- Lock: Lock{HasWriter: true, Writer: 1},
+ Lock: Lock{Writer: 1},
LockRange: LockRange{3072, 4096},
},
})
@@ -241,7 +238,7 @@ func TestSetLock(t *testing.T) {
// 0 max uint64
after: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, LockEOF},
},
},
@@ -254,7 +251,7 @@ func TestSetLock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, LockEOF},
},
},
@@ -273,7 +270,7 @@ func TestSetLock(t *testing.T) {
LockRange: LockRange{0, 4096},
},
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -301,7 +298,7 @@ func TestSetLock(t *testing.T) {
// 0 4096 max uint64
after: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, 4096},
},
{
@@ -318,7 +315,7 @@ func TestSetLock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, LockEOF},
},
},
@@ -550,7 +547,7 @@ func TestSetLock(t *testing.T) {
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{1024, 4096},
},
{
@@ -594,7 +591,7 @@ func TestSetLock(t *testing.T) {
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{1024, 3072},
},
{
@@ -633,7 +630,7 @@ func TestSetLock(t *testing.T) {
// 0 1024 2048 4096 max uint64
before: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, 1024},
},
{
@@ -663,11 +660,11 @@ func TestSetLock(t *testing.T) {
// 0 1024 max uint64
after: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{1024, LockEOF},
},
},
@@ -675,28 +672,30 @@ func TestSetLock(t *testing.T) {
}
for _, test := range tests {
- l := fill(test.before)
+ t.Run(test.name, func(t *testing.T) {
+ l := fill(test.before)
- r := LockRange{Start: test.start, End: test.end}
- success := l.lock(test.uid, test.lockType, r)
- var got []entry
- for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
- got = append(got, entry{
- Lock: seg.Value(),
- LockRange: seg.Range(),
- })
- }
+ r := LockRange{Start: test.start, End: test.end}
+ success := l.lock(test.uid, test.lockType, r)
+ var got []entry
+ for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ got = append(got, entry{
+ Lock: seg.Value(),
+ LockRange: seg.Range(),
+ })
+ }
- if success != test.success {
- t.Errorf("%s: setlock(%v, %+v, %d, %d) got success %v, want %v", test.name, test.before, r, test.uid, test.lockType, success, test.success)
- continue
- }
+ if success != test.success {
+ t.Errorf("setlock(%v, %+v, %d, %d) got success %v, want %v", test.before, r, test.uid, test.lockType, success, test.success)
+ return
+ }
- if success {
- if !equals(got, test.after) {
- t.Errorf("%s: got set %+v, want %+v", test.name, got, test.after)
+ if success {
+ if !equals(got, test.after) {
+ t.Errorf("got set %+v, want %+v", got, test.after)
+ }
}
- }
+ })
}
}
@@ -782,7 +781,7 @@ func TestUnlock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, LockEOF},
},
},
@@ -824,7 +823,7 @@ func TestUnlock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, LockEOF},
},
},
@@ -837,7 +836,7 @@ func TestUnlock(t *testing.T) {
// 0 4096 max uint64
after: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -876,7 +875,7 @@ func TestUnlock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, LockEOF},
},
},
@@ -889,7 +888,7 @@ func TestUnlock(t *testing.T) {
// 0 4096
after: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, 4096},
},
},
@@ -906,7 +905,7 @@ func TestUnlock(t *testing.T) {
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{1024, 4096},
},
{
@@ -974,7 +973,7 @@ func TestUnlock(t *testing.T) {
// 0 1024 4096 max uint64
before: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, 1024},
},
{
@@ -991,7 +990,7 @@ func TestUnlock(t *testing.T) {
// 0 8 4096 max uint64
after: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, 8},
},
{
@@ -1008,7 +1007,7 @@ func TestUnlock(t *testing.T) {
// 0 1024 4096 max uint64
before: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, 1024},
},
{
@@ -1025,7 +1024,7 @@ func TestUnlock(t *testing.T) {
// 0 1024 4096 8192 max uint64
after: []entry{
{
- Lock: Lock{HasWriter: true, Writer: 0},
+ Lock: Lock{Writer: 0},
LockRange: LockRange{0, 1024},
},
{
@@ -1041,19 +1040,21 @@ func TestUnlock(t *testing.T) {
}
for _, test := range tests {
- l := fill(test.before)
+ t.Run(test.name, func(t *testing.T) {
+ l := fill(test.before)
- r := LockRange{Start: test.start, End: test.end}
- l.unlock(test.uid, r)
- var got []entry
- for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
- got = append(got, entry{
- Lock: seg.Value(),
- LockRange: seg.Range(),
- })
- }
- if !equals(got, test.after) {
- t.Errorf("%s: got set %+v, want %+v", test.name, got, test.after)
- }
+ r := LockRange{Start: test.start, End: test.end}
+ l.unlock(test.uid, r)
+ var got []entry
+ for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ got = append(got, entry{
+ Lock: seg.Value(),
+ LockRange: seg.Range(),
+ })
+ }
+ if !equals(got, test.after) {
+ t.Errorf("got set %+v, want %+v", got, test.after)
+ }
+ })
}
}
diff --git a/pkg/sentry/fs/mount.go b/pkg/sentry/fs/mount.go
index 37bae6810..ee69b10e8 100644
--- a/pkg/sentry/fs/mount.go
+++ b/pkg/sentry/fs/mount.go
@@ -51,7 +51,7 @@ type MountSourceOperations interface {
DirentOperations
// Destroy destroys the MountSource.
- Destroy()
+ Destroy(ctx context.Context)
// Below are MountSourceOperations that do not conform to Linux.
@@ -165,16 +165,16 @@ func (msrc *MountSource) DecDirentRefs() {
}
}
-func (msrc *MountSource) destroy() {
+func (msrc *MountSource) destroy(ctx context.Context) {
if c := msrc.DirentRefs(); c != 0 {
panic(fmt.Sprintf("MountSource with non-zero direntRefs is being destroyed: %d", c))
}
- msrc.MountSourceOperations.Destroy()
+ msrc.MountSourceOperations.Destroy(ctx)
}
// DecRef drops a reference on the MountSource.
-func (msrc *MountSource) DecRef() {
- msrc.DecRefWithDestructor(msrc.destroy)
+func (msrc *MountSource) DecRef(ctx context.Context) {
+ msrc.DecRefWithDestructor(ctx, msrc.destroy)
}
// FlushDirentRefs drops all references held by the MountSource on Dirents.
@@ -264,7 +264,7 @@ func (*SimpleMountSourceOperations) ResetInodeMappings() {}
func (*SimpleMountSourceOperations) SaveInodeMapping(*Inode, string) {}
// Destroy implements MountSourceOperations.Destroy.
-func (*SimpleMountSourceOperations) Destroy() {}
+func (*SimpleMountSourceOperations) Destroy(context.Context) {}
// Info defines attributes of a filesystem.
type Info struct {
diff --git a/pkg/sentry/fs/mount_overlay.go b/pkg/sentry/fs/mount_overlay.go
index 78e35b1e6..7badc75d6 100644
--- a/pkg/sentry/fs/mount_overlay.go
+++ b/pkg/sentry/fs/mount_overlay.go
@@ -115,9 +115,9 @@ func (o *overlayMountSourceOperations) SaveInodeMapping(inode *Inode, path strin
}
// Destroy drops references on the upper and lower MountSource.
-func (o *overlayMountSourceOperations) Destroy() {
- o.upper.DecRef()
- o.lower.DecRef()
+func (o *overlayMountSourceOperations) Destroy(ctx context.Context) {
+ o.upper.DecRef(ctx)
+ o.lower.DecRef(ctx)
}
// type overlayFilesystem is the filesystem for overlay mounts.
diff --git a/pkg/sentry/fs/mount_test.go b/pkg/sentry/fs/mount_test.go
index a3d10770b..6c296f5d0 100644
--- a/pkg/sentry/fs/mount_test.go
+++ b/pkg/sentry/fs/mount_test.go
@@ -18,6 +18,7 @@ import (
"fmt"
"testing"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
)
@@ -32,13 +33,13 @@ func cacheReallyContains(cache *DirentCache, d *Dirent) bool {
return false
}
-func mountPathsAre(root *Dirent, got []*Mount, want ...string) error {
+func mountPathsAre(ctx context.Context, root *Dirent, got []*Mount, want ...string) error {
gotPaths := make(map[string]struct{}, len(got))
gotStr := make([]string, len(got))
for i, g := range got {
if groot := g.Root(); groot != nil {
name, _ := groot.FullName(root)
- groot.DecRef()
+ groot.DecRef(ctx)
gotStr[i] = name
gotPaths[name] = struct{}{}
}
@@ -69,7 +70,7 @@ func TestMountSourceOnlyCachedOnce(t *testing.T) {
t.Fatalf("NewMountNamespace failed: %v", err)
}
rootDirent := mm.Root()
- defer rootDirent.DecRef()
+ defer rootDirent.DecRef(ctx)
// Get a child of the root which we will mount over. Note that the
// MockInodeOperations causes Walk to always succeed.
@@ -125,7 +126,7 @@ func TestAllMountsUnder(t *testing.T) {
t.Fatalf("NewMountNamespace failed: %v", err)
}
rootDirent := mm.Root()
- defer rootDirent.DecRef()
+ defer rootDirent.DecRef(ctx)
// Add mounts at the following paths:
paths := []string{
@@ -150,14 +151,14 @@ func TestAllMountsUnder(t *testing.T) {
if err := mm.Mount(ctx, d, submountInode); err != nil {
t.Fatalf("could not mount at %q: %v", p, err)
}
- d.DecRef()
+ d.DecRef(ctx)
}
// mm root should contain all submounts (and does not include the root mount).
rootMnt := mm.FindMount(rootDirent)
submounts := mm.AllMountsUnder(rootMnt)
allPaths := append(paths, "/")
- if err := mountPathsAre(rootDirent, submounts, allPaths...); err != nil {
+ if err := mountPathsAre(ctx, rootDirent, submounts, allPaths...); err != nil {
t.Error(err)
}
@@ -181,9 +182,9 @@ func TestAllMountsUnder(t *testing.T) {
if err != nil {
t.Fatalf("could not find path %q in mount manager: %v", "/foo", err)
}
- defer d.DecRef()
+ defer d.DecRef(ctx)
submounts = mm.AllMountsUnder(mm.FindMount(d))
- if err := mountPathsAre(rootDirent, submounts, "/foo", "/foo/bar", "/foo/qux", "/foo/bar/baz"); err != nil {
+ if err := mountPathsAre(ctx, rootDirent, submounts, "/foo", "/foo/bar", "/foo/qux", "/foo/bar/baz"); err != nil {
t.Error(err)
}
@@ -193,9 +194,9 @@ func TestAllMountsUnder(t *testing.T) {
if err != nil {
t.Fatalf("could not find path %q in mount manager: %v", "/waldo", err)
}
- defer waldo.DecRef()
+ defer waldo.DecRef(ctx)
submounts = mm.AllMountsUnder(mm.FindMount(waldo))
- if err := mountPathsAre(rootDirent, submounts, "/waldo"); err != nil {
+ if err := mountPathsAre(ctx, rootDirent, submounts, "/waldo"); err != nil {
t.Error(err)
}
}
@@ -212,7 +213,7 @@ func TestUnmount(t *testing.T) {
t.Fatalf("NewMountNamespace failed: %v", err)
}
rootDirent := mm.Root()
- defer rootDirent.DecRef()
+ defer rootDirent.DecRef(ctx)
// Add mounts at the following paths:
paths := []string{
@@ -240,7 +241,7 @@ func TestUnmount(t *testing.T) {
if err := mm.Mount(ctx, d, submountInode); err != nil {
t.Fatalf("could not mount at %q: %v", p, err)
}
- d.DecRef()
+ d.DecRef(ctx)
}
allPaths := make([]string, len(paths)+1)
@@ -259,13 +260,13 @@ func TestUnmount(t *testing.T) {
if err := mm.Unmount(ctx, d, false); err != nil {
t.Fatalf("could not unmount at %q: %v", p, err)
}
- d.DecRef()
+ d.DecRef(ctx)
// Remove the path that has been unmounted and the check that the remaining
// mounts are still there.
allPaths = allPaths[:len(allPaths)-1]
submounts := mm.AllMountsUnder(rootMnt)
- if err := mountPathsAre(rootDirent, submounts, allPaths...); err != nil {
+ if err := mountPathsAre(ctx, rootDirent, submounts, allPaths...); err != nil {
t.Error(err)
}
}
diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go
index b414ddaee..d741c4339 100644
--- a/pkg/sentry/fs/mounts.go
+++ b/pkg/sentry/fs/mounts.go
@@ -17,13 +17,9 @@ package fs
import (
"fmt"
"math"
- "path"
- "strings"
"syscall"
- "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sync"
@@ -238,7 +234,7 @@ func (mns *MountNamespace) flushMountSourceRefsLocked() {
// After destroy is called, the MountNamespace may continue to be referenced (for
// example via /proc/mounts), but should free all resources and shouldn't have
// Find* methods called.
-func (mns *MountNamespace) destroy() {
+func (mns *MountNamespace) destroy(ctx context.Context) {
mns.mu.Lock()
defer mns.mu.Unlock()
@@ -251,13 +247,13 @@ func (mns *MountNamespace) destroy() {
for _, mp := range mns.mounts {
// Drop the mount reference on all mounted dirents.
for ; mp != nil; mp = mp.previous {
- mp.root.DecRef()
+ mp.root.DecRef(ctx)
}
}
mns.mounts = nil
// Drop reference on the root.
- mns.root.DecRef()
+ mns.root.DecRef(ctx)
// Ensure that root cannot be accessed via this MountNamespace any
// more.
@@ -269,8 +265,8 @@ func (mns *MountNamespace) destroy() {
}
// DecRef implements RefCounter.DecRef with destructor mns.destroy.
-func (mns *MountNamespace) DecRef() {
- mns.DecRefWithDestructor(mns.destroy)
+func (mns *MountNamespace) DecRef(ctx context.Context) {
+ mns.DecRefWithDestructor(ctx, mns.destroy)
}
// withMountLocked prevents further walks to `node`, because `node` is about to
@@ -316,7 +312,7 @@ func (mns *MountNamespace) Mount(ctx context.Context, mountPoint *Dirent, inode
if err != nil {
return err
}
- defer replacement.DecRef()
+ defer replacement.DecRef(ctx)
// Set the mount's root dirent and id.
parentMnt := mns.findMountLocked(mountPoint)
@@ -398,7 +394,7 @@ func (mns *MountNamespace) Unmount(ctx context.Context, node *Dirent, detachOnly
panic(fmt.Sprintf("Last mount in the chain must be a undo mount: %+v", prev))
}
// Drop mount reference taken at the end of MountNamespace.Mount.
- prev.root.DecRef()
+ prev.root.DecRef(ctx)
} else {
mns.mounts[prev.root] = prev
}
@@ -500,11 +496,11 @@ func (mns *MountNamespace) FindLink(ctx context.Context, root, wd *Dirent, path
// non-directory root is hopeless.
if current != root {
if !IsDir(current.Inode.StableAttr) {
- current.DecRef() // Drop reference from above.
+ current.DecRef(ctx) // Drop reference from above.
return nil, syserror.ENOTDIR
}
if err := current.Inode.CheckPermission(ctx, PermMask{Execute: true}); err != nil {
- current.DecRef() // Drop reference from above.
+ current.DecRef(ctx) // Drop reference from above.
return nil, err
}
}
@@ -515,12 +511,12 @@ func (mns *MountNamespace) FindLink(ctx context.Context, root, wd *Dirent, path
// Allow failed walks to cache the dirent, because no
// children will acquire a reference at the end.
current.maybeExtendReference()
- current.DecRef()
+ current.DecRef(ctx)
return nil, err
}
// Drop old reference.
- current.DecRef()
+ current.DecRef(ctx)
if remainder != "" {
// Ensure it's resolved, unless it's the last level.
@@ -574,11 +570,11 @@ func (mns *MountNamespace) resolve(ctx context.Context, root, node *Dirent, rema
case nil:
// Make sure we didn't exhaust the traversal budget.
if *remainingTraversals == 0 {
- target.DecRef()
+ target.DecRef(ctx)
return nil, syscall.ELOOP
}
- node.DecRef() // Drop the original reference.
+ node.DecRef(ctx) // Drop the original reference.
return target, nil
case syscall.ENOLINK:
@@ -586,7 +582,7 @@ func (mns *MountNamespace) resolve(ctx context.Context, root, node *Dirent, rema
return node, nil
case ErrResolveViaReadlink:
- defer node.DecRef() // See above.
+ defer node.DecRef(ctx) // See above.
// First, check if we should traverse.
if *remainingTraversals == 0 {
@@ -612,7 +608,7 @@ func (mns *MountNamespace) resolve(ctx context.Context, root, node *Dirent, rema
return d, err
default:
- node.DecRef() // Drop for err; see above.
+ node.DecRef(ctx) // Drop for err; see above.
// Propagate the error.
return nil, err
@@ -625,71 +621,3 @@ func (mns *MountNamespace) SyncAll(ctx context.Context) {
defer mns.mu.Unlock()
mns.root.SyncAll(ctx)
}
-
-// ResolveExecutablePath resolves the given executable name given a set of
-// paths that might contain it.
-func (mns *MountNamespace) ResolveExecutablePath(ctx context.Context, wd, name string, paths []string) (string, error) {
- // Absolute paths can be used directly.
- if path.IsAbs(name) {
- return name, nil
- }
-
- // Paths with '/' in them should be joined to the working directory, or
- // to the root if working directory is not set.
- if strings.IndexByte(name, '/') > 0 {
- if wd == "" {
- wd = "/"
- }
- if !path.IsAbs(wd) {
- return "", fmt.Errorf("working directory %q must be absolute", wd)
- }
- return path.Join(wd, name), nil
- }
-
- // Otherwise, We must lookup the name in the paths, starting from the
- // calling context's root directory.
- root := RootFromContext(ctx)
- if root == nil {
- // Caller has no root. Don't bother traversing anything.
- return "", syserror.ENOENT
- }
- defer root.DecRef()
- for _, p := range paths {
- binPath := path.Join(p, name)
- traversals := uint(linux.MaxSymlinkTraversals)
- d, err := mns.FindInode(ctx, root, nil, binPath, &traversals)
- if err == syserror.ENOENT || err == syserror.EACCES {
- // Didn't find it here.
- continue
- }
- if err != nil {
- return "", err
- }
- defer d.DecRef()
-
- // Check that it is a regular file.
- if !IsRegular(d.Inode.StableAttr) {
- continue
- }
-
- // Check whether we can read and execute the found file.
- if err := d.Inode.CheckPermission(ctx, PermMask{Read: true, Execute: true}); err != nil {
- log.Infof("Found executable at %q, but user cannot execute it: %v", binPath, err)
- continue
- }
- return path.Join("/", p, name), nil
- }
- return "", syserror.ENOENT
-}
-
-// GetPath returns the PATH as a slice of strings given the environment
-// variables.
-func GetPath(env []string) []string {
- const prefix = "PATH="
- for _, e := range env {
- if strings.HasPrefix(e, prefix) {
- return strings.Split(strings.TrimPrefix(e, prefix), ":")
- }
- }
- return nil
-}
diff --git a/pkg/sentry/fs/mounts_test.go b/pkg/sentry/fs/mounts_test.go
index a69b41468..975d6cbc9 100644
--- a/pkg/sentry/fs/mounts_test.go
+++ b/pkg/sentry/fs/mounts_test.go
@@ -51,7 +51,7 @@ func TestFindLink(t *testing.T) {
}
root := mm.Root()
- defer root.DecRef()
+ defer root.DecRef(ctx)
foo, err := root.Walk(ctx, root, "foo")
if err != nil {
t.Fatalf("Error walking to foo: %v", err)
diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go
index a8ae7d81d..35013a21b 100644
--- a/pkg/sentry/fs/overlay.go
+++ b/pkg/sentry/fs/overlay.go
@@ -107,7 +107,7 @@ func NewOverlayRoot(ctx context.Context, upper *Inode, lower *Inode, flags Mount
msrc := newOverlayMountSource(ctx, upper.MountSource, lower.MountSource, flags)
overlay, err := newOverlayEntry(ctx, upper, lower, true)
if err != nil {
- msrc.DecRef()
+ msrc.DecRef(ctx)
return nil, err
}
@@ -130,7 +130,7 @@ func NewOverlayRootFile(ctx context.Context, upperMS *MountSource, lower *Inode,
msrc := newOverlayMountSource(ctx, upperMS, lower.MountSource, flags)
overlay, err := newOverlayEntry(ctx, nil, lower, true)
if err != nil {
- msrc.DecRef()
+ msrc.DecRef(ctx)
return nil, err
}
return newOverlayInode(ctx, overlay, msrc), nil
@@ -230,16 +230,16 @@ func newOverlayEntry(ctx context.Context, upper *Inode, lower *Inode, lowerExist
}, nil
}
-func (o *overlayEntry) release() {
+func (o *overlayEntry) release(ctx context.Context) {
// We drop a reference on upper and lower file system Inodes
// rather than releasing them, because in-memory filesystems
// may hold an extra reference to these Inodes so that they
// stay in memory.
if o.upper != nil {
- o.upper.DecRef()
+ o.upper.DecRef(ctx)
}
if o.lower != nil {
- o.lower.DecRef()
+ o.lower.DecRef(ctx)
}
}
diff --git a/pkg/sentry/fs/proc/fds.go b/pkg/sentry/fs/proc/fds.go
index 35972e23c..45523adf8 100644
--- a/pkg/sentry/fs/proc/fds.go
+++ b/pkg/sentry/fs/proc/fds.go
@@ -56,11 +56,11 @@ func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDF
// readDescriptors reads fds in the task starting at offset, and calls the
// toDentAttr callback for each to get a DentAttr, which it then emits. This is
// a helper for implementing fs.InodeOperations.Readdir.
-func readDescriptors(t *kernel.Task, c *fs.DirCtx, offset int64, toDentAttr func(int) fs.DentAttr) (int64, error) {
+func readDescriptors(ctx context.Context, t *kernel.Task, c *fs.DirCtx, offset int64, toDentAttr func(int) fs.DentAttr) (int64, error) {
var fds []int32
t.WithMuLocked(func(t *kernel.Task) {
if fdTable := t.FDTable(); fdTable != nil {
- fds = fdTable.GetFDs()
+ fds = fdTable.GetFDs(ctx)
}
})
@@ -116,7 +116,7 @@ func (f *fd) GetFile(context.Context, *fs.Dirent, fs.FileFlags) (*fs.File, error
func (f *fd) Readlink(ctx context.Context, _ *fs.Inode) (string, error) {
root := fs.RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
n, _ := f.file.Dirent.FullName(root)
return n, nil
@@ -135,13 +135,7 @@ func (f *fd) Truncate(context.Context, *fs.Inode, int64) error {
func (f *fd) Release(ctx context.Context) {
f.Symlink.Release(ctx)
- f.file.DecRef()
-}
-
-// Close releases the reference on the file.
-func (f *fd) Close() error {
- f.file.DecRef()
- return nil
+ f.file.DecRef(ctx)
}
// fdDir is an InodeOperations for /proc/TID/fd.
@@ -227,7 +221,7 @@ func (f *fdDirFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySer
if f.isInfoFile {
typ = fs.Symlink
}
- return readDescriptors(f.t, dirCtx, file.Offset(), func(fd int) fs.DentAttr {
+ return readDescriptors(ctx, f.t, dirCtx, file.Offset(), func(fd int) fs.DentAttr {
return fs.GenericDentAttr(typ, device.ProcDevice)
})
}
@@ -261,7 +255,7 @@ func (fdid *fdInfoDir) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs
// locks, and other data. For now we only have flags.
// See https://www.kernel.org/doc/Documentation/filesystems/proc.txt
flags := file.Flags().ToLinux() | fdFlags.ToLinuxFileFlags()
- file.DecRef()
+ file.DecRef(ctx)
contents := []byte(fmt.Sprintf("flags:\t0%o\n", flags))
return newStaticProcInode(ctx, dir.MountSource, contents)
})
diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go
index 1fc9c703c..6a63c47b3 100644
--- a/pkg/sentry/fs/proc/mounts.go
+++ b/pkg/sentry/fs/proc/mounts.go
@@ -47,7 +47,7 @@ func forEachMount(t *kernel.Task, fn func(string, *fs.Mount)) {
// The task has been destroyed. Nothing to show here.
return
}
- defer rootDir.DecRef()
+ defer rootDir.DecRef(t)
mnt := t.MountNamespace().FindMount(rootDir)
if mnt == nil {
@@ -64,7 +64,7 @@ func forEachMount(t *kernel.Task, fn func(string, *fs.Mount)) {
continue // No longer valid.
}
mountPath, desc := mroot.FullName(rootDir)
- mroot.DecRef()
+ mroot.DecRef(t)
if !desc {
// MountSources that are not descendants of the chroot jail are ignored.
continue
@@ -97,7 +97,7 @@ func (mif *mountInfoFile) ReadSeqFileData(ctx context.Context, handle seqfile.Se
if mroot == nil {
return // No longer valid.
}
- defer mroot.DecRef()
+ defer mroot.DecRef(ctx)
// Format:
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
@@ -216,7 +216,7 @@ func (mf *mountsFile) ReadSeqFileData(ctx context.Context, handle seqfile.SeqHan
if root == nil {
return // No longer valid.
}
- defer root.DecRef()
+ defer root.DecRef(ctx)
flags := root.Inode.MountSource.Flags
opts := "rw"
diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go
index bd18177d4..83a43aa26 100644
--- a/pkg/sentry/fs/proc/net.go
+++ b/pkg/sentry/fs/proc/net.go
@@ -419,7 +419,7 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s
}
sfile := s.(*fs.File)
if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX {
- s.DecRef()
+ s.DecRef(ctx)
// Not a unix socket.
continue
}
@@ -479,7 +479,7 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s
}
fmt.Fprintf(&buf, "\n")
- s.DecRef()
+ s.DecRef(ctx)
}
data := []seqfile.SeqData{
@@ -574,7 +574,7 @@ func commonReadSeqFileDataTCP(ctx context.Context, n seqfile.SeqHandle, k *kerne
panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
}
if family, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) {
- s.DecRef()
+ s.DecRef(ctx)
// Not tcp4 sockets.
continue
}
@@ -664,7 +664,7 @@ func commonReadSeqFileDataTCP(ctx context.Context, n seqfile.SeqHandle, k *kerne
fmt.Fprintf(&buf, "\n")
- s.DecRef()
+ s.DecRef(ctx)
}
data := []seqfile.SeqData{
@@ -752,7 +752,7 @@ func (n *netUDP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
}
if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM {
- s.DecRef()
+ s.DecRef(ctx)
// Not udp4 socket.
continue
}
@@ -822,7 +822,7 @@ func (n *netUDP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
fmt.Fprintf(&buf, "\n")
- s.DecRef()
+ s.DecRef(ctx)
}
data := []seqfile.SeqData{
diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go
index c659224a7..77e0e1d26 100644
--- a/pkg/sentry/fs/proc/proc.go
+++ b/pkg/sentry/fs/proc/proc.go
@@ -213,7 +213,7 @@ func (rpf *rootProcFile) Readdir(ctx context.Context, file *fs.File, ser fs.Dent
// Add dot and dotdot.
root := fs.RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
dot, dotdot := file.Dirent.GetDotAttrs(root)
names = append(names, ".", "..")
diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go
index 702fdd392..8615b60f0 100644
--- a/pkg/sentry/fs/proc/sys_net.go
+++ b/pkg/sentry/fs/proc/sys_net.go
@@ -272,6 +272,96 @@ func (f *tcpSackFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSeque
return n, f.tcpSack.stack.SetTCPSACKEnabled(*f.tcpSack.enabled)
}
+// +stateify savable
+type tcpRecovery struct {
+ fsutil.SimpleFileInode
+
+ stack inet.Stack `state:"wait"`
+ recovery inet.TCPLossRecovery
+}
+
+func newTCPRecoveryInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
+ ts := &tcpRecovery{
+ SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC),
+ stack: s,
+ }
+ sattr := fs.StableAttr{
+ DeviceID: device.ProcDevice.DeviceID(),
+ InodeID: device.ProcDevice.NextIno(),
+ BlockSize: usermem.PageSize,
+ Type: fs.SpecialFile,
+ }
+ return fs.NewInode(ctx, ts, msrc, sattr)
+}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (*tcpRecovery) Truncate(context.Context, *fs.Inode, int64) error {
+ return nil
+}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (r *tcpRecovery) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+ flags.Pread = true
+ flags.Pwrite = true
+ return fs.NewFile(ctx, dirent, flags, &tcpRecoveryFile{
+ tcpRecovery: r,
+ stack: r.stack,
+ }), nil
+}
+
+// +stateify savable
+type tcpRecoveryFile struct {
+ fsutil.FileGenericSeek `state:"nosave"`
+ fsutil.FileNoIoctl `state:"nosave"`
+ fsutil.FileNoMMap `state:"nosave"`
+ fsutil.FileNoSplice `state:"nosave"`
+ fsutil.FileNoopRelease `state:"nosave"`
+ fsutil.FileNoopFlush `state:"nosave"`
+ fsutil.FileNoopFsync `state:"nosave"`
+ fsutil.FileNotDirReaddir `state:"nosave"`
+ fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
+
+ tcpRecovery *tcpRecovery
+
+ stack inet.Stack `state:"wait"`
+}
+
+// Read implements fs.FileOperations.Read.
+func (f *tcpRecoveryFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
+ if offset != 0 {
+ return 0, io.EOF
+ }
+
+ recovery, err := f.stack.TCPRecovery()
+ if err != nil {
+ return 0, err
+ }
+ f.tcpRecovery.recovery = recovery
+ s := fmt.Sprintf("%d\n", f.tcpRecovery.recovery)
+ n, err := dst.CopyOut(ctx, []byte(s))
+ return int64(n), err
+}
+
+// Write implements fs.FileOperations.Write.
+func (f *tcpRecoveryFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
+ if src.NumBytes() == 0 {
+ return 0, nil
+ }
+ src = src.TakeFirst(usermem.PageSize - 1)
+
+ var v int32
+ n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
+ if err != nil {
+ return 0, err
+ }
+ f.tcpRecovery.recovery = inet.TCPLossRecovery(v)
+ if err := f.tcpRecovery.stack.SetTCPRecovery(f.tcpRecovery.recovery); err != nil {
+ return 0, err
+ }
+ return n, nil
+}
+
func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
// The following files are simple stubs until they are implemented in
// netstack, most of these files are configuration related. We use the
@@ -351,6 +441,11 @@ func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s ine
contents["tcp_wmem"] = newTCPMemInode(ctx, msrc, s, tcpWMem)
}
+ // Add tcp_recovery.
+ if _, err := s.TCPRecovery(); err == nil {
+ contents["tcp_recovery"] = newTCPRecoveryInode(ctx, msrc, s)
+ }
+
d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go
index 4bbe90198..9cf7f2a62 100644
--- a/pkg/sentry/fs/proc/task.go
+++ b/pkg/sentry/fs/proc/task.go
@@ -185,7 +185,7 @@ func (f *subtasksFile) Readdir(ctx context.Context, file *fs.File, ser fs.Dentry
// Serialize "." and "..".
root := fs.RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
dot, dotdot := file.Dirent.GetDotAttrs(root)
if err := dirCtx.DirEmit(".", dot); err != nil {
@@ -295,7 +295,7 @@ func (e *exe) Readlink(ctx context.Context, inode *fs.Inode) (string, error) {
if err != nil {
return "", err
}
- defer exec.DecRef()
+ defer exec.DecRef(ctx)
return exec.PathnameWithDeleted(ctx), nil
}
diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go
index bfa304552..f4fcddecb 100644
--- a/pkg/sentry/fs/ramfs/dir.go
+++ b/pkg/sentry/fs/ramfs/dir.go
@@ -219,7 +219,7 @@ func (d *Dir) Remove(ctx context.Context, _ *fs.Inode, name string) error {
}
// Remove our reference on the inode.
- inode.DecRef()
+ inode.DecRef(ctx)
return nil
}
@@ -250,7 +250,7 @@ func (d *Dir) RemoveDirectory(ctx context.Context, _ *fs.Inode, name string) err
}
// Remove our reference on the inode.
- inode.DecRef()
+ inode.DecRef(ctx)
return nil
}
@@ -326,7 +326,7 @@ func (d *Dir) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.F
// Create the Dirent and corresponding file.
created := fs.NewDirent(ctx, inode, name)
- defer created.DecRef()
+ defer created.DecRef(ctx)
return created.Inode.GetFile(ctx, created, flags)
}
@@ -412,11 +412,11 @@ func (*Dir) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, ol
}
// Release implements fs.InodeOperation.Release.
-func (d *Dir) Release(_ context.Context) {
+func (d *Dir) Release(ctx context.Context) {
// Drop references on all children.
d.mu.Lock()
for _, i := range d.children {
- i.DecRef()
+ i.DecRef(ctx)
}
d.mu.Unlock()
}
@@ -456,7 +456,7 @@ func (dfo *dirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirC
func (dfo *dirFileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
root := fs.RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
dirCtx := &fs.DirCtx{
Serializer: serializer,
@@ -473,13 +473,13 @@ func hasChildren(ctx context.Context, inode *fs.Inode) (bool, error) {
// dropped when that dirent is destroyed.
inode.IncRef()
d := fs.NewTransientDirent(inode)
- defer d.DecRef()
+ defer d.DecRef(ctx)
file, err := inode.GetFile(ctx, d, fs.FileFlags{Read: true})
if err != nil {
return false, err
}
- defer file.DecRef()
+ defer file.DecRef(ctx)
ser := &fs.CollectEntriesSerializer{}
if err := file.Readdir(ctx, ser); err != nil {
@@ -530,7 +530,7 @@ func Rename(ctx context.Context, oldParent fs.InodeOperations, oldName string, n
if err != nil {
return err
}
- inode.DecRef()
+ inode.DecRef(ctx)
}
// Be careful, we may have already grabbed this mutex above.
diff --git a/pkg/sentry/fs/ramfs/tree_test.go b/pkg/sentry/fs/ramfs/tree_test.go
index a6ed8b2c5..3e0d1e07e 100644
--- a/pkg/sentry/fs/ramfs/tree_test.go
+++ b/pkg/sentry/fs/ramfs/tree_test.go
@@ -67,7 +67,7 @@ func TestMakeDirectoryTree(t *testing.T) {
continue
}
root := mm.Root()
- defer mm.DecRef()
+ defer mm.DecRef(ctx)
for _, p := range test.subdirs {
maxTraversals := uint(0)
diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go
index 88c344089..f362ca9b6 100644
--- a/pkg/sentry/fs/timerfd/timerfd.go
+++ b/pkg/sentry/fs/timerfd/timerfd.go
@@ -55,7 +55,7 @@ type TimerOperations struct {
func NewFile(ctx context.Context, c ktime.Clock) *fs.File {
dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[timerfd]")
// Release the initial dirent reference after NewFile takes a reference.
- defer dirent.DecRef()
+ defer dirent.DecRef(ctx)
tops := &TimerOperations{}
tops.timer = ktime.NewTimer(c, tops)
// Timerfds reject writes, but the Write flag must be set in order to
@@ -65,7 +65,7 @@ func NewFile(ctx context.Context, c ktime.Clock) *fs.File {
}
// Release implements fs.FileOperations.Release.
-func (t *TimerOperations) Release() {
+func (t *TimerOperations) Release(context.Context) {
t.timer.Destroy()
}
diff --git a/pkg/sentry/fs/tmpfs/file_test.go b/pkg/sentry/fs/tmpfs/file_test.go
index aaba35502..d4d613ea9 100644
--- a/pkg/sentry/fs/tmpfs/file_test.go
+++ b/pkg/sentry/fs/tmpfs/file_test.go
@@ -46,7 +46,7 @@ func newFile(ctx context.Context) *fs.File {
func TestGrow(t *testing.T) {
ctx := contexttest.Context(t)
f := newFile(ctx)
- defer f.DecRef()
+ defer f.DecRef(ctx)
abuf := bytes.Repeat([]byte{'a'}, 68)
n, err := f.Pwritev(ctx, usermem.BytesIOSequence(abuf), 0)
diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go
index 108654827..463f6189e 100644
--- a/pkg/sentry/fs/tty/dir.go
+++ b/pkg/sentry/fs/tty/dir.go
@@ -132,7 +132,7 @@ func (d *dirInodeOperations) Release(ctx context.Context) {
d.mu.Lock()
defer d.mu.Unlock()
- d.master.DecRef()
+ d.master.DecRef(ctx)
if len(d.slaves) != 0 {
panic(fmt.Sprintf("devpts directory still contains active terminals: %+v", d))
}
@@ -263,7 +263,7 @@ func (d *dirInodeOperations) allocateTerminal(ctx context.Context) (*Terminal, e
}
// masterClose is called when the master end of t is closed.
-func (d *dirInodeOperations) masterClose(t *Terminal) {
+func (d *dirInodeOperations) masterClose(ctx context.Context, t *Terminal) {
d.mu.Lock()
defer d.mu.Unlock()
@@ -277,7 +277,7 @@ func (d *dirInodeOperations) masterClose(t *Terminal) {
panic(fmt.Sprintf("Terminal %+v doesn't exist in %+v?", t, d))
}
- s.DecRef()
+ s.DecRef(ctx)
delete(d.slaves, t.n)
d.dentryMap.Remove(strconv.FormatUint(uint64(t.n), 10))
}
@@ -322,7 +322,7 @@ func (df *dirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCt
func (df *dirFileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
root := fs.RootFromContext(ctx)
if root != nil {
- defer root.DecRef()
+ defer root.DecRef(ctx)
}
dirCtx := &fs.DirCtx{
Serializer: serializer,
diff --git a/pkg/sentry/fs/tty/fs.go b/pkg/sentry/fs/tty/fs.go
index 8fe05ebe5..2d4d44bf3 100644
--- a/pkg/sentry/fs/tty/fs.go
+++ b/pkg/sentry/fs/tty/fs.go
@@ -108,4 +108,4 @@ func (superOperations) ResetInodeMappings() {}
func (superOperations) SaveInodeMapping(*fs.Inode, string) {}
// Destroy implements MountSourceOperations.Destroy.
-func (superOperations) Destroy() {}
+func (superOperations) Destroy(context.Context) {}
diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go
index fe07fa929..e00746017 100644
--- a/pkg/sentry/fs/tty/master.go
+++ b/pkg/sentry/fs/tty/master.go
@@ -75,7 +75,7 @@ func newMasterInode(ctx context.Context, d *dirInodeOperations, owner fs.FileOwn
}
// Release implements fs.InodeOperations.Release.
-func (mi *masterInodeOperations) Release(ctx context.Context) {
+func (mi *masterInodeOperations) Release(context.Context) {
}
// Truncate implements fs.InodeOperations.Truncate.
@@ -120,9 +120,9 @@ type masterFileOperations struct {
var _ fs.FileOperations = (*masterFileOperations)(nil)
// Release implements fs.FileOperations.Release.
-func (mf *masterFileOperations) Release() {
- mf.d.masterClose(mf.t)
- mf.t.DecRef()
+func (mf *masterFileOperations) Release(ctx context.Context) {
+ mf.d.masterClose(ctx, mf.t)
+ mf.t.DecRef(ctx)
}
// EventRegister implements waiter.Waitable.EventRegister.
diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go
index 9871f6fc6..7c7292687 100644
--- a/pkg/sentry/fs/tty/slave.go
+++ b/pkg/sentry/fs/tty/slave.go
@@ -71,7 +71,7 @@ func newSlaveInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owne
// Release implements fs.InodeOperations.Release.
func (si *slaveInodeOperations) Release(ctx context.Context) {
- si.t.DecRef()
+ si.t.DecRef(ctx)
}
// Truncate implements fs.InodeOperations.Truncate.
@@ -106,7 +106,7 @@ type slaveFileOperations struct {
var _ fs.FileOperations = (*slaveFileOperations)(nil)
// Release implements fs.FileOperations.Release.
-func (sf *slaveFileOperations) Release() {
+func (sf *slaveFileOperations) Release(context.Context) {
}
// EventRegister implements waiter.Waitable.EventRegister.
diff --git a/pkg/sentry/fs/user/BUILD b/pkg/sentry/fs/user/BUILD
index f37f979f1..66e949c95 100644
--- a/pkg/sentry/fs/user/BUILD
+++ b/pkg/sentry/fs/user/BUILD
@@ -4,15 +4,21 @@ package(licenses = ["notice"])
go_library(
name = "user",
- srcs = ["user.go"],
+ srcs = [
+ "path.go",
+ "user.go",
+ ],
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fspath",
+ "//pkg/log",
"//pkg/sentry/fs",
+ "//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
+ "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fs/user/path.go b/pkg/sentry/fs/user/path.go
new file mode 100644
index 000000000..2f5a43b84
--- /dev/null
+++ b/pkg/sentry/fs/user/path.go
@@ -0,0 +1,170 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package user
+
+import (
+ "fmt"
+ "path"
+ "strings"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// ResolveExecutablePath resolves the given executable name given the working
+// dir and environment.
+func ResolveExecutablePath(ctx context.Context, args *kernel.CreateProcessArgs) (string, error) {
+ name := args.Filename
+ if len(name) == 0 {
+ if len(args.Argv) == 0 {
+ return "", fmt.Errorf("no filename or command provided")
+ }
+ name = args.Argv[0]
+ }
+
+ // Absolute paths can be used directly.
+ if path.IsAbs(name) {
+ return name, nil
+ }
+
+ // Paths with '/' in them should be joined to the working directory, or
+ // to the root if working directory is not set.
+ if strings.IndexByte(name, '/') > 0 {
+ wd := args.WorkingDirectory
+ if wd == "" {
+ wd = "/"
+ }
+ if !path.IsAbs(wd) {
+ return "", fmt.Errorf("working directory %q must be absolute", wd)
+ }
+ return path.Join(wd, name), nil
+ }
+
+ // Otherwise, We must lookup the name in the paths.
+ paths := getPath(args.Envv)
+ if kernel.VFS2Enabled {
+ f, err := resolveVFS2(ctx, args.Credentials, args.MountNamespaceVFS2, paths, name)
+ if err != nil {
+ return "", fmt.Errorf("error finding executable %q in PATH %v: %v", name, paths, err)
+ }
+ return f, nil
+ }
+
+ f, err := resolve(ctx, args.MountNamespace, paths, name)
+ if err != nil {
+ return "", fmt.Errorf("error finding executable %q in PATH %v: %v", name, paths, err)
+ }
+ return f, nil
+}
+
+func resolve(ctx context.Context, mns *fs.MountNamespace, paths []string, name string) (string, error) {
+ root := fs.RootFromContext(ctx)
+ if root == nil {
+ // Caller has no root. Don't bother traversing anything.
+ return "", syserror.ENOENT
+ }
+ defer root.DecRef(ctx)
+ for _, p := range paths {
+ if !path.IsAbs(p) {
+ // Relative paths aren't safe, no one should be using them.
+ log.Warningf("Skipping relative path %q in $PATH", p)
+ continue
+ }
+
+ binPath := path.Join(p, name)
+ traversals := uint(linux.MaxSymlinkTraversals)
+ d, err := mns.FindInode(ctx, root, nil, binPath, &traversals)
+ if err == syserror.ENOENT || err == syserror.EACCES {
+ // Didn't find it here.
+ continue
+ }
+ if err != nil {
+ return "", err
+ }
+ defer d.DecRef(ctx)
+
+ // Check that it is a regular file.
+ if !fs.IsRegular(d.Inode.StableAttr) {
+ continue
+ }
+
+ // Check whether we can read and execute the found file.
+ if err := d.Inode.CheckPermission(ctx, fs.PermMask{Read: true, Execute: true}); err != nil {
+ log.Infof("Found executable at %q, but user cannot execute it: %v", binPath, err)
+ continue
+ }
+ return path.Join("/", p, name), nil
+ }
+
+ // Couldn't find it.
+ return "", syserror.ENOENT
+}
+
+func resolveVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, paths []string, name string) (string, error) {
+ root := mns.Root()
+ defer root.DecRef(ctx)
+ for _, p := range paths {
+ if !path.IsAbs(p) {
+ // Relative paths aren't safe, no one should be using them.
+ log.Warningf("Skipping relative path %q in $PATH", p)
+ continue
+ }
+
+ binPath := path.Join(p, name)
+ pop := &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(binPath),
+ FollowFinalSymlink: true,
+ }
+ opts := &vfs.OpenOptions{
+ FileExec: true,
+ Flags: linux.O_RDONLY,
+ }
+ dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts)
+ if err == syserror.ENOENT || err == syserror.EACCES {
+ // Didn't find it here.
+ continue
+ }
+ if err != nil {
+ return "", err
+ }
+ dentry.DecRef(ctx)
+
+ return binPath, nil
+ }
+
+ // Couldn't find it.
+ return "", syserror.ENOENT
+}
+
+// getPath returns the PATH as a slice of strings given the environment
+// variables.
+func getPath(env []string) []string {
+ const prefix = "PATH="
+ for _, e := range env {
+ if strings.HasPrefix(e, prefix) {
+ return strings.Split(strings.TrimPrefix(e, prefix), ":")
+ }
+ }
+ return nil
+}
diff --git a/pkg/sentry/fs/user/user.go b/pkg/sentry/fs/user/user.go
index fe7f67c00..936fd3932 100644
--- a/pkg/sentry/fs/user/user.go
+++ b/pkg/sentry/fs/user/user.go
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// Package user contains methods for resolving filesystem paths based on the
+// user and their environment.
package user
import (
@@ -60,7 +62,7 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.K
// doesn't exist we will return the default home directory.
return defaultHome, nil
}
- defer dirent.DecRef()
+ defer dirent.DecRef(ctx)
// Check read permissions on the file.
if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil {
@@ -79,7 +81,7 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.K
if err != nil {
return "", err
}
- defer f.DecRef()
+ defer f.DecRef(ctx)
r := &fileReader{
Ctx: ctx,
@@ -103,7 +105,7 @@ func getExecUserHomeVFS2(ctx context.Context, mns *vfs.MountNamespace, uid auth.
const defaultHome = "/"
root := mns.Root()
- defer root.DecRef()
+ defer root.DecRef(ctx)
creds := auth.CredentialsFromContext(ctx)
@@ -121,7 +123,7 @@ func getExecUserHomeVFS2(ctx context.Context, mns *vfs.MountNamespace, uid auth.
if err != nil {
return defaultHome, nil
}
- defer fd.DecRef()
+ defer fd.DecRef(ctx)
r := &fileReaderVFS2{
ctx: ctx,
diff --git a/pkg/sentry/fs/user/user_test.go b/pkg/sentry/fs/user/user_test.go
index 7d8e9ac7c..12b786224 100644
--- a/pkg/sentry/fs/user/user_test.go
+++ b/pkg/sentry/fs/user/user_test.go
@@ -39,7 +39,7 @@ func createEtcPasswd(ctx context.Context, root *fs.Dirent, contents string, mode
if err != nil {
return err
}
- defer etc.DecRef()
+ defer etc.DecRef(ctx)
switch mode.FileType() {
case 0:
// Don't create anything.
@@ -49,7 +49,7 @@ func createEtcPasswd(ctx context.Context, root *fs.Dirent, contents string, mode
if err != nil {
return err
}
- defer passwd.DecRef()
+ defer passwd.DecRef(ctx)
if _, err := passwd.Writev(ctx, usermem.BytesIOSequence([]byte(contents))); err != nil {
return err
}
@@ -110,9 +110,9 @@ func TestGetExecUserHome(t *testing.T) {
if err != nil {
t.Fatalf("NewMountNamespace failed: %v", err)
}
- defer mns.DecRef()
+ defer mns.DecRef(ctx)
root := mns.Root()
- defer root.DecRef()
+ defer root.DecRef(ctx)
ctx = fs.WithRoot(ctx, root)
if err := createEtcPasswd(ctx, root, tc.passwdContents, tc.passwdMode); err != nil {