summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorDean Deng <deandeng@google.com>2020-06-27 14:38:20 -0700
committergVisor bot <gvisor-bot@google.com>2020-06-27 14:39:41 -0700
commit02d552d07c4415978d2ce418fb16baf238d0ff78 (patch)
tree37c414e743978b9f86453d66fa926abeffce1093
parent691c04278ee6cf579e2b2dafb28e39861ce21bb9 (diff)
Support sticky bit in vfs2.
Updates #2923. PiperOrigin-RevId: 318648128
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go65
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go4
-rw-r--r--pkg/sentry/fsimpl/tmpfs/directory.go4
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go9
-rw-r--r--pkg/sentry/vfs/permissions.go14
-rw-r--r--test/syscalls/BUILD1
-rw-r--r--test/syscalls/linux/sticky.cc57
8 files changed, 131 insertions, 27 deletions
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 73bac738d..51082359d 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -16,6 +16,7 @@ package gofer
import (
"sync"
+ "sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
@@ -464,21 +465,61 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
defer mntns.DecRef()
parent.dirMu.Lock()
defer parent.dirMu.Unlock()
+
child, ok := parent.children[name]
if ok && child == nil {
return syserror.ENOENT
}
- // We only need a dentry representing the file at name if it can be a mount
- // point. If child is nil, then it can't be a mount point. If child is
- // non-nil but stale, the actual file can't be a mount point either; we
- // detect this case by just speculatively calling PrepareDeleteDentry and
- // only revalidating the dentry if that fails (indicating that the existing
- // dentry is a mount point).
+
+ sticky := atomic.LoadUint32(&parent.mode)&linux.ModeSticky != 0
+ if sticky {
+ if !ok {
+ // If the sticky bit is set, we need to retrieve the child to determine
+ // whether removing it is allowed.
+ child, err = fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
+ if err != nil {
+ return err
+ }
+ } else if child != nil && !child.cachedMetadataAuthoritative() {
+ // Make sure the dentry representing the file at name is up to date
+ // before examining its metadata.
+ child, err = fs.revalidateChildLocked(ctx, vfsObj, parent, name, child, &ds)
+ if err != nil {
+ return err
+ }
+ }
+ if err := parent.mayDelete(rp.Credentials(), child); err != nil {
+ return err
+ }
+ }
+
+ // If a child dentry exists, prepare to delete it. This should fail if it is
+ // a mount point. We detect mount points by speculatively calling
+ // PrepareDeleteDentry, which fails if child is a mount point. However, we
+ // may need to revalidate the file in this case to make sure that it has not
+ // been deleted or replaced on the remote fs, in which case the mount point
+ // will have disappeared. If calling PrepareDeleteDentry fails again on the
+ // up-to-date dentry, we can be sure that it is a mount point.
+ //
+ // Also note that if child is nil, then it can't be a mount point.
if child != nil {
+ // Hold child.dirMu so we can check child.children and
+ // child.syntheticChildren. We don't access these fields until a bit later,
+ // but locking child.dirMu after calling vfs.PrepareDeleteDentry() would
+ // create an inconsistent lock ordering between dentry.dirMu and
+ // vfs.Dentry.mu (in the VFS lock order, it would make dentry.dirMu both "a
+ // FilesystemImpl lock" and "a lock acquired by a FilesystemImpl between
+ // PrepareDeleteDentry and CommitDeleteDentry). To avoid this, lock
+ // child.dirMu before calling PrepareDeleteDentry.
child.dirMu.Lock()
defer child.dirMu.Unlock()
if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
- if parent.cachedMetadataAuthoritative() {
+ // We can skip revalidation in several cases:
+ // - We are not in InteropModeShared
+ // - The parent directory is synthetic, in which case the child must also
+ // be synthetic
+ // - We already updated the child during the sticky bit check above
+ if parent.cachedMetadataAuthoritative() || sticky {
return err
}
child, err = fs.revalidateChildLocked(ctx, vfsObj, parent, name, child, &ds)
@@ -1100,7 +1141,8 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
return err
}
}
- if err := oldParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
+ creds := rp.Credentials()
+ if err := oldParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil {
return err
}
vfsObj := rp.VirtualFilesystem()
@@ -1115,12 +1157,15 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if renamed == nil {
return syserror.ENOENT
}
+ if err := oldParent.mayDelete(creds, renamed); err != nil {
+ return err
+ }
if renamed.isDir() {
if renamed == newParent || genericIsAncestorDentry(renamed, newParent) {
return syserror.EINVAL
}
if oldParent != newParent {
- if err := renamed.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
+ if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil {
return err
}
}
@@ -1131,7 +1176,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
if oldParent != newParent {
- if err := newParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
+ if err := newParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil {
return err
}
newParent.dirMu.Lock()
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index b0b6d8c64..71c8d3ae1 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1003,6 +1003,10 @@ func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes)
return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid)))
}
+func (d *dentry) mayDelete(creds *auth.Credentials, child *dentry) error {
+ return vfs.CheckDeleteSticky(creds, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&child.uid)))
+}
+
func dentryUIDFromP9UID(uid p9.UID) uint32 {
if !uid.Ok() {
return uint32(auth.OverflowUID)
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 53aec4918..4cb885d87 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -471,6 +471,8 @@ func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *vfs.De
if err := o.checkExistingLocked(name, child); err != nil {
return err
}
+
+ // TODO(gvisor.dev/issue/3027): Check sticky bit before removing.
o.removeLocked(name)
return nil
}
@@ -518,6 +520,8 @@ func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, c
if err := o.checkExistingLocked(oldname, child); err != nil {
return nil, err
}
+
+ // TODO(gvisor.dev/issue/3027): Check sticky bit before removing.
replaced := dst.replaceChildLocked(newname, child)
return replaced, nil
}
diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go
index b172abc15..0a1ad4765 100644
--- a/pkg/sentry/fsimpl/tmpfs/directory.go
+++ b/pkg/sentry/fsimpl/tmpfs/directory.go
@@ -81,6 +81,10 @@ func (dir *directory) removeChildLocked(child *dentry) {
dir.iterMu.Unlock()
}
+func (dir *directory) mayDelete(creds *auth.Credentials, child *dentry) error {
+ return vfs.CheckDeleteSticky(creds, linux.FileMode(atomic.LoadUint32(&dir.inode.mode)), auth.KUID(atomic.LoadUint32(&child.inode.uid)))
+}
+
type directoryFD struct {
fileDescription
vfs.DirectoryFileDescriptionDefaultImpl
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index f766b7ce2..71ac7b8e6 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -492,6 +492,9 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if !ok {
return syserror.ENOENT
}
+ if err := oldParentDir.mayDelete(rp.Credentials(), renamed); err != nil {
+ return err
+ }
// Note that we don't need to call rp.CheckMount(), since if renamed is a
// mount point then we want to rename the mount point, not anything in the
// mounted filesystem.
@@ -606,6 +609,9 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
if !ok {
return syserror.ENOENT
}
+ if err := parentDir.mayDelete(rp.Credentials(), child); err != nil {
+ return err
+ }
childDir, ok := child.inode.impl.(*directory)
if !ok {
return syserror.ENOTDIR
@@ -716,6 +722,9 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
if !ok {
return syserror.ENOENT
}
+ if err := parentDir.mayDelete(rp.Credentials(), child); err != nil {
+ return err
+ }
if child.inode.isDir() {
return syserror.EISDIR
}
diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go
index afe2be8d7..9cb050597 100644
--- a/pkg/sentry/vfs/permissions.go
+++ b/pkg/sentry/vfs/permissions.go
@@ -230,6 +230,20 @@ func CheckSetStat(ctx context.Context, creds *auth.Credentials, stat *linux.Stat
return nil
}
+// CheckDeleteSticky checks whether the sticky bit is set on a directory with
+// the given file mode, and if so, checks whether creds has permission to
+// remove a file owned by childKUID from a directory with the given mode.
+// CheckDeleteSticky is consistent with fs/linux.h:check_sticky().
+func CheckDeleteSticky(creds *auth.Credentials, parentMode linux.FileMode, childKUID auth.KUID) error {
+ if parentMode&linux.ModeSticky == 0 {
+ return nil
+ }
+ if CanActAsOwner(creds, childKUID) {
+ return nil
+ }
+ return syserror.EPERM
+}
+
// CanActAsOwner returns true if creds can act as the owner of a file with the
// given owning UID, consistent with Linux's
// fs/inode.c:inode_owner_or_capable().
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index c4fff0ac8..36c178e4a 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -942,6 +942,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:sticky_test",
+ vfs2 = "True",
)
syscall_test(
diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc
index 92eec0449..39f4fb801 100644
--- a/test/syscalls/linux/sticky.cc
+++ b/test/syscalls/linux/sticky.cc
@@ -40,11 +40,14 @@ namespace {
TEST(StickyTest, StickyBitPermDenied) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
- auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
- EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
- const FileDescriptor dirfd =
- ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY));
- ASSERT_THAT(mkdirat(dirfd.get(), "NewDir", 0755), SyscallSucceeds());
+ const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateFileWith(parent.path(), "some content", 0755));
+ const TempPath dir =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755));
+ const TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo(parent.path(), file.path()));
// Drop privileges and change IDs only in child thread, or else this parent
// thread won't be able to open some log files after the test ends.
@@ -62,18 +65,26 @@ TEST(StickyTest, StickyBitPermDenied) {
syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid), -1),
SyscallSucceeds());
- EXPECT_THAT(unlinkat(dirfd.get(), "NewDir", AT_REMOVEDIR),
+ std::string new_path = NewTempAbsPath();
+ EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()),
SyscallFailsWithErrno(EPERM));
+ EXPECT_THAT(unlink(file.path().c_str()), SyscallFailsWithErrno(EPERM));
+ EXPECT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(EPERM));
+ EXPECT_THAT(unlink(link.path().c_str()), SyscallFailsWithErrno(EPERM));
});
}
TEST(StickyTest, StickyBitSameUID) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
- auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
- EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
- std::string path = JoinPath(dir.path(), "NewDir");
- ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds());
+ const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateFileWith(parent.path(), "some content", 0755));
+ const TempPath dir =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755));
+ const TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo(parent.path(), file.path()));
// Drop privileges and change IDs only in child thread, or else this parent
// thread won't be able to open some log files after the test ends.
@@ -89,18 +100,26 @@ TEST(StickyTest, StickyBitSameUID) {
SyscallSucceeds());
// We still have the same EUID.
- EXPECT_THAT(rmdir(path.c_str()), SyscallSucceeds());
+ std::string new_path = NewTempAbsPath();
+ EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()),
+ SyscallSucceeds());
+ EXPECT_THAT(unlink(new_path.c_str()), SyscallSucceeds());
+ EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlink(link.path().c_str()), SyscallSucceeds());
});
}
TEST(StickyTest, StickyBitCapFOWNER) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
- auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
- EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
- const FileDescriptor dirfd =
- ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY));
- ASSERT_THAT(mkdirat(dirfd.get(), "NewDir", 0755), SyscallSucceeds());
+ const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateFileWith(parent.path(), "some content", 0755));
+ const TempPath dir =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755));
+ const TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo(parent.path(), file.path()));
// Drop privileges and change IDs only in child thread, or else this parent
// thread won't be able to open some log files after the test ends.
@@ -117,8 +136,12 @@ TEST(StickyTest, StickyBitCapFOWNER) {
SyscallSucceeds());
EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, true));
- EXPECT_THAT(unlinkat(dirfd.get(), "NewDir", AT_REMOVEDIR),
+ std::string new_path = NewTempAbsPath();
+ EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()),
SyscallSucceeds());
+ EXPECT_THAT(unlink(new_path.c_str()), SyscallSucceeds());
+ EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlink(link.path().c_str()), SyscallSucceeds());
});
}
} // namespace