diff options
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 33 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/device_file.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/directory.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/filesystem.go | 16 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/named_pipe.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/regular_file.go | 13 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/socket_file.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/symlink.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/tmpfs.go | 72 | ||||
-rw-r--r-- | pkg/sentry/vfs/permissions.go | 17 | ||||
-rw-r--r-- | test/syscalls/BUILD | 6 | ||||
-rw-r--r-- | test/syscalls/linux/setgid.cc | 15 |
12 files changed, 139 insertions, 53 deletions
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 8139bff76..6b890a39c 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -294,22 +294,41 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut return err } + clearSID := false stat := opts.Stat + if stat.Mask&linux.STATX_UID != 0 { + atomic.StoreUint32(&a.uid, stat.UID) + clearSID = true + } + if stat.Mask&linux.STATX_GID != 0 { + atomic.StoreUint32(&a.gid, stat.GID) + clearSID = true + } if stat.Mask&linux.STATX_MODE != 0 { for { old := atomic.LoadUint32(&a.mode) - new := old | uint32(stat.Mode & ^uint16(linux.S_IFMT)) - if swapped := atomic.CompareAndSwapUint32(&a.mode, old, new); swapped { + ft := old & linux.S_IFMT + newMode := ft | uint32(stat.Mode & ^uint16(linux.S_IFMT)) + if clearSID { + newMode = vfs.ClearSUIDAndSGID(newMode) + } + if swapped := atomic.CompareAndSwapUint32(&a.mode, old, newMode); swapped { + clearSID = false break } } } - if stat.Mask&linux.STATX_UID != 0 { - atomic.StoreUint32(&a.uid, stat.UID) - } - if stat.Mask&linux.STATX_GID != 0 { - atomic.StoreUint32(&a.gid, stat.GID) + // We may have to clear the SUID/SGID bits, but didn't do so as part of + // STATX_MODE. + if clearSID { + for { + old := atomic.LoadUint32(&a.mode) + newMode := vfs.ClearSUIDAndSGID(old) + if swapped := atomic.CompareAndSwapUint32(&a.mode, old, newMode); swapped { + break + } + } } now := ktime.NowFromContext(ctx).Nanoseconds() diff --git a/pkg/sentry/fsimpl/tmpfs/device_file.go b/pkg/sentry/fsimpl/tmpfs/device_file.go index 9129d35b7..616ae6730 100644 --- a/pkg/sentry/fsimpl/tmpfs/device_file.go +++ b/pkg/sentry/fsimpl/tmpfs/device_file.go @@ -30,7 +30,7 @@ type deviceFile struct { minor uint32 } -func (fs *filesystem) newDeviceFile(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode, kind vfs.DeviceKind, major, minor uint32) *inode { +func (fs *filesystem) newDeviceFile(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode, kind vfs.DeviceKind, major, minor uint32, parentDir *directory) *inode { file := &deviceFile{ kind: kind, major: major, @@ -44,7 +44,7 @@ func (fs *filesystem) newDeviceFile(kuid auth.KUID, kgid auth.KGID, mode linux.F default: panic(fmt.Sprintf("invalid DeviceKind: %v", kind)) } - file.inode.init(file, fs, kuid, kgid, mode) + file.inode.init(file, fs, kuid, kgid, mode, parentDir) file.inode.nlink = 1 // from parent directory return &file.inode } diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go index 417ac2eff..e8d256495 100644 --- a/pkg/sentry/fsimpl/tmpfs/directory.go +++ b/pkg/sentry/fsimpl/tmpfs/directory.go @@ -49,9 +49,9 @@ type directory struct { childList dentryList } -func (fs *filesystem) newDirectory(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode) *directory { +func (fs *filesystem) newDirectory(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode, parentDir *directory) *directory { dir := &directory{} - dir.inode.init(dir, fs, kuid, kgid, linux.S_IFDIR|mode) + dir.inode.init(dir, fs, kuid, kgid, linux.S_IFDIR|mode, parentDir) dir.inode.nlink = 2 // from "." and parent directory or ".." for root dir.dentry.inode = &dir.inode dir.dentry.vfsd.Init(&dir.dentry) diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 453e41d11..4f675c21e 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -277,7 +277,7 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return syserror.EMLINK } parentDir.inode.incLinksLocked() // from child's ".." - childDir := fs.newDirectory(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode) + childDir := fs.newDirectory(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, parentDir) parentDir.insertChildLocked(&childDir.dentry, name) return nil }) @@ -290,15 +290,15 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v var childInode *inode switch opts.Mode.FileType() { case linux.S_IFREG: - childInode = fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode) + childInode = fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, parentDir) case linux.S_IFIFO: - childInode = fs.newNamedPipe(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode) + childInode = fs.newNamedPipe(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, parentDir) case linux.S_IFBLK: - childInode = fs.newDeviceFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, vfs.BlockDevice, opts.DevMajor, opts.DevMinor) + childInode = fs.newDeviceFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, vfs.BlockDevice, opts.DevMajor, opts.DevMinor, parentDir) case linux.S_IFCHR: - childInode = fs.newDeviceFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, vfs.CharDevice, opts.DevMajor, opts.DevMinor) + childInode = fs.newDeviceFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, vfs.CharDevice, opts.DevMajor, opts.DevMinor, parentDir) case linux.S_IFSOCK: - childInode = fs.newSocketFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, opts.Endpoint) + childInode = fs.newSocketFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, opts.Endpoint, parentDir) default: return syserror.EINVAL } @@ -387,7 +387,7 @@ afterTrailingSymlink: defer rp.Mount().EndWrite() // Create and open the child. creds := rp.Credentials() - child := fs.newDentry(fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode)) + child := fs.newDentry(fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, parentDir)) parentDir.insertChildLocked(child, name) child.IncRef() defer child.DecRef(ctx) @@ -727,7 +727,7 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { return fs.doCreateAt(ctx, rp, false /* dir */, func(parentDir *directory, name string) error { creds := rp.Credentials() - child := fs.newDentry(fs.newSymlink(creds.EffectiveKUID, creds.EffectiveKGID, 0777, target)) + child := fs.newDentry(fs.newSymlink(creds.EffectiveKUID, creds.EffectiveKGID, 0777, target, parentDir)) parentDir.insertChildLocked(child, name) return nil }) diff --git a/pkg/sentry/fsimpl/tmpfs/named_pipe.go b/pkg/sentry/fsimpl/tmpfs/named_pipe.go index 57e7b57b0..65c2cbf86 100644 --- a/pkg/sentry/fsimpl/tmpfs/named_pipe.go +++ b/pkg/sentry/fsimpl/tmpfs/named_pipe.go @@ -30,9 +30,9 @@ type namedPipe struct { // Preconditions: // * fs.mu must be locked. // * rp.Mount().CheckBeginWrite() has been called successfully. -func (fs *filesystem) newNamedPipe(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode) *inode { +func (fs *filesystem) newNamedPipe(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode, parentDir *directory) *inode { file := &namedPipe{pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize)} - file.inode.init(file, fs, kuid, kgid, linux.S_IFIFO|mode) + file.inode.init(file, fs, kuid, kgid, linux.S_IFIFO|mode, parentDir) file.inode.nlink = 1 // Only the parent has a link. return &file.inode } diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index 82a743ff3..a6d161882 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -91,13 +91,13 @@ type regularFile struct { size uint64 } -func (fs *filesystem) newRegularFile(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode) *inode { +func (fs *filesystem) newRegularFile(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode, parentDir *directory) *inode { file := ®ularFile{ memFile: fs.mfp.MemoryFile(), memoryUsageKind: usage.Tmpfs, seals: linux.F_SEAL_SEAL, } - file.inode.init(file, fs, kuid, kgid, linux.S_IFREG|mode) + file.inode.init(file, fs, kuid, kgid, linux.S_IFREG|mode, parentDir) file.inode.nlink = 1 // from parent directory return &file.inode } @@ -116,7 +116,7 @@ func newUnlinkedRegularFileDescription(ctx context.Context, creds *auth.Credenti panic("tmpfs.newUnlinkedRegularFileDescription() called with non-tmpfs mount") } - inode := fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, 0777) + inode := fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, 0777, nil /* parentDir */) d := fs.newDentry(inode) defer d.DecRef(ctx) d.name = name @@ -443,6 +443,13 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off rw := getRegularFileReadWriter(f, offset) n, err := src.CopyInTo(ctx, rw) f.inode.touchCMtimeLocked() + for { + old := atomic.LoadUint32(&f.inode.mode) + new := vfs.ClearSUIDAndSGID(old) + if swapped := atomic.CompareAndSwapUint32(&f.inode.mode, old, new); swapped { + break + } + } putRegularFileReadWriter(rw) return n, n + offset, err } diff --git a/pkg/sentry/fsimpl/tmpfs/socket_file.go b/pkg/sentry/fsimpl/tmpfs/socket_file.go index 5699d5975..6112279b0 100644 --- a/pkg/sentry/fsimpl/tmpfs/socket_file.go +++ b/pkg/sentry/fsimpl/tmpfs/socket_file.go @@ -28,9 +28,9 @@ type socketFile struct { ep transport.BoundEndpoint } -func (fs *filesystem) newSocketFile(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode, ep transport.BoundEndpoint) *inode { +func (fs *filesystem) newSocketFile(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode, ep transport.BoundEndpoint, parentDir *directory) *inode { file := &socketFile{ep: ep} - file.inode.init(file, fs, kuid, kgid, mode) + file.inode.init(file, fs, kuid, kgid, mode, parentDir) file.inode.nlink = 1 // from parent directory return &file.inode } diff --git a/pkg/sentry/fsimpl/tmpfs/symlink.go b/pkg/sentry/fsimpl/tmpfs/symlink.go index a102a2ee2..6a83296bc 100644 --- a/pkg/sentry/fsimpl/tmpfs/symlink.go +++ b/pkg/sentry/fsimpl/tmpfs/symlink.go @@ -25,11 +25,11 @@ type symlink struct { target string // immutable } -func (fs *filesystem) newSymlink(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode, target string) *inode { +func (fs *filesystem) newSymlink(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode, target string, parentDir *directory) *inode { link := &symlink{ target: target, } - link.inode.init(link, fs, kuid, kgid, linux.S_IFLNK|mode) + link.inode.init(link, fs, kuid, kgid, linux.S_IFLNK|mode, parentDir) link.inode.nlink = 1 // from parent directory return &link.inode } diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index b32c54e20..a01e413e0 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -190,11 +190,11 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt var root *dentry switch rootFileType { case linux.S_IFREG: - root = fs.newDentry(fs.newRegularFile(rootKUID, rootKGID, rootMode)) + root = fs.newDentry(fs.newRegularFile(rootKUID, rootKGID, rootMode, nil /* parentDir */)) case linux.S_IFLNK: - root = fs.newDentry(fs.newSymlink(rootKUID, rootKGID, rootMode, tmpfsOpts.RootSymlinkTarget)) + root = fs.newDentry(fs.newSymlink(rootKUID, rootKGID, rootMode, tmpfsOpts.RootSymlinkTarget, nil /* parentDir */)) case linux.S_IFDIR: - root = &fs.newDirectory(rootKUID, rootKGID, rootMode).dentry + root = &fs.newDirectory(rootKUID, rootKGID, rootMode, nil /* parentDir */).dentry default: fs.vfsfs.DecRef(ctx) return nil, nil, fmt.Errorf("invalid tmpfs root file type: %#o", rootFileType) @@ -385,10 +385,19 @@ type inode struct { const maxLinks = math.MaxUint32 -func (i *inode) init(impl interface{}, fs *filesystem, kuid auth.KUID, kgid auth.KGID, mode linux.FileMode) { +func (i *inode) init(impl interface{}, fs *filesystem, kuid auth.KUID, kgid auth.KGID, mode linux.FileMode, parentDir *directory) { if mode.FileType() == 0 { panic("file type is required in FileMode") } + + // Inherit the group and setgid bit as in fs/inode.c:inode_init_owner(). + if parentDir != nil && parentDir.inode.mode&linux.S_ISGID == linux.S_ISGID { + kgid = auth.KGID(parentDir.inode.gid) + if mode&linux.S_IFDIR == linux.S_IFDIR { + mode |= linux.S_ISGID + } + } + i.fs = fs i.mode = uint32(mode) i.uid = uint32(kuid) @@ -519,26 +528,15 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs. if err := vfs.CheckSetStat(ctx, creds, opts, mode, auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil { return err } + i.mu.Lock() defer i.mu.Unlock() var ( needsMtimeBump bool needsCtimeBump bool ) + clearSID := false mask := stat.Mask - if mask&linux.STATX_MODE != 0 { - ft := atomic.LoadUint32(&i.mode) & linux.S_IFMT - atomic.StoreUint32(&i.mode, ft|uint32(stat.Mode&^linux.S_IFMT)) - needsCtimeBump = true - } - if mask&linux.STATX_UID != 0 { - atomic.StoreUint32(&i.uid, stat.UID) - needsCtimeBump = true - } - if mask&linux.STATX_GID != 0 { - atomic.StoreUint32(&i.gid, stat.GID) - needsCtimeBump = true - } if mask&linux.STATX_SIZE != 0 { switch impl := i.impl.(type) { case *regularFile: @@ -547,6 +545,7 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs. return err } if updated { + clearSID = true needsMtimeBump = true needsCtimeBump = true } @@ -556,6 +555,31 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs. return syserror.EINVAL } } + if mask&linux.STATX_UID != 0 { + atomic.StoreUint32(&i.uid, stat.UID) + needsCtimeBump = true + clearSID = true + } + if mask&linux.STATX_GID != 0 { + atomic.StoreUint32(&i.gid, stat.GID) + needsCtimeBump = true + clearSID = true + } + if mask&linux.STATX_MODE != 0 { + for { + old := atomic.LoadUint32(&i.mode) + ft := old & linux.S_IFMT + newMode := ft | uint32(stat.Mode & ^uint16(linux.S_IFMT)) + if clearSID { + newMode = vfs.ClearSUIDAndSGID(newMode) + } + if swapped := atomic.CompareAndSwapUint32(&i.mode, old, newMode); swapped { + clearSID = false + break + } + } + needsCtimeBump = true + } now := i.fs.clock.Now().Nanoseconds() if mask&linux.STATX_ATIME != 0 { if stat.Atime.Nsec == linux.UTIME_NOW { @@ -584,6 +608,20 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs. // Ignore the ctime bump, since we just set it ourselves. needsCtimeBump = false } + + // We may have to clear the SUID/SGID bits, but didn't do so as part of + // STATX_MODE. + if clearSID { + for { + old := atomic.LoadUint32(&i.mode) + newMode := vfs.ClearSUIDAndSGID(old) + if swapped := atomic.CompareAndSwapUint32(&i.mode, old, newMode); swapped { + break + } + } + needsCtimeBump = true + } + if needsMtimeBump { atomic.StoreInt64(&i.mtime, now) } diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go index db6146fd2..b7704874f 100644 --- a/pkg/sentry/vfs/permissions.go +++ b/pkg/sentry/vfs/permissions.go @@ -326,3 +326,20 @@ func CheckXattrPermissions(creds *auth.Credentials, ats AccessTypes, mode linux. } return nil } + +// ClearSUIDAndSGID clears the setuid and/or setgid bits after a chown or write. +// Depending on the mode, neither bit, only the setuid bit, or both are cleared. +func ClearSUIDAndSGID(mode uint32) uint32 { + // Directories don't have their bits changed. + if mode&linux.ModeDirectory == linux.ModeDirectory { + return mode + } + + // Changing owners always disables the setuid bit. It disables + // the setgid bit when the file is executable. + mode &= ^uint32(linux.ModeSetUID) + if sgid := uint32(linux.ModeSetGID | linux.ModeGroupExec); mode&sgid == sgid { + mode &= ^uint32(linux.ModeSetGID) + } + return mode +} diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index d6658898d..9adb1cea3 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -89,7 +89,7 @@ syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:chown_test", - use_tmpfs = True, # chwon tests require gofer to be running as root. + use_tmpfs = True, # chown tests require gofer to be running as root. ) syscall_test( @@ -557,7 +557,11 @@ syscall_test( ) syscall_test( + add_overlay = True, test = "//test/syscalls/linux:setgid_test", + # setgid tests require the gofer's user namespace to have multiple groups, + # but bazel only provides one. + use_tmpfs = True, ) syscall_test( diff --git a/test/syscalls/linux/setgid.cc b/test/syscalls/linux/setgid.cc index bfd91ba4f..cd030b094 100644 --- a/test/syscalls/linux/setgid.cc +++ b/test/syscalls/linux/setgid.cc @@ -86,7 +86,7 @@ class SetgidDirTest : public ::testing::Test { original_gid_ = getegid(); // TODO(b/175325250): Enable when setgid directories are supported. - SKIP_IF(IsRunningOnGvisor()); + SKIP_IF(IsRunningWithVFS1()); SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETGID))); temp_dir_ = ASSERT_NO_ERRNO_AND_VALUE( @@ -305,9 +305,7 @@ struct FileModeTestcase { class FileModeTest : public ::testing::TestWithParam<FileModeTestcase> {}; TEST_P(FileModeTest, WriteToFile) { - // TODO(b/175325250): Enable when setgid directories are supported. - SKIP_IF(IsRunningOnGvisor()); - + SKIP_IF(IsRunningWithVFS1()); auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0777 /* mode */)); auto path = JoinPath(temp_dir.path(), GetParam().name); @@ -330,9 +328,7 @@ TEST_P(FileModeTest, WriteToFile) { } TEST_P(FileModeTest, TruncateFile) { - // TODO(b/175325250): Enable when setgid directories are supported. - SKIP_IF(IsRunningOnGvisor()); - + SKIP_IF(IsRunningWithVFS1()); auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0777 /* mode */)); auto path = JoinPath(temp_dir.path(), GetParam().name); @@ -343,6 +339,11 @@ TEST_P(FileModeTest, TruncateFile) { ASSERT_THAT(fstat(fd.get(), &stats), SyscallSucceeds()); EXPECT_EQ(stats.st_mode & kDirmodeMask, GetParam().mode); + // Write something to the file, as truncating an empty file is a no-op. + constexpr char c = 'M'; + ASSERT_THAT(write(fd.get(), &c, sizeof(c)), + SyscallSucceedsWithValue(sizeof(c))); + // For security reasons, truncating the file clears the SUID bit, and clears // the SGID bit when the group executable bit is unset (which is not a true // SGID binary). |