diff options
Diffstat (limited to 'pkg/sentry/fsimpl')
-rw-r--r-- | pkg/sentry/fsimpl/cgroupfs/base.go | 21 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/cgroupfs/cgroupfs.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/filesystem.go | 22 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/gofer.go | 23 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/filesystem.go | 13 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/overlay/filesystem.go | 16 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/tasks_sys.go | 1 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/testutil/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/testutil/kernel.go | 10 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/filesystem.go | 21 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/tmpfs.go | 57 |
11 files changed, 115 insertions, 75 deletions
diff --git a/pkg/sentry/fsimpl/cgroupfs/base.go b/pkg/sentry/fsimpl/cgroupfs/base.go index 6512e9cdb..fe9871bdd 100644 --- a/pkg/sentry/fsimpl/cgroupfs/base.go +++ b/pkg/sentry/fsimpl/cgroupfs/base.go @@ -133,6 +133,17 @@ func (c *cgroupInode) Controllers() []kernel.CgroupController { return c.fs.kcontrollers } +// tasks returns a snapshot of the tasks inside the cgroup. +func (c *cgroupInode) tasks() []*kernel.Task { + c.fs.tasksMu.RLock() + defer c.fs.tasksMu.RUnlock() + ts := make([]*kernel.Task, 0, len(c.ts)) + for t := range c.ts { + ts = append(ts, t) + } + return ts +} + // Enter implements kernel.CgroupImpl.Enter. func (c *cgroupInode) Enter(t *kernel.Task) { c.fs.tasksMu.Lock() @@ -163,10 +174,7 @@ func (d *cgroupProcsData) Generate(ctx context.Context, buf *bytes.Buffer) error pgids := make(map[kernel.ThreadID]struct{}) - d.fs.tasksMu.RLock() - defer d.fs.tasksMu.RUnlock() - - for task := range d.ts { + for _, task := range d.tasks() { // Map dedups pgid, since iterating over all tasks produces multiple // entries for the group leaders. if pgid := currPidns.IDOfThreadGroup(task.ThreadGroup()); pgid != 0 { @@ -205,10 +213,7 @@ func (d *tasksData) Generate(ctx context.Context, buf *bytes.Buffer) error { var pids []kernel.ThreadID - d.fs.tasksMu.RLock() - defer d.fs.tasksMu.RUnlock() - - for task := range d.ts { + for _, task := range d.tasks() { if pid := currPidns.IDOfTask(task); pid != 0 { pids = append(pids, pid) } diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go index 54050de3c..05d7eb4ce 100644 --- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go +++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go @@ -49,8 +49,9 @@ // // kernel.CgroupRegistry.mu // cgroupfs.filesystem.mu -// Task.mu -// cgroupfs.filesystem.tasksMu. +// kernel.TaskSet.mu +// kernel.Task.mu +// cgroupfs.filesystem.tasksMu. package cgroupfs import ( diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 91ec4a142..eb09d54c3 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -1194,11 +1194,7 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st // RenameAt implements vfs.FilesystemImpl.RenameAt. func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { - if opts.Flags != 0 { - // Requires 9P support. - return syserror.EINVAL - } - + // Resolve newParent first to verify that it's on this Mount. var ds *[]*dentry fs.renameMu.Lock() defer fs.renameMuUnlockAndCheckCaching(ctx, &ds) @@ -1206,8 +1202,21 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if err != nil { return err } + + if opts.Flags&^linux.RENAME_NOREPLACE != 0 { + return syserror.EINVAL + } + if fs.opts.interop == InteropModeShared && opts.Flags&linux.RENAME_NOREPLACE != 0 { + // Requires 9P support to synchronize with other remote filesystem + // users. + return syserror.EINVAL + } + newName := rp.Component() if newName == "." || newName == ".." { + if opts.Flags&linux.RENAME_NOREPLACE != 0 { + return syserror.EEXIST + } return syserror.EBUSY } mnt := rp.Mount() @@ -1280,6 +1289,9 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } var replacedVFSD *vfs.Dentry if replaced != nil { + if opts.Flags&linux.RENAME_NOREPLACE != 0 { + return syserror.EEXIST + } replacedVFSD = &replaced.vfsd if replaced.isDir() { if !renamed.isDir() { diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 21692d2ac..cf69e1b7a 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -1282,9 +1282,12 @@ func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) } func (d *dentry) checkXattrPermissions(creds *auth.Credentials, name string, ats vfs.AccessTypes) error { - // We only support xattrs prefixed with "user." (see b/148380782). Currently, - // there is no need to expose any other xattrs through a gofer. - if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + // Deny access to the "security" and "system" namespaces since applications + // may expect these to affect kernel behavior in unimplemented ways + // (b/148380782). Allow all other extended attributes to be passed through + // to the remote filesystem. This is inconsistent with Linux's 9p client, + // but consistent with other filesystems (e.g. FUSE). + if strings.HasPrefix(name, linux.XATTR_SECURITY_PREFIX) || strings.HasPrefix(name, linux.XATTR_SYSTEM_PREFIX) { return syserror.EOPNOTSUPP } mode := linux.FileMode(atomic.LoadUint32(&d.mode)) @@ -1684,7 +1687,7 @@ func (d *dentry) setDeleted() { } func (d *dentry) listXattr(ctx context.Context, creds *auth.Credentials, size uint64) ([]string, error) { - if d.file.isNil() || !d.userXattrSupported() { + if d.file.isNil() { return nil, nil } xattrMap, err := d.file.listXattr(ctx, size) @@ -1693,10 +1696,7 @@ func (d *dentry) listXattr(ctx context.Context, creds *auth.Credentials, size ui } xattrs := make([]string, 0, len(xattrMap)) for x := range xattrMap { - // We only support xattrs in the user.* namespace. - if strings.HasPrefix(x, linux.XATTR_USER_PREFIX) { - xattrs = append(xattrs, x) - } + xattrs = append(xattrs, x) } return xattrs, nil } @@ -1731,13 +1731,6 @@ func (d *dentry) removeXattr(ctx context.Context, creds *auth.Credentials, name return d.file.removeXattr(ctx, name) } -// Extended attributes in the user.* namespace are only supported for regular -// files and directories. -func (d *dentry) userXattrSupported() bool { - filetype := linux.FileMode(atomic.LoadUint32(&d.mode)).FileType() - return filetype == linux.ModeRegular || filetype == linux.ModeDirectory -} - // Preconditions: // * !d.isSynthetic(). // * d.isRegularFile() || d.isDir(). diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index f50b0fb08..8fac53c60 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -635,12 +635,6 @@ func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st // RenameAt implements vfs.FilesystemImpl.RenameAt. func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { - // Only RENAME_NOREPLACE is supported. - if opts.Flags&^linux.RENAME_NOREPLACE != 0 { - return syserror.EINVAL - } - noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0 - fs.mu.Lock() defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() @@ -651,6 +645,13 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if err != nil { return err } + + // Only RENAME_NOREPLACE is supported. + if opts.Flags&^linux.RENAME_NOREPLACE != 0 { + return syserror.EINVAL + } + noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0 + mnt := rp.Mount() if mnt != oldParentVD.Mount() { return syserror.EXDEV diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go index 46c500427..6b6fa0bd5 100644 --- a/pkg/sentry/fsimpl/overlay/filesystem.go +++ b/pkg/sentry/fsimpl/overlay/filesystem.go @@ -1017,10 +1017,7 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st // RenameAt implements vfs.FilesystemImpl.RenameAt. func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { - if opts.Flags != 0 { - return syserror.EINVAL - } - + // Resolve newParent first to verify that it's on this Mount. var ds *[]*dentry fs.renameMu.Lock() defer fs.renameMuUnlockAndCheckDrop(ctx, &ds) @@ -1028,8 +1025,16 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if err != nil { return err } + + if opts.Flags&^linux.RENAME_NOREPLACE != 0 { + return syserror.EINVAL + } + newName := rp.Component() if newName == "." || newName == ".." { + if opts.Flags&linux.RENAME_NOREPLACE != 0 { + return syserror.EEXIST + } return syserror.EBUSY } mnt := rp.Mount() @@ -1093,6 +1098,9 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa return err } if replaced != nil { + if opts.Flags&linux.RENAME_NOREPLACE != 0 { + return syserror.EEXIST + } replacedVFSD = &replaced.vfsd if replaced.isDir() { if !renamed.isDir() { diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index 88ab49048..2bc98a94f 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -55,6 +55,7 @@ func (fs *filesystem) newSysDir(ctx context.Context, root *auth.Credentials, k * }), }), "vm": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ + "max_map_count": fs.newInode(ctx, root, 0444, newStaticFile("2147483647\n")), "mmap_min_addr": fs.newInode(ctx, root, 0444, &mmapMinAddrData{k: k}), "overcommit_memory": fs.newInode(ctx, root, 0444, newStaticFile("0\n")), }), diff --git a/pkg/sentry/fsimpl/testutil/BUILD b/pkg/sentry/fsimpl/testutil/BUILD index c766164c7..b3f9d1010 100644 --- a/pkg/sentry/fsimpl/testutil/BUILD +++ b/pkg/sentry/fsimpl/testutil/BUILD @@ -17,7 +17,6 @@ go_library( "//pkg/fspath", "//pkg/hostarch", "//pkg/memutil", - "//pkg/metric", "//pkg/sentry/fsbridge", "//pkg/sentry/fsimpl/tmpfs", "//pkg/sentry/kernel", diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go index 438840ae2..473b41cff 100644 --- a/pkg/sentry/fsimpl/testutil/kernel.go +++ b/pkg/sentry/fsimpl/testutil/kernel.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/memutil" - "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -63,8 +62,6 @@ func Boot() (*kernel.Kernel, error) { return nil, fmt.Errorf("creating platform: %v", err) } - metric.CreateSentryMetrics() - kernel.VFS2Enabled = true k := &kernel.Kernel{ Platform: plat, @@ -83,12 +80,8 @@ func Boot() (*kernel.Kernel, error) { } // Create timekeeper. - tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) - if err != nil { - return nil, fmt.Errorf("creating timekeeper: %v", err) - } + tk := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) tk.SetClocks(time.NewCalibratedClocks()) - k.SetTimekeeper(tk) creds := auth.NewRootCredentials(auth.NewRootUserNamespace()) @@ -97,6 +90,7 @@ func Boot() (*kernel.Kernel, error) { if err = k.Init(kernel.InitKernelArgs{ ApplicationCores: uint(runtime.GOMAXPROCS(-1)), FeatureSet: cpuid.HostFeatureSet(), + Timekeeper: tk, RootUserNamespace: creds.UserNamespace, Vdso: vdso, RootUTSNamespace: kernel.NewUTSNamespace("hostname", "domain", creds.UserNamespace), diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 766289e60..f0f4297ef 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -496,20 +496,24 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st // RenameAt implements vfs.FilesystemImpl.RenameAt. func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { - if opts.Flags != 0 { - // TODO(b/145974740): Support renameat2 flags. - return syserror.EINVAL - } - - // Resolve newParent first to verify that it's on this Mount. + // Resolve newParentDir first to verify that it's on this Mount. fs.mu.Lock() defer fs.mu.Unlock() newParentDir, err := walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry)) if err != nil { return err } + + if opts.Flags&^linux.RENAME_NOREPLACE != 0 { + // TODO(b/145974740): Support other renameat2 flags. + return syserror.EINVAL + } + newName := rp.Component() if newName == "." || newName == ".." { + if opts.Flags&linux.RENAME_NOREPLACE != 0 { + return syserror.EEXIST + } return syserror.EBUSY } mnt := rp.Mount() @@ -556,6 +560,9 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } replaced, ok := newParentDir.childMap[newName] if ok { + if opts.Flags&linux.RENAME_NOREPLACE != 0 { + return syserror.EEXIST + } replacedDir, ok := replaced.inode.impl.(*directory) if ok { if !renamed.inode.isDir() { @@ -815,7 +822,7 @@ func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si if err != nil { return nil, err } - return d.inode.listXattr(size) + return d.inode.listXattr(rp.Credentials(), size) } // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 9ae25ce9e..6b4367c42 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -717,44 +717,63 @@ func (i *inode) touchCMtimeLocked() { atomic.StoreInt64(&i.ctime, now) } -func (i *inode) listXattr(size uint64) ([]string, error) { - return i.xattrs.ListXattr(size) +func checkXattrName(name string) error { + // Linux's tmpfs supports "security" and "trusted" xattr namespaces, and + // (depending on build configuration) POSIX ACL xattr namespaces + // ("system.posix_acl_access" and "system.posix_acl_default"). We don't + // support POSIX ACLs or the "security" namespace (b/148380782). + if strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX) { + return nil + } + // We support the "user" namespace because we have tests that depend on + // this feature. + if strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return nil + } + return syserror.EOPNOTSUPP +} + +func (i *inode) listXattr(creds *auth.Credentials, size uint64) ([]string, error) { + return i.xattrs.ListXattr(creds, size) } func (i *inode) getXattr(creds *auth.Credentials, opts *vfs.GetXattrOptions) (string, error) { - if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil { + if err := checkXattrName(opts.Name); err != nil { return "", err } - return i.xattrs.GetXattr(opts) + mode := linux.FileMode(atomic.LoadUint32(&i.mode)) + kuid := auth.KUID(atomic.LoadUint32(&i.uid)) + kgid := auth.KGID(atomic.LoadUint32(&i.gid)) + if err := vfs.GenericCheckPermissions(creds, vfs.MayRead, mode, kuid, kgid); err != nil { + return "", err + } + return i.xattrs.GetXattr(creds, mode, kuid, opts) } func (i *inode) setXattr(creds *auth.Credentials, opts *vfs.SetXattrOptions) error { - if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil { + if err := checkXattrName(opts.Name); err != nil { return err } - return i.xattrs.SetXattr(opts) -} - -func (i *inode) removeXattr(creds *auth.Credentials, name string) error { - if err := i.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil { + mode := linux.FileMode(atomic.LoadUint32(&i.mode)) + kuid := auth.KUID(atomic.LoadUint32(&i.uid)) + kgid := auth.KGID(atomic.LoadUint32(&i.gid)) + if err := vfs.GenericCheckPermissions(creds, vfs.MayWrite, mode, kuid, kgid); err != nil { return err } - return i.xattrs.RemoveXattr(name) + return i.xattrs.SetXattr(creds, mode, kuid, opts) } -func (i *inode) checkXattrPermissions(creds *auth.Credentials, name string, ats vfs.AccessTypes) error { - // We currently only support extended attributes in the user.* and - // trusted.* namespaces. See b/148380782. - if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) && !strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX) { - return syserror.EOPNOTSUPP +func (i *inode) removeXattr(creds *auth.Credentials, name string) error { + if err := checkXattrName(name); err != nil { + return err } mode := linux.FileMode(atomic.LoadUint32(&i.mode)) kuid := auth.KUID(atomic.LoadUint32(&i.uid)) kgid := auth.KGID(atomic.LoadUint32(&i.gid)) - if err := vfs.GenericCheckPermissions(creds, ats, mode, kuid, kgid); err != nil { + if err := vfs.GenericCheckPermissions(creds, vfs.MayWrite, mode, kuid, kgid); err != nil { return err } - return vfs.CheckXattrPermissions(creds, ats, mode, kuid, name) + return i.xattrs.RemoveXattr(creds, mode, kuid, name) } // fileDescription is embedded by tmpfs implementations of @@ -807,7 +826,7 @@ func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { // ListXattr implements vfs.FileDescriptionImpl.ListXattr. func (fd *fileDescription) ListXattr(ctx context.Context, size uint64) ([]string, error) { - return fd.inode().listXattr(size) + return fd.inode().listXattr(auth.CredentialsFromContext(ctx), size) } // GetXattr implements vfs.FileDescriptionImpl.GetXattr. |