summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorNicolas Lacasse <nlacasse@google.com>2020-03-31 15:00:26 -0700
committergVisor bot <gvisor-bot@google.com>2020-03-31 15:07:26 -0700
commite1c8eaca8f8413b17dab8f01b2e123e9d4b9ddbc (patch)
tree1aed92ac42f776f3670b6fe688a7c207d0579211
parent9de982ea790ffe56eca07b6535e9420b669b7c0c (diff)
Fix /proc/self/mounts and /proc/self/mountinfo in VFS2.
Some extra fields were added to the Mount type to expose necessary data to the proc filesystem. PiperOrigin-RevId: 304053361
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go183
-rw-r--r--pkg/sentry/vfs/mount.go192
2 files changed, 219 insertions, 156 deletions
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 8c743df8d..df0d1bcc5 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -18,13 +18,10 @@ import (
"bytes"
"fmt"
"io"
- "sort"
- "strings"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/safemem"
- "gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -634,51 +631,6 @@ func (s *exeSymlink) executable() (file fsbridge.File, err error) {
return
}
-// forEachMountSource runs f for the process root mount and each mount that is
-// a descendant of the root.
-func forEachMount(t *kernel.Task, fn func(string, *fs.Mount)) {
- var fsctx *kernel.FSContext
- t.WithMuLocked(func(t *kernel.Task) {
- fsctx = t.FSContext()
- })
- if fsctx == nil {
- // The task has been destroyed. Nothing to show here.
- return
- }
-
- // All mount points must be relative to the rootDir, and mounts outside
- // will be excluded.
- rootDir := fsctx.RootDirectory()
- if rootDir == nil {
- // The task has been destroyed. Nothing to show here.
- return
- }
- defer rootDir.DecRef()
-
- mnt := t.MountNamespace().FindMount(rootDir)
- if mnt == nil {
- // Has it just been unmounted?
- return
- }
- ms := t.MountNamespace().AllMountsUnder(mnt)
- sort.Slice(ms, func(i, j int) bool {
- return ms[i].ID < ms[j].ID
- })
- for _, m := range ms {
- mroot := m.Root()
- if mroot == nil {
- continue // No longer valid.
- }
- mountPath, desc := mroot.FullName(rootDir)
- mroot.DecRef()
- if !desc {
- // MountSources that are not descendants of the chroot jail are ignored.
- continue
- }
- fn(mountPath, m)
- }
-}
-
// mountInfoData is used to implement /proc/[pid]/mountinfo.
//
// +stateify savable
@@ -692,92 +644,22 @@ var _ dynamicInode = (*mountInfoData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (i *mountInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- forEachMount(i.task, func(mountPath string, m *fs.Mount) {
- mroot := m.Root()
- if mroot == nil {
- return // No longer valid.
- }
- defer mroot.DecRef()
-
- // Format:
- // 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
- // (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
-
- // (1) MountSource ID.
- fmt.Fprintf(buf, "%d ", m.ID)
-
- // (2) Parent ID (or this ID if there is no parent).
- pID := m.ID
- if !m.IsRoot() && !m.IsUndo() {
- pID = m.ParentID
- }
- fmt.Fprintf(buf, "%d ", pID)
-
- // (3) Major:Minor device ID. We don't have a superblock, so we
- // just use the root inode device number.
- sa := mroot.Inode.StableAttr
- fmt.Fprintf(buf, "%d:%d ", sa.DeviceFileMajor, sa.DeviceFileMinor)
-
- // (4) Root: the pathname of the directory in the filesystem
- // which forms the root of this mount.
- //
- // NOTE(b/78135857): This will always be "/" until we implement
- // bind mounts.
- fmt.Fprintf(buf, "/ ")
-
- // (5) Mount point (relative to process root).
- fmt.Fprintf(buf, "%s ", mountPath)
-
- // (6) Mount options.
- flags := mroot.Inode.MountSource.Flags
- opts := "rw"
- if flags.ReadOnly {
- opts = "ro"
- }
- if flags.NoAtime {
- opts += ",noatime"
- }
- if flags.NoExec {
- opts += ",noexec"
- }
- fmt.Fprintf(buf, "%s ", opts)
-
- // (7) Optional fields: zero or more fields of the form "tag[:value]".
- // (8) Separator: the end of the optional fields is marked by a single hyphen.
- fmt.Fprintf(buf, "- ")
-
- // (9) Filesystem type.
- fmt.Fprintf(buf, "%s ", mroot.Inode.MountSource.FilesystemType)
-
- // (10) Mount source: filesystem-specific information or "none".
- fmt.Fprintf(buf, "none ")
-
- // (11) Superblock options, and final newline.
- fmt.Fprintf(buf, "%s\n", superBlockOpts(mountPath, mroot.Inode.MountSource))
+ var fsctx *kernel.FSContext
+ i.task.WithMuLocked(func(t *kernel.Task) {
+ fsctx = t.FSContext()
})
- return nil
-}
-
-func superBlockOpts(mountPath string, msrc *fs.MountSource) string {
- // gVisor doesn't (yet) have a concept of super block options, so we
- // use the ro/rw bit from the mount flag.
- opts := "rw"
- if msrc.Flags.ReadOnly {
- opts = "ro"
+ if fsctx == nil {
+ // The task has been destroyed. Nothing to show here.
+ return nil
}
-
- // NOTE(b/147673608): If the mount is a cgroup, we also need to include
- // the cgroup name in the options. For now we just read that from the
- // path.
- // TODO(gvisor.dev/issues/190): Once gVisor has full cgroup support, we
- // should get this value from the cgroup itself, and not rely on the
- // path.
- if msrc.FilesystemType == "cgroup" {
- splitPath := strings.Split(mountPath, "/")
- cgroupType := splitPath[len(splitPath)-1]
- opts += "," + cgroupType
+ rootDir := fsctx.RootDirectoryVFS2()
+ if !rootDir.Ok() {
+ // Root has been destroyed. Don't try to read mounts.
+ return nil
}
- return opts
+ defer rootDir.DecRef()
+ i.task.Kernel().VFS().GenerateProcMountInfo(ctx, rootDir, buf)
+ return nil
}
// mountsData is used to implement /proc/[pid]/mounts.
@@ -789,33 +671,24 @@ type mountsData struct {
task *kernel.Task
}
-var _ dynamicInode = (*mountInfoData)(nil)
+var _ dynamicInode = (*mountsData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- forEachMount(i.task, func(mountPath string, m *fs.Mount) {
- // Format:
- // <special device or remote filesystem> <mount point> <filesystem type> <mount options> <needs dump> <fsck order>
- //
- // We use the filesystem name as the first field, since there
- // is no real block device we can point to, and we also should
- // not expose anything about the remote filesystem.
- //
- // Only ro/rw option is supported for now.
- //
- // The "needs dump"and fsck flags are always 0, which is allowed.
- root := m.Root()
- if root == nil {
- return // No longer valid.
- }
- defer root.DecRef()
-
- flags := root.Inode.MountSource.Flags
- opts := "rw"
- if flags.ReadOnly {
- opts = "ro"
- }
- fmt.Fprintf(buf, "%s %s %s %s %d %d\n", "none", mountPath, root.Inode.MountSource.FilesystemType, opts, 0, 0)
+ var fsctx *kernel.FSContext
+ i.task.WithMuLocked(func(t *kernel.Task) {
+ fsctx = t.FSContext()
})
+ if fsctx == nil {
+ // The task has been destroyed. Nothing to show here.
+ return nil
+ }
+ rootDir := fsctx.RootDirectoryVFS2()
+ if !rootDir.Ok() {
+ // Root has been destroyed. Don't try to read mounts.
+ return nil
+ }
+ defer rootDir.DecRef()
+ i.task.Kernel().VFS().GenerateProcMounts(ctx, rootDir, buf)
return nil
}
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index 4b68cabda..7792eb1a0 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -15,7 +15,11 @@
package vfs
import (
+ "bytes"
+ "fmt"
"math"
+ "sort"
+ "strings"
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -44,7 +48,7 @@ var lastMountID uint64
//
// +stateify savable
type Mount struct {
- // vfs, fs, and root are immutable. References are held on fs and root.
+ // vfs, fs, root are immutable. References are held on fs and root.
//
// Invariant: root belongs to fs.
vfs *VirtualFilesystem
@@ -639,12 +643,28 @@ func (mnt *Mount) setReadOnlyLocked(ro bool) error {
return nil
}
+func (mnt *Mount) readOnly() bool {
+ return atomic.LoadInt64(&mnt.writers) < 0
+}
+
// Filesystem returns the mounted Filesystem. It does not take a reference on
// the returned Filesystem.
func (mnt *Mount) Filesystem() *Filesystem {
return mnt.fs
}
+// submountsLocked returns this Mount and all Mounts that are descendents of
+// it.
+//
+// Precondition: mnt.vfs.mountMu must be held.
+func (mnt *Mount) submountsLocked() []*Mount {
+ mounts := []*Mount{mnt}
+ for m := range mnt.children {
+ mounts = append(mounts, m.submountsLocked()...)
+ }
+ return mounts
+}
+
// Root returns mntns' root. A reference is taken on the returned
// VirtualDentry.
func (mntns *MountNamespace) Root() VirtualDentry {
@@ -655,3 +675,173 @@ func (mntns *MountNamespace) Root() VirtualDentry {
vd.IncRef()
return vd
}
+
+// GenerateProcMounts emits the contents of /proc/[pid]/mounts for vfs to buf.
+//
+// Preconditions: taskRootDir.Ok().
+func (vfs *VirtualFilesystem) GenerateProcMounts(ctx context.Context, taskRootDir VirtualDentry, buf *bytes.Buffer) {
+ vfs.mountMu.Lock()
+ defer vfs.mountMu.Unlock()
+ rootMnt := taskRootDir.mount
+ mounts := rootMnt.submountsLocked()
+ sort.Slice(mounts, func(i, j int) bool { return mounts[i].ID < mounts[j].ID })
+ for _, mnt := range mounts {
+ // Get the path to this mount relative to task root.
+ mntRootVD := VirtualDentry{
+ mount: mnt,
+ dentry: mnt.root,
+ }
+ path, err := vfs.PathnameReachable(ctx, taskRootDir, mntRootVD)
+ if err != nil {
+ // For some reason we didn't get a path. Log a warning
+ // and run with empty path.
+ ctx.Warningf("Error getting pathname for mount root %+v: %v", mnt.root, err)
+ path = ""
+ }
+ if path == "" {
+ // Either an error occurred, or path is not reachable
+ // from root.
+ break
+ }
+
+ opts := "rw"
+ if mnt.readOnly() {
+ opts = "ro"
+ }
+ if mnt.flags.NoExec {
+ opts += ",noexec"
+ }
+
+ // Format:
+ // <special device or remote filesystem> <mount point> <filesystem type> <mount options> <needs dump> <fsck order>
+ //
+ // The "needs dump" and "fsck order" flags are always 0, which
+ // is allowed.
+ fmt.Fprintf(buf, "%s %s %s %s %d %d\n", "none", path, mnt.fs.FilesystemType().Name(), opts, 0, 0)
+ }
+}
+
+// GenerateProcMountInfo emits the contents of /proc/[pid]/mountinfo for vfs to
+// buf.
+//
+// Preconditions: taskRootDir.Ok().
+func (vfs *VirtualFilesystem) GenerateProcMountInfo(ctx context.Context, taskRootDir VirtualDentry, buf *bytes.Buffer) {
+ vfs.mountMu.Lock()
+ defer vfs.mountMu.Unlock()
+ rootMnt := taskRootDir.mount
+ mounts := rootMnt.submountsLocked()
+ sort.Slice(mounts, func(i, j int) bool { return mounts[i].ID < mounts[j].ID })
+ for _, mnt := range mounts {
+ // Get the path to this mount relative to task root.
+ mntRootVD := VirtualDentry{
+ mount: mnt,
+ dentry: mnt.root,
+ }
+ path, err := vfs.PathnameReachable(ctx, taskRootDir, mntRootVD)
+ if err != nil {
+ // For some reason we didn't get a path. Log a warning
+ // and run with empty path.
+ ctx.Warningf("Error getting pathname for mount root %+v: %v", mnt.root, err)
+ path = ""
+ }
+ if path == "" {
+ // Either an error occurred, or path is not reachable
+ // from root.
+ break
+ }
+ // Stat the mount root to get the major/minor device numbers.
+ pop := &PathOperation{
+ Root: mntRootVD,
+ Start: mntRootVD,
+ }
+ statx, err := vfs.StatAt(ctx, auth.NewAnonymousCredentials(), pop, &StatOptions{})
+ if err != nil {
+ // Well that's not good. Ignore this mount.
+ break
+ }
+
+ // Format:
+ // 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+ // (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
+
+ // (1) Mount ID.
+ fmt.Fprintf(buf, "%d ", mnt.ID)
+
+ // (2) Parent ID (or this ID if there is no parent).
+ pID := mnt.ID
+ if p := mnt.parent(); p != nil {
+ pID = p.ID
+ }
+ fmt.Fprintf(buf, "%d ", pID)
+
+ // (3) Major:Minor device ID. We don't have a superblock, so we
+ // just use the root inode device number.
+ fmt.Fprintf(buf, "%d:%d ", statx.DevMajor, statx.DevMinor)
+
+ // (4) Root: the pathname of the directory in the filesystem
+ // which forms the root of this mount.
+ //
+ // NOTE(b/78135857): This will always be "/" until we implement
+ // bind mounts.
+ fmt.Fprintf(buf, "/ ")
+
+ // (5) Mount point (relative to process root).
+ fmt.Fprintf(buf, "%s ", manglePath(path))
+
+ // (6) Mount options.
+ opts := "rw"
+ if mnt.readOnly() {
+ opts = "ro"
+ }
+ if mnt.flags.NoExec {
+ opts += ",noexec"
+ }
+ // TODO(gvisor.dev/issue/1193): Add "noatime" if MS_NOATIME is
+ // set.
+ fmt.Fprintf(buf, "%s ", opts)
+
+ // (7) Optional fields: zero or more fields of the form "tag[:value]".
+ // (8) Separator: the end of the optional fields is marked by a single hyphen.
+ fmt.Fprintf(buf, "- ")
+
+ // (9) Filesystem type.
+ fmt.Fprintf(buf, "%s ", mnt.fs.FilesystemType().Name())
+
+ // (10) Mount source: filesystem-specific information or "none".
+ fmt.Fprintf(buf, "none ")
+
+ // (11) Superblock options, and final newline.
+ fmt.Fprintf(buf, "%s\n", superBlockOpts(path, mnt))
+ }
+}
+
+// manglePath replaces ' ', '\t', '\n', and '\\' with their octal equivalents.
+// See Linux fs/seq_file.c:mangle_path.
+func manglePath(p string) string {
+ r := strings.NewReplacer(" ", "\\040", "\t", "\\011", "\n", "\\012", "\\", "\\134")
+ return r.Replace(p)
+}
+
+// superBlockOpts returns the super block options string for the the mount at
+// the given path.
+func superBlockOpts(mountPath string, mnt *Mount) string {
+ // gVisor doesn't (yet) have a concept of super block options, so we
+ // use the ro/rw bit from the mount flag.
+ opts := "rw"
+ if mnt.readOnly() {
+ opts = "ro"
+ }
+
+ // NOTE(b/147673608): If the mount is a cgroup, we also need to include
+ // the cgroup name in the options. For now we just read that from the
+ // path.
+ // TODO(gvisor.dev/issues/190): Once gVisor has full cgroup support, we
+ // should get this value from the cgroup itself, and not rely on the
+ // path.
+ if mnt.fs.FilesystemType().Name() == "cgroup" {
+ splitPath := strings.Split(mountPath, "/")
+ cgroupType := splitPath[len(splitPath)-1]
+ opts += "," + cgroupType
+ }
+ return opts
+}