summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorFabricio Voznika <fvoznika@google.com>2019-05-23 04:15:18 -0700
committerShentubot <shentubot@google.com>2019-05-23 04:16:10 -0700
commit9006304dfecf3670ad03c9629f9a4ac3273c386a (patch)
tree958b4c09c1118cd173675618002a2c1f32384071
parent022bd0fd1091a29a41fa4c065ac35e45e3d6c576 (diff)
Initial support for bind mounts
Separate MountSource from Mount. This is needed to allow mounts to be shared by multiple containers within the same pod. PiperOrigin-RevId: 249617810 Change-Id: Id2944feb7e4194951f355cbe6d4944ae3c02e468
-rw-r--r--pkg/sentry/fs/mock.go1
-rw-r--r--pkg/sentry/fs/mount.go73
-rw-r--r--pkg/sentry/fs/mount_test.go167
-rw-r--r--pkg/sentry/fs/mounts.go258
-rw-r--r--pkg/sentry/fs/proc/mounts.go48
-rw-r--r--runsc/boot/fs.go16
6 files changed, 317 insertions, 246 deletions
diff --git a/pkg/sentry/fs/mock.go b/pkg/sentry/fs/mock.go
index 064943c5b..ff04e9b22 100644
--- a/pkg/sentry/fs/mock.go
+++ b/pkg/sentry/fs/mock.go
@@ -62,7 +62,6 @@ func NewMockMountSource(cache *DirentCache) *MountSource {
return &MountSource{
MountSourceOperations: &MockMountSourceOps{keep: keep},
fscache: cache,
- children: make(map[*MountSource]struct{}),
}
}
diff --git a/pkg/sentry/fs/mount.go b/pkg/sentry/fs/mount.go
index 63fcf4380..41e0d285b 100644
--- a/pkg/sentry/fs/mount.go
+++ b/pkg/sentry/fs/mount.go
@@ -17,7 +17,6 @@ package fs
import (
"bytes"
"fmt"
- "sync"
"sync/atomic"
"gvisor.googlesource.com/gvisor/pkg/refs"
@@ -89,15 +88,7 @@ func (i InodeMappings) String() string {
// one mount source. Each file object may only be represented using one inode
// object in a sentry instance.
//
-// This is an amalgamation of structs super_block, vfsmount, and mount, while
-// MountSourceOperations is akin to struct super_operations.
-//
-// Hence, mount source also contains common mounted file system state, such as
-// mount flags, the root Dirent, and children mounts. For now, this
-// amalgamation implies that a mount source cannot be shared by multiple mounts
-// (e.g. cannot be mounted at different locations).
-//
-// TODO(b/63601033): Move mount-specific information out of MountSource.
+// TODO(b/63601033): Move Flags out of MountSource to Mount.
//
// +stateify savable
type MountSource struct {
@@ -128,22 +119,6 @@ type MountSource struct {
//
// direntRefs must be atomically changed.
direntRefs uint64
-
- // mu protects the fields below, which are set by the MountNamespace
- // during MountSource/Unmount.
- mu sync.Mutex `state:"nosave"`
-
- // id is a unique id for this mount.
- id uint64
-
- // root is the root Dirent of this mount.
- root *Dirent
-
- // parent is the parent MountSource, or nil if this MountSource is the root.
- parent *MountSource
-
- // children are the child MountSources of this MountSource.
- children map[*MountSource]struct{}
}
// DefaultDirentCacheSize is the number of Dirents that the VFS can hold an
@@ -162,53 +137,7 @@ func NewMountSource(mops MountSourceOperations, filesystem Filesystem, flags Mou
Flags: flags,
FilesystemType: fsType,
fscache: NewDirentCache(DefaultDirentCacheSize),
- children: make(map[*MountSource]struct{}),
- }
-}
-
-// Parent returns the parent mount, or nil if this mount is the root.
-func (msrc *MountSource) Parent() *MountSource {
- msrc.mu.Lock()
- defer msrc.mu.Unlock()
- return msrc.parent
-}
-
-// ID returns the ID of this mount.
-func (msrc *MountSource) ID() uint64 {
- msrc.mu.Lock()
- defer msrc.mu.Unlock()
- return msrc.id
-}
-
-// Children returns the (immediate) children of this MountSource.
-func (msrc *MountSource) Children() []*MountSource {
- msrc.mu.Lock()
- defer msrc.mu.Unlock()
-
- ms := make([]*MountSource, 0, len(msrc.children))
- for c := range msrc.children {
- ms = append(ms, c)
}
- return ms
-}
-
-// Submounts returns all mounts that are descendants of this mount.
-func (msrc *MountSource) Submounts() []*MountSource {
- var ms []*MountSource
- for _, c := range msrc.Children() {
- ms = append(ms, c)
- ms = append(ms, c.Submounts()...)
- }
- return ms
-}
-
-// Root returns the root dirent of this mount. Callers must call DecRef on the
-// returned dirent.
-func (msrc *MountSource) Root() *Dirent {
- msrc.mu.Lock()
- defer msrc.mu.Unlock()
- msrc.root.IncRef()
- return msrc.root
}
// DirentRefs returns the current mount direntRefs.
diff --git a/pkg/sentry/fs/mount_test.go b/pkg/sentry/fs/mount_test.go
index 9f7fbeff2..2e2716643 100644
--- a/pkg/sentry/fs/mount_test.go
+++ b/pkg/sentry/fs/mount_test.go
@@ -32,6 +32,27 @@ func cacheReallyContains(cache *DirentCache, d *Dirent) bool {
return false
}
+func mountPathsAre(root *Dirent, got []*Mount, want ...string) error {
+ gotPaths := make(map[string]struct{}, len(got))
+ gotStr := make([]string, len(got))
+ for i, g := range got {
+ groot := g.Root()
+ name, _ := groot.FullName(root)
+ groot.DecRef()
+ gotStr[i] = name
+ gotPaths[name] = struct{}{}
+ }
+ if len(got) != len(want) {
+ return fmt.Errorf("mount paths are different, got: %q, want: %q", gotStr, want)
+ }
+ for _, w := range want {
+ if _, ok := gotPaths[w]; !ok {
+ return fmt.Errorf("no mount with path %q found", w)
+ }
+ }
+ return nil
+}
+
// TestMountSourceOnlyCachedOnce tests that a Dirent that is mounted over only ends
// up in a single Dirent Cache. NOTE(b/63848693): Having a dirent in multiple
// caches causes major consistency issues.
@@ -91,8 +112,7 @@ func TestMountSourceOnlyCachedOnce(t *testing.T) {
}
}
-// Test that mounts have proper parent/child relationships.
-func TestMountSourceParentChildRelationship(t *testing.T) {
+func TestAllMountsUnder(t *testing.T) {
ctx := contexttest.Context(t)
rootCache := NewDirentCache(100)
@@ -122,101 +142,130 @@ func TestMountSourceParentChildRelationship(t *testing.T) {
if err != nil {
t.Fatalf("could not find path %q in mount manager: %v", p, err)
}
+
submountInode := NewMockInode(ctx, NewMockMountSource(nil), StableAttr{
Type: Directory,
})
if err := mm.Mount(ctx, d, submountInode); err != nil {
t.Fatalf("could not mount at %q: %v", p, err)
}
+ d.DecRef()
}
- // mm root should contain all submounts (and does not include the root
- // mount).
- allMountSources := rootDirent.Inode.MountSource.Submounts()
- if err := mountPathsAre(rootDirent, allMountSources, paths...); err != nil {
+ // mm root should contain all submounts (and does not include the root mount).
+ rootMnt := mm.FindMount(rootDirent)
+ submounts := mm.AllMountsUnder(rootMnt)
+ allPaths := append(paths, "/")
+ if err := mountPathsAre(rootDirent, submounts, allPaths...); err != nil {
t.Error(err)
}
// Each mount should have a unique ID.
foundIDs := make(map[uint64]struct{})
- for _, m := range allMountSources {
- id := m.ID()
- if _, ok := foundIDs[id]; ok {
- t.Errorf("got multiple mounts with id %d", id)
+ for _, m := range submounts {
+ if _, ok := foundIDs[m.ID]; ok {
+ t.Errorf("got multiple mounts with id %d", m.ID)
}
- foundIDs[id] = struct{}{}
+ foundIDs[m.ID] = struct{}{}
}
// Root mount should have no parent.
- rootMountSource := mm.root.Inode.MountSource
- if p := rootMountSource.Parent(); p != nil {
+ if p := rootMnt.ParentID; p != invalidMountID {
t.Errorf("root.Parent got %v wanted nil", p)
}
- // Root mount should have 2 children: foo and waldo.
- rootChildren := rootMountSource.Children()
- if err := mountPathsAre(rootDirent, rootChildren, "/foo", "/waldo"); err != nil {
- t.Error(err)
- }
- // All root mount children should have root as parent.
- for _, c := range rootChildren {
- if p := c.Parent(); p != rootMountSource {
- t.Errorf("root mount child got parent %+v, wanted root mount", p)
- }
- }
-
- // "foo" mount should have two children: /foo/bar, and /foo/qux.
+ // Check that "foo" mount has 3 children.
maxTraversals = 0
d, err := mm.FindLink(ctx, rootDirent, nil, "/foo", &maxTraversals)
if err != nil {
t.Fatalf("could not find path %q in mount manager: %v", "/foo", err)
}
- fooMountSource := d.Inode.MountSource
- fooMountSourceChildren := fooMountSource.Children()
- if err := mountPathsAre(rootDirent, fooMountSourceChildren, "/foo/bar", "/foo/qux"); err != nil {
- t.Error(err)
- }
- // Each child should have fooMountSource as parent.
- for _, c := range fooMountSourceChildren {
- if p := c.Parent(); p != fooMountSource {
- t.Errorf("foo mount child got parent %+v, wanted foo mount", p)
- }
- }
- // Submounts of foo are /foo/bar, /foo/qux, and /foo/bar/baz.
- if err := mountPathsAre(rootDirent, fooMountSource.Submounts(), "/foo/bar", "/foo/qux", "/foo/bar/baz"); err != nil {
+ defer d.DecRef()
+ submounts = mm.AllMountsUnder(mm.FindMount(d))
+ if err := mountPathsAre(rootDirent, submounts, "/foo", "/foo/bar", "/foo/qux", "/foo/bar/baz"); err != nil {
t.Error(err)
}
- // "waldo" mount should have no submounts or children.
+ // "waldo" mount should have no children.
maxTraversals = 0
waldo, err := mm.FindLink(ctx, rootDirent, nil, "/waldo", &maxTraversals)
if err != nil {
t.Fatalf("could not find path %q in mount manager: %v", "/waldo", err)
}
- waldoMountSource := waldo.Inode.MountSource
- if got := len(waldoMountSource.Children()); got != 0 {
- t.Errorf("waldo got %d children, wanted 0", got)
- }
- if got := len(waldoMountSource.Submounts()); got != 0 {
- t.Errorf("waldo got %d children, wanted 0", got)
+ defer waldo.DecRef()
+ submounts = mm.AllMountsUnder(mm.FindMount(waldo))
+ if err := mountPathsAre(rootDirent, submounts, "/waldo"); err != nil {
+ t.Error(err)
}
}
-func mountPathsAre(root *Dirent, got []*MountSource, want ...string) error {
- if len(got) != len(want) {
- return fmt.Errorf("mount paths have different lengths: got %d want %d", len(got), len(want))
+func TestUnmount(t *testing.T) {
+ ctx := contexttest.Context(t)
+
+ rootCache := NewDirentCache(100)
+ rootInode := NewMockInode(ctx, NewMockMountSource(rootCache), StableAttr{
+ Type: Directory,
+ })
+ mm, err := NewMountNamespace(ctx, rootInode)
+ if err != nil {
+ t.Fatalf("NewMountNamespace failed: %v", err)
}
- gotPaths := make(map[string]struct{}, len(got))
- for _, g := range got {
- groot := g.Root()
- n, _ := groot.FullName(root)
- groot.DecRef()
- gotPaths[n] = struct{}{}
+ rootDirent := mm.Root()
+ defer rootDirent.DecRef()
+
+ // Add mounts at the following paths:
+ paths := []string{
+ "/foo",
+ "/foo/bar",
+ "/foo/bar/goo",
+ "/foo/bar/goo/abc",
+ "/foo/abc",
+ "/foo/def",
+ "/waldo",
+ "/wally",
}
- for _, w := range want {
- if _, ok := gotPaths[w]; !ok {
- return fmt.Errorf("no mount with path %q found", w)
+
+ var maxTraversals uint
+ for _, p := range paths {
+ maxTraversals = 0
+ d, err := mm.FindLink(ctx, rootDirent, nil, p, &maxTraversals)
+ if err != nil {
+ t.Fatalf("could not find path %q in mount manager: %v", p, err)
+ }
+
+ submountInode := NewMockInode(ctx, NewMockMountSource(nil), StableAttr{
+ Type: Directory,
+ })
+ if err := mm.Mount(ctx, d, submountInode); err != nil {
+ t.Fatalf("could not mount at %q: %v", p, err)
+ }
+ d.DecRef()
+ }
+
+ allPaths := make([]string, len(paths)+1)
+ allPaths[0] = "/"
+ copy(allPaths[1:], paths)
+
+ rootMnt := mm.FindMount(rootDirent)
+ for i := len(paths) - 1; i >= 0; i-- {
+ maxTraversals = 0
+ p := paths[i]
+ d, err := mm.FindLink(ctx, rootDirent, nil, p, &maxTraversals)
+ if err != nil {
+ t.Fatalf("could not find path %q in mount manager: %v", p, err)
+ }
+
+ if err := mm.Unmount(ctx, d, false); err != nil {
+ t.Fatalf("could not unmount at %q: %v", p, err)
+ }
+ d.DecRef()
+
+ // Remove the path that has been unmounted and the check that the remaining
+ // mounts are still there.
+ allPaths = allPaths[:len(allPaths)-1]
+ submounts := mm.AllMountsUnder(rootMnt)
+ if err := mountPathsAre(rootDirent, submounts, allPaths...); err != nil {
+ t.Error(err)
}
}
- return nil
}
diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go
index 01eb4607e..a5c52d7ba 100644
--- a/pkg/sentry/fs/mounts.go
+++ b/pkg/sentry/fs/mounts.go
@@ -16,6 +16,7 @@ package fs
import (
"fmt"
+ "math"
"path"
"strings"
"sync"
@@ -35,6 +36,94 @@ import (
// sane.
const DefaultTraversalLimit = 10
+const invalidMountID = math.MaxUint64
+
+// Mount represents a mount in the file system. It holds the root dirent for the
+// mount. It also points back to the dirent or mount where it was mounted over,
+// so that it can be restored when unmounted. The chained mount can be either:
+// - Mount: when it's mounted on top of another mount point.
+// - Dirent: when it's mounted on top of a dirent. In this case the mount is
+// called an "undo" mount and only 'root' is set. All other fields are
+// either invalid or nil.
+//
+// +stateify savable
+type Mount struct {
+ // ID is a unique id for this mount. It may be invalidMountID if this is
+ // used to cache a dirent that was mounted over.
+ ID uint64
+
+ // ParentID is the parent's mount unique id. It may be invalidMountID if this
+ // is the root mount or if this is used to cache a dirent that was mounted
+ // over.
+ ParentID uint64
+
+ // root is the root Dirent of this mount. A reference on this Dirent must be
+ // held through the lifetime of the Mount which contains it.
+ root *Dirent
+
+ // previous is the existing dirent or mount that this object was mounted over.
+ // It's nil for the root mount and for the last entry in the chain (always an
+ // "undo" mount).
+ previous *Mount
+}
+
+// newMount creates a new mount, taking a reference on 'root'. Caller must
+// release the reference when it's done with the mount.
+func newMount(id, pid uint64, root *Dirent) *Mount {
+ root.IncRef()
+ return &Mount{
+ ID: id,
+ ParentID: pid,
+ root: root,
+ }
+}
+
+// newRootMount creates a new root mount (no parent), taking a reference on
+// 'root'. Caller must release the reference when it's done with the mount.
+func newRootMount(id uint64, root *Dirent) *Mount {
+ root.IncRef()
+ return &Mount{
+ ID: id,
+ ParentID: invalidMountID,
+ root: root,
+ }
+}
+
+// newUndoMount creates a new undo mount, taking a reference on 'd'. Caller must
+// release the reference when it's done with the mount.
+func newUndoMount(d *Dirent) *Mount {
+ d.IncRef()
+ return &Mount{
+ ID: invalidMountID,
+ ParentID: invalidMountID,
+ root: d,
+ }
+}
+
+// Root returns the root dirent of this mount. Callers must call DecRef on the
+// returned dirent.
+func (m *Mount) Root() *Dirent {
+ m.root.IncRef()
+ return m.root
+}
+
+// IsRoot returns true if the mount has no parent.
+func (m *Mount) IsRoot() bool {
+ return !m.IsUndo() && m.ParentID == invalidMountID
+}
+
+// IsUndo returns true if 'm' is an undo mount that should be used to restore
+// the original dirent during unmount only and it's not a valid mount.
+func (m *Mount) IsUndo() bool {
+ if m.ID == invalidMountID {
+ if m.ParentID != invalidMountID {
+ panic(fmt.Sprintf("Undo mount with valid parentID: %+v", m))
+ }
+ return true
+ }
+ return false
+}
+
// MountNamespace defines a collection of mounts.
//
// +stateify savable
@@ -55,13 +144,16 @@ type MountNamespace struct {
// mu protects mounts and mountID counter.
mu sync.Mutex `state:"nosave"`
- // mounts is a map of the last mounted Dirent -> stack of old Dirents
- // that were mounted over, with the oldest mounted Dirent first and
- // more recent mounted Dirents at the end of the slice.
- //
- // A reference to all Dirents in mounts (keys and values) must be held
- // to ensure the Dirents are recoverable when unmounting.
- mounts map[*Dirent][]*Dirent
+ // mounts is a map of mounted Dirent -> Mount object. There are three
+ // possible cases:
+ // - Dirent is mounted over a mount point: the stored Mount object will be
+ // the Mount for that mount point.
+ // - Dirent is mounted over a regular (non-mount point) Dirent: the stored
+ // Mount object will be an "undo" mount containing the mounted-over
+ // Dirent.
+ // - Dirent is the root mount: the stored Mount object will be a root mount
+ // containing the Dirent itself.
+ mounts map[*Dirent]*Mount
// mountID is the next mount id to assign.
mountID uint64
@@ -72,18 +164,18 @@ type MountNamespace struct {
func NewMountNamespace(ctx context.Context, root *Inode) (*MountNamespace, error) {
creds := auth.CredentialsFromContext(ctx)
- root.MountSource.mu.Lock()
- defer root.MountSource.mu.Unlock()
-
- // Set the root dirent and id on the root mount.
+ // Set the root dirent and id on the root mount. The reference returned from
+ // NewDirent will be donated to the MountNamespace constructed below.
d := NewDirent(root, "/")
- root.MountSource.root = d
- root.MountSource.id = 1
+
+ mnts := map[*Dirent]*Mount{
+ d: newRootMount(1, d),
+ }
return &MountNamespace{
userns: creds.UserNamespace,
root: d,
- mounts: make(map[*Dirent][]*Dirent),
+ mounts: mnts,
mountID: 2,
}, nil
}
@@ -110,10 +202,9 @@ func (mns *MountNamespace) FlushMountSourceRefs() {
func (mns *MountNamespace) flushMountSourceRefsLocked() {
// Flush mounts' MountSource references.
- for current, stack := range mns.mounts {
- current.Inode.MountSource.FlushDirentRefs()
- for _, prev := range stack {
- prev.Inode.MountSource.FlushDirentRefs()
+ for _, mp := range mns.mounts {
+ for ; mp != nil; mp = mp.previous {
+ mp.root.Inode.MountSource.FlushDirentRefs()
}
}
@@ -136,12 +227,11 @@ func (mns *MountNamespace) destroy() {
mns.flushMountSourceRefsLocked()
// Teardown mounts.
- for current, mp := range mns.mounts {
+ for _, mp := range mns.mounts {
// Drop the mount reference on all mounted dirents.
- for _, d := range mp {
- d.DecRef()
+ for ; mp != nil; mp = mp.previous {
+ mp.root.DecRef()
}
- current.DecRef()
}
mns.mounts = nil
@@ -208,46 +298,34 @@ func (mns *MountNamespace) withMountLocked(node *Dirent, fn func() error) error
}
// Mount mounts a `inode` over the subtree at `node`.
-func (mns *MountNamespace) Mount(ctx context.Context, node *Dirent, inode *Inode) error {
- return mns.withMountLocked(node, func() error {
- // replacement already has one reference taken; this is the mount
- // reference.
- replacement, err := node.mount(ctx, inode)
+func (mns *MountNamespace) Mount(ctx context.Context, mountPoint *Dirent, inode *Inode) error {
+ return mns.withMountLocked(mountPoint, func() error {
+ replacement, err := mountPoint.mount(ctx, inode)
if err != nil {
return err
}
-
- // Set child/parent dirent relationship.
- parentMountSource := node.Inode.MountSource
- childMountSource := inode.MountSource
- parentMountSource.mu.Lock()
- defer parentMountSource.mu.Unlock()
- childMountSource.mu.Lock()
- defer childMountSource.mu.Unlock()
-
- parentMountSource.children[childMountSource] = struct{}{}
- childMountSource.parent = parentMountSource
+ defer replacement.DecRef()
// Set the mount's root dirent and id.
- childMountSource.root = replacement
- childMountSource.id = mns.mountID
+ parentMnt := mns.findMountLocked(mountPoint)
+ childMnt := newMount(mns.mountID, parentMnt.ID, replacement)
mns.mountID++
- // Drop node from its dirent cache.
- node.dropExtendedReference()
+ // Drop mountPoint from its dirent cache.
+ mountPoint.dropExtendedReference()
- // If node is already a mount point, push node on the stack so it can
+ // If mountPoint is already a mount, push mountPoint on the stack so it can
// be recovered on unmount.
- if stack, ok := mns.mounts[node]; ok {
- mns.mounts[replacement] = append(stack, node)
- delete(mns.mounts, node)
+ if prev := mns.mounts[mountPoint]; prev != nil {
+ childMnt.previous = prev
+ mns.mounts[replacement] = childMnt
+ delete(mns.mounts, mountPoint)
return nil
}
// Was not already mounted, just add another mount point.
- // Take a reference on node so it can be recovered on unmount.
- node.IncRef()
- mns.mounts[replacement] = []*Dirent{node}
+ childMnt.previous = newUndoMount(mountPoint)
+ mns.mounts[replacement] = childMnt
return nil
})
}
@@ -268,13 +346,13 @@ func (mns *MountNamespace) Unmount(ctx context.Context, node *Dirent, detachOnly
// This takes locks to prevent further walks to Dirents in this mount
// under the assumption that `node` is the root of the mount.
return mns.withMountLocked(node, func() error {
- origs, ok := mns.mounts[node]
+ orig, ok := mns.mounts[node]
if !ok {
// node is not a mount point.
return syserror.EINVAL
}
- if len(origs) == 0 {
+ if orig.previous == nil {
panic("cannot unmount initial dirent")
}
@@ -298,44 +376,62 @@ func (mns *MountNamespace) Unmount(ctx context.Context, node *Dirent, detachOnly
}
}
- // Lock the parent MountSource first, if it exists. We are
- // holding mns.Lock, so the parent can not change out
- // from under us.
- parent := m.Parent()
- if parent != nil {
- parent.mu.Lock()
- defer parent.mu.Unlock()
+ prev := orig.previous
+ if err := node.unmount(ctx, prev.root); err != nil {
+ return err
}
- // Lock the mount that is being unmounted.
- m.mu.Lock()
- defer m.mu.Unlock()
-
- if m.parent != nil {
- // Sanity check.
- if _, ok := m.parent.children[m]; !ok {
- panic(fmt.Sprintf("mount %+v is not a child of parent %+v", m, m.parent))
+ if prev.previous == nil {
+ if !prev.IsUndo() {
+ panic(fmt.Sprintf("Last mount in the chain must be a undo mount: %+v", prev))
}
- delete(m.parent.children, m)
+ // Drop mount reference taken at the end of MountNamespace.Mount.
+ prev.root.DecRef()
+ } else {
+ mns.mounts[prev.root] = prev
}
+ delete(mns.mounts, node)
- original := origs[len(origs)-1]
- if err := node.unmount(ctx, original); err != nil {
- return err
- }
+ return nil
+ })
+}
+
+// FindMount returns the mount that 'd' belongs to. It walks the dirent back
+// until a mount is found. It may return nil if no mount was found.
+func (mns *MountNamespace) FindMount(d *Dirent) *Mount {
+ mns.mu.Lock()
+ defer mns.mu.Unlock()
+ renameMu.Lock()
+ defer renameMu.Unlock()
- switch {
- case len(origs) > 1:
- mns.mounts[original] = origs[:len(origs)-1]
- case len(origs) == 1:
- // Drop mount reference taken at the end of
- // MountNamespace.Mount.
- original.DecRef()
+ return mns.findMountLocked(d)
+}
+
+func (mns *MountNamespace) findMountLocked(d *Dirent) *Mount {
+ for {
+ if mnt := mns.mounts[d]; mnt != nil {
+ return mnt
+ }
+ if d.parent == nil {
+ return nil
}
+ d = d.parent
+ }
+}
- delete(mns.mounts, node)
- return nil
- })
+// AllMountsUnder returns a slice of all mounts under the parent, including
+// itself.
+func (mns *MountNamespace) AllMountsUnder(parent *Mount) []*Mount {
+ mns.mu.Lock()
+ defer mns.mu.Unlock()
+
+ var rv []*Mount
+ for _, mp := range mns.mounts {
+ if !mp.IsUndo() && mp.root.descendantOf(parent.root) {
+ rv = append(rv, mp)
+ }
+ }
+ return rv
}
// FindLink returns an Dirent from a given node, which may be a symlink.
diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go
index b5e01301f..1f7817947 100644
--- a/pkg/sentry/fs/proc/mounts.go
+++ b/pkg/sentry/fs/proc/mounts.go
@@ -27,7 +27,7 @@ import (
// forEachMountSource runs f for the process root mount and each mount that is a
// descendant of the root.
-func forEachMountSource(t *kernel.Task, fn func(string, *fs.MountSource)) {
+func forEachMount(t *kernel.Task, fn func(string, *fs.Mount)) {
var fsctx *kernel.FSContext
t.WithMuLocked(func(t *kernel.Task) {
fsctx = t.FSContext()
@@ -46,16 +46,14 @@ func forEachMountSource(t *kernel.Task, fn func(string, *fs.MountSource)) {
}
defer rootDir.DecRef()
- if rootDir.Inode == nil {
- panic(fmt.Sprintf("root dirent has nil inode: %+v", rootDir))
- }
- if rootDir.Inode.MountSource == nil {
- panic(fmt.Sprintf("root dirent has nil mount: %+v", rootDir))
+ mnt := t.MountNamespace().FindMount(rootDir)
+ if mnt == nil {
+ // Has it just been unmounted?
+ return
}
-
- ms := append(rootDir.Inode.MountSource.Submounts(), rootDir.Inode.MountSource)
+ ms := t.MountNamespace().AllMountsUnder(mnt)
sort.Slice(ms, func(i, j int) bool {
- return ms[i].ID() < ms[j].ID()
+ return ms[i].ID < ms[j].ID
})
for _, m := range ms {
mroot := m.Root()
@@ -89,26 +87,27 @@ func (mif *mountInfoFile) ReadSeqFileData(ctx context.Context, handle seqfile.Se
}
var buf bytes.Buffer
- forEachMountSource(mif.t, func(mountPath string, m *fs.MountSource) {
+ forEachMount(mif.t, func(mountPath string, m *fs.Mount) {
// Format:
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
// (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
// (1) MountSource ID.
- fmt.Fprintf(&buf, "%d ", m.ID())
+ fmt.Fprintf(&buf, "%d ", m.ID)
// (2) Parent ID (or this ID if there is no parent).
- pID := m.ID()
- if p := m.Parent(); p != nil {
- pID = p.ID()
+ pID := m.ID
+ if !m.IsRoot() && !m.IsUndo() {
+ pID = m.ParentID
}
fmt.Fprintf(&buf, "%d ", pID)
// (3) Major:Minor device ID. We don't have a superblock, so we
// just use the root inode device number.
mroot := m.Root()
+ defer mroot.DecRef()
+
sa := mroot.Inode.StableAttr
- mroot.DecRef()
fmt.Fprintf(&buf, "%d:%d ", sa.DeviceFileMajor, sa.DeviceFileMinor)
// (4) Root: the pathname of the directory in the filesystem
@@ -122,14 +121,15 @@ func (mif *mountInfoFile) ReadSeqFileData(ctx context.Context, handle seqfile.Se
fmt.Fprintf(&buf, "%s ", mountPath)
// (6) Mount options.
+ flags := mroot.Inode.MountSource.Flags
opts := "rw"
- if m.Flags.ReadOnly {
+ if flags.ReadOnly {
opts = "ro"
}
- if m.Flags.NoAtime {
+ if flags.NoAtime {
opts += ",noatime"
}
- if m.Flags.NoExec {
+ if flags.NoExec {
opts += ",noexec"
}
fmt.Fprintf(&buf, "%s ", opts)
@@ -139,7 +139,7 @@ func (mif *mountInfoFile) ReadSeqFileData(ctx context.Context, handle seqfile.Se
fmt.Fprintf(&buf, "- ")
// (9) Filesystem type.
- fmt.Fprintf(&buf, "%s ", m.FilesystemType)
+ fmt.Fprintf(&buf, "%s ", mroot.Inode.MountSource.FilesystemType)
// (10) Mount source: filesystem-specific information or "none".
fmt.Fprintf(&buf, "none ")
@@ -171,7 +171,7 @@ func (mf *mountsFile) ReadSeqFileData(ctx context.Context, handle seqfile.SeqHan
}
var buf bytes.Buffer
- forEachMountSource(mf.t, func(mountPath string, m *fs.MountSource) {
+ forEachMount(mf.t, func(mountPath string, m *fs.Mount) {
// Format:
// <special device or remote filesystem> <mount point> <filesystem type> <mount options> <needs dump> <fsck order>
//
@@ -182,11 +182,15 @@ func (mf *mountsFile) ReadSeqFileData(ctx context.Context, handle seqfile.SeqHan
// Only ro/rw option is supported for now.
//
// The "needs dump"and fsck flags are always 0, which is allowed.
+ root := m.Root()
+ defer root.DecRef()
+
+ flags := root.Inode.MountSource.Flags
opts := "rw"
- if m.Flags.ReadOnly {
+ if flags.ReadOnly {
opts = "ro"
}
- fmt.Fprintf(&buf, "%s %s %s %s %d %d\n", "none", mountPath, m.FilesystemType, opts, 0, 0)
+ fmt.Fprintf(&buf, "%s %s %s %s %d %d\n", "none", mountPath, root.Inode.MountSource.FilesystemType, opts, 0, 0)
})
return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*mountsFile)(nil)}}, 0
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 1611dda2c..bc05b3491 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -685,27 +685,21 @@ func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error
// Iterate through all submounts and unmount them. We unmount lazily by
// setting detach=true, so we can unmount in any order.
- for _, m := range containerRootDirent.Inode.MountSource.Submounts() {
+ mnt := mns.FindMount(containerRootDirent)
+ for _, m := range mns.AllMountsUnder(mnt) {
root := m.Root()
defer root.DecRef()
// Do a best-effort unmount by flushing the refs and unmount
// with "detach only = true". Unmount returns EINVAL when the mount point
// doesn't exist, i.e. it has already been unmounted.
- log.Debugf("Unmounting container submount %q", root.BaseName())
- m.FlushDirentRefs()
+ log.Debugf("Unmounting container mount %q", root.BaseName())
+ root.Inode.MountSource.FlushDirentRefs()
if err := mns.Unmount(ctx, root, true /* detach only */); err != nil && err != syserror.EINVAL {
- return fmt.Errorf("unmounting container submount %q: %v", root.BaseName(), err)
+ return fmt.Errorf("unmounting container mount %q: %v", root.BaseName(), err)
}
}
- // Unmount the container root itself.
- log.Debugf("Unmounting container root %q", containerRoot)
- containerRootDirent.Inode.MountSource.FlushDirentRefs()
- if err := mns.Unmount(ctx, containerRootDirent, true /* detach only */); err != nil {
- return fmt.Errorf("unmounting container root mount %q: %v", containerRootDirent.BaseName(), err)
- }
-
// Get a reference to the parent directory and remove the root
// container directory.
maxTraversals = 0