summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/overlay
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl/overlay')
-rw-r--r--pkg/sentry/fsimpl/overlay/BUILD3
-rw-r--r--pkg/sentry/fsimpl/overlay/copy_up.go2
-rw-r--r--pkg/sentry/fsimpl/overlay/filesystem.go146
-rw-r--r--pkg/sentry/fsimpl/overlay/overlay.go242
-rw-r--r--pkg/sentry/fsimpl/overlay/save_restore.go27
5 files changed, 257 insertions, 163 deletions
diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD
index 1e11b0428..bf13bbbf4 100644
--- a/pkg/sentry/fsimpl/overlay/BUILD
+++ b/pkg/sentry/fsimpl/overlay/BUILD
@@ -23,6 +23,7 @@ go_library(
"fstree.go",
"overlay.go",
"regular_file.go",
+ "save_restore.go",
],
visibility = ["//pkg/sentry:internal"],
deps = [
@@ -30,6 +31,8 @@ go_library(
"//pkg/context",
"//pkg/fspath",
"//pkg/log",
+ "//pkg/refs",
+ "//pkg/refsvfs2",
"//pkg/sentry/arch",
"//pkg/sentry/fs/lock",
"//pkg/sentry/kernel/auth",
diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go
index 4506642ca..469f3a33d 100644
--- a/pkg/sentry/fsimpl/overlay/copy_up.go
+++ b/pkg/sentry/fsimpl/overlay/copy_up.go
@@ -409,7 +409,7 @@ func (d *dentry) copyUpDescendantsLocked(ctx context.Context, ds **[]*dentry) er
if dirent.Name == "." || dirent.Name == ".." {
continue
}
- child, err := d.fs.getChildLocked(ctx, d, dirent.Name, ds)
+ child, _, err := d.fs.getChildLocked(ctx, d, dirent.Name, ds)
if err != nil {
return err
}
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 78a01bbb7..bc07d72c0 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -121,63 +122,63 @@ func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*de
// * fs.renameMu must be locked.
// * d.dirMu must be locked.
// * !rp.Done().
-func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, error) {
+func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, lookupLayer, error) {
if !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, lookupLayerNone, syserror.ENOTDIR
}
if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
- return nil, err
+ return nil, lookupLayerNone, err
}
afterSymlink:
name := rp.Component()
if name == "." {
rp.Advance()
- return d, nil
+ return d, d.topLookupLayer(), nil
}
if name == ".." {
if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil {
- return nil, err
+ return nil, lookupLayerNone, err
} else if isRoot || d.parent == nil {
rp.Advance()
- return d, nil
+ return d, d.topLookupLayer(), nil
}
if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil {
- return nil, err
+ return nil, lookupLayerNone, err
}
rp.Advance()
- return d.parent, nil
+ return d.parent, d.parent.topLookupLayer(), nil
}
- child, err := fs.getChildLocked(ctx, d, name, ds)
+ child, topLookupLayer, err := fs.getChildLocked(ctx, d, name, ds)
if err != nil {
- return nil, err
+ return nil, topLookupLayer, err
}
if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
- return nil, err
+ return nil, lookupLayerNone, err
}
if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() {
target, err := child.readlink(ctx)
if err != nil {
- return nil, err
+ return nil, lookupLayerNone, err
}
if err := rp.HandleSymlink(target); err != nil {
- return nil, err
+ return nil, topLookupLayer, err
}
goto afterSymlink // don't check the current directory again
}
rp.Advance()
- return child, nil
+ return child, topLookupLayer, nil
}
// Preconditions:
// * fs.renameMu must be locked.
// * d.dirMu must be locked.
-func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
+func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, lookupLayer, error) {
if child, ok := parent.children[name]; ok {
- return child, nil
+ return child, child.topLookupLayer(), nil
}
- child, err := fs.lookupLocked(ctx, parent, name)
+ child, topLookupLayer, err := fs.lookupLocked(ctx, parent, name)
if err != nil {
- return nil, err
+ return nil, topLookupLayer, err
}
if parent.children == nil {
parent.children = make(map[string]*dentry)
@@ -185,16 +186,16 @@ func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name s
parent.children[name] = child
// child's refcount is initially 0, so it may be dropped after traversal.
*ds = appendDentry(*ds, child)
- return child, nil
+ return child, topLookupLayer, nil
}
// Preconditions:
// * fs.renameMu must be locked.
// * parent.dirMu must be locked.
-func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name string) (*dentry, error) {
+func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name string) (*dentry, lookupLayer, error) {
childPath := fspath.Parse(name)
child := fs.newDentry()
- existsOnAnyLayer := false
+ topLookupLayer := lookupLayerNone
var lookupErr error
vfsObj := fs.vfsfs.VirtualFilesystem()
@@ -215,7 +216,7 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
defer childVD.DecRef(ctx)
mask := uint32(linux.STATX_TYPE)
- if !existsOnAnyLayer {
+ if topLookupLayer == lookupLayerNone {
// Mode, UID, GID, and (for non-directories) inode number come from
// the topmost layer on which the file exists.
mask |= linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO
@@ -238,10 +239,13 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
if isWhiteout(&stat) {
// This is a whiteout, so it "doesn't exist" on this layer, and
// layers below this one are ignored.
+ if isUpper {
+ topLookupLayer = lookupLayerUpperWhiteout
+ }
return false
}
isDir := stat.Mode&linux.S_IFMT == linux.S_IFDIR
- if existsOnAnyLayer && !isDir {
+ if topLookupLayer != lookupLayerNone && !isDir {
// Directories are not merged with non-directory files from lower
// layers; instead, layers including and below the first
// non-directory file are ignored. (This file must be a directory
@@ -258,8 +262,12 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
} else {
child.lowerVDs = append(child.lowerVDs, childVD)
}
- if !existsOnAnyLayer {
- existsOnAnyLayer = true
+ if topLookupLayer == lookupLayerNone {
+ if isUpper {
+ topLookupLayer = lookupLayerUpper
+ } else {
+ topLookupLayer = lookupLayerLower
+ }
child.mode = uint32(stat.Mode)
child.uid = stat.UID
child.gid = stat.GID
@@ -288,11 +296,11 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
if lookupErr != nil {
child.destroyLocked(ctx)
- return nil, lookupErr
+ return nil, topLookupLayer, lookupErr
}
- if !existsOnAnyLayer {
+ if !topLookupLayer.existsInOverlay() {
child.destroyLocked(ctx)
- return nil, syserror.ENOENT
+ return nil, topLookupLayer, syserror.ENOENT
}
// Device and inode numbers were copied from the topmost layer above;
@@ -302,14 +310,20 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
child.devMinor = fs.dirDevMinor
child.ino = fs.newDirIno()
} else if !child.upperVD.Ok() {
+ childDevMinor, err := fs.getLowerDevMinor(child.devMajor, child.devMinor)
+ if err != nil {
+ ctx.Infof("overlay.filesystem.lookupLocked: failed to map lower layer device number (%d, %d) to an overlay-specific device number: %v", child.devMajor, child.devMinor, err)
+ child.destroyLocked(ctx)
+ return nil, topLookupLayer, err
+ }
child.devMajor = linux.UNNAMED_MAJOR
- child.devMinor = fs.lowerDevMinors[child.lowerVDs[0].Mount().Filesystem()]
+ child.devMinor = childDevMinor
}
parent.IncRef()
child.parent = parent
child.name = name
- return child, nil
+ return child, topLookupLayer, nil
}
// lookupLayerLocked is similar to lookupLocked, but only returns information
@@ -408,7 +422,7 @@ func (ll lookupLayer) existsInOverlay() bool {
func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
for !rp.Final() {
d.dirMu.Lock()
- next, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
+ next, _, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
d.dirMu.Unlock()
if err != nil {
return nil, err
@@ -428,7 +442,7 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath,
d := rp.Start().Impl().(*dentry)
for !rp.Done() {
d.dirMu.Lock()
- next, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
+ next, _, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
d.dirMu.Unlock()
if err != nil {
return nil, err
@@ -463,9 +477,6 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
if name == "." || name == ".." {
return syserror.EEXIST
}
- if !dir && rp.MustBeDir() {
- return syserror.ENOENT
- }
if parent.vfsd.IsDead() {
return syserror.ENOENT
}
@@ -489,6 +500,10 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
return syserror.EEXIST
}
+ if !dir && rp.MustBeDir() {
+ return syserror.ENOENT
+ }
+
// Ensure that the parent directory is copied-up so that we can create the
// new file in the upper layer.
if err := parent.copyUpLocked(ctx); err != nil {
@@ -791,9 +806,9 @@ afterTrailingSymlink:
}
// Determine whether or not we need to create a file.
parent.dirMu.Lock()
- child, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
+ child, topLookupLayer, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
if err == syserror.ENOENT && mayCreate {
- fd, err := fs.createAndOpenLocked(ctx, rp, parent, &opts, &ds)
+ fd, err := fs.createAndOpenLocked(ctx, rp, parent, &opts, &ds, topLookupLayer == lookupLayerUpperWhiteout)
parent.dirMu.Unlock()
return fd, err
}
@@ -893,7 +908,7 @@ func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts *
// Preconditions:
// * parent.dirMu must be locked.
// * parent does not already contain a child named rp.Component().
-func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.ResolvingPath, parent *dentry, opts *vfs.OpenOptions, ds **[]*dentry) (*vfs.FileDescription, error) {
+func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.ResolvingPath, parent *dentry, opts *vfs.OpenOptions, ds **[]*dentry, haveUpperWhiteout bool) (*vfs.FileDescription, error) {
creds := rp.Credentials()
if err := parent.checkPermissions(creds, vfs.MayWrite); err != nil {
return nil, err
@@ -918,19 +933,12 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
Start: parent.upperVD,
Path: fspath.Parse(childName),
}
- // We don't know if a whiteout exists on the upper layer; speculatively
- // unlink it.
- //
- // TODO(gvisor.dev/issue/1199): Modify OpenAt => stepLocked so that we do
- // know whether a whiteout exists.
- var haveUpperWhiteout bool
- switch err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err {
- case nil:
- haveUpperWhiteout = true
- case syserror.ENOENT:
- haveUpperWhiteout = false
- default:
- return nil, err
+ // Unlink the whiteout if it exists.
+ if haveUpperWhiteout {
+ if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil {
+ log.Warningf("overlay.filesystem.createAndOpenLocked: failed to unlink whiteout: %v", err)
+ return nil, err
+ }
}
// Create the file on the upper layer, and get an FD representing it.
upperFD, err := vfsObj.OpenAt(ctx, fs.creds, &pop, &vfs.OpenOptions{
@@ -961,7 +969,7 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
}
// Re-lookup to get a dentry representing the new file, which is needed for
// the returned FD.
- child, err := fs.getChildLocked(ctx, parent, childName, ds)
+ child, _, err := fs.getChildLocked(ctx, parent, childName, ds)
if err != nil {
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) dentry lookup failure: %v", cleanupErr))
@@ -970,7 +978,10 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
}
return nil, err
}
- // Finally construct the overlay FD.
+ // Finally construct the overlay FD. Below this point, we don't perform
+ // cleanup (the file was created successfully even if we can no longer open
+ // it for some reason).
+ parent.dirents = nil
upperFlags := upperFD.StatusFlags()
fd := &regularFileFD{
copiedUp: true,
@@ -981,8 +992,6 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
upperFDOpts := upperFD.Options()
if err := fd.vfsfd.Init(fd, upperFlags, mnt, &child.vfsd, &upperFDOpts); err != nil {
upperFD.DecRef(ctx)
- // Don't bother with cleanup; the file was created successfully, we
- // just can't open it anymore for some reason.
return nil, err
}
parent.watches.Notify(ctx, childName, linux.IN_CREATE, 0 /* cookie */, vfs.PathEvent, false /* unlinked */)
@@ -1040,7 +1049,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
// directory, we need to check for write permission on it.
oldParent.dirMu.Lock()
defer oldParent.dirMu.Unlock()
- renamed, err := fs.getChildLocked(ctx, oldParent, oldName, &ds)
+ renamed, _, err := fs.getChildLocked(ctx, oldParent, oldName, &ds)
if err != nil {
return err
}
@@ -1072,20 +1081,17 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if newParent.vfsd.IsDead() {
return syserror.ENOENT
}
- replacedLayer, err := fs.lookupLayerLocked(ctx, newParent, newName)
- if err != nil {
- return err
- }
var (
- replaced *dentry
- replacedVFSD *vfs.Dentry
- whiteouts map[string]bool
+ replaced *dentry
+ replacedVFSD *vfs.Dentry
+ replacedLayer lookupLayer
+ whiteouts map[string]bool
)
- if replacedLayer.existsInOverlay() {
- replaced, err = fs.getChildLocked(ctx, newParent, newName, &ds)
- if err != nil {
- return err
- }
+ replaced, replacedLayer, err = fs.getChildLocked(ctx, newParent, newName, &ds)
+ if err != nil && err != syserror.ENOENT {
+ return err
+ }
+ if replaced != nil {
replacedVFSD = &replaced.vfsd
if replaced.isDir() {
if !renamed.isDir() {
@@ -1289,7 +1295,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
// Unlike UnlinkAt, we need a dentry representing the child directory being
// removed in order to verify that it's empty.
- child, err := fs.getChildLocked(ctx, parent, name, &ds)
+ child, _, err := fs.getChildLocked(ctx, parent, name, &ds)
if err != nil {
return err
}
@@ -1541,7 +1547,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
if parentMode&linux.S_ISVTX != 0 {
// If the parent's sticky bit is set, we need a child dentry to get
// its owner.
- child, err = fs.getChildLocked(ctx, parent, name, &ds)
+ child, _, err = fs.getChildLocked(ctx, parent, name, &ds)
if err != nil {
return err
}
diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index 4c5de8d32..73130bc8d 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -22,6 +22,7 @@
// filesystem.renameMu
// dentry.dirMu
// dentry.copyMu
+// filesystem.devMu
// *** "memmap.Mappable locks" below this point
// dentry.mapsMu
// *** "memmap.Mappable locks taken by Translate" below this point
@@ -33,12 +34,14 @@
package overlay
import (
+ "fmt"
"strings"
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/refsvfs2"
fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
@@ -99,10 +102,15 @@ type filesystem struct {
// is immutable.
dirDevMinor uint32
- // lowerDevMinors maps lower layer filesystems to device minor numbers
- // assigned to non-directory files originating from that filesystem.
- // lowerDevMinors is immutable.
- lowerDevMinors map[*vfs.Filesystem]uint32
+ // lowerDevMinors maps device numbers from lower layer filesystems to
+ // device minor numbers assigned to non-directory files originating from
+ // that filesystem. (This remapping is necessary for lower layers because a
+ // file on a lower layer, and that same file on an overlay, are
+ // distinguishable because they will diverge after copy-up; this isn't true
+ // for non-directory files already on the upper layer.) lowerDevMinors is
+ // protected by devMu.
+ devMu sync.Mutex `state:"nosave"`
+ lowerDevMinors map[layerDevNumber]uint32
// renameMu synchronizes renaming with non-renaming operations in order to
// ensure consistent lock ordering between dentry.dirMu in different
@@ -114,78 +122,69 @@ type filesystem struct {
lastDirIno uint64
}
+// +stateify savable
+type layerDevNumber struct {
+ major uint32
+ minor uint32
+}
+
// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
mopts := vfs.GenericParseMountOptions(opts.Data)
fsoptsRaw := opts.InternalData
- fsopts, haveFSOpts := fsoptsRaw.(FilesystemOptions)
- if fsoptsRaw != nil && !haveFSOpts {
+ fsopts, ok := fsoptsRaw.(FilesystemOptions)
+ if fsoptsRaw != nil && !ok {
ctx.Infof("overlay.FilesystemType.GetFilesystem: GetFilesystemOptions.InternalData has type %T, wanted overlay.FilesystemOptions or nil", fsoptsRaw)
return nil, nil, syserror.EINVAL
}
- if haveFSOpts {
- if len(fsopts.LowerRoots) == 0 {
- ctx.Infof("overlay.FilesystemType.GetFilesystem: LowerRoots must be non-empty")
+ vfsroot := vfs.RootFromContext(ctx)
+ if vfsroot.Ok() {
+ defer vfsroot.DecRef(ctx)
+ }
+
+ if upperPathname, ok := mopts["upperdir"]; ok {
+ if fsopts.UpperRoot.Ok() {
+ ctx.Infof("overlay.FilesystemType.GetFilesystem: both upperdir and FilesystemOptions.UpperRoot are specified")
return nil, nil, syserror.EINVAL
}
- if len(fsopts.LowerRoots) < 2 && !fsopts.UpperRoot.Ok() {
- ctx.Infof("overlay.FilesystemType.GetFilesystem: at least two LowerRoots are required when UpperRoot is unspecified")
+ delete(mopts, "upperdir")
+ // Linux overlayfs also requires a workdir when upperdir is
+ // specified; we don't, so silently ignore this option.
+ delete(mopts, "workdir")
+ upperPath := fspath.Parse(upperPathname)
+ if !upperPath.Absolute {
+ ctx.Infof("overlay.FilesystemType.GetFilesystem: upperdir %q must be absolute", upperPathname)
return nil, nil, syserror.EINVAL
}
- // We don't enforce a maximum number of lower layers when not
- // configured by applications; the sandbox owner can have an overlay
- // filesystem with any number of lower layers.
- } else {
- vfsroot := vfs.RootFromContext(ctx)
- defer vfsroot.DecRef(ctx)
- upperPathname, ok := mopts["upperdir"]
- if ok {
- delete(mopts, "upperdir")
- // Linux overlayfs also requires a workdir when upperdir is
- // specified; we don't, so silently ignore this option.
- delete(mopts, "workdir")
- upperPath := fspath.Parse(upperPathname)
- if !upperPath.Absolute {
- ctx.Infof("overlay.FilesystemType.GetFilesystem: upperdir %q must be absolute", upperPathname)
- return nil, nil, syserror.EINVAL
- }
- upperRoot, err := vfsObj.GetDentryAt(ctx, creds, &vfs.PathOperation{
- Root: vfsroot,
- Start: vfsroot,
- Path: upperPath,
- FollowFinalSymlink: true,
- }, &vfs.GetDentryOptions{
- CheckSearchable: true,
- })
- if err != nil {
- ctx.Infof("overlay.FilesystemType.GetFilesystem: failed to resolve upperdir %q: %v", upperPathname, err)
- return nil, nil, err
- }
- defer upperRoot.DecRef(ctx)
- privateUpperRoot, err := clonePrivateMount(vfsObj, upperRoot, false /* forceReadOnly */)
- if err != nil {
- ctx.Infof("overlay.FilesystemType.GetFilesystem: failed to make private bind mount of upperdir %q: %v", upperPathname, err)
- return nil, nil, err
- }
- defer privateUpperRoot.DecRef(ctx)
- fsopts.UpperRoot = privateUpperRoot
+ upperRoot, err := vfsObj.GetDentryAt(ctx, creds, &vfs.PathOperation{
+ Root: vfsroot,
+ Start: vfsroot,
+ Path: upperPath,
+ FollowFinalSymlink: true,
+ }, &vfs.GetDentryOptions{
+ CheckSearchable: true,
+ })
+ if err != nil {
+ ctx.Infof("overlay.FilesystemType.GetFilesystem: failed to resolve upperdir %q: %v", upperPathname, err)
+ return nil, nil, err
+ }
+ privateUpperRoot, err := clonePrivateMount(vfsObj, upperRoot, false /* forceReadOnly */)
+ upperRoot.DecRef(ctx)
+ if err != nil {
+ ctx.Infof("overlay.FilesystemType.GetFilesystem: failed to make private bind mount of upperdir %q: %v", upperPathname, err)
+ return nil, nil, err
}
- lowerPathnamesStr, ok := mopts["lowerdir"]
- if !ok {
- ctx.Infof("overlay.FilesystemType.GetFilesystem: missing required option lowerdir")
+ defer privateUpperRoot.DecRef(ctx)
+ fsopts.UpperRoot = privateUpperRoot
+ }
+
+ if lowerPathnamesStr, ok := mopts["lowerdir"]; ok {
+ if len(fsopts.LowerRoots) != 0 {
+ ctx.Infof("overlay.FilesystemType.GetFilesystem: both lowerdir and FilesystemOptions.LowerRoots are specified")
return nil, nil, syserror.EINVAL
}
delete(mopts, "lowerdir")
lowerPathnames := strings.Split(lowerPathnamesStr, ":")
- const maxLowerLayers = 500 // Linux: fs/overlay/super.c:OVL_MAX_STACK
- if len(lowerPathnames) < 2 && !fsopts.UpperRoot.Ok() {
- ctx.Infof("overlay.FilesystemType.GetFilesystem: at least two lowerdirs are required when upperdir is unspecified")
- return nil, nil, syserror.EINVAL
- }
- if len(lowerPathnames) > maxLowerLayers {
- ctx.Infof("overlay.FilesystemType.GetFilesystem: %d lowerdirs specified, maximum %d", len(lowerPathnames), maxLowerLayers)
- return nil, nil, syserror.EINVAL
- }
for _, lowerPathname := range lowerPathnames {
lowerPath := fspath.Parse(lowerPathname)
if !lowerPath.Absolute {
@@ -204,8 +203,8 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
ctx.Infof("overlay.FilesystemType.GetFilesystem: failed to resolve lowerdir %q: %v", lowerPathname, err)
return nil, nil, err
}
- defer lowerRoot.DecRef(ctx)
privateLowerRoot, err := clonePrivateMount(vfsObj, lowerRoot, true /* forceReadOnly */)
+ lowerRoot.DecRef(ctx)
if err != nil {
ctx.Infof("overlay.FilesystemType.GetFilesystem: failed to make private bind mount of lowerdir %q: %v", lowerPathname, err)
return nil, nil, err
@@ -214,31 +213,31 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
fsopts.LowerRoots = append(fsopts.LowerRoots, privateLowerRoot)
}
}
+
if len(mopts) != 0 {
ctx.Infof("overlay.FilesystemType.GetFilesystem: unused options: %v", mopts)
return nil, nil, syserror.EINVAL
}
- // Allocate device numbers.
+ if len(fsopts.LowerRoots) == 0 {
+ ctx.Infof("overlay.FilesystemType.GetFilesystem: at least one lower layer is required")
+ return nil, nil, syserror.EINVAL
+ }
+ if len(fsopts.LowerRoots) < 2 && !fsopts.UpperRoot.Ok() {
+ ctx.Infof("overlay.FilesystemType.GetFilesystem: at least two lower layers are required when no upper layer is present")
+ return nil, nil, syserror.EINVAL
+ }
+ const maxLowerLayers = 500 // Linux: fs/overlay/super.c:OVL_MAX_STACK
+ if len(fsopts.LowerRoots) > maxLowerLayers {
+ ctx.Infof("overlay.FilesystemType.GetFilesystem: %d lower layers specified, maximum %d", len(fsopts.LowerRoots), maxLowerLayers)
+ return nil, nil, syserror.EINVAL
+ }
+
+ // Allocate dirDevMinor. lowerDevMinors are allocated dynamically.
dirDevMinor, err := vfsObj.GetAnonBlockDevMinor()
if err != nil {
return nil, nil, err
}
- lowerDevMinors := make(map[*vfs.Filesystem]uint32)
- for _, lowerRoot := range fsopts.LowerRoots {
- lowerFS := lowerRoot.Mount().Filesystem()
- if _, ok := lowerDevMinors[lowerFS]; !ok {
- devMinor, err := vfsObj.GetAnonBlockDevMinor()
- if err != nil {
- vfsObj.PutAnonBlockDevMinor(dirDevMinor)
- for _, lowerDevMinor := range lowerDevMinors {
- vfsObj.PutAnonBlockDevMinor(lowerDevMinor)
- }
- return nil, nil, err
- }
- lowerDevMinors[lowerFS] = devMinor
- }
- }
// Take extra references held by the filesystem.
if fsopts.UpperRoot.Ok() {
@@ -252,7 +251,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
opts: fsopts,
creds: creds.Fork(),
dirDevMinor: dirDevMinor,
- lowerDevMinors: lowerDevMinors,
+ lowerDevMinors: make(map[layerDevNumber]uint32),
}
fs.vfsfs.Init(vfsObj, &fstype, fs)
@@ -302,7 +301,14 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
root.ino = fs.newDirIno()
} else if !root.upperVD.Ok() {
root.devMajor = linux.UNNAMED_MAJOR
- root.devMinor = fs.lowerDevMinors[root.lowerVDs[0].Mount().Filesystem()]
+ rootDevMinor, err := fs.getLowerDevMinor(rootStat.DevMajor, rootStat.DevMinor)
+ if err != nil {
+ ctx.Infof("overlay.FilesystemType.GetFilesystem: failed to get device number for root: %v", err)
+ root.destroyLocked(ctx)
+ fs.vfsfs.DecRef(ctx)
+ return nil, nil, err
+ }
+ root.devMinor = rootDevMinor
root.ino = rootStat.Ino
} else {
root.devMajor = rootStat.DevMajor
@@ -375,6 +381,21 @@ func (fs *filesystem) newDirIno() uint64 {
return atomic.AddUint64(&fs.lastDirIno, 1)
}
+func (fs *filesystem) getLowerDevMinor(layerMajor, layerMinor uint32) (uint32, error) {
+ fs.devMu.Lock()
+ defer fs.devMu.Unlock()
+ orig := layerDevNumber{layerMajor, layerMinor}
+ if minor, ok := fs.lowerDevMinors[orig]; ok {
+ return minor, nil
+ }
+ minor, err := fs.vfsfs.VirtualFilesystem().GetAnonBlockDevMinor()
+ if err != nil {
+ return 0, err
+ }
+ fs.lowerDevMinors[orig] = minor
+ return minor, nil
+}
+
// dentry implements vfs.DentryImpl.
//
// +stateify savable
@@ -458,9 +479,9 @@ type dentry struct {
//
// - isMappable is non-zero iff wrappedMappable is non-nil. isMappable is
// accessed using atomic memory operations.
- mapsMu sync.Mutex
+ mapsMu sync.Mutex `state:"nosave"`
lowerMappings memmap.MappingSet
- dataMu sync.RWMutex
+ dataMu sync.RWMutex `state:"nosave"`
wrappedMappable memmap.Mappable
isMappable uint32
@@ -484,6 +505,7 @@ func (fs *filesystem) newDentry() *dentry {
}
d.lowerVDs = d.inlineLowerVDs[:0]
d.vfsd.Init(d)
+ refsvfs2.Register(d)
return d
}
@@ -491,17 +513,19 @@ func (fs *filesystem) newDentry() *dentry {
func (d *dentry) IncRef() {
// d.refs may be 0 if d.fs.renameMu is locked, which serializes against
// d.checkDropLocked().
- atomic.AddInt64(&d.refs, 1)
+ r := atomic.AddInt64(&d.refs, 1)
+ refsvfs2.LogIncRef(d, r)
}
// TryIncRef implements vfs.DentryImpl.TryIncRef.
func (d *dentry) TryIncRef() bool {
for {
- refs := atomic.LoadInt64(&d.refs)
- if refs <= 0 {
+ r := atomic.LoadInt64(&d.refs)
+ if r <= 0 {
return false
}
- if atomic.CompareAndSwapInt64(&d.refs, refs, refs+1) {
+ if atomic.CompareAndSwapInt64(&d.refs, r, r+1) {
+ refsvfs2.LogTryIncRef(d, r+1)
return true
}
}
@@ -509,15 +533,27 @@ func (d *dentry) TryIncRef() bool {
// DecRef implements vfs.DentryImpl.DecRef.
func (d *dentry) DecRef(ctx context.Context) {
- if refs := atomic.AddInt64(&d.refs, -1); refs == 0 {
+ r := atomic.AddInt64(&d.refs, -1)
+ refsvfs2.LogDecRef(d, r)
+ if r == 0 {
d.fs.renameMu.Lock()
d.checkDropLocked(ctx)
d.fs.renameMu.Unlock()
- } else if refs < 0 {
+ } else if r < 0 {
panic("overlay.dentry.DecRef() called without holding a reference")
}
}
+func (d *dentry) decRefLocked(ctx context.Context) {
+ r := atomic.AddInt64(&d.refs, -1)
+ refsvfs2.LogDecRef(d, r)
+ if r == 0 {
+ d.checkDropLocked(ctx)
+ } else if r < 0 {
+ panic("overlay.dentry.decRefLocked() called without holding a reference")
+ }
+}
+
// checkDropLocked should be called after d's reference count becomes 0 or it
// becomes deleted.
//
@@ -577,12 +613,27 @@ func (d *dentry) destroyLocked(ctx context.Context) {
d.parent.dirMu.Unlock()
// Drop the reference held by d on its parent without recursively
// locking d.fs.renameMu.
- if refs := atomic.AddInt64(&d.parent.refs, -1); refs == 0 {
- d.parent.checkDropLocked(ctx)
- } else if refs < 0 {
- panic("overlay.dentry.DecRef() called without holding a reference")
- }
+ d.parent.decRefLocked(ctx)
}
+ refsvfs2.Unregister(d)
+}
+
+// RefType implements refsvfs2.CheckedObject.Type.
+func (d *dentry) RefType() string {
+ return "overlay.dentry"
+}
+
+// LeakMessage implements refsvfs2.CheckedObject.LeakMessage.
+func (d *dentry) LeakMessage() string {
+ return fmt.Sprintf("[overlay.dentry %p] reference count of %d instead of -1", d, atomic.LoadInt64(&d.refs))
+}
+
+// LogRefs implements refsvfs2.CheckedObject.LogRefs.
+//
+// This should only be set to true for debugging purposes, as it can generate an
+// extremely large amount of output and drastically degrade performance.
+func (d *dentry) LogRefs() bool {
+ return false
}
// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
@@ -645,6 +696,13 @@ func (d *dentry) topLayer() vfs.VirtualDentry {
return vd
}
+func (d *dentry) topLookupLayer() lookupLayer {
+ if d.upperVD.Ok() {
+ return lookupLayerUpper
+ }
+ return lookupLayerLower
+}
+
func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid)))
}
diff --git a/pkg/sentry/fsimpl/overlay/save_restore.go b/pkg/sentry/fsimpl/overlay/save_restore.go
new file mode 100644
index 000000000..54809f16c
--- /dev/null
+++ b/pkg/sentry/fsimpl/overlay/save_restore.go
@@ -0,0 +1,27 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package overlay
+
+import (
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/refsvfs2"
+)
+
+func (d *dentry) afterLoad() {
+ if atomic.LoadInt64(&d.refs) != -1 {
+ refsvfs2.Register(d)
+ }
+}