From 0b3bfe2ea30d491a6533f8ee74eb6e3cea707f06 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Mon, 27 Aug 2018 14:25:21 -0700 Subject: fs: Fix remote-revalidate cache policy. When revalidating a Dirent, if the inode id is the same, then we don't need to throw away the entire Dirent. We can just update the unstable attributes in place. If the inode id has changed, then the remote file has been deleted or moved, and we have no choice but to throw away the dirent we have a look up another. In this case, we may still end up losing a mounted dirent that is a child of the revalidated dirent. However, that seems appropriate here because the entire mount point has been pulled out from underneath us. Because gVisor's overlay is at the Inode level rather than the Dirent level, we must pass the parent Inode and name along with the Inode that is being revalidated. PiperOrigin-RevId: 210431270 Change-Id: I705caef9c68900234972d5aac4ae3a78c61c7d42 --- pkg/sentry/fs/attr.go | 19 -------- pkg/sentry/fs/dirent.go | 2 +- pkg/sentry/fs/fsutil/inode_cached.go | 41 ++++++++++++++++ pkg/sentry/fs/gofer/cache_policy.go | 64 ++++++++++++++++++++---- pkg/sentry/fs/gofer/gofer_test.go | 95 ++++++++++++++++++++++++++---------- pkg/sentry/fs/gofer/session.go | 6 +-- pkg/sentry/fs/mock.go | 2 +- pkg/sentry/fs/mount.go | 13 +++-- pkg/sentry/fs/mount_overlay.go | 20 +++++--- pkg/sentry/fs/tty/fs.go | 2 +- runsc/container/container_test.go | 18 +++---- 11 files changed, 201 insertions(+), 81 deletions(-) diff --git a/pkg/sentry/fs/attr.go b/pkg/sentry/fs/attr.go index 4178f18b2..091f4ac63 100644 --- a/pkg/sentry/fs/attr.go +++ b/pkg/sentry/fs/attr.go @@ -213,25 +213,6 @@ func (a AttrMask) Empty() bool { return a == AttrMask{} } -// Union returns an AttrMask containing the inclusive disjunction of fields in a and b. -func (a AttrMask) Union(b AttrMask) AttrMask { - return AttrMask{ - Type: a.Type || b.Type, - DeviceID: a.DeviceID || b.DeviceID, - InodeID: a.InodeID || b.InodeID, - BlockSize: a.BlockSize || b.BlockSize, - Size: a.Size || b.Size, - Usage: a.Usage || b.Usage, - Perms: a.Perms || b.Perms, - UID: a.UID || b.UID, - GID: a.GID || b.GID, - AccessTime: a.AccessTime || b.AccessTime, - ModificationTime: a.ModificationTime || b.ModificationTime, - StatusChangeTime: a.StatusChangeTime || b.StatusChangeTime, - Links: a.Links || b.Links, - } -} - // PermMask are file access permissions. // // +stateify savable diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index c1dfa0de7..5587582b5 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -499,7 +499,7 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl // // We never allow the file system to revalidate mounts, that could cause them // to unexpectedly drop out before umount. - if cd.mounted || !cd.Inode.MountSource.Revalidate(ctx, cd.Inode) { + if cd.mounted || !cd.Inode.MountSource.Revalidate(ctx, name, d.Inode, cd.Inode) { // Good to go. This is the fast-path. return cd, nil } diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index 0a320e2d8..6777c8bf7 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -427,6 +427,47 @@ func (c *CachingInodeOperations) touchStatusChangeTimeLocked(ctx context.Context c.dirtyAttr.StatusChangeTime = true } +// UpdateUnstable updates the cached unstable attributes. Only non-dirty +// attributes are updated. +func (c *CachingInodeOperations) UpdateUnstable(attr fs.UnstableAttr) { + // All attributes are protected by attrMu. + c.attrMu.Lock() + + if !c.dirtyAttr.Usage { + c.attr.Usage = attr.Usage + } + if !c.dirtyAttr.Perms { + c.attr.Perms = attr.Perms + } + if !c.dirtyAttr.UID { + c.attr.Owner.UID = attr.Owner.UID + } + if !c.dirtyAttr.GID { + c.attr.Owner.GID = attr.Owner.GID + } + if !c.dirtyAttr.AccessTime { + c.attr.AccessTime = attr.AccessTime + } + if !c.dirtyAttr.ModificationTime { + c.attr.ModificationTime = attr.ModificationTime + } + if !c.dirtyAttr.StatusChangeTime { + c.attr.StatusChangeTime = attr.StatusChangeTime + } + if !c.dirtyAttr.Links { + c.attr.Links = attr.Links + } + + // Size requires holding attrMu and dataMu. + c.dataMu.Lock() + if !c.dirtyAttr.Size { + c.attr.Size = attr.Size + } + c.dataMu.Unlock() + + c.attrMu.Unlock() +} + // Read reads from frames and otherwise directly from the backing file // into dst starting at offset until dst is full, EOF is reached, or an // error is encountered. diff --git a/pkg/sentry/fs/gofer/cache_policy.go b/pkg/sentry/fs/gofer/cache_policy.go index fa8abf51c..98f43c578 100644 --- a/pkg/sentry/fs/gofer/cache_policy.go +++ b/pkg/sentry/fs/gofer/cache_policy.go @@ -17,6 +17,7 @@ package gofer import ( "fmt" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" ) @@ -108,25 +109,68 @@ func (cp cachePolicy) writeThrough(inode *fs.Inode) bool { return cp == cacheNone || cp == cacheAllWritethrough } -// revalidateDirent indicates that a dirent should be revalidated after a -// lookup, because the looked up version may be stale. -func (cp cachePolicy) revalidateDirent() bool { +// revalidate revalidates the child Inode if the cache policy allows it. +// +// Depending on the cache policy, revalidate will walk from the parent to the +// child inode, and if any unstable attributes have changed, will update the +// cached attributes on the child inode. If the walk fails, or the returned +// inode id is different from the one being revalidated, then the entire Dirent +// must be reloaded. +func (cp cachePolicy) revalidate(ctx context.Context, name string, parent, child *fs.Inode) bool { if cp == cacheAll || cp == cacheAllWritethrough { return false } - // TODO: The cacheRemoteRevalidating policy should only - // return true if the remote file's attributes have changed. - return true + if cp == cacheNone { + return true + } + + childIops, ok := child.InodeOperations.(*inodeOperations) + if !ok { + panic(fmt.Sprintf("revalidating inode operations of unknown type %T", child.InodeOperations)) + } + parentIops, ok := parent.InodeOperations.(*inodeOperations) + if !ok { + panic(fmt.Sprintf("revalidating inode operations with parent of unknown type %T", parent.InodeOperations)) + } + + // Walk from parent to child again. + // + // TODO: If we have a directory FD in the parent + // inodeOperations, then we can use fstatat(2) to get the inode + // attributes instead of making this RPC. + qids, _, mask, attr, err := parentIops.fileState.file.walkGetAttr(ctx, []string{name}) + if err != nil { + // Can't look up the name. Trigger reload. + return true + } + + // If the Path has changed, then we are not looking at the file file. + // We must reload. + if qids[0].Path != childIops.fileState.key.Inode { + return true + } + + // If we are not caching unstable attrs, then there is nothing to + // update on this inode. + if !cp.cacheUAttrs(child) { + return false + } + + // Update the inode's cached unstable attrs. + s := childIops.session() + childIops.cachingInodeOps.UpdateUnstable(unstable(ctx, mask, attr, s.mounter, s.client)) + + return false } -// keepDirent indicates that dirents should be kept pinned in the dirent tree -// even if there are no application references on the file. -func (cp cachePolicy) keepDirent(inode *fs.Inode) bool { +// keep indicates that dirents should be kept pinned in the dirent tree even if +// there are no application references on the file. +func (cp cachePolicy) keep(d *fs.Dirent) bool { if cp == cacheNone { return false } - sattr := inode.StableAttr + sattr := d.Inode.StableAttr // NOTE: Only cache files, directories, and symlinks. return fs.IsFile(sattr) || fs.IsDir(sattr) || fs.IsSymlink(sattr) } diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go index 45fdaacfd..c8d7bd773 100644 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ b/pkg/sentry/fs/gofer/gofer_test.go @@ -151,41 +151,60 @@ func TestLookup(t *testing.T) { func TestRevalidation(t *testing.T) { tests := []struct { - cachePolicy cachePolicy - preModificationWantReval bool - postModificationWantReval bool + cachePolicy cachePolicy + + // Whether dirent should be reloaded before any modifications. + preModificationWantReload bool + + // Whether dirent should be reloaded after updating an unstable + // attribute on the remote fs. + postModificationWantReload bool + + // Whether dirent unstable attributes should be updated after + // updating an attribute on the remote fs. + postModificationWantUpdatedAttrs bool + + // Whether dirent should be reloaded after the remote has + // removed the file. + postRemovalWantReload bool }{ { // Policy cacheNone causes Revalidate to always return // true. - cachePolicy: cacheNone, - preModificationWantReval: true, - postModificationWantReval: true, + cachePolicy: cacheNone, + preModificationWantReload: true, + postModificationWantReload: true, + postModificationWantUpdatedAttrs: true, + postRemovalWantReload: true, }, { // Policy cacheAll causes Revalidate to always return // false. - cachePolicy: cacheAll, - preModificationWantReval: false, - postModificationWantReval: false, + cachePolicy: cacheAll, + preModificationWantReload: false, + postModificationWantReload: false, + postModificationWantUpdatedAttrs: false, + postRemovalWantReload: false, }, { // Policy cacheAllWritethrough causes Revalidate to // always return false. - cachePolicy: cacheAllWritethrough, - preModificationWantReval: false, - postModificationWantReval: false, + cachePolicy: cacheAllWritethrough, + preModificationWantReload: false, + postModificationWantReload: false, + postModificationWantUpdatedAttrs: false, + postRemovalWantReload: false, }, { // Policy cacheRemoteRevalidating causes Revalidate to - // always return true. - // - // TODO: The cacheRemoteRevalidating - // policy should only return true if the remote file's - // attributes have changed. - cachePolicy: cacheRemoteRevalidating, - preModificationWantReval: true, - postModificationWantReval: true, + // return update cached unstable attrs, and returns + // true only when the remote inode itself has been + // removed or replaced. + cachePolicy: cacheRemoteRevalidating, + preModificationWantReload: false, + postModificationWantReload: false, + postModificationWantUpdatedAttrs: true, + postRemovalWantReload: true, }, } @@ -227,15 +246,17 @@ func TestRevalidation(t *testing.T) { if err != nil { t.Fatalf("Lookup(%q) failed: %v", name, err) } - if test.preModificationWantReval && dirent == newDirent { + if test.preModificationWantReload && dirent == newDirent { t.Errorf("Lookup(%q) with cachePolicy=%s got old dirent %v, wanted a new dirent", name, test.cachePolicy, dirent) } - if !test.preModificationWantReval && dirent != newDirent { + if !test.preModificationWantReload && dirent != newDirent { t.Errorf("Lookup(%q) with cachePolicy=%s got new dirent %v, wanted old dirent %v", name, test.cachePolicy, newDirent, dirent) } // Modify the underlying mocked file's modification time. - file.GetAttrMock.Attr.MTimeSeconds = uint64(time.Now().Unix()) + nowSeconds := time.Now().Unix() + rootFile.WalkGetAttrMock.Attr.MTimeSeconds = uint64(nowSeconds) + file.GetAttrMock.Attr.MTimeSeconds = uint64(nowSeconds) // Walk again. Depending on the cache policy, we may get a new // dirent. @@ -243,12 +264,36 @@ func TestRevalidation(t *testing.T) { if err != nil { t.Fatalf("Lookup(%q) failed: %v", name, err) } - if test.postModificationWantReval && dirent == newDirent { + if test.postModificationWantReload && dirent == newDirent { t.Errorf("Lookup(%q) with cachePolicy=%s got old dirent %v, wanted a new dirent", name, test.cachePolicy, dirent) } - if !test.postModificationWantReval && dirent != newDirent { + if !test.postModificationWantReload && dirent != newDirent { t.Errorf("Lookup(%q) with cachePolicy=%s got new dirent %v, wanted old dirent %v", name, test.cachePolicy, newDirent, dirent) } + uattrs, err := newDirent.Inode.UnstableAttr(ctx) + if err != nil { + t.Fatalf("Error getting unstable attrs: %v", err) + } + gotModTimeSeconds := uattrs.ModificationTime.Seconds() + if test.postModificationWantUpdatedAttrs && gotModTimeSeconds != nowSeconds { + t.Fatalf("Lookup(%q) with cachePolicy=%s got new modification time %v, wanted %v", name, test.cachePolicy, gotModTimeSeconds, nowSeconds) + } + + // Make WalkGetAttr return ENOENT. This simulates + // removing the file from the remote fs. + rootFile.WalkGetAttrMock = p9test.WalkGetAttrMock{ + Err: syscall.ENOENT, + } + + // Walk again. Depending on the cache policy, we may + // get ENOENT. + newDirent, err = rootDir.Walk(ctx, rootDir, name) + if test.postRemovalWantReload && err == nil { + t.Errorf("Lookup(%q) with cachePolicy=%s got nil error, wanted ENOENT", name, test.cachePolicy) + } + if !test.postRemovalWantReload && (err != nil || dirent != newDirent) { + t.Errorf("Lookup(%q) with cachePolicy=%s got new dirent %v and error %v, wanted old dirent %v and nil error", name, test.cachePolicy, newDirent, err, dirent) + } }) } } diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index eeb9087e9..49d27ee88 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -146,13 +146,13 @@ func (s *session) Destroy() { } // Revalidate implements MountSource.Revalidate. -func (s *session) Revalidate(ctx context.Context, i *fs.Inode) bool { - return s.cachePolicy.revalidateDirent() +func (s *session) Revalidate(ctx context.Context, name string, parent, child *fs.Inode) bool { + return s.cachePolicy.revalidate(ctx, name, parent, child) } // Keep implements MountSource.Keep. func (s *session) Keep(d *fs.Dirent) bool { - return s.cachePolicy.keepDirent(d.Inode) + return s.cachePolicy.keep(d) } // ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings. diff --git a/pkg/sentry/fs/mock.go b/pkg/sentry/fs/mock.go index 89a0103ba..846b6e8bb 100644 --- a/pkg/sentry/fs/mock.go +++ b/pkg/sentry/fs/mock.go @@ -68,7 +68,7 @@ func NewMockMountSource(cache *DirentCache) *MountSource { } // Revalidate implements fs.MountSourceOperations.Revalidate. -func (n *MockMountSourceOps) Revalidate(context.Context, *Inode) bool { +func (n *MockMountSourceOps) Revalidate(context.Context, string, *Inode, *Inode) bool { return n.revalidate } diff --git a/pkg/sentry/fs/mount.go b/pkg/sentry/fs/mount.go index 455f5b35c..8345876fc 100644 --- a/pkg/sentry/fs/mount.go +++ b/pkg/sentry/fs/mount.go @@ -27,10 +27,13 @@ import ( // DirentOperations provide file systems greater control over how long a Dirent stays pinned // in core. Implementations must not take Dirent.mu. type DirentOperations interface { - // Revalidate returns true if the Inode is stale and its - // InodeOperations needs to be reloaded. Revalidate will never be - // called on a Inode that is mounted. - Revalidate(ctx context.Context, inode *Inode) bool + // Revalidate is called during lookup each time we encounter a Dirent + // in the cache. Implementations may update stale properties of the + // child Inode. If Revalidate returns true, then the entire Inode will + // be reloaded. + // + // Revalidate will never be called on a Inode that is mounted. + Revalidate(ctx context.Context, name string, parent, child *Inode) bool // Keep returns true if the Dirent should be kept in memory for as long // as possible beyond any active references. @@ -281,7 +284,7 @@ type SimpleMountSourceOperations struct { } // Revalidate implements MountSourceOperations.Revalidate. -func (smo *SimpleMountSourceOperations) Revalidate(context.Context, *Inode) bool { +func (smo *SimpleMountSourceOperations) Revalidate(context.Context, string, *Inode, *Inode) bool { return smo.revalidate } diff --git a/pkg/sentry/fs/mount_overlay.go b/pkg/sentry/fs/mount_overlay.go index 9fa87c10f..dbc608c7e 100644 --- a/pkg/sentry/fs/mount_overlay.go +++ b/pkg/sentry/fs/mount_overlay.go @@ -41,23 +41,29 @@ func newOverlayMountSource(upper, lower *MountSource, flags MountSourceFlags) *M // delegating to the upper filesystem's Revalidate method. We cannot reload // files from the lower filesystem, so we panic if the lower filesystem's // Revalidate method returns true. -func (o *overlayMountSourceOperations) Revalidate(ctx context.Context, inode *Inode) bool { - if inode.overlay == nil { +func (o *overlayMountSourceOperations) Revalidate(ctx context.Context, name string, parent, child *Inode) bool { + if child.overlay == nil { panic("overlay cannot revalidate inode that is not an overlay") } - // Should we bother checking this, or just ignore? - if inode.overlay.lower != nil && o.lower.Revalidate(ctx, inode.overlay.lower) { + // Revalidate is never called on a mount point, so parent and child + // must be from the same mount, and thus must both be overlay inodes. + if parent.overlay == nil { + panic("trying to revalidate an overlay inode but the parent is not an overlay") + } + + // We can't revalidate from the lower filesystem. + if child.overlay.lower != nil && o.lower.Revalidate(ctx, name, parent.overlay.lower, child.overlay.lower) { panic("an overlay cannot revalidate file objects from the lower fs") } - if inode.overlay.upper == nil { - // Nothing to revalidate. + // Do we have anything to revalidate? + if child.overlay.upper == nil { return false } // Does the upper require revalidation? - return o.upper.Revalidate(ctx, inode.overlay.upper) + return o.upper.Revalidate(ctx, name, parent.overlay.upper, child.overlay.upper) } // Keep implements MountSourceOperations by delegating to the upper diff --git a/pkg/sentry/fs/tty/fs.go b/pkg/sentry/fs/tty/fs.go index fe7da05b5..d9f8f02f3 100644 --- a/pkg/sentry/fs/tty/fs.go +++ b/pkg/sentry/fs/tty/fs.go @@ -82,7 +82,7 @@ type superOperations struct{} // Slave entries are dropped from dir when their master is closed, so an // existing slave Dirent in the tree is not sufficient to guarantee that it // still exists on the filesystem. -func (superOperations) Revalidate(context.Context, *fs.Inode) bool { +func (superOperations) Revalidate(context.Context, string, *fs.Inode, *fs.Inode) bool { return true } diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 25aaf3f86..4ce3afc91 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -134,7 +134,13 @@ func waitForFile(f *os.File) error { } return nil } - return testutil.Poll(op, 5*time.Second) + + timeout := 5 * time.Second + if testutil.RaceEnabled { + // Race makes slow things even slow, so bump the timeout. + timeout = 3 * timeout + } + return testutil.Poll(op, timeout) } // readOutputNum reads a file at given filepath and returns the int at the @@ -213,10 +219,8 @@ const ( nonExclusiveFS ) -// TODO: nonExclusiveFS was removed because it causes timeout -// with --race. Put it back when bug is fixed. -var all = []configOption{overlay, kvm} -var noOverlay = []configOption{kvm} +var noOverlay = []configOption{kvm, nonExclusiveFS} +var all = append(noOverlay, overlay) // configs generates different configurations to run tests. func configs(opts ...configOption) []*boot.Config { @@ -1572,10 +1576,6 @@ func TestContainerVolumeContentsShared(t *testing.T) { // the filesystem. spec := testutil.NewSpecWithArgs("sleep", "1000") - // TODO: $TEST_TMPDIR mount is mistakenly marked as RO after - // revalidation. Remove when it's fixed. - spec.Root.Readonly = false - dir, err := ioutil.TempDir(testutil.TmpDir(), "root-fs-test") if err != nil { t.Fatalf("TempDir failed: %v", err) -- cgit v1.2.3