diff options
author | gVisor bot <gvisor-bot@google.com> | 2019-06-02 06:44:55 +0000 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2019-06-02 06:44:55 +0000 |
commit | ceb0d792f328d1fc0692197d8856a43c3936a571 (patch) | |
tree | 83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/sentry/fs/file_overlay.go | |
parent | deb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff) | |
parent | 216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff) |
Merge 216da0b7 (automated)
Diffstat (limited to 'pkg/sentry/fs/file_overlay.go')
-rw-r--r-- | pkg/sentry/fs/file_overlay.go | 505 |
1 files changed, 505 insertions, 0 deletions
diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go new file mode 100644 index 000000000..273de1e14 --- /dev/null +++ b/pkg/sentry/fs/file_overlay.go @@ -0,0 +1,505 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "sync" + + "gvisor.googlesource.com/gvisor/pkg/refs" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// overlayFile gets a handle to a file from the upper or lower filesystem +// in an overlay. The caller is responsible for calling File.DecRef on +// the returned file. +func overlayFile(ctx context.Context, inode *Inode, flags FileFlags) (*File, error) { + // Do a song and dance to eventually get to: + // + // File -> single reference + // Dirent -> single reference + // Inode -> multiple references + // + // So that File.DecRef() -> File.destroy -> Dirent.DecRef -> Dirent.destroy, + // and both the transitory File and Dirent can be GC'ed but the Inode + // remains. + + // Take another reference on the Inode. + inode.IncRef() + + // Start with a single reference on the Dirent. It inherits the reference + // we just took on the Inode above. + dirent := NewTransientDirent(inode) + + // Get a File. This will take another reference on the Dirent. + f, err := inode.GetFile(ctx, dirent, flags) + + // Drop the extra reference on the Dirent. Now there's only one reference + // on the dirent, either owned by f (if non-nil), or the Dirent is about + // to be destroyed (if GetFile failed). + dirent.DecRef() + + return f, err +} + +// overlayFileOperations implements FileOperations for a file in an overlay. +// +// +stateify savable +type overlayFileOperations struct { + // upperMu protects upper below. In contrast lower is stable. + upperMu sync.Mutex `state:"nosave"` + + // We can't share Files in upper and lower filesystems between all Files + // in an overlay because some file systems expect to get distinct handles + // that are not consistent with each other on open(2). + // + // So we lazily acquire an upper File when the overlayEntry acquires an + // upper Inode (it might have one from the start). This synchronizes with + // copy up. + // + // If upper is non-nil and this is not a directory, then lower is ignored. + // + // For directories, upper and lower are ignored because it is always + // necessary to acquire new directory handles so that the directory cursors + // of the upper and lower Files are not exhausted. + upper *File + lower *File + + // dirCursor is a directory cursor for a directory in an overlay. It is + // protected by File.mu of the owning file, which is held during + // Readdir and Seek calls. + dirCursor string + + // dirCacheMu protects dirCache. + dirCacheMu sync.RWMutex `state:"nosave"` + + // dirCache is cache of DentAttrs from upper and lower Inodes. + dirCache *SortedDentryMap +} + +// Release implements FileOperations.Release. +func (f *overlayFileOperations) Release() { + if f.upper != nil { + f.upper.DecRef() + } + if f.lower != nil { + f.lower.DecRef() + } +} + +// EventRegister implements FileOperations.EventRegister. +func (f *overlayFileOperations) EventRegister(we *waiter.Entry, mask waiter.EventMask) { + f.upperMu.Lock() + defer f.upperMu.Unlock() + if f.upper != nil { + f.upper.EventRegister(we, mask) + return + } + f.lower.EventRegister(we, mask) +} + +// EventUnregister implements FileOperations.Unregister. +func (f *overlayFileOperations) EventUnregister(we *waiter.Entry) { + f.upperMu.Lock() + defer f.upperMu.Unlock() + if f.upper != nil { + f.upper.EventUnregister(we) + return + } + f.lower.EventUnregister(we) +} + +// Readiness implements FileOperations.Readiness. +func (f *overlayFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask { + f.upperMu.Lock() + defer f.upperMu.Unlock() + if f.upper != nil { + return f.upper.Readiness(mask) + } + return f.lower.Readiness(mask) +} + +// Seek implements FileOperations.Seek. +func (f *overlayFileOperations) Seek(ctx context.Context, file *File, whence SeekWhence, offset int64) (int64, error) { + f.upperMu.Lock() + defer f.upperMu.Unlock() + + var seekDir bool + var n int64 + if f.upper != nil { + var err error + if n, err = f.upper.FileOperations.Seek(ctx, file, whence, offset); err != nil { + return n, err + } + seekDir = IsDir(f.upper.Dirent.Inode.StableAttr) + } else { + var err error + if n, err = f.lower.FileOperations.Seek(ctx, file, whence, offset); err != nil { + return n, err + } + seekDir = IsDir(f.lower.Dirent.Inode.StableAttr) + } + + // If this was a seek on a directory, we must update the cursor. + if seekDir && whence == SeekSet && offset == 0 { + // Currently only seeking to 0 on a directory is supported. + // FIXME(b/33075855): Lift directory seeking limitations. + f.dirCursor = "" + } + return n, nil +} + +// Readdir implements FileOperations.Readdir. +func (f *overlayFileOperations) Readdir(ctx context.Context, file *File, serializer DentrySerializer) (int64, error) { + root := RootFromContext(ctx) + if root != nil { + defer root.DecRef() + } + dirCtx := &DirCtx{ + Serializer: serializer, + DirCursor: &f.dirCursor, + } + + // If the directory dirent is frozen, then DirentReaddir will calculate + // the children based off the frozen dirent tree. There is no need to + // call readdir on the upper/lower layers. + if file.Dirent.frozen { + return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset()) + } + + // Otherwise proceed with usual overlay readdir. + o := file.Dirent.Inode.overlay + + // readdirEntries holds o.copyUpMu to ensure that copy-up does not + // occur while calculating the readir results. + // + // However, it is possible for a copy-up to occur after the call to + // readdirEntries, but before setting f.dirCache. This is OK, since + // copy-up only does not change the children in a way that would affect + // the children returned in dirCache. Copy-up only moves + // files/directories between layers in the overlay. + // + // It is also possible for Readdir to race with a Create operation + // (which may trigger a copy-up during it's execution). Depending on + // whether the Create happens before or after the readdirEntries call, + // the newly created file may or may not appear in the readdir results. + // But this can only be caused by a real race between readdir and + // create syscalls, so it's also OK. + dirCache, err := readdirEntries(ctx, o) + if err != nil { + return file.Offset(), err + } + + f.dirCacheMu.Lock() + f.dirCache = dirCache + f.dirCacheMu.Unlock() + + return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset()) +} + +// IterateDir implements DirIterator.IterateDir. +func (f *overlayFileOperations) IterateDir(ctx context.Context, dirCtx *DirCtx, offset int) (int, error) { + f.dirCacheMu.RLock() + n, err := GenericReaddir(dirCtx, f.dirCache) + f.dirCacheMu.RUnlock() + return offset + n, err +} + +// onTop performs the given operation on the top-most available layer. +func (f *overlayFileOperations) onTop(ctx context.Context, file *File, fn func(*File, FileOperations) error) error { + file.Dirent.Inode.overlay.copyMu.RLock() + defer file.Dirent.Inode.overlay.copyMu.RUnlock() + + // Only lower layer is available. + if file.Dirent.Inode.overlay.upper == nil { + return fn(f.lower, f.lower.FileOperations) + } + + f.upperMu.Lock() + if f.upper == nil { + upper, err := overlayFile(ctx, file.Dirent.Inode.overlay.upper, file.Flags()) + if err != nil { + // Something very wrong; return a generic filesystem + // error to avoid propagating internals. + f.upperMu.Unlock() + return syserror.EIO + } + + // Save upper file. + f.upper = upper + } + f.upperMu.Unlock() + + return fn(f.upper, f.upper.FileOperations) +} + +// Read implements FileOperations.Read. +func (f *overlayFileOperations) Read(ctx context.Context, file *File, dst usermem.IOSequence, offset int64) (n int64, err error) { + err = f.onTop(ctx, file, func(file *File, ops FileOperations) error { + n, err = ops.Read(ctx, file, dst, offset) + return err // Will overwrite itself. + }) + return +} + +// WriteTo implements FileOperations.WriteTo. +func (f *overlayFileOperations) WriteTo(ctx context.Context, file *File, dst *File, opts SpliceOpts) (n int64, err error) { + err = f.onTop(ctx, file, func(file *File, ops FileOperations) error { + n, err = ops.WriteTo(ctx, file, dst, opts) + return err // Will overwrite itself. + }) + return +} + +// Write implements FileOperations.Write. +func (f *overlayFileOperations) Write(ctx context.Context, file *File, src usermem.IOSequence, offset int64) (int64, error) { + // f.upper must be non-nil. See inode_overlay.go:overlayGetFile, where the + // file is copied up and opened in the upper filesystem if FileFlags.Write. + // Write cannot be called if !FileFlags.Write, see FileOperations.Write. + return f.upper.FileOperations.Write(ctx, f.upper, src, offset) +} + +// ReadFrom implements FileOperations.ReadFrom. +func (f *overlayFileOperations) ReadFrom(ctx context.Context, file *File, src *File, opts SpliceOpts) (n int64, err error) { + // See above; f.upper must be non-nil. + return f.upper.FileOperations.ReadFrom(ctx, f.upper, src, opts) +} + +// Fsync implements FileOperations.Fsync. +func (f *overlayFileOperations) Fsync(ctx context.Context, file *File, start, end int64, syncType SyncType) (err error) { + f.upperMu.Lock() + if f.upper != nil { + err = f.upper.FileOperations.Fsync(ctx, f.upper, start, end, syncType) + } + f.upperMu.Unlock() + if err == nil && f.lower != nil { + // N.B. Fsync on the lower filesystem can cause writes of file + // attributes (i.e. access time) despite the fact that we must + // treat the lower filesystem as read-only. + // + // This matches the semantics of fsync(2) in Linux overlayfs. + err = f.lower.FileOperations.Fsync(ctx, f.lower, start, end, syncType) + } + return err +} + +// Flush implements FileOperations.Flush. +func (f *overlayFileOperations) Flush(ctx context.Context, file *File) (err error) { + // Flush whatever handles we have. + f.upperMu.Lock() + if f.upper != nil { + err = f.upper.FileOperations.Flush(ctx, f.upper) + } + f.upperMu.Unlock() + if err == nil && f.lower != nil { + err = f.lower.FileOperations.Flush(ctx, f.lower) + } + return err +} + +// ConfigureMMap implements FileOperations.ConfigureMMap. +func (*overlayFileOperations) ConfigureMMap(ctx context.Context, file *File, opts *memmap.MMapOpts) error { + o := file.Dirent.Inode.overlay + + o.copyMu.RLock() + defer o.copyMu.RUnlock() + + // If there is no lower inode, the overlay will never need to do a + // copy-up, and thus will never need to invalidate any mappings. We can + // call ConfigureMMap directly on the upper file. + if o.lower == nil { + f := file.FileOperations.(*overlayFileOperations) + if err := f.upper.ConfigureMMap(ctx, opts); err != nil { + return err + } + + // ConfigureMMap will set the MappableIdentity to the upper + // file and take a reference on it, but we must also hold a + // reference to the overlay file during the lifetime of the + // Mappable. If we do not do this, the overlay file can be + // Released before the upper file is Released, and we will be + // unable to traverse to the upper file during Save, thus + // preventing us from saving a proper inode mapping for the + // file. + file.IncRef() + id := &overlayMappingIdentity{ + id: opts.MappingIdentity, + overlayFile: file, + } + + // Swap out the old MappingIdentity for the wrapped one. + opts.MappingIdentity = id + return nil + } + + if !o.isMappableLocked() { + return syserror.ENODEV + } + + // FIXME(jamieliu): This is a copy/paste of fsutil.GenericConfigureMMap, + // which we can't use because the overlay implementation is in package fs, + // so depending on fs/fsutil would create a circular dependency. Move + // overlay to fs/overlay. + opts.Mappable = o + opts.MappingIdentity = file + file.IncRef() + return nil +} + +// UnstableAttr implements fs.FileOperations.UnstableAttr. +func (f *overlayFileOperations) UnstableAttr(ctx context.Context, file *File) (UnstableAttr, error) { + // Hot path. Avoid defers. + f.upperMu.Lock() + if f.upper != nil { + attr, err := f.upper.UnstableAttr(ctx) + f.upperMu.Unlock() + return attr, err + } + f.upperMu.Unlock() + + // It's possible that copy-up has occurred, but we haven't opened a upper + // file yet. If this is the case, just use the upper inode's UnstableAttr + // rather than opening a file. + o := file.Dirent.Inode.overlay + o.copyMu.RLock() + if o.upper != nil { + attr, err := o.upper.UnstableAttr(ctx) + o.copyMu.RUnlock() + return attr, err + } + o.copyMu.RUnlock() + + return f.lower.UnstableAttr(ctx) +} + +// Ioctl implements fs.FileOperations.Ioctl and always returns ENOTTY. +func (*overlayFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + return 0, syserror.ENOTTY +} + +// readdirEntries returns a sorted map of directory entries from the +// upper and/or lower filesystem. +func readdirEntries(ctx context.Context, o *overlayEntry) (*SortedDentryMap, error) { + o.copyMu.RLock() + defer o.copyMu.RUnlock() + + // Assert that there is at least one upper or lower entry. + if o.upper == nil && o.lower == nil { + panic("invalid overlayEntry, needs at least one Inode") + } + entries := make(map[string]DentAttr) + + // Try the upper filesystem first. + if o.upper != nil { + var err error + entries, err = readdirOne(ctx, NewTransientDirent(o.upper)) + if err != nil { + return nil, err + } + } + + // Try the lower filesystem next. + if o.lower != nil { + lowerEntries, err := readdirOne(ctx, NewTransientDirent(o.lower)) + if err != nil { + return nil, err + } + for name, entry := range lowerEntries { + // Skip this name if it is a negative entry in the + // upper or there exists a whiteout for it. + if o.upper != nil { + if overlayHasWhiteout(o.upper, name) { + continue + } + } + // Prefer the entries from the upper filesystem + // when names overlap. + if _, ok := entries[name]; !ok { + entries[name] = entry + } + } + } + + // Sort and return the entries. + return NewSortedDentryMap(entries), nil +} + +// readdirOne reads all of the directory entries from d. +func readdirOne(ctx context.Context, d *Dirent) (map[string]DentAttr, error) { + dir, err := d.Inode.GetFile(ctx, d, FileFlags{Read: true}) + if err != nil { + return nil, err + } + defer dir.DecRef() + + // Use a stub serializer to read the entries into memory. + stubSerializer := &CollectEntriesSerializer{} + if err := dir.Readdir(ctx, stubSerializer); err != nil { + return nil, err + } + // The "." and ".." entries are from the overlay Inode's Dirent, not the stub. + delete(stubSerializer.Entries, ".") + delete(stubSerializer.Entries, "..") + return stubSerializer.Entries, nil +} + +// overlayMappingIdentity wraps a MappingIdentity, and also holds a reference +// on a file during its lifetime. +// +// +stateify savable +type overlayMappingIdentity struct { + refs.AtomicRefCount + id memmap.MappingIdentity + overlayFile *File +} + +// DecRef implements AtomicRefCount.DecRef. +func (omi *overlayMappingIdentity) DecRef() { + omi.AtomicRefCount.DecRefWithDestructor(func() { + omi.overlayFile.DecRef() + omi.id.DecRef() + }) +} + +// DeviceID implements MappingIdentity.DeviceID using the device id from the +// overlayFile. +func (omi *overlayMappingIdentity) DeviceID() uint64 { + return omi.overlayFile.Dirent.Inode.StableAttr.DeviceID +} + +// DeviceID implements MappingIdentity.InodeID using the inode id from the +// overlayFile. +func (omi *overlayMappingIdentity) InodeID() uint64 { + return omi.overlayFile.Dirent.Inode.StableAttr.InodeID +} + +// MappedName implements MappingIdentity.MappedName. +func (omi *overlayMappingIdentity) MappedName(ctx context.Context) string { + root := RootFromContext(ctx) + if root != nil { + defer root.DecRef() + } + name, _ := omi.overlayFile.Dirent.FullName(root) + return name +} + +// Msync implements MappingIdentity.Msync. +func (omi *overlayMappingIdentity) Msync(ctx context.Context, mr memmap.MappableRange) error { + return omi.id.Msync(ctx, mr) +} |