summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs/file_overlay.go
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2019-06-02 06:44:55 +0000
committergVisor bot <gvisor-bot@google.com>2019-06-02 06:44:55 +0000
commitceb0d792f328d1fc0692197d8856a43c3936a571 (patch)
tree83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/sentry/fs/file_overlay.go
parentdeb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff)
parent216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff)
Merge 216da0b7 (automated)
Diffstat (limited to 'pkg/sentry/fs/file_overlay.go')
-rw-r--r--pkg/sentry/fs/file_overlay.go505
1 files changed, 505 insertions, 0 deletions
diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go
new file mode 100644
index 000000000..273de1e14
--- /dev/null
+++ b/pkg/sentry/fs/file_overlay.go
@@ -0,0 +1,505 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fs
+
+import (
+ "sync"
+
+ "gvisor.googlesource.com/gvisor/pkg/refs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.googlesource.com/gvisor/pkg/waiter"
+)
+
+// overlayFile gets a handle to a file from the upper or lower filesystem
+// in an overlay. The caller is responsible for calling File.DecRef on
+// the returned file.
+func overlayFile(ctx context.Context, inode *Inode, flags FileFlags) (*File, error) {
+ // Do a song and dance to eventually get to:
+ //
+ // File -> single reference
+ // Dirent -> single reference
+ // Inode -> multiple references
+ //
+ // So that File.DecRef() -> File.destroy -> Dirent.DecRef -> Dirent.destroy,
+ // and both the transitory File and Dirent can be GC'ed but the Inode
+ // remains.
+
+ // Take another reference on the Inode.
+ inode.IncRef()
+
+ // Start with a single reference on the Dirent. It inherits the reference
+ // we just took on the Inode above.
+ dirent := NewTransientDirent(inode)
+
+ // Get a File. This will take another reference on the Dirent.
+ f, err := inode.GetFile(ctx, dirent, flags)
+
+ // Drop the extra reference on the Dirent. Now there's only one reference
+ // on the dirent, either owned by f (if non-nil), or the Dirent is about
+ // to be destroyed (if GetFile failed).
+ dirent.DecRef()
+
+ return f, err
+}
+
+// overlayFileOperations implements FileOperations for a file in an overlay.
+//
+// +stateify savable
+type overlayFileOperations struct {
+ // upperMu protects upper below. In contrast lower is stable.
+ upperMu sync.Mutex `state:"nosave"`
+
+ // We can't share Files in upper and lower filesystems between all Files
+ // in an overlay because some file systems expect to get distinct handles
+ // that are not consistent with each other on open(2).
+ //
+ // So we lazily acquire an upper File when the overlayEntry acquires an
+ // upper Inode (it might have one from the start). This synchronizes with
+ // copy up.
+ //
+ // If upper is non-nil and this is not a directory, then lower is ignored.
+ //
+ // For directories, upper and lower are ignored because it is always
+ // necessary to acquire new directory handles so that the directory cursors
+ // of the upper and lower Files are not exhausted.
+ upper *File
+ lower *File
+
+ // dirCursor is a directory cursor for a directory in an overlay. It is
+ // protected by File.mu of the owning file, which is held during
+ // Readdir and Seek calls.
+ dirCursor string
+
+ // dirCacheMu protects dirCache.
+ dirCacheMu sync.RWMutex `state:"nosave"`
+
+ // dirCache is cache of DentAttrs from upper and lower Inodes.
+ dirCache *SortedDentryMap
+}
+
+// Release implements FileOperations.Release.
+func (f *overlayFileOperations) Release() {
+ if f.upper != nil {
+ f.upper.DecRef()
+ }
+ if f.lower != nil {
+ f.lower.DecRef()
+ }
+}
+
+// EventRegister implements FileOperations.EventRegister.
+func (f *overlayFileOperations) EventRegister(we *waiter.Entry, mask waiter.EventMask) {
+ f.upperMu.Lock()
+ defer f.upperMu.Unlock()
+ if f.upper != nil {
+ f.upper.EventRegister(we, mask)
+ return
+ }
+ f.lower.EventRegister(we, mask)
+}
+
+// EventUnregister implements FileOperations.Unregister.
+func (f *overlayFileOperations) EventUnregister(we *waiter.Entry) {
+ f.upperMu.Lock()
+ defer f.upperMu.Unlock()
+ if f.upper != nil {
+ f.upper.EventUnregister(we)
+ return
+ }
+ f.lower.EventUnregister(we)
+}
+
+// Readiness implements FileOperations.Readiness.
+func (f *overlayFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
+ f.upperMu.Lock()
+ defer f.upperMu.Unlock()
+ if f.upper != nil {
+ return f.upper.Readiness(mask)
+ }
+ return f.lower.Readiness(mask)
+}
+
+// Seek implements FileOperations.Seek.
+func (f *overlayFileOperations) Seek(ctx context.Context, file *File, whence SeekWhence, offset int64) (int64, error) {
+ f.upperMu.Lock()
+ defer f.upperMu.Unlock()
+
+ var seekDir bool
+ var n int64
+ if f.upper != nil {
+ var err error
+ if n, err = f.upper.FileOperations.Seek(ctx, file, whence, offset); err != nil {
+ return n, err
+ }
+ seekDir = IsDir(f.upper.Dirent.Inode.StableAttr)
+ } else {
+ var err error
+ if n, err = f.lower.FileOperations.Seek(ctx, file, whence, offset); err != nil {
+ return n, err
+ }
+ seekDir = IsDir(f.lower.Dirent.Inode.StableAttr)
+ }
+
+ // If this was a seek on a directory, we must update the cursor.
+ if seekDir && whence == SeekSet && offset == 0 {
+ // Currently only seeking to 0 on a directory is supported.
+ // FIXME(b/33075855): Lift directory seeking limitations.
+ f.dirCursor = ""
+ }
+ return n, nil
+}
+
+// Readdir implements FileOperations.Readdir.
+func (f *overlayFileOperations) Readdir(ctx context.Context, file *File, serializer DentrySerializer) (int64, error) {
+ root := RootFromContext(ctx)
+ if root != nil {
+ defer root.DecRef()
+ }
+ dirCtx := &DirCtx{
+ Serializer: serializer,
+ DirCursor: &f.dirCursor,
+ }
+
+ // If the directory dirent is frozen, then DirentReaddir will calculate
+ // the children based off the frozen dirent tree. There is no need to
+ // call readdir on the upper/lower layers.
+ if file.Dirent.frozen {
+ return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
+ }
+
+ // Otherwise proceed with usual overlay readdir.
+ o := file.Dirent.Inode.overlay
+
+ // readdirEntries holds o.copyUpMu to ensure that copy-up does not
+ // occur while calculating the readir results.
+ //
+ // However, it is possible for a copy-up to occur after the call to
+ // readdirEntries, but before setting f.dirCache. This is OK, since
+ // copy-up only does not change the children in a way that would affect
+ // the children returned in dirCache. Copy-up only moves
+ // files/directories between layers in the overlay.
+ //
+ // It is also possible for Readdir to race with a Create operation
+ // (which may trigger a copy-up during it's execution). Depending on
+ // whether the Create happens before or after the readdirEntries call,
+ // the newly created file may or may not appear in the readdir results.
+ // But this can only be caused by a real race between readdir and
+ // create syscalls, so it's also OK.
+ dirCache, err := readdirEntries(ctx, o)
+ if err != nil {
+ return file.Offset(), err
+ }
+
+ f.dirCacheMu.Lock()
+ f.dirCache = dirCache
+ f.dirCacheMu.Unlock()
+
+ return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
+}
+
+// IterateDir implements DirIterator.IterateDir.
+func (f *overlayFileOperations) IterateDir(ctx context.Context, dirCtx *DirCtx, offset int) (int, error) {
+ f.dirCacheMu.RLock()
+ n, err := GenericReaddir(dirCtx, f.dirCache)
+ f.dirCacheMu.RUnlock()
+ return offset + n, err
+}
+
+// onTop performs the given operation on the top-most available layer.
+func (f *overlayFileOperations) onTop(ctx context.Context, file *File, fn func(*File, FileOperations) error) error {
+ file.Dirent.Inode.overlay.copyMu.RLock()
+ defer file.Dirent.Inode.overlay.copyMu.RUnlock()
+
+ // Only lower layer is available.
+ if file.Dirent.Inode.overlay.upper == nil {
+ return fn(f.lower, f.lower.FileOperations)
+ }
+
+ f.upperMu.Lock()
+ if f.upper == nil {
+ upper, err := overlayFile(ctx, file.Dirent.Inode.overlay.upper, file.Flags())
+ if err != nil {
+ // Something very wrong; return a generic filesystem
+ // error to avoid propagating internals.
+ f.upperMu.Unlock()
+ return syserror.EIO
+ }
+
+ // Save upper file.
+ f.upper = upper
+ }
+ f.upperMu.Unlock()
+
+ return fn(f.upper, f.upper.FileOperations)
+}
+
+// Read implements FileOperations.Read.
+func (f *overlayFileOperations) Read(ctx context.Context, file *File, dst usermem.IOSequence, offset int64) (n int64, err error) {
+ err = f.onTop(ctx, file, func(file *File, ops FileOperations) error {
+ n, err = ops.Read(ctx, file, dst, offset)
+ return err // Will overwrite itself.
+ })
+ return
+}
+
+// WriteTo implements FileOperations.WriteTo.
+func (f *overlayFileOperations) WriteTo(ctx context.Context, file *File, dst *File, opts SpliceOpts) (n int64, err error) {
+ err = f.onTop(ctx, file, func(file *File, ops FileOperations) error {
+ n, err = ops.WriteTo(ctx, file, dst, opts)
+ return err // Will overwrite itself.
+ })
+ return
+}
+
+// Write implements FileOperations.Write.
+func (f *overlayFileOperations) Write(ctx context.Context, file *File, src usermem.IOSequence, offset int64) (int64, error) {
+ // f.upper must be non-nil. See inode_overlay.go:overlayGetFile, where the
+ // file is copied up and opened in the upper filesystem if FileFlags.Write.
+ // Write cannot be called if !FileFlags.Write, see FileOperations.Write.
+ return f.upper.FileOperations.Write(ctx, f.upper, src, offset)
+}
+
+// ReadFrom implements FileOperations.ReadFrom.
+func (f *overlayFileOperations) ReadFrom(ctx context.Context, file *File, src *File, opts SpliceOpts) (n int64, err error) {
+ // See above; f.upper must be non-nil.
+ return f.upper.FileOperations.ReadFrom(ctx, f.upper, src, opts)
+}
+
+// Fsync implements FileOperations.Fsync.
+func (f *overlayFileOperations) Fsync(ctx context.Context, file *File, start, end int64, syncType SyncType) (err error) {
+ f.upperMu.Lock()
+ if f.upper != nil {
+ err = f.upper.FileOperations.Fsync(ctx, f.upper, start, end, syncType)
+ }
+ f.upperMu.Unlock()
+ if err == nil && f.lower != nil {
+ // N.B. Fsync on the lower filesystem can cause writes of file
+ // attributes (i.e. access time) despite the fact that we must
+ // treat the lower filesystem as read-only.
+ //
+ // This matches the semantics of fsync(2) in Linux overlayfs.
+ err = f.lower.FileOperations.Fsync(ctx, f.lower, start, end, syncType)
+ }
+ return err
+}
+
+// Flush implements FileOperations.Flush.
+func (f *overlayFileOperations) Flush(ctx context.Context, file *File) (err error) {
+ // Flush whatever handles we have.
+ f.upperMu.Lock()
+ if f.upper != nil {
+ err = f.upper.FileOperations.Flush(ctx, f.upper)
+ }
+ f.upperMu.Unlock()
+ if err == nil && f.lower != nil {
+ err = f.lower.FileOperations.Flush(ctx, f.lower)
+ }
+ return err
+}
+
+// ConfigureMMap implements FileOperations.ConfigureMMap.
+func (*overlayFileOperations) ConfigureMMap(ctx context.Context, file *File, opts *memmap.MMapOpts) error {
+ o := file.Dirent.Inode.overlay
+
+ o.copyMu.RLock()
+ defer o.copyMu.RUnlock()
+
+ // If there is no lower inode, the overlay will never need to do a
+ // copy-up, and thus will never need to invalidate any mappings. We can
+ // call ConfigureMMap directly on the upper file.
+ if o.lower == nil {
+ f := file.FileOperations.(*overlayFileOperations)
+ if err := f.upper.ConfigureMMap(ctx, opts); err != nil {
+ return err
+ }
+
+ // ConfigureMMap will set the MappableIdentity to the upper
+ // file and take a reference on it, but we must also hold a
+ // reference to the overlay file during the lifetime of the
+ // Mappable. If we do not do this, the overlay file can be
+ // Released before the upper file is Released, and we will be
+ // unable to traverse to the upper file during Save, thus
+ // preventing us from saving a proper inode mapping for the
+ // file.
+ file.IncRef()
+ id := &overlayMappingIdentity{
+ id: opts.MappingIdentity,
+ overlayFile: file,
+ }
+
+ // Swap out the old MappingIdentity for the wrapped one.
+ opts.MappingIdentity = id
+ return nil
+ }
+
+ if !o.isMappableLocked() {
+ return syserror.ENODEV
+ }
+
+ // FIXME(jamieliu): This is a copy/paste of fsutil.GenericConfigureMMap,
+ // which we can't use because the overlay implementation is in package fs,
+ // so depending on fs/fsutil would create a circular dependency. Move
+ // overlay to fs/overlay.
+ opts.Mappable = o
+ opts.MappingIdentity = file
+ file.IncRef()
+ return nil
+}
+
+// UnstableAttr implements fs.FileOperations.UnstableAttr.
+func (f *overlayFileOperations) UnstableAttr(ctx context.Context, file *File) (UnstableAttr, error) {
+ // Hot path. Avoid defers.
+ f.upperMu.Lock()
+ if f.upper != nil {
+ attr, err := f.upper.UnstableAttr(ctx)
+ f.upperMu.Unlock()
+ return attr, err
+ }
+ f.upperMu.Unlock()
+
+ // It's possible that copy-up has occurred, but we haven't opened a upper
+ // file yet. If this is the case, just use the upper inode's UnstableAttr
+ // rather than opening a file.
+ o := file.Dirent.Inode.overlay
+ o.copyMu.RLock()
+ if o.upper != nil {
+ attr, err := o.upper.UnstableAttr(ctx)
+ o.copyMu.RUnlock()
+ return attr, err
+ }
+ o.copyMu.RUnlock()
+
+ return f.lower.UnstableAttr(ctx)
+}
+
+// Ioctl implements fs.FileOperations.Ioctl and always returns ENOTTY.
+func (*overlayFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+ return 0, syserror.ENOTTY
+}
+
+// readdirEntries returns a sorted map of directory entries from the
+// upper and/or lower filesystem.
+func readdirEntries(ctx context.Context, o *overlayEntry) (*SortedDentryMap, error) {
+ o.copyMu.RLock()
+ defer o.copyMu.RUnlock()
+
+ // Assert that there is at least one upper or lower entry.
+ if o.upper == nil && o.lower == nil {
+ panic("invalid overlayEntry, needs at least one Inode")
+ }
+ entries := make(map[string]DentAttr)
+
+ // Try the upper filesystem first.
+ if o.upper != nil {
+ var err error
+ entries, err = readdirOne(ctx, NewTransientDirent(o.upper))
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ // Try the lower filesystem next.
+ if o.lower != nil {
+ lowerEntries, err := readdirOne(ctx, NewTransientDirent(o.lower))
+ if err != nil {
+ return nil, err
+ }
+ for name, entry := range lowerEntries {
+ // Skip this name if it is a negative entry in the
+ // upper or there exists a whiteout for it.
+ if o.upper != nil {
+ if overlayHasWhiteout(o.upper, name) {
+ continue
+ }
+ }
+ // Prefer the entries from the upper filesystem
+ // when names overlap.
+ if _, ok := entries[name]; !ok {
+ entries[name] = entry
+ }
+ }
+ }
+
+ // Sort and return the entries.
+ return NewSortedDentryMap(entries), nil
+}
+
+// readdirOne reads all of the directory entries from d.
+func readdirOne(ctx context.Context, d *Dirent) (map[string]DentAttr, error) {
+ dir, err := d.Inode.GetFile(ctx, d, FileFlags{Read: true})
+ if err != nil {
+ return nil, err
+ }
+ defer dir.DecRef()
+
+ // Use a stub serializer to read the entries into memory.
+ stubSerializer := &CollectEntriesSerializer{}
+ if err := dir.Readdir(ctx, stubSerializer); err != nil {
+ return nil, err
+ }
+ // The "." and ".." entries are from the overlay Inode's Dirent, not the stub.
+ delete(stubSerializer.Entries, ".")
+ delete(stubSerializer.Entries, "..")
+ return stubSerializer.Entries, nil
+}
+
+// overlayMappingIdentity wraps a MappingIdentity, and also holds a reference
+// on a file during its lifetime.
+//
+// +stateify savable
+type overlayMappingIdentity struct {
+ refs.AtomicRefCount
+ id memmap.MappingIdentity
+ overlayFile *File
+}
+
+// DecRef implements AtomicRefCount.DecRef.
+func (omi *overlayMappingIdentity) DecRef() {
+ omi.AtomicRefCount.DecRefWithDestructor(func() {
+ omi.overlayFile.DecRef()
+ omi.id.DecRef()
+ })
+}
+
+// DeviceID implements MappingIdentity.DeviceID using the device id from the
+// overlayFile.
+func (omi *overlayMappingIdentity) DeviceID() uint64 {
+ return omi.overlayFile.Dirent.Inode.StableAttr.DeviceID
+}
+
+// DeviceID implements MappingIdentity.InodeID using the inode id from the
+// overlayFile.
+func (omi *overlayMappingIdentity) InodeID() uint64 {
+ return omi.overlayFile.Dirent.Inode.StableAttr.InodeID
+}
+
+// MappedName implements MappingIdentity.MappedName.
+func (omi *overlayMappingIdentity) MappedName(ctx context.Context) string {
+ root := RootFromContext(ctx)
+ if root != nil {
+ defer root.DecRef()
+ }
+ name, _ := omi.overlayFile.Dirent.FullName(root)
+ return name
+}
+
+// Msync implements MappingIdentity.Msync.
+func (omi *overlayMappingIdentity) Msync(ctx context.Context, mr memmap.MappableRange) error {
+ return omi.id.Msync(ctx, mr)
+}