summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs/file_overlay.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fs/file_overlay.go')
-rw-r--r--pkg/sentry/fs/file_overlay.go345
1 files changed, 345 insertions, 0 deletions
diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go
new file mode 100644
index 000000000..0c6e622b9
--- /dev/null
+++ b/pkg/sentry/fs/file_overlay.go
@@ -0,0 +1,345 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fs
+
+import (
+ "sync"
+
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.googlesource.com/gvisor/pkg/waiter"
+)
+
+// overlayFile gets a handle to a file from the upper or lower filesystem
+// in an overlay. The caller is responsible for calling File.DecRef on
+// the returned file.
+func overlayFile(ctx context.Context, inode *Inode, flags FileFlags) (*File, error) {
+ // Do a song and dance to eventually get to:
+ //
+ // File -> single reference
+ // Dirent -> single reference
+ // Inode -> multiple references
+ //
+ // So that File.DecRef() -> File.destroy -> Dirent.DecRef -> Dirent.destroy,
+ // and both the transitory File and Dirent can be GC'ed but the Inode
+ // remains.
+
+ // Take another reference on the Inode.
+ inode.IncRef()
+
+ // Start with a single reference on the Dirent. It inherits the reference
+ // we just took on the Inode above.
+ dirent := NewTransientDirent(inode)
+
+ // Get a File. This will take another reference on the Dirent.
+ f, err := inode.GetFile(ctx, dirent, flags)
+
+ // Drop the extra reference on the Dirent. Now there's only one reference
+ // on the dirent, either owned by f (if non-nil), or the Dirent is about
+ // to be destroyed (if GetFile failed).
+ dirent.DecRef()
+
+ return f, err
+}
+
+// overlayFileOperations implements FileOperations for a file in an overlay.
+type overlayFileOperations struct {
+ // upperMu protects upper below. In contrast lower is stable.
+ upperMu sync.Mutex `state:"nosave"`
+
+ // We can't share Files in upper and lower filesystems between all Files
+ // in an overlay because some file systems expect to get distinct handles
+ // that are not consistent with each other on open(2).
+ //
+ // So we lazily acquire an upper File when the overlayEntry acquires an
+ // upper Inode (it might have one from the start). This synchronizes with
+ // copy up.
+ //
+ // If upper is non-nil and this is not a directory, then lower is ignored.
+ //
+ // For directories, upper and lower are ignored because it is always
+ // necessary to acquire new directory handles so that the directory cursors
+ // of the upper and lower Files are not exhausted.
+ upper *File
+ lower *File
+
+ // dirCursor is a directory cursor for a directory in an overlay.
+ dirCursor string
+
+ // dirCache is cache of DentAttrs from upper and lower Inodes.
+ dirCache *SortedDentryMap
+}
+
+// Release implements FileOperations.Release.
+func (f *overlayFileOperations) Release() {
+ if f.upper != nil {
+ f.upper.DecRef()
+ }
+ if f.lower != nil {
+ f.lower.DecRef()
+ }
+}
+
+// EventRegister implements FileOperations.EventRegister.
+func (f *overlayFileOperations) EventRegister(we *waiter.Entry, mask waiter.EventMask) {
+ f.upperMu.Lock()
+ defer f.upperMu.Unlock()
+ if f.upper != nil {
+ f.upper.EventRegister(we, mask)
+ return
+ }
+ f.lower.EventRegister(we, mask)
+}
+
+// EventUnregister implements FileOperations.Unregister.
+func (f *overlayFileOperations) EventUnregister(we *waiter.Entry) {
+ f.upperMu.Lock()
+ defer f.upperMu.Unlock()
+ if f.upper != nil {
+ f.upper.EventUnregister(we)
+ return
+ }
+ f.lower.EventUnregister(we)
+}
+
+// Readiness implements FileOperations.Readiness.
+func (f *overlayFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
+ f.upperMu.Lock()
+ defer f.upperMu.Unlock()
+ if f.upper != nil {
+ return f.upper.Readiness(mask)
+ }
+ return f.lower.Readiness(mask)
+}
+
+// Seek implements FileOperations.Seek.
+func (f *overlayFileOperations) Seek(ctx context.Context, file *File, whence SeekWhence, offset int64) (int64, error) {
+ f.upperMu.Lock()
+ defer f.upperMu.Unlock()
+
+ var seekDir bool
+ var n int64
+ if f.upper != nil {
+ var err error
+ if n, err = f.upper.FileOperations.Seek(ctx, file, whence, offset); err != nil {
+ return n, err
+ }
+ seekDir = IsDir(f.upper.Dirent.Inode.StableAttr)
+ } else {
+ var err error
+ if n, err = f.lower.FileOperations.Seek(ctx, file, whence, offset); err != nil {
+ return n, err
+ }
+ seekDir = IsDir(f.lower.Dirent.Inode.StableAttr)
+ }
+
+ // If this was a seek on a directory, we must update the cursor.
+ if seekDir && whence == SeekSet && offset == 0 {
+ // Currenly only seeking to 0 on a directory is supported.
+ // FIXME: Lift directory seeking limitations.
+ f.dirCursor = ""
+ }
+ return n, nil
+}
+
+// Readdir implements FileOperations.Readdir.
+func (f *overlayFileOperations) Readdir(ctx context.Context, file *File, serializer DentrySerializer) (int64, error) {
+ o := file.Dirent.Inode.overlay
+
+ o.copyMu.RLock()
+ defer o.copyMu.RUnlock()
+
+ var err error
+ f.dirCache, err = readdirEntries(ctx, o)
+ if err != nil {
+ return file.Offset(), err
+ }
+
+ root := RootFromContext(ctx)
+ defer root.DecRef()
+
+ dirCtx := &DirCtx{
+ Serializer: serializer,
+ DirCursor: &f.dirCursor,
+ }
+ return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
+}
+
+// IterateDir implements DirIterator.IterateDir.
+func (f *overlayFileOperations) IterateDir(ctx context.Context, dirCtx *DirCtx, offset int) (int, error) {
+ n, err := GenericReaddir(dirCtx, f.dirCache)
+ return offset + n, err
+}
+
+// Read implements FileOperations.Read.
+func (f *overlayFileOperations) Read(ctx context.Context, file *File, dst usermem.IOSequence, offset int64) (int64, error) {
+ o := file.Dirent.Inode.overlay
+
+ o.copyMu.RLock()
+ defer o.copyMu.RUnlock()
+
+ if o.upper != nil {
+ // We may need to acquire an open file handle to read from if
+ // copy up has occurred. Otherwise we risk reading from the
+ // wrong source.
+ f.upperMu.Lock()
+ if f.upper == nil {
+ var err error
+ f.upper, err = overlayFile(ctx, o.upper, file.Flags())
+ if err != nil {
+ f.upperMu.Unlock()
+ log.Warningf("failed to acquire handle with flags %v: %v", file.Flags(), err)
+ return 0, syserror.EIO
+ }
+ }
+ f.upperMu.Unlock()
+ return f.upper.FileOperations.Read(ctx, f.upper, dst, offset)
+ }
+ return f.lower.FileOperations.Read(ctx, f.lower, dst, offset)
+}
+
+// Write implements FileOperations.Write.
+func (f *overlayFileOperations) Write(ctx context.Context, file *File, src usermem.IOSequence, offset int64) (int64, error) {
+ // f.upper must be non-nil. See inode_overlay.go:overlayGetFile, where the
+ // file is copied up and opened in the upper filesystem if FileFlags.Write.
+ // Write cannot be called if !FileFlags.Write, see FileOperations.Write.
+ return f.upper.FileOperations.Write(ctx, f.upper, src, offset)
+}
+
+// Fsync implements FileOperations.Fsync.
+func (f *overlayFileOperations) Fsync(ctx context.Context, file *File, start, end int64, syncType SyncType) error {
+ var err error
+ f.upperMu.Lock()
+ if f.upper != nil {
+ err = f.upper.FileOperations.Fsync(ctx, f.upper, start, end, syncType)
+ }
+ f.upperMu.Unlock()
+ if f.lower != nil {
+ // N.B. Fsync on the lower filesystem can cause writes of file
+ // attributes (i.e. access time) despite the fact that we must
+ // treat the lower filesystem as read-only.
+ //
+ // This matches the semantics of fsync(2) in Linux overlayfs.
+ err = f.lower.FileOperations.Fsync(ctx, f.lower, start, end, syncType)
+ }
+ return err
+}
+
+// Flush implements FileOperations.Flush.
+func (f *overlayFileOperations) Flush(ctx context.Context, file *File) error {
+ // Flush whatever handles we have.
+ var err error
+ f.upperMu.Lock()
+ if f.upper != nil {
+ err = f.upper.FileOperations.Flush(ctx, f.upper)
+ }
+ f.upperMu.Unlock()
+ if f.lower != nil {
+ err = f.lower.FileOperations.Flush(ctx, f.lower)
+ }
+ return err
+}
+
+// ConfigureMMap implements FileOperations.ConfigureMMap.
+func (*overlayFileOperations) ConfigureMMap(ctx context.Context, file *File, opts *memmap.MMapOpts) error {
+ o := file.Dirent.Inode.overlay
+
+ o.copyMu.RLock()
+ defer o.copyMu.RUnlock()
+
+ if !o.isMappableLocked() {
+ return syserror.ENODEV
+ }
+ // FIXME: This is a copy/paste of fsutil.GenericConfigureMMap,
+ // which we can't use because the overlay implementation is in package fs,
+ // so depending on fs/fsutil would create a circular dependency. Move
+ // overlay to fs/overlay.
+ opts.Mappable = o
+ opts.MappingIdentity = file
+ file.IncRef()
+ return nil
+}
+
+// Ioctl implements fs.FileOperations.Ioctl and always returns ENOTTY.
+func (*overlayFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+ return 0, syserror.ENOTTY
+}
+
+// readdirEntries returns a sorted map of directory entries from the
+// upper and/or lower filesystem.
+func readdirEntries(ctx context.Context, o *overlayEntry) (*SortedDentryMap, error) {
+ // Assert that there is at least one upper or lower entry.
+ if o.upper == nil && o.lower == nil {
+ panic("invalid overlayEntry, needs at least one Inode")
+ }
+ entries := make(map[string]DentAttr)
+
+ // Try the upper filesystem first.
+ if o.upper != nil {
+ var err error
+ entries, err = readdirOne(ctx, NewTransientDirent(o.upper))
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ // Try the lower filesystem next.
+ if o.lower != nil {
+ lowerEntries, err := readdirOne(ctx, NewTransientDirent(o.lower))
+ if err != nil {
+ return nil, err
+ }
+ for name, entry := range lowerEntries {
+ // Skip this name if it is a negative entry in the
+ // upper or there exists a whiteout for it.
+ if o.upper != nil {
+ if overlayHasWhiteout(o.upper, name) {
+ continue
+ }
+ }
+ // Prefer the entries from the upper filesystem
+ // when names overlap.
+ if _, ok := entries[name]; !ok {
+ entries[name] = entry
+ }
+ }
+ }
+
+ // Sort and return the entries.
+ return NewSortedDentryMap(entries), nil
+}
+
+// readdirOne reads all of the directory entries from d.
+func readdirOne(ctx context.Context, d *Dirent) (map[string]DentAttr, error) {
+ dir, err := d.Inode.GetFile(ctx, d, FileFlags{Read: true})
+ if err != nil {
+ return nil, err
+ }
+ defer dir.DecRef()
+
+ // Use a stub serializer to read the entries into memory.
+ stubSerializer := &CollectEntriesSerializer{}
+ if err := dir.Readdir(ctx, stubSerializer); err != nil {
+ return nil, err
+ }
+ // The "." and ".." entries are from the overlay Inode's Dirent, not the stub.
+ delete(stubSerializer.Entries, ".")
+ delete(stubSerializer.Entries, "..")
+ return stubSerializer.Entries, nil
+}