diff options
Diffstat (limited to 'pkg')
35 files changed, 314 insertions, 106 deletions
diff --git a/pkg/abi/linux/linux_abi_autogen_unsafe.go b/pkg/abi/linux/linux_abi_autogen_unsafe.go index 572e581ab..e5c7d1ac6 100644 --- a/pkg/abi/linux/linux_abi_autogen_unsafe.go +++ b/pkg/abi/linux/linux_abi_autogen_unsafe.go @@ -155,12 +155,12 @@ func (s *Statx) UnmarshalBytes(src []byte) { // Packed implements marshal.Marshallable.Packed. //go:nosplit func (s *Statx) Packed() bool { - return s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() + return s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() } // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. func (s *Statx) MarshalUnsafe(dst []byte) { - if s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() { + if s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() { safecopy.CopyIn(dst, unsafe.Pointer(s)) } else { // Type Statx doesn't have a packed layout in memory, fallback to MarshalBytes. @@ -170,7 +170,7 @@ func (s *Statx) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (s *Statx) UnmarshalUnsafe(src []byte) { - if s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() { + if s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() { safecopy.CopyOut(unsafe.Pointer(s), src) } else { // Type Statx doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -181,7 +181,7 @@ func (s *Statx) UnmarshalUnsafe(src []byte) { // CopyOutN implements marshal.Marshallable.CopyOutN. //go:nosplit func (s *Statx) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) { - if !s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() { + if !s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() { // Type Statx doesn't have a packed layout in memory, fall back to MarshalBytes. buf := task.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. s.MarshalBytes(buf) // escapes: fallback. @@ -211,7 +211,7 @@ func (s *Statx) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) { // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (s *Statx) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) { - if !s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() { + if !s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() { // Type Statx doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := task.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. length, err := task.CopyInBytes(addr, buf) // escapes: okay. @@ -2023,7 +2023,7 @@ func (i *IPTEntry) UnmarshalBytes(src []byte) { // Packed implements marshal.Marshallable.Packed. //go:nosplit func (i *IPTEntry) Packed() bool { - return i.IP.Packed() && i.Counters.Packed() + return i.Counters.Packed() && i.IP.Packed() } // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. @@ -2038,7 +2038,7 @@ func (i *IPTEntry) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (i *IPTEntry) UnmarshalUnsafe(src []byte) { - if i.IP.Packed() && i.Counters.Packed() { + if i.Counters.Packed() && i.IP.Packed() { safecopy.CopyOut(unsafe.Pointer(i), src) } else { // Type IPTEntry doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -3002,7 +3002,7 @@ func (i *IP6TEntry) UnmarshalBytes(src []byte) { // Packed implements marshal.Marshallable.Packed. //go:nosplit func (i *IP6TEntry) Packed() bool { - return i.IPv6.Packed() && i.Counters.Packed() + return i.Counters.Packed() && i.IPv6.Packed() } // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. @@ -3028,7 +3028,7 @@ func (i *IP6TEntry) UnmarshalUnsafe(src []byte) { // CopyOutN implements marshal.Marshallable.CopyOutN. //go:nosplit func (i *IP6TEntry) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) { - if !i.Counters.Packed() && i.IPv6.Packed() { + if !i.IPv6.Packed() && i.Counters.Packed() { // Type IP6TEntry doesn't have a packed layout in memory, fall back to MarshalBytes. buf := task.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. i.MarshalBytes(buf) // escapes: fallback. @@ -3058,7 +3058,7 @@ func (i *IP6TEntry) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) { // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (i *IP6TEntry) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) { - if !i.IPv6.Packed() && i.Counters.Packed() { + if !i.Counters.Packed() && i.IPv6.Packed() { // Type IP6TEntry doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := task.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. length, err := task.CopyInBytes(addr, buf) // escapes: okay. @@ -3214,7 +3214,7 @@ func (i *IP6TIP) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (i *IP6TIP) UnmarshalUnsafe(src []byte) { - if i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() { + if i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() { safecopy.CopyOut(unsafe.Pointer(i), src) } else { // Type IP6TIP doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -3225,7 +3225,7 @@ func (i *IP6TIP) UnmarshalUnsafe(src []byte) { // CopyOutN implements marshal.Marshallable.CopyOutN. //go:nosplit func (i *IP6TIP) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) { - if !i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() { + if !i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() { // Type IP6TIP doesn't have a packed layout in memory, fall back to MarshalBytes. buf := task.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. i.MarshalBytes(buf) // escapes: fallback. @@ -3255,7 +3255,7 @@ func (i *IP6TIP) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) { // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (i *IP6TIP) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) { - if !i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() { + if !i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() { // Type IP6TIP doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := task.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. length, err := task.CopyInBytes(addr, buf) // escapes: okay. diff --git a/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go b/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go index 4e38c1859..22bd46f7f 100644 --- a/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go +++ b/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go @@ -293,7 +293,7 @@ func (s *Stat) Packed() bool { // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. func (s *Stat) MarshalUnsafe(dst []byte) { - if s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed() { + if s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() { safecopy.CopyIn(dst, unsafe.Pointer(s)) } else { // Type Stat doesn't have a packed layout in memory, fallback to MarshalBytes. @@ -370,7 +370,7 @@ func (s *Stat) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) { // WriteTo implements io.WriterTo.WriteTo. func (s *Stat) WriteTo(writer io.Writer) (int64, error) { - if !s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() { + if !s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed() { // Type Stat doesn't have a packed layout in memory, fall back to MarshalBytes. buf := make([]byte, s.SizeBytes()) s.MarshalBytes(buf) diff --git a/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go b/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go index 4b9b793d0..7a5f4e86c 100644 --- a/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go +++ b/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go @@ -295,12 +295,12 @@ func (s *Stat) UnmarshalBytes(src []byte) { // Packed implements marshal.Marshallable.Packed. //go:nosplit func (s *Stat) Packed() bool { - return s.MTime.Packed() && s.CTime.Packed() && s.ATime.Packed() + return s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() } // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. func (s *Stat) MarshalUnsafe(dst []byte) { - if s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed() { + if s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() { safecopy.CopyIn(dst, unsafe.Pointer(s)) } else { // Type Stat doesn't have a packed layout in memory, fallback to MarshalBytes. @@ -351,7 +351,7 @@ func (s *Stat) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) { // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (s *Stat) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) { - if !s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed() { + if !s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() { // Type Stat doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := task.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. length, err := task.CopyInBytes(addr, buf) // escapes: okay. diff --git a/pkg/sentry/fsimpl/devpts/root_inode_refs.go b/pkg/sentry/fsimpl/devpts/root_inode_refs.go index b860819f0..1b7090229 100644 --- a/pkg/sentry/fsimpl/devpts/root_inode_refs.go +++ b/pkg/sentry/fsimpl/devpts/root_inode_refs.go @@ -1,12 +1,12 @@ package devpts import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/fuse/inode_refs.go b/pkg/sentry/fsimpl/fuse/inode_refs.go index 646042846..12e7d6e6c 100644 --- a/pkg/sentry/fsimpl/fuse/inode_refs.go +++ b/pkg/sentry/fsimpl/fuse/inode_refs.go @@ -1,12 +1,12 @@ package fuse import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go index 670166c76..7fa5a516d 100644 --- a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go +++ b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go @@ -1,12 +1,12 @@ package host import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/host/inode_refs.go b/pkg/sentry/fsimpl/host/inode_refs.go index 49c57c113..c294b8b80 100644 --- a/pkg/sentry/fsimpl/host/inode_refs.go +++ b/pkg/sentry/fsimpl/host/inode_refs.go @@ -1,12 +1,12 @@ package host import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/kernfs/dentry_refs.go b/pkg/sentry/fsimpl/kernfs/dentry_refs.go index 0269851c9..dd5325635 100644 --- a/pkg/sentry/fsimpl/kernfs/dentry_refs.go +++ b/pkg/sentry/fsimpl/kernfs/dentry_refs.go @@ -1,12 +1,12 @@ package kernfs import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/kernfs/static_directory_refs.go b/pkg/sentry/fsimpl/kernfs/static_directory_refs.go index 866620ef9..80513f6aa 100644 --- a/pkg/sentry/fsimpl/kernfs/static_directory_refs.go +++ b/pkg/sentry/fsimpl/kernfs/static_directory_refs.go @@ -1,12 +1,12 @@ package kernfs import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go index c589b4746..360b77ef6 100644 --- a/pkg/sentry/fsimpl/overlay/copy_up.go +++ b/pkg/sentry/fsimpl/overlay/copy_up.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -81,6 +82,8 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { Start: d.parent.upperVD, Path: fspath.Parse(d.name), } + // Used during copy-up of memory-mapped regular files. + var mmapOpts *memmap.MMapOpts cleanupUndoCopyUp := func() { var err error if ftype == linux.S_IFDIR { @@ -136,6 +139,25 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { break } } + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + if d.wrappedMappable != nil { + // We may have memory mappings of the file on the lower layer. + // Switch to mapping the file on the upper layer instead. + mmapOpts = &memmap.MMapOpts{ + Perms: usermem.ReadWrite, + MaxPerms: usermem.ReadWrite, + } + if err := newFD.ConfigureMMap(ctx, mmapOpts); err != nil { + cleanupUndoCopyUp() + return err + } + if mmapOpts.MappingIdentity != nil { + mmapOpts.MappingIdentity.DecRef(ctx) + } + // Don't actually switch Mappables until the end of copy-up; see + // below for why. + } if err := newFD.SetStat(ctx, vfs.SetStatOptions{ Stat: linux.Statx{ Mask: linux.STATX_UID | linux.STATX_GID, @@ -265,6 +287,62 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { atomic.StoreUint64(&d.ino, upperStat.Ino) } + if mmapOpts != nil && mmapOpts.Mappable != nil { + // Note that if mmapOpts != nil, then d.mapsMu is locked for writing + // (from the S_IFREG path above). + + // Propagate mappings of d to the new Mappable. Remember which mappings + // we added so we can remove them on failure. + upperMappable := mmapOpts.Mappable + allAdded := make(map[memmap.MappableRange]memmap.MappingsOfRange) + for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + added := make(memmap.MappingsOfRange) + for m := range seg.Value() { + if err := upperMappable.AddMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable); err != nil { + for m := range added { + upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable) + } + for mr, mappings := range allAdded { + for m := range mappings { + upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, mr.Start, m.Writable) + } + } + return err + } + added[m] = struct{}{} + } + allAdded[seg.Range()] = added + } + + // Switch to the new Mappable. We do this at the end of copy-up + // because: + // + // - We need to switch Mappables (by changing d.wrappedMappable) before + // invalidating Translations from the old Mappable (to pick up + // Translations from the new one). + // + // - We need to lock d.dataMu while changing d.wrappedMappable, but + // must invalidate Translations with d.dataMu unlocked (due to lock + // ordering). + // + // - Consequently, once we unlock d.dataMu, other threads may + // immediately observe the new (copied-up) Mappable, which we want to + // delay until copy-up is guaranteed to succeed. + d.dataMu.Lock() + lowerMappable := d.wrappedMappable + d.wrappedMappable = upperMappable + d.dataMu.Unlock() + d.lowerMappings.InvalidateAll(memmap.InvalidateOpts{}) + + // Remove mappings from the old Mappable. + for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + for m := range seg.Value() { + lowerMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable) + } + } + d.lowerMappings.RemoveAll() + } + atomic.StoreUint32(&d.copiedUp, 1) return nil } diff --git a/pkg/sentry/fsimpl/overlay/non_directory.go b/pkg/sentry/fsimpl/overlay/non_directory.go index 268b32537..74cfd3799 100644 --- a/pkg/sentry/fsimpl/overlay/non_directory.go +++ b/pkg/sentry/fsimpl/overlay/non_directory.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -256,10 +257,105 @@ func (fd *nonDirectoryFD) Sync(ctx context.Context) error { // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. func (fd *nonDirectoryFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - wrappedFD, err := fd.getCurrentFD(ctx) + if err := fd.ensureMappable(ctx, opts); err != nil { + return err + } + return vfs.GenericConfigureMMap(&fd.vfsfd, fd.dentry(), opts) +} + +// ensureMappable ensures that fd.dentry().wrappedMappable is not nil. +func (fd *nonDirectoryFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error { + d := fd.dentry() + + // Fast path if we already have a Mappable for the current top layer. + if atomic.LoadUint32(&d.isMappable) != 0 { + return nil + } + + // Only permit mmap of regular files, since other file types may have + // unpredictable behavior when mmapped (e.g. /dev/zero). + if atomic.LoadUint32(&d.mode)&linux.S_IFMT != linux.S_IFREG { + return syserror.ENODEV + } + + // Get a Mappable for the current top layer. + fd.mu.Lock() + defer fd.mu.Unlock() + d.copyMu.RLock() + defer d.copyMu.RUnlock() + if atomic.LoadUint32(&d.isMappable) != 0 { + return nil + } + wrappedFD, err := fd.currentFDLocked(ctx) if err != nil { return err } - defer wrappedFD.DecRef(ctx) - return wrappedFD.ConfigureMMap(ctx, opts) + if err := wrappedFD.ConfigureMMap(ctx, opts); err != nil { + return err + } + if opts.MappingIdentity != nil { + opts.MappingIdentity.DecRef(ctx) + opts.MappingIdentity = nil + } + // Use this Mappable for all mappings of this layer (unless we raced with + // another call to ensureMappable). + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + d.dataMu.Lock() + defer d.dataMu.Unlock() + if d.wrappedMappable == nil { + d.wrappedMappable = opts.Mappable + atomic.StoreUint32(&d.isMappable, 1) + } + return nil +} + +// AddMapping implements memmap.Mappable.AddMapping. +func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + if err := d.wrappedMappable.AddMapping(ctx, ms, ar, offset, writable); err != nil { + return err + } + if !d.isCopiedUp() { + d.lowerMappings.AddMapping(ms, ar, offset, writable) + } + return nil +} + +// RemoveMapping implements memmap.Mappable.RemoveMapping. +func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + d.wrappedMappable.RemoveMapping(ctx, ms, ar, offset, writable) + if !d.isCopiedUp() { + d.lowerMappings.RemoveMapping(ms, ar, offset, writable) + } +} + +// CopyMapping implements memmap.Mappable.CopyMapping. +func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + if err := d.wrappedMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil { + return err + } + if !d.isCopiedUp() { + d.lowerMappings.AddMapping(ms, dstAR, offset, writable) + } + return nil +} + +// Translate implements memmap.Mappable.Translate. +func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { + d.dataMu.RLock() + defer d.dataMu.RUnlock() + return d.wrappedMappable.Translate(ctx, required, optional, at) +} + +// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. +func (d *dentry) InvalidateUnsavable(ctx context.Context) error { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + return d.wrappedMappable.InvalidateUnsavable(ctx) } diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go index 9a8f7010e..b2efe5f80 100644 --- a/pkg/sentry/fsimpl/overlay/overlay.go +++ b/pkg/sentry/fsimpl/overlay/overlay.go @@ -22,6 +22,10 @@ // filesystem.renameMu // dentry.dirMu // dentry.copyMu +// *** "memmap.Mappable locks" below this point +// dentry.mapsMu +// *** "memmap.Mappable locks taken by Translate" below this point +// dentry.dataMu // // Locking dentry.dirMu in multiple dentries requires that parent dentries are // locked before child dentries, and that filesystem.renameMu is locked to @@ -37,6 +41,7 @@ import ( "gvisor.dev/gvisor/pkg/fspath" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -419,6 +424,35 @@ type dentry struct { devMinor uint32 ino uint64 + // If this dentry represents a regular file, then: + // + // - mapsMu is used to synchronize between copy-up and memmap.Mappable + // methods on dentry preceding mm.MemoryManager.activeMu in the lock order. + // + // - dataMu is used to synchronize between copy-up and + // dentry.(memmap.Mappable).Translate. + // + // - lowerMappings tracks memory mappings of the file. lowerMappings is + // used to invalidate mappings of the lower layer when the file is copied + // up to ensure that they remain coherent with subsequent writes to the + // file. (Note that, as of this writing, Linux overlayfs does not do this; + // this feature is a gVisor extension.) lowerMappings is protected by + // mapsMu. + // + // - If this dentry is copied-up, then wrappedMappable is the Mappable + // obtained from a call to the current top layer's + // FileDescription.ConfigureMMap(). Once wrappedMappable becomes non-nil + // (from a call to nonDirectoryFD.ensureMappable()), it cannot become nil. + // wrappedMappable is protected by mapsMu and dataMu. + // + // - isMappable is non-zero iff wrappedMappable is non-nil. isMappable is + // accessed using atomic memory operations. + mapsMu sync.Mutex + lowerMappings memmap.MappingSet + dataMu sync.RWMutex + wrappedMappable memmap.Mappable + isMappable uint32 + locks vfs.FileLocks } diff --git a/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go b/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go index 84c03e6cd..8ed286c46 100644 --- a/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go +++ b/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go @@ -1,12 +1,12 @@ package proc import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go b/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go index 5b3c7cd1c..957c6a6dd 100644 --- a/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go +++ b/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go @@ -1,12 +1,12 @@ package proc import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go b/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go index 3091deb89..a80ec9e0a 100644 --- a/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go +++ b/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go @@ -1,12 +1,12 @@ package proc import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/proc/task_inode_refs.go b/pkg/sentry/fsimpl/proc/task_inode_refs.go index 5833527c5..c4835cbca 100644 --- a/pkg/sentry/fsimpl/proc/task_inode_refs.go +++ b/pkg/sentry/fsimpl/proc/task_inode_refs.go @@ -1,12 +1,12 @@ package proc import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/proc/tasks_inode_refs.go b/pkg/sentry/fsimpl/proc/tasks_inode_refs.go index 0961dd629..5dfb34238 100644 --- a/pkg/sentry/fsimpl/proc/tasks_inode_refs.go +++ b/pkg/sentry/fsimpl/proc/tasks_inode_refs.go @@ -1,12 +1,12 @@ package proc import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/sys/dir_refs.go b/pkg/sentry/fsimpl/sys/dir_refs.go index 718d542eb..c05154e2b 100644 --- a/pkg/sentry/fsimpl/sys/dir_refs.go +++ b/pkg/sentry/fsimpl/sys/dir_refs.go @@ -1,12 +1,12 @@ package sys import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/tmpfs/inode_refs.go b/pkg/sentry/fsimpl/tmpfs/inode_refs.go index 5ec208d9e..38cc30981 100644 --- a/pkg/sentry/fsimpl/tmpfs/inode_refs.go +++ b/pkg/sentry/fsimpl/tmpfs/inode_refs.go @@ -1,12 +1,12 @@ package tmpfs import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/kernel/fd_table_refs.go b/pkg/sentry/kernel/fd_table_refs.go index 9330e37e4..8973cac8a 100644 --- a/pkg/sentry/kernel/fd_table_refs.go +++ b/pkg/sentry/kernel/fd_table_refs.go @@ -1,12 +1,12 @@ package kernel import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/kernel/fs_context_refs.go b/pkg/sentry/kernel/fs_context_refs.go index e2280f400..c0e01dbb3 100644 --- a/pkg/sentry/kernel/fs_context_refs.go +++ b/pkg/sentry/kernel/fs_context_refs.go @@ -1,12 +1,12 @@ package kernel import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/kernel/process_group_refs.go b/pkg/sentry/kernel/process_group_refs.go index 832a99d66..5ff83ab96 100644 --- a/pkg/sentry/kernel/process_group_refs.go +++ b/pkg/sentry/kernel/process_group_refs.go @@ -1,12 +1,12 @@ package kernel import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go index 8a2418c41..90148bbb2 100644 --- a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go +++ b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go @@ -1,12 +1,12 @@ package kernel import ( + "fmt" + "reflect" "strings" "unsafe" - "fmt" "gvisor.dev/gvisor/pkg/sync" - "reflect" ) // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race diff --git a/pkg/sentry/kernel/session_refs.go b/pkg/sentry/kernel/session_refs.go index 850f56d3a..9c51eda6c 100644 --- a/pkg/sentry/kernel/session_refs.go +++ b/pkg/sentry/kernel/session_refs.go @@ -1,12 +1,12 @@ package kernel import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/kernel/shm/shm_refs.go b/pkg/sentry/kernel/shm/shm_refs.go index 35f74eb6c..0da23fa5f 100644 --- a/pkg/sentry/kernel/shm/shm_refs.go +++ b/pkg/sentry/kernel/shm/shm_refs.go @@ -1,12 +1,12 @@ package shm import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/mm/aio_mappable_refs.go b/pkg/sentry/mm/aio_mappable_refs.go index d4f278400..c80c56d94 100644 --- a/pkg/sentry/mm/aio_mappable_refs.go +++ b/pkg/sentry/mm/aio_mappable_refs.go @@ -1,12 +1,12 @@ package mm import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/mm/special_mappable_refs.go b/pkg/sentry/mm/special_mappable_refs.go index 2a348bacf..5eb7e69e0 100644 --- a/pkg/sentry/mm/special_mappable_refs.go +++ b/pkg/sentry/mm/special_mappable_refs.go @@ -1,12 +1,12 @@ package mm import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/platform/ring0/defs_impl_arm64.go b/pkg/sentry/platform/ring0/defs_impl_arm64.go index eba2eac30..8ebfbfdb6 100644 --- a/pkg/sentry/platform/ring0/defs_impl_arm64.go +++ b/pkg/sentry/platform/ring0/defs_impl_arm64.go @@ -1,13 +1,13 @@ package ring0 import ( - "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" - "io" - "reflect" "fmt" + "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/usermem" + "io" + "reflect" ) // Useful bits. diff --git a/pkg/sentry/socket/unix/socket_refs.go b/pkg/sentry/socket/unix/socket_refs.go index a0e5d1393..680ea06cd 100644 --- a/pkg/sentry/socket/unix/socket_refs.go +++ b/pkg/sentry/socket/unix/socket_refs.go @@ -1,12 +1,12 @@ package unix import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/socket/unix/transport/queue_refs.go b/pkg/sentry/socket/unix/transport/queue_refs.go index 21d43fc24..17951eb2f 100644 --- a/pkg/sentry/socket/unix/transport/queue_refs.go +++ b/pkg/sentry/socket/unix/transport/queue_refs.go @@ -1,12 +1,12 @@ package transport import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/time/seqatomic_parameters_unsafe.go b/pkg/sentry/time/seqatomic_parameters_unsafe.go index 14978ed91..2cb001080 100644 --- a/pkg/sentry/time/seqatomic_parameters_unsafe.go +++ b/pkg/sentry/time/seqatomic_parameters_unsafe.go @@ -1,12 +1,12 @@ package time import ( + "fmt" + "reflect" "strings" "unsafe" - "fmt" "gvisor.dev/gvisor/pkg/sync" - "reflect" ) // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race diff --git a/pkg/sentry/vfs/file_description_refs.go b/pkg/sentry/vfs/file_description_refs.go index ebc089b4c..5e612d7f0 100644 --- a/pkg/sentry/vfs/file_description_refs.go +++ b/pkg/sentry/vfs/file_description_refs.go @@ -1,12 +1,12 @@ package vfs import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/vfs/filesystem_refs.go b/pkg/sentry/vfs/filesystem_refs.go index 32f21c044..aaa753c43 100644 --- a/pkg/sentry/vfs/filesystem_refs.go +++ b/pkg/sentry/vfs/filesystem_refs.go @@ -1,12 +1,12 @@ package vfs import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/vfs/mount_namespace_refs.go b/pkg/sentry/vfs/mount_namespace_refs.go index 2c920ac41..56b8d9903 100644 --- a/pkg/sentry/vfs/mount_namespace_refs.go +++ b/pkg/sentry/vfs/mount_namespace_refs.go @@ -1,12 +1,12 @@ package vfs import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/tcpip/link/tun/tun_endpoint_refs.go b/pkg/tcpip/link/tun/tun_endpoint_refs.go index 06d11f0e1..7f80b1fa3 100644 --- a/pkg/tcpip/link/tun/tun_endpoint_refs.go +++ b/pkg/tcpip/link/tun/tun_endpoint_refs.go @@ -1,12 +1,12 @@ package tun import ( - "sync/atomic" - "fmt" - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so |