diff options
Diffstat (limited to 'pkg/sentry')
35 files changed, 150 insertions, 85 deletions
diff --git a/pkg/sentry/devices/memdev/zero.go b/pkg/sentry/devices/memdev/zero.go index 2e631a252..60cfea888 100644 --- a/pkg/sentry/devices/memdev/zero.go +++ b/pkg/sentry/devices/memdev/zero.go @@ -16,9 +16,10 @@ package memdev import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/usermem" ) @@ -79,11 +80,22 @@ func (fd *zeroFD) Seek(ctx context.Context, offset int64, whence int32) (int64, // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. func (fd *zeroFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - m, err := mm.NewSharedAnonMappable(opts.Length, pgalloc.MemoryFileProviderFromContext(ctx)) + if opts.Private || !opts.MaxPerms.Write { + // This mapping will never permit writing to the "underlying file" (in + // Linux terms, it isn't VM_SHARED), so implement it as an anonymous + // mapping, but back it with fd; this is what Linux does, and is + // actually application-visible because the resulting VMA will show up + // in /proc/[pid]/maps with fd.vfsfd.VirtualDentry()'s path rather than + // "/dev/zero (deleted)". + opts.Offset = 0 + opts.MappingIdentity = &fd.vfsfd + opts.MappingIdentity.IncRef() + return nil + } + tmpfsFD, err := tmpfs.NewZeroFile(ctx, auth.CredentialsFromContext(ctx), kernel.KernelFromContext(ctx).ShmMount(), opts.Length) if err != nil { return err } - opts.MappingIdentity = m - opts.Mappable = m - return nil + defer tmpfsFD.DecRef(ctx) + return tmpfsFD.ConfigureMMap(ctx, opts) } diff --git a/pkg/sentry/fsimpl/devpts/root_inode_refs.go b/pkg/sentry/fsimpl/devpts/root_inode_refs.go index 1b7090229..051801202 100644 --- a/pkg/sentry/fsimpl/devpts/root_inode_refs.go +++ b/pkg/sentry/fsimpl/devpts/root_inode_refs.go @@ -2,11 +2,11 @@ package devpts import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/fuse/inode_refs.go b/pkg/sentry/fsimpl/fuse/inode_refs.go index 12e7d6e6c..6b9456e1d 100644 --- a/pkg/sentry/fsimpl/fuse/inode_refs.go +++ b/pkg/sentry/fsimpl/fuse/inode_refs.go @@ -2,11 +2,11 @@ package fuse import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go index 7fa5a516d..babb3f664 100644 --- a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go +++ b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go @@ -2,11 +2,11 @@ package host import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/host/inode_refs.go b/pkg/sentry/fsimpl/host/inode_refs.go index c294b8b80..17f90ce4a 100644 --- a/pkg/sentry/fsimpl/host/inode_refs.go +++ b/pkg/sentry/fsimpl/host/inode_refs.go @@ -2,11 +2,11 @@ package host import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/kernfs/dentry_refs.go b/pkg/sentry/fsimpl/kernfs/dentry_refs.go index dd5325635..79863b3bc 100644 --- a/pkg/sentry/fsimpl/kernfs/dentry_refs.go +++ b/pkg/sentry/fsimpl/kernfs/dentry_refs.go @@ -2,11 +2,11 @@ package kernfs import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/kernfs/static_directory_refs.go b/pkg/sentry/fsimpl/kernfs/static_directory_refs.go index 80513f6aa..478b04bdd 100644 --- a/pkg/sentry/fsimpl/kernfs/static_directory_refs.go +++ b/pkg/sentry/fsimpl/kernfs/static_directory_refs.go @@ -2,11 +2,11 @@ package kernfs import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go b/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go index 8ed286c46..9431c1506 100644 --- a/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go +++ b/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go @@ -2,11 +2,11 @@ package proc import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go b/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go index 957c6a6dd..872b20eb0 100644 --- a/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go +++ b/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go @@ -2,11 +2,11 @@ package proc import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go b/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go index a80ec9e0a..c6d9b3522 100644 --- a/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go +++ b/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go @@ -2,11 +2,11 @@ package proc import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/proc/task_inode_refs.go b/pkg/sentry/fsimpl/proc/task_inode_refs.go index c4835cbca..714488450 100644 --- a/pkg/sentry/fsimpl/proc/task_inode_refs.go +++ b/pkg/sentry/fsimpl/proc/task_inode_refs.go @@ -2,11 +2,11 @@ package proc import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/proc/tasks_inode_refs.go b/pkg/sentry/fsimpl/proc/tasks_inode_refs.go index 5dfb34238..22d9cc488 100644 --- a/pkg/sentry/fsimpl/proc/tasks_inode_refs.go +++ b/pkg/sentry/fsimpl/proc/tasks_inode_refs.go @@ -2,11 +2,11 @@ package proc import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/sys/dir_refs.go b/pkg/sentry/fsimpl/sys/dir_refs.go index c05154e2b..89609b198 100644 --- a/pkg/sentry/fsimpl/sys/dir_refs.go +++ b/pkg/sentry/fsimpl/sys/dir_refs.go @@ -2,11 +2,11 @@ package sys import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 99277093e..1362c1602 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -865,8 +865,16 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe } if d.parent == nil { if d.name != "" { - // This must be an anonymous memfd file. + // This file must have been created by + // newUnlinkedRegularFileDescription(). In Linux, + // mm/shmem.c:__shmem_file_setup() => + // fs/file_table.c:alloc_file_pseudo() sets the created + // dentry's dentry_operations to anon_ops, for which d_dname == + // simple_dname. fs/d_path.c:simple_dname() defines the + // dentry's pathname to be its name, prefixed with "/" and + // suffixed with " (deleted)". b.PrependComponent("/" + d.name) + b.AppendString(" (deleted)") return vfs.PrependPathSyntheticError{} } return vfs.PrependPathAtNonMountRootError{} diff --git a/pkg/sentry/fsimpl/tmpfs/inode_refs.go b/pkg/sentry/fsimpl/tmpfs/inode_refs.go index 38cc30981..dbf0b2766 100644 --- a/pkg/sentry/fsimpl/tmpfs/inode_refs.go +++ b/pkg/sentry/fsimpl/tmpfs/inode_refs.go @@ -2,11 +2,11 @@ package tmpfs import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index 0710b65db..b8699d064 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -42,6 +42,10 @@ type regularFile struct { // memFile is a platform.File used to allocate pages to this regularFile. memFile *pgalloc.MemoryFile + // memoryUsageKind is the memory accounting category under which pages backing + // this regularFile's contents are accounted. + memoryUsageKind usage.MemoryKind + // mapsMu protects mappings. mapsMu sync.Mutex `state:"nosave"` @@ -86,14 +90,75 @@ type regularFile struct { func (fs *filesystem) newRegularFile(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode) *inode { file := ®ularFile{ - memFile: fs.memFile, - seals: linux.F_SEAL_SEAL, + memFile: fs.memFile, + memoryUsageKind: usage.Tmpfs, + seals: linux.F_SEAL_SEAL, } file.inode.init(file, fs, kuid, kgid, linux.S_IFREG|mode) file.inode.nlink = 1 // from parent directory return &file.inode } +// newUnlinkedRegularFileDescription creates a regular file on the tmpfs +// filesystem represented by mount and returns an FD representing that file. +// The new file is not reachable by path traversal from any other file. +// +// newUnlinkedRegularFileDescription is analogous to Linux's +// mm/shmem.c:__shmem_file_setup(). +// +// Preconditions: mount must be a tmpfs mount. +func newUnlinkedRegularFileDescription(ctx context.Context, creds *auth.Credentials, mount *vfs.Mount, name string) (*regularFileFD, error) { + fs, ok := mount.Filesystem().Impl().(*filesystem) + if !ok { + panic("tmpfs.newUnlinkedRegularFileDescription() called with non-tmpfs mount") + } + + inode := fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, 0777) + d := fs.newDentry(inode) + defer d.DecRef(ctx) + d.name = name + + fd := ®ularFileFD{} + fd.Init(&inode.locks) + flags := uint32(linux.O_RDWR) + if err := fd.vfsfd.Init(fd, flags, mount, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + return nil, err + } + return fd, nil +} + +// NewZeroFile creates a new regular file and file description as for +// mmap(MAP_SHARED | MAP_ANONYMOUS). The file has the given size and is +// initially (implicitly) filled with zeroes. +// +// Preconditions: mount must be a tmpfs mount. +func NewZeroFile(ctx context.Context, creds *auth.Credentials, mount *vfs.Mount, size uint64) (*vfs.FileDescription, error) { + // Compare mm/shmem.c:shmem_zero_setup(). + fd, err := newUnlinkedRegularFileDescription(ctx, creds, mount, "dev/zero") + if err != nil { + return nil, err + } + rf := fd.inode().impl.(*regularFile) + rf.memoryUsageKind = usage.Anonymous + rf.size = size + return &fd.vfsfd, err +} + +// NewMemfd creates a new regular file and file description as for +// memfd_create. +// +// Preconditions: mount must be a tmpfs mount. +func NewMemfd(ctx context.Context, creds *auth.Credentials, mount *vfs.Mount, allowSeals bool, name string) (*vfs.FileDescription, error) { + fd, err := newUnlinkedRegularFileDescription(ctx, creds, mount, name) + if err != nil { + return nil, err + } + if allowSeals { + fd.inode().impl.(*regularFile).seals = 0 + } + return &fd.vfsfd, nil +} + // truncate grows or shrinks the file to the given size. It returns true if the // file size was updated. func (rf *regularFile) truncate(newSize uint64) (bool, error) { @@ -226,7 +291,7 @@ func (rf *regularFile) Translate(ctx context.Context, required, optional memmap. optional.End = pgend } - cerr := rf.data.Fill(ctx, required, optional, rf.memFile, usage.Tmpfs, func(_ context.Context, dsts safemem.BlockSeq, _ uint64) (uint64, error) { + cerr := rf.data.Fill(ctx, required, optional, rf.memFile, rf.memoryUsageKind, func(_ context.Context, dsts safemem.BlockSeq, _ uint64) (uint64, error) { // Newly-allocated pages are zeroed, so we don't need to do anything. return dsts.NumBytes(), nil }) @@ -575,7 +640,7 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, case gap.Ok(): // Allocate memory for the write. gapMR := gap.Range().Intersect(pgMR) - fr, err := rw.file.memFile.Allocate(gapMR.Length(), usage.Tmpfs) + fr, err := rw.file.memFile.Allocate(gapMR.Length(), rw.file.memoryUsageKind) if err != nil { retErr = err goto exitLoop diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 4871e55d3..4658e1533 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -746,37 +746,6 @@ func (fd *fileDescription) RemoveXattr(ctx context.Context, name string) error { return nil } -// NewMemfd creates a new tmpfs regular file and file description that can back -// an anonymous fd created by memfd_create. -func NewMemfd(ctx context.Context, creds *auth.Credentials, mount *vfs.Mount, allowSeals bool, name string) (*vfs.FileDescription, error) { - fs, ok := mount.Filesystem().Impl().(*filesystem) - if !ok { - panic("NewMemfd() called with non-tmpfs mount") - } - - // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd inodes are set up with - // S_IRWXUGO. - inode := fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, 0777) - rf := inode.impl.(*regularFile) - if allowSeals { - rf.seals = 0 - } - - d := fs.newDentry(inode) - defer d.DecRef(ctx) - d.name = name - - // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with - // FMODE_READ | FMODE_WRITE. - var fd regularFileFD - fd.Init(&inode.locks) - flags := uint32(linux.O_RDWR) - if err := fd.vfsfd.Init(&fd, flags, mount, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { - return nil, err - } - return &fd.vfsfd, nil -} - // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) diff --git a/pkg/sentry/kernel/fd_table_refs.go b/pkg/sentry/kernel/fd_table_refs.go index 8973cac8a..ecba138ac 100644 --- a/pkg/sentry/kernel/fd_table_refs.go +++ b/pkg/sentry/kernel/fd_table_refs.go @@ -2,11 +2,11 @@ package kernel import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/kernel/fs_context_refs.go b/pkg/sentry/kernel/fs_context_refs.go index c0e01dbb3..fb2fde971 100644 --- a/pkg/sentry/kernel/fs_context_refs.go +++ b/pkg/sentry/kernel/fs_context_refs.go @@ -2,11 +2,11 @@ package kernel import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/kernel/process_group_refs.go b/pkg/sentry/kernel/process_group_refs.go index 5ff83ab96..4ed6e6458 100644 --- a/pkg/sentry/kernel/process_group_refs.go +++ b/pkg/sentry/kernel/process_group_refs.go @@ -2,11 +2,11 @@ package kernel import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/kernel/session_refs.go b/pkg/sentry/kernel/session_refs.go index 9c51eda6c..f2e1bb797 100644 --- a/pkg/sentry/kernel/session_refs.go +++ b/pkg/sentry/kernel/session_refs.go @@ -2,11 +2,11 @@ package kernel import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/kernel/shm/shm_refs.go b/pkg/sentry/kernel/shm/shm_refs.go index 0da23fa5f..51e07d0b3 100644 --- a/pkg/sentry/kernel/shm/shm_refs.go +++ b/pkg/sentry/kernel/shm/shm_refs.go @@ -2,11 +2,11 @@ package shm import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/mm/aio_mappable_refs.go b/pkg/sentry/mm/aio_mappable_refs.go index c80c56d94..b99909f07 100644 --- a/pkg/sentry/mm/aio_mappable_refs.go +++ b/pkg/sentry/mm/aio_mappable_refs.go @@ -2,11 +2,11 @@ package mm import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index 3e85964e4..8c9f11cce 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -242,7 +242,7 @@ type MemoryManager struct { // +stateify savable type vma struct { // mappable is the virtual memory object mapped by this vma. If mappable is - // nil, the vma represents a private anonymous mapping. + // nil, the vma represents an anonymous mapping. mappable memmap.Mappable // off is the offset into mappable at which this vma begins. If mappable is diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go index f4c93baeb..2dbe5b751 100644 --- a/pkg/sentry/mm/special_mappable.go +++ b/pkg/sentry/mm/special_mappable.go @@ -136,9 +136,12 @@ func (m *SpecialMappable) Length() uint64 { // NewSharedAnonMappable returns a SpecialMappable that implements the // semantics of mmap(MAP_SHARED|MAP_ANONYMOUS) and mappings of /dev/zero. // -// TODO(jamieliu): The use of SpecialMappable is a lazy code reuse hack. Linux -// uses an ephemeral file created by mm/shmem.c:shmem_zero_setup(); we should -// do the same to get non-zero device and inode IDs. +// TODO(gvisor.dev/issue/1624): Linux uses an ephemeral file created by +// mm/shmem.c:shmem_zero_setup(), and VFS2 does something analogous. VFS1 uses +// a SpecialMappable instead, incorrectly getting device and inode IDs of zero +// and causing memory for shared anonymous mappings to be allocated up-front +// instead of on first touch; this is to avoid exacerbating the fs.MountSource +// leak (b/143656263). Delete this function along with VFS1. func NewSharedAnonMappable(length uint64, mfp pgalloc.MemoryFileProvider) (*SpecialMappable, error) { if length == 0 { return nil, syserror.EINVAL diff --git a/pkg/sentry/mm/special_mappable_refs.go b/pkg/sentry/mm/special_mappable_refs.go index 5eb7e69e0..035bbe690 100644 --- a/pkg/sentry/mm/special_mappable_refs.go +++ b/pkg/sentry/mm/special_mappable_refs.go @@ -2,11 +2,11 @@ package mm import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go index 4c9a575e7..a2555ba1a 100644 --- a/pkg/sentry/mm/syscalls.go +++ b/pkg/sentry/mm/syscalls.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -93,18 +92,6 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (userme } } else { opts.Offset = 0 - if !opts.Private { - if opts.MappingIdentity != nil { - return 0, syserror.EINVAL - } - m, err := NewSharedAnonMappable(opts.Length, pgalloc.MemoryFileProviderFromContext(ctx)) - if err != nil { - return 0, err - } - defer m.DecRef(ctx) - opts.MappingIdentity = m - opts.Mappable = m - } } if opts.Addr.RoundDown() != opts.Addr { diff --git a/pkg/sentry/platform/ring0/defs_impl_arm64.go b/pkg/sentry/platform/ring0/defs_impl_arm64.go index 78547069f..6b06983ba 100644 --- a/pkg/sentry/platform/ring0/defs_impl_arm64.go +++ b/pkg/sentry/platform/ring0/defs_impl_arm64.go @@ -3,11 +3,11 @@ package ring0 import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" + "reflect" "fmt" "gvisor.dev/gvisor/pkg/usermem" "io" - "reflect" ) // Useful bits. diff --git a/pkg/sentry/socket/unix/socket_refs.go b/pkg/sentry/socket/unix/socket_refs.go index 680ea06cd..dababb85f 100644 --- a/pkg/sentry/socket/unix/socket_refs.go +++ b/pkg/sentry/socket/unix/socket_refs.go @@ -2,11 +2,11 @@ package unix import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/socket/unix/transport/queue_refs.go b/pkg/sentry/socket/unix/transport/queue_refs.go index 17951eb2f..0d4e34988 100644 --- a/pkg/sentry/socket/unix/transport/queue_refs.go +++ b/pkg/sentry/socket/unix/transport/queue_refs.go @@ -2,11 +2,11 @@ package transport import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go index 8ab062bca..cd8dfdfa4 100644 --- a/pkg/sentry/syscalls/linux/sys_mmap.go +++ b/pkg/sentry/syscalls/linux/sys_mmap.go @@ -100,6 +100,15 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if err := file.ConfigureMMap(t, &opts); err != nil { return 0, nil, err } + } else if shared { + // Back shared anonymous mappings with a special mappable. + opts.Offset = 0 + m, err := mm.NewSharedAnonMappable(opts.Length, t.Kernel()) + if err != nil { + return 0, nil, err + } + opts.MappingIdentity = m // transfers ownership of m to opts + opts.Mappable = m } rv, err := t.MemoryManager().MMap(t, opts) diff --git a/pkg/sentry/syscalls/linux/vfs2/mmap.go b/pkg/sentry/syscalls/linux/vfs2/mmap.go index dc05c2994..9d9dbf775 100644 --- a/pkg/sentry/syscalls/linux/vfs2/mmap.go +++ b/pkg/sentry/syscalls/linux/vfs2/mmap.go @@ -17,6 +17,7 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/syserror" @@ -85,6 +86,17 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if err := file.ConfigureMMap(t, &opts); err != nil { return 0, nil, err } + } else if shared { + // Back shared anonymous mappings with an anonymous tmpfs file. + opts.Offset = 0 + file, err := tmpfs.NewZeroFile(t, t.Credentials(), t.Kernel().ShmMount(), opts.Length) + if err != nil { + return 0, nil, err + } + defer file.DecRef(t) + if err := file.ConfigureMMap(t, &opts); err != nil { + return 0, nil, err + } } rv, err := t.MemoryManager().MMap(t, opts) diff --git a/pkg/sentry/vfs/file_description_refs.go b/pkg/sentry/vfs/file_description_refs.go index 5e612d7f0..bdd7e6554 100644 --- a/pkg/sentry/vfs/file_description_refs.go +++ b/pkg/sentry/vfs/file_description_refs.go @@ -2,11 +2,11 @@ package vfs import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/vfs/filesystem_refs.go b/pkg/sentry/vfs/filesystem_refs.go index aaa753c43..38a9a986f 100644 --- a/pkg/sentry/vfs/filesystem_refs.go +++ b/pkg/sentry/vfs/filesystem_refs.go @@ -2,11 +2,11 @@ package vfs import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/vfs/mount_namespace_refs.go b/pkg/sentry/vfs/mount_namespace_refs.go index 56b8d9903..63285fb8e 100644 --- a/pkg/sentry/vfs/mount_namespace_refs.go +++ b/pkg/sentry/vfs/mount_namespace_refs.go @@ -2,11 +2,11 @@ package vfs import ( "fmt" - refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" ) // ownerType is used to customize logging. Note that we use a pointer to T so |