diff options
Diffstat (limited to 'pkg/sentry/mm')
-rw-r--r-- | pkg/sentry/mm/BUILD | 4 | ||||
-rw-r--r-- | pkg/sentry/mm/aio_context.go | 40 | ||||
-rw-r--r-- | pkg/sentry/mm/io.go | 26 | ||||
-rw-r--r-- | pkg/sentry/mm/mm.go | 1 | ||||
-rw-r--r-- | pkg/sentry/mm/mm_test.go | 8 | ||||
-rw-r--r-- | pkg/sentry/mm/pma.go | 47 | ||||
-rw-r--r-- | pkg/sentry/mm/shm.go | 6 | ||||
-rw-r--r-- | pkg/sentry/mm/special_mappable.go | 8 | ||||
-rw-r--r-- | pkg/sentry/mm/syscalls.go | 137 | ||||
-rw-r--r-- | pkg/sentry/mm/vma.go | 20 |
10 files changed, 158 insertions, 139 deletions
diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index b417c2da7..b7d782b7f 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -125,6 +125,7 @@ go_library( "//pkg/abi/linux", "//pkg/atomicbitops", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/refs", @@ -143,7 +144,6 @@ go_library( "//pkg/sentry/platform", "//pkg/sentry/usage", "//pkg/sync", - "//pkg/syserror", "//pkg/tcpip/buffer", "//pkg/usermem", ], @@ -156,6 +156,7 @@ go_test( library = ":mm", deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sentry/arch", "//pkg/sentry/contexttest", @@ -163,7 +164,6 @@ go_test( "//pkg/sentry/memmap", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go index 346866d3c..d71d64580 100644 --- a/pkg/sentry/mm/aio_context.go +++ b/pkg/sentry/mm/aio_context.go @@ -17,12 +17,12 @@ package mm import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -77,15 +77,6 @@ func (mm *MemoryManager) destroyAIOContextLocked(ctx context.Context, id uint64) return nil } - // Only unmaps after it assured that the address is a valid aio context to - // prevent random memory from been unmapped. - // - // Note: It's possible to unmap this address and map something else into - // the same address. Then it would be unmapping memory that it doesn't own. - // This is, however, the way Linux implements AIO. Keeps the same [weird] - // semantics in case anyone relies on it. - mm.MUnmap(ctx, hostarch.Addr(id), aioRingBufferSize) - delete(mm.aioManager.contexts, id) aioCtx.destroy() return aioCtx @@ -158,11 +149,11 @@ func (ctx *AIOContext) Prepare() error { defer ctx.mu.Unlock() if ctx.dead { // Context died after the caller looked it up. - return syserror.EINVAL + return linuxerr.EINVAL } if ctx.outstanding >= ctx.maxOutstanding { // Context is busy. - return syserror.EAGAIN + return linuxerr.EAGAIN } ctx.outstanding++ return nil @@ -297,7 +288,7 @@ func (m *aioMappable) InodeID() uint64 { // Msync implements memmap.MappingIdentity.Msync. func (m *aioMappable) Msync(ctx context.Context, mr memmap.MappableRange) error { // Linux: aio_ring_fops.fsync == NULL - return syserror.EINVAL + return linuxerr.EINVAL } // AddMapping implements memmap.Mappable.AddMapping. @@ -305,7 +296,7 @@ func (m *aioMappable) AddMapping(_ context.Context, _ memmap.MappingSpace, ar ho // Don't allow mappings to be expanded (in Linux, fs/aio.c:aio_ring_mmap() // sets VM_DONTEXPAND). if offset != 0 || uint64(ar.Length()) != aioRingBufferSize { - return syserror.EFAULT + return linuxerr.EFAULT } return nil } @@ -319,13 +310,13 @@ func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, s // Don't allow mappings to be expanded (in Linux, fs/aio.c:aio_ring_mmap() // sets VM_DONTEXPAND). if offset != 0 || uint64(dstAR.Length()) != aioRingBufferSize { - return syserror.EFAULT + return linuxerr.EFAULT } // Require that the mapping correspond to a live AIOContext. Compare // Linux's fs/aio.c:aio_ring_mremap(). mm, ok := ms.(*MemoryManager) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } am := &mm.aioManager am.mu.Lock() @@ -333,12 +324,12 @@ func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, s oldID := uint64(srcAR.Start) aioCtx, ok := am.contexts[oldID] if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } aioCtx.mu.Lock() defer aioCtx.mu.Unlock() if aioCtx.dead { - return syserror.EINVAL + return linuxerr.EINVAL } // Use the new ID for the AIOContext. am.contexts[uint64(dstAR.Start)] = aioCtx @@ -350,7 +341,7 @@ func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, s func (m *aioMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { var err error if required.End > m.fr.Length() { - err = &memmap.BusError{syserror.EFAULT} + err = &memmap.BusError{linuxerr.EFAULT} } if source := optional.Intersect(memmap.MappableRange{0, m.fr.Length()}); source.Length() != 0 { return []memmap.Translation{ @@ -399,7 +390,7 @@ func (mm *MemoryManager) NewAIOContext(ctx context.Context, events uint32) (uint id := uint64(addr) if !mm.aioManager.newAIOContext(events, id) { mm.MUnmap(ctx, addr, aioRingBufferSize) - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } return id, nil } @@ -411,6 +402,15 @@ func (mm *MemoryManager) DestroyAIOContext(ctx context.Context, id uint64) *AIOC return nil } + // Only unmaps after it assured that the address is a valid aio context to + // prevent random memory from been unmapped. + // + // Note: It's possible to unmap this address and map something else into + // the same address. Then it would be unmapping memory that it doesn't own. + // This is, however, the way Linux implements AIO. Keeps the same [weird] + // semantics in case anyone relies on it. + mm.MUnmap(ctx, hostarch.Addr(id), aioRingBufferSize) + mm.aioManager.mu.Lock() defer mm.aioManager.mu.Unlock() return mm.destroyAIOContextLocked(ctx, id) diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go index 16f318ab3..5fcfeb473 100644 --- a/pkg/sentry/mm/io.go +++ b/pkg/sentry/mm/io.go @@ -16,10 +16,10 @@ package mm import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -97,14 +97,14 @@ func translateIOError(ctx context.Context, err error) error { if logIOErrors { ctx.Debugf("MM I/O error: %v", err) } - return syserror.EFAULT + return linuxerr.EFAULT } // CopyOut implements usermem.IO.CopyOut. func (mm *MemoryManager) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) { ar, ok := mm.CheckIORange(addr, int64(len(src))) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } if len(src) == 0 { @@ -147,7 +147,7 @@ func (mm *MemoryManager) asCopyOut(ctx context.Context, addr hostarch.Addr, src func (mm *MemoryManager) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) { ar, ok := mm.CheckIORange(addr, int64(len(dst))) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } if len(dst) == 0 { @@ -190,7 +190,7 @@ func (mm *MemoryManager) asCopyIn(ctx context.Context, addr hostarch.Addr, dst [ func (mm *MemoryManager) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { ar, ok := mm.CheckIORange(addr, toZero) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } if toZero == 0 { @@ -231,7 +231,7 @@ func (mm *MemoryManager) asZeroOut(ctx context.Context, addr hostarch.Addr, toZe // CopyOutFrom implements usermem.IO.CopyOutFrom. func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { if !mm.checkIOVec(ars) { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } if ars.NumBytes() == 0 { @@ -276,7 +276,7 @@ func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars hostarch.AddrRange // CopyInTo implements usermem.IO.CopyInTo. func (mm *MemoryManager) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { if !mm.checkIOVec(ars) { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } if ars.NumBytes() == 0 { @@ -314,7 +314,7 @@ func (mm *MemoryManager) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) { ar, ok := mm.CheckIORange(addr, 4) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // Do AddressSpace IO if applicable. @@ -339,7 +339,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new _, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { if ims.NumBlocks() != 1 || ims.NumBytes() != 4 { // Atomicity is unachievable across mappings. - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } im := ims.Head() var err error @@ -357,7 +357,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) { ar, ok := mm.CheckIORange(addr, 4) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // Do AddressSpace IO if applicable. @@ -382,7 +382,7 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch _, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { if ims.NumBlocks() != 1 || ims.NumBytes() != 4 { // Atomicity is unachievable across mappings. - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } im := ims.Head() var err error @@ -400,7 +400,7 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch func (mm *MemoryManager) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) { ar, ok := mm.CheckIORange(addr, 4) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // Do AddressSpace IO if applicable. @@ -425,7 +425,7 @@ func (mm *MemoryManager) LoadUint32(ctx context.Context, addr hostarch.Addr, opt _, err := mm.withInternalMappings(ctx, ar, hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { if ims.NumBlocks() != 1 || ims.NumBytes() != 4 { // Atomicity is unachievable across mappings. - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } im := ims.Head() var err error diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index 57969b26c..0fca59b64 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -28,6 +28,7 @@ // memmap.File locks // mm.aioManager.mu // mm.AIOContext.mu +// kernel.TaskSet.mu // // Only mm.MemoryManager.Fork is permitted to lock mm.MemoryManager.activeMu in // multiple mm.MemoryManagers, as it does so in a well-defined order (forked diff --git a/pkg/sentry/mm/mm_test.go b/pkg/sentry/mm/mm_test.go index 1304b0a2f..84cb8158d 100644 --- a/pkg/sentry/mm/mm_test.go +++ b/pkg/sentry/mm/mm_test.go @@ -18,6 +18,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/contexttest" @@ -25,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -171,7 +171,7 @@ func TestIOAfterUnmap(t *testing.T) { } n, err = mm.CopyIn(ctx, addr, b, usermem.IOOpts{}) - if err != syserror.EFAULT { + if !linuxerr.Equals(linuxerr.EFAULT, err) { t.Errorf("CopyIn got err %v want EFAULT", err) } if n != 0 { @@ -212,7 +212,7 @@ func TestIOAfterMProtect(t *testing.T) { // Without IgnorePermissions, CopyOut should no longer succeed. n, err = mm.CopyOut(ctx, addr, b, usermem.IOOpts{}) - if err != syserror.EFAULT { + if !linuxerr.Equals(linuxerr.EFAULT, err) { t.Errorf("CopyOut got err %v want EFAULT", err) } if n != 0 { @@ -249,7 +249,7 @@ func TestAIOPrepareAfterDestroy(t *testing.T) { mm.DestroyAIOContext(ctx, id) // Prepare should fail because aioCtx should be destroyed. - if err := aioCtx.Prepare(); err != syserror.EINVAL { + if err := aioCtx.Prepare(); !linuxerr.Equals(linuxerr.EINVAL, err) { t.Errorf("aioCtx.Prepare got err %v want nil", err) } else if err == nil { aioCtx.CancelPendingRequest() diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go index 5583f62b2..05cdcd8ae 100644 --- a/pkg/sentry/mm/pma.go +++ b/pkg/sentry/mm/pma.go @@ -18,12 +18,12 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safecopy" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // existingPMAsLocked checks that pmas exist for all addresses in ar, and @@ -116,7 +116,7 @@ func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar var alignerr error if !ok { end = ar.End.RoundDown() - alignerr = syserror.EFAULT + alignerr = linuxerr.EFAULT } ar = hostarch.AddrRange{ar.Start.RoundDown(), end} @@ -162,7 +162,7 @@ func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars hostarch.Addr var alignerr error if !ok { end = ar.End.RoundDown() - alignerr = syserror.EFAULT + alignerr = linuxerr.EFAULT } ar = hostarch.AddrRange{ar.Start.RoundDown(), end} @@ -324,20 +324,37 @@ func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIter panic(fmt.Sprintf("pma %v needs to be copied for writing, but is not readable: %v", pseg.Range(), oldpma)) } } - // The majority of copy-on-write breaks on executable pages - // come from: - // - // - The ELF loader, which must zero out bytes on the last - // page of each segment after the end of the segment. - // - // - gdb's use of ptrace to insert breakpoints. - // - // Neither of these cases has enough spatial locality to - // benefit from copying nearby pages, so if the vma is - // executable, only copy the pages required. var copyAR hostarch.AddrRange - if vseg.ValuePtr().effectivePerms.Execute { + if vma := vseg.ValuePtr(); vma.effectivePerms.Execute { + // The majority of copy-on-write breaks on executable + // pages come from: + // + // - The ELF loader, which must zero out bytes on the + // last page of each segment after the end of the + // segment. + // + // - gdb's use of ptrace to insert breakpoints. + // + // Neither of these cases has enough spatial locality + // to benefit from copying nearby pages, so if the vma + // is executable, only copy the pages required. copyAR = pseg.Range().Intersect(ar) + } else if vma.growsDown { + // In most cases, the new process will not use most of + // its stack before exiting or invoking execve(); it is + // especially unlikely to return very far down its call + // stack, since async-signal-safety concerns in + // multithreaded programs prevent the new process from + // being able to do much. So only copy up to one page + // before and after the pages required. + stackMaskAR := ar + if newStart := stackMaskAR.Start - hostarch.PageSize; newStart < stackMaskAR.Start { + stackMaskAR.Start = newStart + } + if newEnd := stackMaskAR.End + hostarch.PageSize; newEnd > stackMaskAR.End { + stackMaskAR.End = newEnd + } + copyAR = pseg.Range().Intersect(stackMaskAR) } else { copyAR = pseg.Range().Intersect(maskAR) } diff --git a/pkg/sentry/mm/shm.go b/pkg/sentry/mm/shm.go index 3130be80c..94d5112a1 100644 --- a/pkg/sentry/mm/shm.go +++ b/pkg/sentry/mm/shm.go @@ -16,16 +16,16 @@ package mm import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/shm" - "gvisor.dev/gvisor/pkg/syserror" ) // DetachShm unmaps a sysv shared memory segment. func (mm *MemoryManager) DetachShm(ctx context.Context, addr hostarch.Addr) error { if addr != addr.RoundDown() { // "... shmaddr is not aligned on a page boundary." - man shmdt(2) - return syserror.EINVAL + return linuxerr.EINVAL } var detached *shm.Shm @@ -48,7 +48,7 @@ func (mm *MemoryManager) DetachShm(ctx context.Context, addr hostarch.Addr) erro if detached == nil { // There is no shared memory segment attached at addr. - return syserror.EINVAL + return linuxerr.EINVAL } // Remove all vmas that could have been created by the same attach. diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go index e748b7ff8..69c6e77a7 100644 --- a/pkg/sentry/mm/special_mappable.go +++ b/pkg/sentry/mm/special_mappable.go @@ -16,11 +16,11 @@ package mm import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // SpecialMappable implements memmap.MappingIdentity and memmap.Mappable with @@ -94,7 +94,7 @@ func (*SpecialMappable) CopyMapping(context.Context, memmap.MappingSpace, hostar func (m *SpecialMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { var err error if required.End > m.fr.Length() { - err = &memmap.BusError{syserror.EFAULT} + err = &memmap.BusError{linuxerr.EFAULT} } if source := optional.Intersect(memmap.MappableRange{0, m.fr.Length()}); source.Length() != 0 { return []memmap.Translation{ @@ -144,11 +144,11 @@ func (m *SpecialMappable) Length() uint64 { // leak (b/143656263). Delete this function along with VFS1. func NewSharedAnonMappable(length uint64, mfp pgalloc.MemoryFileProvider) (*SpecialMappable, error) { if length == 0 { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } alignedLen, ok := hostarch.Addr(length).RoundUp() if !ok { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } fr, err := mfp.MemoryFile().Allocate(uint64(alignedLen), usage.Anonymous) if err != nil { diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go index 7ad6b7c21..dc12ad357 100644 --- a/pkg/sentry/mm/syscalls.go +++ b/pkg/sentry/mm/syscalls.go @@ -21,12 +21,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" ) // HandleUserFault handles an application page fault. sp is the faulting @@ -36,7 +36,7 @@ import ( func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr hostarch.Addr, at hostarch.AccessType, sp hostarch.Addr) error { ar, ok := addr.RoundDown().ToRange(hostarch.PageSize) if !ok { - return syserror.EFAULT + return linuxerr.EFAULT } // Don't bother trying existingPMAsLocked; in most cases, if we did have @@ -74,22 +74,22 @@ func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr hostarch.Addr // MMap establishes a memory mapping. func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostarch.Addr, error) { if opts.Length == 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } length, ok := hostarch.Addr(opts.Length).RoundUp() if !ok { - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } opts.Length = uint64(length) if opts.Mappable != nil { // Offset must be aligned. if hostarch.Addr(opts.Offset).RoundDown() != hostarch.Addr(opts.Offset) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Offset + length must not overflow. if end := opts.Offset + opts.Length; end < opts.Offset { - return 0, syserror.ENOMEM + return 0, linuxerr.EOVERFLOW } } else { opts.Offset = 0 @@ -99,19 +99,19 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostar // MAP_FIXED requires addr to be page-aligned; non-fixed mappings // don't. if opts.Fixed { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } opts.Addr = opts.Addr.RoundDown() } if !opts.MaxPerms.SupersetOf(opts.Perms) { - return 0, syserror.EACCES + return 0, linuxerr.EACCES } if opts.Unmap && !opts.Fixed { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if opts.GrowsDown && opts.Mappable != nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Get the new vma. @@ -203,6 +203,7 @@ func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar h // * vseg.Range().IsSupersetOf(ar). // // Postconditions: mm.mappingMu will be unlocked. +// +checklocksrelease:mm.mappingMu func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, precommit bool) { // See populateVMA above for commentary. if !vseg.ValuePtr().effectivePerms.Any() { @@ -251,7 +252,7 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (hostarch.AddrRange, erro ctx.Warningf("Capping stack size from RLIMIT_STACK of %v down to %v.", sz, maxStackSize) sz = maxStackSize } else if sz == 0 { - return hostarch.AddrRange{}, syserror.ENOMEM + return hostarch.AddrRange{}, linuxerr.ENOMEM } szaddr := hostarch.Addr(sz) ctx.Debugf("Allocating stack with size of %v bytes", sz) @@ -260,7 +261,7 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (hostarch.AddrRange, erro // randomization can't be disabled. stackEnd := mm.layout.MaxAddr - hostarch.Addr(mrand.Int63n(int64(mm.layout.MaxStackRand))).RoundDown() if stackEnd < szaddr { - return hostarch.AddrRange{}, syserror.ENOMEM + return hostarch.AddrRange{}, linuxerr.ENOMEM } stackStart := stackEnd - szaddr mm.mappingMu.Lock() @@ -281,18 +282,18 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (hostarch.AddrRange, erro // MUnmap implements the semantics of Linux's munmap(2). func (mm *MemoryManager) MUnmap(ctx context.Context, addr hostarch.Addr, length uint64) error { if addr != addr.RoundDown() { - return syserror.EINVAL + return linuxerr.EINVAL } if length == 0 { - return syserror.EINVAL + return linuxerr.EINVAL } la, ok := hostarch.Addr(length).RoundUp() if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } ar, ok := addr.ToRange(uint64(la)) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } mm.mappingMu.Lock() @@ -331,7 +332,7 @@ const ( func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldSize uint64, newSize uint64, opts MRemapOpts) (hostarch.Addr, error) { // "Note that old_address has to be page aligned." - mremap(2) if oldAddr.RoundDown() != oldAddr { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Linux treats an old_size that rounds up to 0 as 0, which is otherwise a @@ -340,13 +341,13 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS oldSize = uint64(oldSizeAddr) newSizeAddr, ok := hostarch.Addr(newSize).RoundUp() if !ok || newSizeAddr == 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } newSize = uint64(newSizeAddr) oldEnd, ok := oldAddr.AddLength(oldSize) if !ok { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } mm.mappingMu.Lock() @@ -355,7 +356,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS // All cases require that a vma exists at oldAddr. vseg := mm.vmas.FindSegment(oldAddr) if !vseg.Ok() { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // Behavior matrix: @@ -379,7 +380,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) { if newLockedAS := mm.lockedAS - oldSize + newSize; newLockedAS > mlockLimit { - return 0, syserror.EAGAIN + return 0, linuxerr.EAGAIN } } } @@ -402,7 +403,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS // Check that oldEnd maps to the same vma as oldAddr. if vseg.End() < oldEnd { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // "Grow" the existing vma by creating a new mergeable one. vma := vseg.ValuePtr() @@ -450,15 +451,15 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS case MRemapMustMove: newAddr := opts.NewAddr if newAddr.RoundDown() != newAddr { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } var ok bool newAR, ok = newAddr.ToRange(newSize) if !ok { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if (hostarch.AddrRange{oldAddr, oldEnd}).Overlaps(newAR) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Check that the new region is valid. @@ -492,19 +493,19 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS // Check that oldEnd maps to the same vma as oldAddr. if vseg.End() < oldEnd { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // Check against RLIMIT_AS. newUsageAS := mm.usageAS - uint64(oldAR.Length()) + uint64(newAR.Length()) if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS { - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } if vma := vseg.ValuePtr(); vma.mappable != nil { // Check that offset+length does not overflow. if vma.off+uint64(newAR.Length()) < vma.off { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Inform the Mappable, if any, of the new mapping. if err := vma.mappable.CopyMapping(ctx, mm, oldAR, newAR, vseg.mappableOffsetAt(oldAR.Start), vma.canWriteMappableLocked()); err != nil { @@ -590,18 +591,18 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS // MProtect implements the semantics of Linux's mprotect(2). func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms hostarch.AccessType, growsDown bool) error { if addr.RoundDown() != addr { - return syserror.EINVAL + return linuxerr.EINVAL } if length == 0 { return nil } rlength, ok := hostarch.Addr(length).RoundUp() if !ok { - return syserror.ENOMEM + return linuxerr.ENOMEM } ar, ok := addr.ToRange(uint64(rlength)) if !ok { - return syserror.ENOMEM + return linuxerr.ENOMEM } effectivePerms := realPerms.Effective() @@ -614,19 +615,19 @@ func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms h // the non-growsDown case. vseg := mm.vmas.LowerBoundSegment(ar.Start) if !vseg.Ok() { - return syserror.ENOMEM + return linuxerr.ENOMEM } if growsDown { if !vseg.ValuePtr().growsDown { - return syserror.EINVAL + return linuxerr.EINVAL } if ar.End <= vseg.Start() { - return syserror.ENOMEM + return linuxerr.ENOMEM } ar.Start = vseg.Start() } else { if ar.Start < vseg.Start() { - return syserror.ENOMEM + return linuxerr.ENOMEM } } @@ -644,7 +645,7 @@ func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms h // Check for permission validity before splitting vmas, for consistency // with Linux. if !vseg.ValuePtr().maxPerms.SupersetOf(effectivePerms) { - return syserror.EACCES + return linuxerr.EACCES } vseg = mm.vmas.Isolate(vseg, ar) @@ -686,7 +687,7 @@ func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms h } vseg, _ = vseg.NextNonEmpty() if !vseg.Ok() { - return syserror.ENOMEM + return linuxerr.ENOMEM } } } @@ -711,7 +712,7 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr hostarch.Addr) (hostarch. if addr < mm.brk.Start { addr = mm.brk.End mm.mappingMu.Unlock() - return addr, syserror.EINVAL + return addr, linuxerr.EINVAL } // TODO(gvisor.dev/issue/156): This enforces RLIMIT_DATA, but is @@ -722,7 +723,7 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr hostarch.Addr) (hostarch. if uint64(addr-mm.brk.Start) > limits.FromContext(ctx).Get(limits.Data).Cur { addr = mm.brk.End mm.mappingMu.Unlock() - return addr, syserror.ENOMEM + return addr, linuxerr.ENOMEM } oldbrkpg, _ := mm.brk.End.RoundUp() @@ -730,7 +731,7 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr hostarch.Addr) (hostarch. if !ok { addr = mm.brk.End mm.mappingMu.Unlock() - return addr, syserror.EFAULT + return addr, linuxerr.EFAULT } switch { @@ -780,7 +781,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u la, _ := hostarch.Addr(length + addr.PageOffset()).RoundUp() ar, ok := addr.RoundDown().ToRange(uint64(la)) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } mm.mappingMu.Lock() @@ -792,11 +793,11 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur if mlockLimit == 0 { mm.mappingMu.Unlock() - return syserror.EPERM + return linuxerr.EPERM } if newLockedAS := mm.lockedAS + uint64(ar.Length()) - mm.mlockedBytesRangeLocked(ar); newLockedAS > mlockLimit { mm.mappingMu.Unlock() - return syserror.ENOMEM + return linuxerr.ENOMEM } } } @@ -833,7 +834,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u mm.vmas.MergeAdjacent(ar) if unmapped { mm.mappingMu.Unlock() - return syserror.ENOMEM + return linuxerr.ENOMEM } if mode == memmap.MLockEager { @@ -848,18 +849,18 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u // case, which is converted to ENOMEM by mlock. mm.activeMu.Unlock() mm.mappingMu.RUnlock() - return syserror.ENOMEM + return linuxerr.ENOMEM } _, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), hostarch.NoAccess) if err != nil { mm.activeMu.Unlock() mm.mappingMu.RUnlock() // Linux: mm/mlock.c:__mlock_posix_error_return() - if err == syserror.EFAULT { - return syserror.ENOMEM + if linuxerr.Equals(linuxerr.EFAULT, err) { + return linuxerr.ENOMEM } - if err == syserror.ENOMEM { - return syserror.EAGAIN + if linuxerr.Equals(linuxerr.ENOMEM, err) { + return linuxerr.EAGAIN } return err } @@ -898,7 +899,7 @@ type MLockAllOpts struct { // depending on opts. func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error { if !opts.Current && !opts.Future { - return syserror.EINVAL + return linuxerr.EINVAL } mm.mappingMu.Lock() @@ -911,11 +912,11 @@ func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur if mlockLimit == 0 { mm.mappingMu.Unlock() - return syserror.EPERM + return linuxerr.EPERM } if uint64(mm.vmas.Span()) > mlockLimit { mm.mappingMu.Unlock() - return syserror.ENOMEM + return linuxerr.ENOMEM } } } @@ -970,7 +971,7 @@ func (mm *MemoryManager) NumaPolicy(addr hostarch.Addr) (linux.NumaPolicy, uint6 defer mm.mappingMu.RUnlock() vseg := mm.vmas.FindSegment(addr) if !vseg.Ok() { - return 0, 0, syserror.EFAULT + return 0, 0, linuxerr.EFAULT } vma := vseg.ValuePtr() return vma.numaPolicy, vma.numaNodemask, nil @@ -979,13 +980,13 @@ func (mm *MemoryManager) NumaPolicy(addr hostarch.Addr) (linux.NumaPolicy, uint6 // SetNumaPolicy implements the semantics of Linux's mbind(). func (mm *MemoryManager) SetNumaPolicy(addr hostarch.Addr, length uint64, policy linux.NumaPolicy, nodemask uint64) error { if !addr.IsPageAligned() { - return syserror.EINVAL + return linuxerr.EINVAL } // Linux allows this to overflow. la, _ := hostarch.Addr(length).RoundUp() ar, ok := addr.ToRange(uint64(la)) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } if ar.Length() == 0 { return nil @@ -1003,7 +1004,7 @@ func (mm *MemoryManager) SetNumaPolicy(addr hostarch.Addr, length uint64, policy if !vseg.Ok() || lastEnd < vseg.Start() { // "EFAULT: ... there was an unmapped hole in the specified memory // range specified [sic] by addr and len." - mbind(2) - return syserror.EFAULT + return linuxerr.EFAULT } vseg = mm.vmas.Isolate(vseg, ar) vma := vseg.ValuePtr() @@ -1021,7 +1022,7 @@ func (mm *MemoryManager) SetNumaPolicy(addr hostarch.Addr, length uint64, policy func (mm *MemoryManager) SetDontFork(addr hostarch.Addr, length uint64, dontfork bool) error { ar, ok := addr.ToRange(length) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } mm.mappingMu.Lock() @@ -1038,7 +1039,7 @@ func (mm *MemoryManager) SetDontFork(addr hostarch.Addr, length uint64, dontfork } if mm.vmas.SpanRange(ar) != ar.Length() { - return syserror.ENOMEM + return linuxerr.ENOMEM } return nil } @@ -1047,7 +1048,7 @@ func (mm *MemoryManager) SetDontFork(addr hostarch.Addr, length uint64, dontfork func (mm *MemoryManager) Decommit(addr hostarch.Addr, length uint64) error { ar, ok := addr.ToRange(length) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } mm.mappingMu.RLock() @@ -1063,7 +1064,7 @@ func (mm *MemoryManager) Decommit(addr hostarch.Addr, length uint64) error { for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() { vma := vseg.ValuePtr() if vma.mlockMode != memmap.MLockNone { - return syserror.EINVAL + return linuxerr.EINVAL } vsegAR := vseg.Range().Intersect(ar) // pseg should already correspond to either this vma or a later one, @@ -1097,7 +1098,7 @@ func (mm *MemoryManager) Decommit(addr hostarch.Addr, length uint64) error { // to the rest (but returns ENOMEM from the system call, as it should)." - // madvise(2) if mm.vmas.SpanRange(ar) != ar.Length() { - return syserror.ENOMEM + return linuxerr.ENOMEM } return nil } @@ -1114,18 +1115,18 @@ type MSyncOpts struct { // MSync implements the semantics of Linux's msync(). func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length uint64, opts MSyncOpts) error { if addr != addr.RoundDown() { - return syserror.EINVAL + return linuxerr.EINVAL } if length == 0 { return nil } la, ok := hostarch.Addr(length).RoundUp() if !ok { - return syserror.ENOMEM + return linuxerr.ENOMEM } ar, ok := addr.ToRange(uint64(la)) if !ok { - return syserror.ENOMEM + return linuxerr.ENOMEM } mm.mappingMu.RLock() @@ -1133,7 +1134,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u vseg := mm.vmas.LowerBoundSegment(ar.Start) if !vseg.Ok() { mm.mappingMu.RUnlock() - return syserror.ENOMEM + return linuxerr.ENOMEM } var unmapped bool lastEnd := ar.Start @@ -1150,7 +1151,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u vma := vseg.ValuePtr() if opts.Invalidate && vma.mlockMode != memmap.MLockNone { mm.mappingMu.RUnlock() - return syserror.EBUSY + return linuxerr.EBUSY } // It's only possible to have dirtied the Mappable through a shared // mapping. Don't check if the mapping is writable, because mprotect @@ -1182,7 +1183,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u } if unmapped { - return syserror.ENOMEM + return linuxerr.ENOMEM } return nil } @@ -1191,7 +1192,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u func (mm *MemoryManager) GetSharedFutexKey(ctx context.Context, addr hostarch.Addr) (futex.Key, error) { ar, ok := addr.ToRange(4) // sizeof(int32). if !ok { - return futex.Key{}, syserror.EFAULT + return futex.Key{}, linuxerr.EFAULT } mm.mappingMu.RLock() diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go index 0d019e41d..e34b7a2f7 100644 --- a/pkg/sentry/mm/vma.go +++ b/pkg/sentry/mm/vma.go @@ -19,12 +19,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" ) // Preconditions: @@ -58,7 +58,7 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp newUsageAS -= uint64(mm.vmas.SpanRange(ar)) } if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS { - return vmaIterator{}, hostarch.AddrRange{}, syserror.ENOMEM + return vmaIterator{}, hostarch.AddrRange{}, linuxerr.ENOMEM } if opts.MLockMode != memmap.MLockNone { @@ -66,14 +66,14 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) { mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur if mlockLimit == 0 { - return vmaIterator{}, hostarch.AddrRange{}, syserror.EPERM + return vmaIterator{}, hostarch.AddrRange{}, linuxerr.EPERM } newLockedAS := mm.lockedAS + opts.Length if opts.Unmap { newLockedAS -= mm.mlockedBytesRangeLocked(ar) } if newLockedAS > mlockLimit { - return vmaIterator{}, hostarch.AddrRange{}, syserror.EAGAIN + return vmaIterator{}, hostarch.AddrRange{}, linuxerr.EAGAIN } } } @@ -177,7 +177,7 @@ func (mm *MemoryManager) findAvailableLocked(length uint64, opts findAvailableOp // Fixed mappings accept only the requested address. if opts.Fixed { - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } // Prefer hugepage alignment if a hugepage or more is requested. @@ -215,7 +215,7 @@ func (mm *MemoryManager) findLowestAvailableLocked(length, alignment uint64, bou return gr.Start, nil } } - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } // Preconditions: mm.mappingMu must be locked. @@ -235,7 +235,7 @@ func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bo return start, nil } } - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } // Preconditions: mm.mappingMu must be locked. @@ -288,7 +288,7 @@ func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar hostarch.AddrRang vma := vseg.ValuePtr() if addr < vseg.Start() { // TODO(jamieliu): Implement vma.growsDown here. - return vbegin, vgap, syserror.EFAULT + return vbegin, vgap, linuxerr.EFAULT } perms := vma.effectivePerms @@ -296,7 +296,7 @@ func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar hostarch.AddrRang perms = vma.maxPerms } if !perms.SupersetOf(at) { - return vbegin, vgap, syserror.EPERM + return vbegin, vgap, linuxerr.EPERM } addr = vseg.End() @@ -308,7 +308,7 @@ func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar hostarch.AddrRang } // Ran out of vmas before ar.End. - return vbegin, vgap, syserror.EFAULT + return vbegin, vgap, linuxerr.EFAULT } // getVecVMAsLocked ensures that vmas exist for all addresses in ars, and |