diff options
-rw-r--r-- | pkg/sentry/fs/fsutil/README.md | 17 | ||||
-rw-r--r-- | pkg/sentry/fs/fsutil/host_mappable.go | 10 | ||||
-rw-r--r-- | pkg/sentry/fs/fsutil/inode_cached.go | 28 | ||||
-rw-r--r-- | pkg/sentry/fs/tmpfs/inode_file.go | 16 | ||||
-rw-r--r-- | pkg/sentry/memmap/memmap.go | 14 | ||||
-rw-r--r-- | pkg/sentry/mm/address_space.go | 2 | ||||
-rw-r--r-- | pkg/sentry/mm/mm.go | 3 | ||||
-rw-r--r-- | pkg/sentry/platform/filemem/filemem.go | 65 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/BUILD | 24 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/address_space.go | 104 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/host_map.go | 184 | ||||
-rw-r--r-- | pkg/sentry/platform/platform.go | 95 | ||||
-rw-r--r-- | pkg/sentry/platform/ptrace/subprocess.go | 4 |
13 files changed, 139 insertions, 427 deletions
diff --git a/pkg/sentry/fs/fsutil/README.md b/pkg/sentry/fs/fsutil/README.md index d3780e9fa..6e677890c 100644 --- a/pkg/sentry/fs/fsutil/README.md +++ b/pkg/sentry/fs/fsutil/README.md @@ -108,9 +108,9 @@ The host then sends a `SIGSEGV` to the sentry because the address range [`A`, `A`+8) is not mapped on the host. The `SIGSEGV` indicates that the memory was accessed writable. The sentry looks up the vma associated with [`A`, `A`+8), finds the file that was mapped and its `CachingInodeOperations`. It then calls -`CachingInodeOperations.MapInto` which allocates memory to back [`A`, `A`+8). It -may choose to allocate more memory (i.e. do "readahead") to minimize subsequent -faults. +`CachingInodeOperations.Translate` which allocates memory to back [`A`, `A`+8). +It may choose to allocate more memory (i.e. do "readahead") to minimize +subsequent faults. Memory that is allocated comes from a host tmpfs file (see `filemem.FileMem`). The host tmpfs file memory is brought up to date with the contents of the mapped @@ -138,12 +138,11 @@ memcpy(A, buffer, 4); ``` Since the first process has already mapped and accessed the same region of the -file writable, `CachingInodeOperations.MapInto` is called but re-maps the memory -that has already been allocated (because the host mapping can be invalidated at -any time) rather than allocating new memory. The address range [`A`, `A`+0x1000) -reflects the same cached view of the file as the first process sees. For -example, reading 8 bytes from the file from either process via read(2) starting -at offset 0 returns a consistent "bbbbaaaa". +file writable, `CachingInodeOperations.Translate` is called but returns the +memory that has already been allocated rather than allocating new memory. The +address range [`A`, `A`+0x1000) reflects the same cached view of the file as the +first process sees. For example, reading 8 bytes from the file from either +process via read(2) starting at offset 0 returns a consistent "bbbbaaaa". When this process no longer needs the shared memory, it may do: diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go index 340f8d288..1bb5c6b6e 100644 --- a/pkg/sentry/fs/fsutil/host_mappable.go +++ b/pkg/sentry/fs/fsutil/host_mappable.go @@ -106,16 +106,16 @@ func (h *HostMappable) InvalidateUnsavable(ctx context.Context) error { return nil } -// MapInto implements platform.File.MapInto. -func (h *HostMappable) MapInto(as platform.AddressSpace, addr usermem.Addr, fr platform.FileRange, at usermem.AccessType, precommit bool) error { - return as.MapFile(addr, h.backingFile.FD(), fr, at, precommit) -} - // MapInternal implements platform.File.MapInternal. func (h *HostMappable) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { return h.hostFileMapper.MapInternal(fr, h.backingFile.FD(), at.Write) } +// FD implements platform.File.FD. +func (h *HostMappable) FD() int { + return h.backingFile.FD() +} + // IncRef implements platform.File.IncRef. func (h *HostMappable) IncRef(fr platform.FileRange) { mr := memmap.MappableRange{Start: fr.Start, End: fr.End} diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index e3b52e943..ef11676b8 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -835,20 +835,6 @@ func (c *CachingInodeOperations) InvalidateUnsavable(ctx context.Context) error return nil } -// MapInto implements platform.File.MapInto. This is used when we directly map -// an underlying host fd and CachingInodeOperations is used as the platform.File -// during translation. -func (c *CachingInodeOperations) MapInto(as platform.AddressSpace, addr usermem.Addr, fr platform.FileRange, at usermem.AccessType, precommit bool) error { - return as.MapFile(addr, c.backingFile.FD(), fr, at, precommit) -} - -// MapInternal implements platform.File.MapInternal. This is used when we -// directly map an underlying host fd and CachingInodeOperations is used as the -// platform.File during translation. -func (c *CachingInodeOperations) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { - return c.hostFileMapper.MapInternal(fr, c.backingFile.FD(), at.Write) -} - // IncRef implements platform.File.IncRef. This is used when we directly map an // underlying host fd and CachingInodeOperations is used as the platform.File // during translation. @@ -900,3 +886,17 @@ func (c *CachingInodeOperations) DecRef(fr platform.FileRange) { c.dataMu.Unlock() } + +// MapInternal implements platform.File.MapInternal. This is used when we +// directly map an underlying host fd and CachingInodeOperations is used as the +// platform.File during translation. +func (c *CachingInodeOperations) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { + return c.hostFileMapper.MapInternal(fr, c.backingFile.FD(), at.Write) +} + +// FD implements platform.File.FD. This is used when we directly map an +// underlying host fd and CachingInodeOperations is used as the platform.File +// during translation. +func (c *CachingInodeOperations) FD() int { + return c.backingFile.FD() +} diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go index 5648ff8f4..13d06684d 100644 --- a/pkg/sentry/fs/tmpfs/inode_file.go +++ b/pkg/sentry/fs/tmpfs/inode_file.go @@ -39,20 +39,8 @@ var ( ) // fileInodeOperations implements fs.InodeOperations for a regular tmpfs file. -// These files are backed by FrameRegions allocated from a platform.Memory, -// and may be directly mapped. -// -// The tmpfs file memory is backed by FrameRegions, each of which is reference -// counted. frames maintains a single reference on each of the FrameRegions. -// Since these contain the contents of the file, the reference may only be -// decremented once this file is both deleted and all handles to the file have -// been closed. -// -// Mappable users may also call IncRefOn/DecRefOn, generally to indicate that -// they plan to use MapInto to map the file into an AddressSpace. These calls -// include an InvalidatorRegion associated with that reference. When the -// referenced portion of the file is removed (with Truncate), the associated -// InvalidatorRegion is invalidated. +// These files are backed by pages allocated from a platform.Memory, and may be +// directly mapped. // // +stateify savable type fileInodeOperations struct { diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go index cf20b11e3..70cdf428b 100644 --- a/pkg/sentry/memmap/memmap.go +++ b/pkg/sentry/memmap/memmap.go @@ -70,11 +70,13 @@ type Mappable interface { // of offsets specified by required, and at most the range of offsets // specified by optional. at is the set of access types that may be // performed using the returned Translations. If not all required offsets - // are translated, it returns a non-nil error explaining why. Returned - // translations, and any mappings returned by platform.File.MapInternal for - // translated platform.Files, are valid until invalidated by a call back to + // are translated, it returns a non-nil error explaining why. + // + // Translations are valid until invalidated by a callback to // MappingSpace.Invalidate or until the caller removes its mapping of the - // translated range. + // translated range. Mappable implementations must ensure that at least one + // reference is held on all pages in a platform.File that may be the result + // of a valid Translation. // // Preconditions: required.Length() > 0. optional.IsSupersetOf(required). // required and optional must be page-aligned. The caller must have @@ -98,9 +100,7 @@ type Translation struct { // Source is the translated range in the Mappable. Source MappableRange - // File is the mapped file. When the Translation is invalidated, pages - // mapped by File.MapInto must be unmapped, and pages mapped by - // File.MapInternal become invalid. + // File is the mapped file. File platform.File // Offset is the offset into File at which this Translation begins. diff --git a/pkg/sentry/mm/address_space.go b/pkg/sentry/mm/address_space.go index e7aa24c69..90cfef746 100644 --- a/pkg/sentry/mm/address_space.go +++ b/pkg/sentry/mm/address_space.go @@ -183,7 +183,7 @@ func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar usermem.AddrRange, pre if pma.needCOW { perms.Write = false } - if err := pma.file.MapInto(mm.as, pmaMapAR.Start, pseg.fileRangeOf(pmaMapAR), perms, precommit); err != nil { + if err := mm.as.MapFile(pmaMapAR.Start, pma.file, pseg.fileRangeOf(pmaMapAR), perms, precommit); err != nil { return err } pseg = pseg.NextSegment() diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index a69b8c7be..e2c636f38 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -24,7 +24,8 @@ // mm.MemoryManager.activeMu // Locks taken by memmap.Mappable.Translate // mm.privateRefs.mu -// platform.File locks +// platform.AddressSpace locks +// platform.File locks // mm.aioManager.mu // mm.AIOContext.mu // diff --git a/pkg/sentry/platform/filemem/filemem.go b/pkg/sentry/platform/filemem/filemem.go index 97da31e70..f41c70ba5 100644 --- a/pkg/sentry/platform/filemem/filemem.go +++ b/pkg/sentry/platform/filemem/filemem.go @@ -32,7 +32,6 @@ import ( "time" "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" "gvisor.googlesource.com/gvisor/pkg/sentry/memutil" "gvisor.googlesource.com/gvisor/pkg/sentry/platform" "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" @@ -504,39 +503,6 @@ func (f *FileMem) markReclaimed(fr platform.FileRange) { } } -// MapInto implements platform.File.MapInto. -func (f *FileMem) MapInto(as platform.AddressSpace, addr usermem.Addr, fr platform.FileRange, at usermem.AccessType, precommit bool) error { - if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 { - panic(fmt.Sprintf("invalid range: %v", fr)) - } - return as.MapFile(addr, int(f.file.Fd()), fr, at, precommit) -} - -// MapInternal implements platform.File.MapInternal. -func (f *FileMem) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { - if !fr.WellFormed() || fr.Length() == 0 { - panic(fmt.Sprintf("invalid range: %v", fr)) - } - if at.Execute { - return safemem.BlockSeq{}, syserror.EACCES - } - - chunks := ((fr.End + chunkMask) >> chunkShift) - (fr.Start >> chunkShift) - if chunks == 1 { - // Avoid an unnecessary slice allocation. - var seq safemem.BlockSeq - err := f.forEachMappingSlice(fr, func(bs []byte) { - seq = safemem.BlockSeqOf(safemem.BlockFromSafeSlice(bs)) - }) - return seq, err - } - blocks := make([]safemem.Block, 0, chunks) - err := f.forEachMappingSlice(fr, func(bs []byte) { - blocks = append(blocks, safemem.BlockFromSafeSlice(bs)) - }) - return safemem.BlockSeqFromSlice(blocks), err -} - // IncRef implements platform.File.IncRef. func (f *FileMem) IncRef(fr platform.FileRange) { if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 { @@ -596,9 +562,29 @@ func (f *FileMem) DecRef(fr platform.FileRange) { } } -// Flush implements platform.Mappable.Flush. -func (f *FileMem) Flush(ctx context.Context) error { - return nil +// MapInternal implements platform.File.MapInternal. +func (f *FileMem) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { + if !fr.WellFormed() || fr.Length() == 0 { + panic(fmt.Sprintf("invalid range: %v", fr)) + } + if at.Execute { + return safemem.BlockSeq{}, syserror.EACCES + } + + chunks := ((fr.End + chunkMask) >> chunkShift) - (fr.Start >> chunkShift) + if chunks == 1 { + // Avoid an unnecessary slice allocation. + var seq safemem.BlockSeq + err := f.forEachMappingSlice(fr, func(bs []byte) { + seq = safemem.BlockSeqOf(safemem.BlockFromSafeSlice(bs)) + }) + return seq, err + } + blocks := make([]safemem.Block, 0, chunks) + err := f.forEachMappingSlice(fr, func(bs []byte) { + blocks = append(blocks, safemem.BlockFromSafeSlice(bs)) + }) + return safemem.BlockSeqFromSlice(blocks), err } // forEachMappingSlice invokes fn on a sequence of byte slices that @@ -653,6 +639,11 @@ func (f *FileMem) getChunkMapping(chunk int) ([]uintptr, uintptr, error) { return mappings, m, nil } +// FD implements platform.File.FD. +func (f *FileMem) FD() int { + return int(f.file.Fd()) +} + // UpdateUsage implements platform.Memory.UpdateUsage. func (f *FileMem) UpdateUsage() error { f.mu.Lock() diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index 6e40b3177..b7bf88249 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -2,28 +2,6 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) -load("//tools/go_generics:defs.bzl", "go_template_instance") - -go_template_instance( - name = "host_map_set", - out = "host_map_set.go", - consts = { - "minDegree": "15", - }, - imports = { - "usermem": "gvisor.googlesource.com/gvisor/pkg/sentry/usermem", - }, - package = "kvm", - prefix = "hostMap", - template = "//pkg/segment:generic_set", - types = { - "Key": "usermem.Addr", - "Range": "usermem.AddrRange", - "Value": "uintptr", - "Functions": "hostMapSetFunctions", - }, -) - go_library( name = "kvm", srcs = [ @@ -36,8 +14,6 @@ go_library( "bluepill_fault.go", "bluepill_unsafe.go", "context.go", - "host_map.go", - "host_map_set.go", "kvm.go", "kvm_amd64.go", "kvm_amd64_unsafe.go", diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index 72e897a9a..6d8d8e65b 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -15,7 +15,6 @@ package kvm import ( - "reflect" "sync" "sync/atomic" @@ -88,11 +87,6 @@ type addressSpace struct { // dirtySet is the set of dirty vCPUs. dirtySet *dirtySet - - // files contains files mapped in the host address space. - // - // See host_map.go for more information. - files hostMap } // invalidate is the implementation for Invalidate. @@ -118,6 +112,11 @@ func (as *addressSpace) Touch(c *vCPU) bool { return as.dirtySet.mark(c) } +type hostMapEntry struct { + addr uintptr + length uintptr +} + func (as *addressSpace) mapHost(addr usermem.Addr, m hostMapEntry, at usermem.AccessType) (inv bool) { for m.length > 0 { physical, length, ok := translateToPhysical(m.addr) @@ -158,100 +157,57 @@ func (as *addressSpace) mapHost(addr usermem.Addr, m hostMapEntry, at usermem.Ac return inv } -func (as *addressSpace) mapHostFile(addr usermem.Addr, fd int, fr platform.FileRange, at usermem.AccessType) error { - // Create custom host mappings. - ms, err := as.files.CreateMappings(usermem.AddrRange{ - Start: addr, - End: addr + usermem.Addr(fr.End-fr.Start), - }, at, fd, fr.Start) - if err != nil { - return err - } - - inv := false - for _, m := range ms { - // The host mapped slices are guaranteed to be aligned. - prev := as.mapHost(addr, m, at) - inv = inv || prev - addr += usermem.Addr(m.length) - } - if inv { - as.invalidate() - } - - return nil -} +// MapFile implements platform.AddressSpace.MapFile. +func (as *addressSpace) MapFile(addr usermem.Addr, f platform.File, fr platform.FileRange, at usermem.AccessType, precommit bool) error { + as.mu.Lock() + defer as.mu.Unlock() -func (as *addressSpace) mapFilemem(addr usermem.Addr, fr platform.FileRange, at usermem.AccessType, precommit bool) error { - // TODO: Lock order at the platform level is not sufficiently - // well-defined to guarantee that the caller (FileMem.MapInto) is not - // holding any locks that FileMem.MapInternal may take. - - // Retrieve mappings for the underlying filemem. Note that the - // permissions here are largely irrelevant, since it corresponds to - // physical memory for the guest. We enforce the given access type - // below, in the guest page tables. - bs, err := as.filemem.MapInternal(fr, usermem.AccessType{ - Read: true, - Write: true, + // Get mappings in the sentry's address space, which are guaranteed to be + // valid as long as a reference is held on the mapped pages (which is in + // turn required by AddressSpace.MapFile precondition). + // + // If precommit is true, we will touch mappings to commit them, so ensure + // that mappings are readable from sentry context. + // + // We don't execute from application file-mapped memory, and guest page + // tables don't care if we have execute permission (but they do need pages + // to be readable). + bs, err := f.MapInternal(fr, usermem.AccessType{ + Read: at.Read || at.Execute || precommit, + Write: at.Write, }) if err != nil { return err } - // Save the original range for invalidation. - orig := usermem.AddrRange{ - Start: addr, - End: addr + usermem.Addr(fr.End-fr.Start), - } - + // Map the mappings in the sentry's address space (guest physical memory) + // into the application's address space (guest virtual memory). inv := false for !bs.IsEmpty() { b := bs.Head() bs = bs.Tail() // Since fr was page-aligned, b should also be page-aligned. We do the // lookup in our host page tables for this translation. - s := b.ToSlice() if precommit { + s := b.ToSlice() for i := 0; i < len(s); i += usermem.PageSize { _ = s[i] // Touch to commit. } } prev := as.mapHost(addr, hostMapEntry{ - addr: reflect.ValueOf(&s[0]).Pointer(), - length: uintptr(len(s)), + addr: b.Addr(), + length: uintptr(b.Len()), }, at) inv = inv || prev - addr += usermem.Addr(len(s)) + addr += usermem.Addr(b.Len()) } if inv { as.invalidate() - as.files.DeleteMapping(orig) } return nil } -// MapFile implements platform.AddressSpace.MapFile. -func (as *addressSpace) MapFile(addr usermem.Addr, fd int, fr platform.FileRange, at usermem.AccessType, precommit bool) error { - as.mu.Lock() - defer as.mu.Unlock() - - // Create an appropriate mapping. If this is filemem, we don't create - // custom mappings for each in-application mapping. For files however, - // we create distinct mappings for each address space. Unfortunately, - // there's not a better way to manage this here. The file underlying - // this fd can change at any time, so we can't actually index the file - // and share between address space. Oh well. It's all referring to the - // same physical pages, hopefully we don't run out of address space. - if fd != int(as.filemem.File().Fd()) { - // N.B. precommit is ignored for host files. - return as.mapHostFile(addr, fd, fr, at) - } - - return as.mapFilemem(addr, fr, at, precommit) -} - // Unmap unmaps the given range by calling pagetables.PageTables.Unmap. func (as *addressSpace) Unmap(addr usermem.Addr, length uint64) { as.mu.Lock() @@ -264,10 +220,6 @@ func (as *addressSpace) Unmap(addr usermem.Addr, length uint64) { }) if prev { as.invalidate() - as.files.DeleteMapping(usermem.AddrRange{ - Start: addr, - End: addr + usermem.Addr(length), - }) // Recycle any freed intermediate pages. as.pageTables.Allocator.Recycle() diff --git a/pkg/sentry/platform/kvm/host_map.go b/pkg/sentry/platform/kvm/host_map.go deleted file mode 100644 index ee6a1a42d..000000000 --- a/pkg/sentry/platform/kvm/host_map.go +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright 2018 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kvm - -import ( - "fmt" - "sync" - "syscall" - - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" -) - -type hostMap struct { - // mu protects below. - mu sync.RWMutex - - // set contains host mappings. - set hostMapSet -} - -type hostMapEntry struct { - addr uintptr - length uintptr -} - -// forEach iterates over all mappings in the given range. -// -// Precondition: segFn and gapFn must be non-nil. -func (hm *hostMap) forEach( - r usermem.AddrRange, - segFn func(offset uint64, m hostMapEntry), - gapFn func(offset uint64, length uintptr) (uintptr, bool)) { - - seg, gap := hm.set.Find(r.Start) - for { - if seg.Ok() && seg.Start() < r.End { - // A valid segment: pass information. - overlap := seg.Range().Intersect(r) - segOffset := uintptr(overlap.Start - seg.Start()) - mapOffset := uint64(overlap.Start - r.Start) - segFn(mapOffset, hostMapEntry{ - addr: seg.Value() + segOffset, - length: uintptr(overlap.Length()), - }) - seg, gap = seg.NextNonEmpty() - } else if gap.Ok() && gap.Start() < r.End { - // A gap: pass gap information. - overlap := gap.Range().Intersect(r) - mapOffset := uint64(overlap.Start - r.Start) - addr, ok := gapFn(mapOffset, uintptr(overlap.Length())) - if ok { - seg = hm.set.Insert(gap, overlap, addr) - seg, gap = seg.NextNonEmpty() - } else { - seg = gap.NextSegment() - gap = hostMapGapIterator{} // Invalid. - } - } else { - // Terminal. - break - } - } -} - -func (hm *hostMap) createMappings(r usermem.AddrRange, at usermem.AccessType, fd int, offset uint64) (ms []hostMapEntry, err error) { - hm.forEach(r, func(mapOffset uint64, m hostMapEntry) { - // Replace any existing mappings. - _, _, errno := syscall.RawSyscall6( - syscall.SYS_MMAP, - m.addr, - m.length, - uintptr(at.Prot()), - syscall.MAP_FIXED|syscall.MAP_SHARED, - uintptr(fd), - uintptr(offset+mapOffset)) - if errno != 0 && err == nil { - err = errno - } - }, func(mapOffset uint64, length uintptr) (uintptr, bool) { - // Create a new mapping. - addr, _, errno := syscall.RawSyscall6( - syscall.SYS_MMAP, - 0, - length, - uintptr(at.Prot()), - syscall.MAP_SHARED, - uintptr(fd), - uintptr(offset+mapOffset)) - if errno != 0 { - err = errno - return 0, false - } - return addr, true - }) - if err != nil { - return nil, err - } - - // Collect all entries. - // - // We do this after the first iteration because some segments may have - // been merged in the above, and we'll return the simplest form. This - // also provides a basic sanity check in the form of no gaps. - hm.forEach(r, func(_ uint64, m hostMapEntry) { - ms = append(ms, m) - }, func(uint64, uintptr) (uintptr, bool) { - // Should not happen: we just mapped this above. - panic("unexpected gap") - }) - - return ms, nil -} - -// CreateMappings creates a new set of host mapping entries. -func (hm *hostMap) CreateMappings(r usermem.AddrRange, at usermem.AccessType, fd int, offset uint64) (ms []hostMapEntry, err error) { - hm.mu.Lock() - ms, err = hm.createMappings(r, at, fd, offset) - hm.mu.Unlock() - return -} - -func (hm *hostMap) deleteMapping(r usermem.AddrRange) { - // Remove all the existing mappings. - hm.forEach(r, func(_ uint64, m hostMapEntry) { - _, _, errno := syscall.RawSyscall( - syscall.SYS_MUNMAP, - m.addr, - m.length, - 0) - if errno != 0 { - // Should never happen. - panic(fmt.Sprintf("unmap error: %v", errno)) - } - }, func(uint64, uintptr) (uintptr, bool) { - // Sometimes deleteMapping will be called on a larger range - // than physical mappings are defined. That's okay. - return 0, false - }) - - // Knock the entire range out. - hm.set.RemoveRange(r) -} - -// DeleteMapping deletes the given range. -func (hm *hostMap) DeleteMapping(r usermem.AddrRange) { - hm.mu.Lock() - hm.deleteMapping(r) - hm.mu.Unlock() -} - -// hostMapSetFunctions is used in the implementation of mapSet. -type hostMapSetFunctions struct{} - -func (hostMapSetFunctions) MinKey() usermem.Addr { return 0 } -func (hostMapSetFunctions) MaxKey() usermem.Addr { return ^usermem.Addr(0) } -func (hostMapSetFunctions) ClearValue(val *uintptr) { *val = 0 } - -func (hostMapSetFunctions) Merge(r1 usermem.AddrRange, addr1 uintptr, r2 usermem.AddrRange, addr2 uintptr) (uintptr, bool) { - if addr1+uintptr(r1.Length()) != addr2 { - return 0, false - } - - // Since the two regions are contiguous in both the key space and the - // value space, we can just store a single segment with the first host - // virtual address; the logic above operates based on the size of the - // segments. - return addr1, true -} - -func (hostMapSetFunctions) Split(r usermem.AddrRange, hostAddr uintptr, split usermem.Addr) (uintptr, uintptr) { - return hostAddr, hostAddr + uintptr(split-r.Start) -} diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go index a9e76bd45..b2ce851da 100644 --- a/pkg/sentry/platform/platform.go +++ b/pkg/sentry/platform/platform.go @@ -52,11 +52,11 @@ type Platform interface { DetectsCPUPreemption() bool // MapUnit returns the alignment used for optional mappings into this - // platform's AddressSpaces. Higher values indicate lower per-page - // costs for AddressSpace.MapInto. As a special case, a MapUnit of 0 - // indicates that the cost of AddressSpace.MapInto is effectively - // independent of the number of pages mapped. If MapUnit is non-zero, - // it must be a power-of-2 multiple of usermem.PageSize. + // platform's AddressSpaces. Higher values indicate lower per-page costs + // for AddressSpace.MapFile. As a special case, a MapUnit of 0 indicates + // that the cost of AddressSpace.MapFile is effectively independent of the + // number of pages mapped. If MapUnit is non-zero, it must be a power-of-2 + // multiple of usermem.PageSize. MapUnit() uint64 // MinUserAddress returns the minimum mappable address on this @@ -194,17 +194,17 @@ const SignalInterrupt = linux.SIGCHLD // AddressSpace represents a virtual address space in which a Context can // execute. type AddressSpace interface { - // MapFile creates a shared mapping of offsets in fr, from the file - // with file descriptor fd, at address addr. Any existing overlapping - // mappings are silently replaced. + // MapFile creates a shared mapping of offsets fr from f at address addr. + // Any existing overlapping mappings are silently replaced. // - // If precommit is true, host memory should be committed to the mapping - // when MapFile returns when possible. The precommit flag is advisory - // and implementations may choose to ignore it. + // If precommit is true, the platform should eagerly commit resources (e.g. + // physical memory) to the mapping. The precommit flag is advisory and + // implementations may choose to ignore it. // - // Preconditions: addr and fr must be page-aligned. length > 0. - // at.Any() == true. - MapFile(addr usermem.Addr, fd int, fr FileRange, at usermem.AccessType, precommit bool) error + // Preconditions: addr and fr must be page-aligned. fr.Length() > 0. + // at.Any() == true. At least one reference must be held on all pages in + // fr, and must continue to be held as long as pages are mapped. + MapFile(addr usermem.Addr, f File, fr FileRange, at usermem.AccessType, precommit bool) error // Unmap unmaps the given range. // @@ -309,44 +309,39 @@ func (f SegmentationFault) Error() string { // File represents a host file that may be mapped into an AddressSpace. type File interface { - // MapInto maps fr into as, starting at addr, for accesses of type at. - // - // If precommit is true, the platform should eagerly commit resources (e.g. - // physical memory) to the mapping. The precommit flag is advisory and - // implementations may choose to ignore it. + // All pages in a File are reference-counted. + + // IncRef increments the reference count on all pages in fr. // - // Note that there is no File.Unmap; clients should use as.Unmap directly. + // Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() > + // 0. At least one reference must be held on all pages in fr. (The File + // interface does not provide a way to acquire an initial reference; + // implementors may define mechanisms for doing so.) + IncRef(fr FileRange) + + // DecRef decrements the reference count on all pages in fr. // - // Preconditions: fr.Start and fr.End must be page-aligned. - // fr.Length() > 0. at.Any() == true. Implementors may define - // additional requirements. - MapInto(as AddressSpace, addr usermem.Addr, fr FileRange, at usermem.AccessType, precommit bool) error + // Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() > + // 0. At least one reference must be held on all pages in fr. + DecRef(fr FileRange) // MapInternal returns a mapping of the given file offsets in the invoking - // process' address space for reading and writing. The returned mapping is - // valid as long as a reference is held on the mapped range. + // process' address space for reading and writing. // // Note that fr.Start and fr.End need not be page-aligned. // - // Preconditions: fr.Length() > 0. Implementors may define additional - // requirements. - MapInternal(fr FileRange, at usermem.AccessType) (safemem.BlockSeq, error) - - // IncRef signals that a region in the file is actively referenced through a - // memory map. Implementors must ensure that the contents of a referenced - // region remain consistent. Specifically, mappings returned by MapInternal - // must refer to the same underlying contents. If the implementor also - // implements the Memory interface, the file range must not be reused in a - // different allocation while it has active references. + // Preconditions: fr.Length() > 0. At least one reference must be held on + // all pages in fr. // - // Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() > 0. - IncRef(fr FileRange) + // Postconditions: The returned mapping is valid as long as at least one + // reference is held on the mapped pages. + MapInternal(fr FileRange, at usermem.AccessType) (safemem.BlockSeq, error) - // DecRef reduces the frame ref count on the range specified by fr. + // FD returns the file descriptor represented by the File. // - // Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() > - // 0. DecRef()s on a region must match earlier IncRef()s. - DecRef(fr FileRange) + // The only permitted operation on the returned file descriptor is to map + // pages from it consistent with the requirements of AddressSpace.MapFile. + FD() int } // FileRange represents a range of uint64 offsets into a File. @@ -361,19 +356,13 @@ func (fr FileRange) String() string { // Memory represents an allocatable File that may be mapped into any // AddressSpace associated with the same Platform. type Memory interface { - // Memory implements File methods with the following properties: - // - // - Pages mapped by MapInto must be allocated, and must be unmapped from - // all AddressSpaces before they are freed. - // - // - Pages mapped by MapInternal must be allocated. Returned mappings are - // guaranteed to be valid until the mapped pages are freed. File - // Allocate returns a range of pages of the given length, owned by the - // caller and with the given accounting kind. Allocated memory initially has - // a single reference and will automatically be freed when no references to - // them remain. See File.IncRef and File.DecRef. + // Allocate returns a range of initially-zeroed pages of the given length + // with the given accounting kind and a single reference held by the + // caller. When the last reference on an allocated page is released, + // ownership of the page is returned to the Memory, allowing it to be + // returned by a future call to Allocate. // // Preconditions: length must be page-aligned and non-zero. Allocate(length uint64, kind usage.MemoryKind) (FileRange, error) diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go index a9d083f5a..82f125073 100644 --- a/pkg/sentry/platform/ptrace/subprocess.go +++ b/pkg/sentry/platform/ptrace/subprocess.go @@ -563,7 +563,7 @@ func (s *subprocess) syscall(sysno uintptr, args ...arch.SyscallArgument) (uintp } // MapFile implements platform.AddressSpace.MapFile. -func (s *subprocess) MapFile(addr usermem.Addr, fd int, fr platform.FileRange, at usermem.AccessType, precommit bool) error { +func (s *subprocess) MapFile(addr usermem.Addr, f platform.File, fr platform.FileRange, at usermem.AccessType, precommit bool) error { var flags int if precommit { flags |= syscall.MAP_POPULATE @@ -574,7 +574,7 @@ func (s *subprocess) MapFile(addr usermem.Addr, fd int, fr platform.FileRange, a arch.SyscallArgument{Value: uintptr(fr.Length())}, arch.SyscallArgument{Value: uintptr(at.Prot())}, arch.SyscallArgument{Value: uintptr(flags | syscall.MAP_SHARED | syscall.MAP_FIXED)}, - arch.SyscallArgument{Value: uintptr(fd)}, + arch.SyscallArgument{Value: uintptr(f.FD())}, arch.SyscallArgument{Value: uintptr(fr.Start)}) return err } |