summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/mm
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/mm')
-rw-r--r--pkg/sentry/mm/BUILD2
-rw-r--r--pkg/sentry/mm/lifecycle.go2
-rw-r--r--pkg/sentry/mm/mm.go4
-rw-r--r--pkg/sentry/mm/proc_pid_maps.go121
-rw-r--r--pkg/sentry/mm/procfs.go289
-rw-r--r--pkg/sentry/mm/syscalls.go4
-rw-r--r--pkg/sentry/mm/vma.go17
7 files changed, 309 insertions, 130 deletions
diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD
index 5a9185e5d..0997ec0a7 100644
--- a/pkg/sentry/mm/BUILD
+++ b/pkg/sentry/mm/BUILD
@@ -87,7 +87,7 @@ go_library(
"mm.go",
"pma.go",
"pma_set.go",
- "proc_pid_maps.go",
+ "procfs.go",
"save_restore.go",
"shm.go",
"special_mappable.go",
diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go
index a42e32b43..1ee8ae74e 100644
--- a/pkg/sentry/mm/lifecycle.go
+++ b/pkg/sentry/mm/lifecycle.go
@@ -86,7 +86,7 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
vmaAR := srcvseg.Range()
// Inform the Mappable, if any, of the new mapping.
if vma.mappable != nil {
- if err := vma.mappable.AddMapping(ctx, mm2, vmaAR, vma.off, vma.isMappableAsWritable()); err != nil {
+ if err := vma.mappable.AddMapping(ctx, mm2, vmaAR, vma.off, vma.canWriteMappableLocked()); err != nil {
mm2.removeVMAsLocked(ctx, mm2.applicationAddrRange())
return nil, err
}
diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go
index c0632d232..2154e7918 100644
--- a/pkg/sentry/mm/mm.go
+++ b/pkg/sentry/mm/mm.go
@@ -363,10 +363,6 @@ func (v *vma) loadRealPerms(b int) {
}
}
-func (v *vma) isMappableAsWritable() bool {
- return !v.private && v.maxPerms.Write
-}
-
// pma represents a platform mapping area.
//
// +stateify savable
diff --git a/pkg/sentry/mm/proc_pid_maps.go b/pkg/sentry/mm/proc_pid_maps.go
deleted file mode 100644
index 247ee45ef..000000000
--- a/pkg/sentry/mm/proc_pid_maps.go
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright 2018 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mm
-
-import (
- "bytes"
- "fmt"
- "strings"
-
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
-)
-
-const (
- // devMinorBits is the number of minor bits in a device number. Linux:
- // include/linux/kdev_t.h:MINORBITS
- devMinorBits = 20
-)
-
-// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
-func (mm *MemoryManager) NeedsUpdate(generation int64) bool {
- return true
-}
-
-// ReadSeqFileData is called by fs/proc.mapsData.ReadSeqFileData.
-func (mm *MemoryManager) ReadSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
- mm.mappingMu.RLock()
- defer mm.mappingMu.RUnlock()
- var data []seqfile.SeqData
- var start usermem.Addr
- if handle != nil {
- start = *handle.(*usermem.Addr)
- }
- for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
- // FIXME: If we use a usermem.Addr for the handle, we get
- // "panic: autosave error: type usermem.Addr is not registered".
- vmaAddr := vseg.End()
- data = append(data, seqfile.SeqData{
- Buf: mm.vmaMapsEntryLocked(ctx, vseg),
- Handle: &vmaAddr,
- })
- }
-
- // We always emulate vsyscall, so advertise it here. Everything about a
- // vsyscall region is static, so just hard code the maps entry since we
- // don't have a real vma backing it. The vsyscall region is at the end of
- // the virtual address space so nothing should be mapped after it (if
- // something is really mapped in the tiny ~10 MiB segment afterwards, we'll
- // get the sorting on the maps file wrong at worst; but that's not possible
- // on any current platform).
- //
- // Artifically adjust the seqfile handle so we only output vsyscall entry once.
- if vsyscallEnd := usermem.Addr(0xffffffffff601000); start != vsyscallEnd {
- data = append(data, seqfile.SeqData{
- Buf: []byte("ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"),
- Handle: &vsyscallEnd,
- })
- }
- return data, 1
-}
-
-// vmaMapsEntryLocked returns a /proc/[pid]/maps entry for the vma iterated by
-// vseg, including the trailing newline.
-//
-// Preconditions: mm.mappingMu must be locked.
-func (mm *MemoryManager) vmaMapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte {
- vma := vseg.ValuePtr()
- private := "p"
- if !vma.private {
- private = "s"
- }
-
- var dev, ino uint64
- if vma.id != nil {
- dev = vma.id.DeviceID()
- ino = vma.id.InodeID()
- }
- devMajor := uint32(dev >> devMinorBits)
- devMinor := uint32(dev & ((1 << devMinorBits) - 1))
-
- var b bytes.Buffer
- // Do not include the guard page: fs/proc/task_mmu.c:show_map_vma() =>
- // stack_guard_page_start().
- fmt.Fprintf(&b, "%08x-%08x %s%s %08x %02x:%02x %d ",
- vseg.Start(), vseg.End(), vma.realPerms, private, vma.off, devMajor, devMinor, ino)
-
- // Figure out our filename or hint.
- var s string
- if vma.hint != "" {
- s = vma.hint
- } else if vma.id != nil {
- // FIXME: We are holding mm.mappingMu here, which is
- // consistent with Linux's holding mmap_sem in
- // fs/proc/task_mmu.c:show_map_vma() => fs/seq_file.c:seq_file_path().
- // However, it's not clear that fs.File.MappedName() is actually
- // consistent with this lock order.
- s = vma.id.MappedName(ctx)
- }
- if s != "" {
- // Per linux, we pad until the 74th character.
- if pad := 73 - b.Len(); pad > 0 {
- b.WriteString(strings.Repeat(" ", pad))
- }
- b.WriteString(s)
- }
- b.WriteString("\n")
- return b.Bytes()
-}
diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go
new file mode 100644
index 000000000..0c4b8895d
--- /dev/null
+++ b/pkg/sentry/mm/procfs.go
@@ -0,0 +1,289 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+const (
+ // devMinorBits is the number of minor bits in a device number. Linux:
+ // include/linux/kdev_t.h:MINORBITS
+ devMinorBits = 20
+
+ vsyscallEnd = usermem.Addr(0xffffffffff601000)
+ vsyscallMapsEntry = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"
+ vsyscallSmapsEntry = vsyscallMapsEntry +
+ "Size: 4 kB\n" +
+ "Rss: 0 kB\n" +
+ "Pss: 0 kB\n" +
+ "Shared_Clean: 0 kB\n" +
+ "Shared_Dirty: 0 kB\n" +
+ "Private_Clean: 0 kB\n" +
+ "Private_Dirty: 0 kB\n" +
+ "Referenced: 0 kB\n" +
+ "Anonymous: 0 kB\n" +
+ "AnonHugePages: 0 kB\n" +
+ "Shared_Hugetlb: 0 kB\n" +
+ "Private_Hugetlb: 0 kB\n" +
+ "Swap: 0 kB\n" +
+ "SwapPss: 0 kB\n" +
+ "KernelPageSize: 4 kB\n" +
+ "MMUPageSize: 4 kB\n" +
+ "Locked: 0 kB\n" +
+ "VmFlags: rd ex \n"
+)
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (mm *MemoryManager) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadMapsSeqFileData is called by fs/proc.mapsData.ReadSeqFileData to
+// implement /proc/[pid]/maps.
+func (mm *MemoryManager) ReadMapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ mm.mappingMu.RLock()
+ defer mm.mappingMu.RUnlock()
+ var data []seqfile.SeqData
+ var start usermem.Addr
+ if handle != nil {
+ start = *handle.(*usermem.Addr)
+ }
+ for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
+ // FIXME: If we use a usermem.Addr for the handle, we get
+ // "panic: autosave error: type usermem.Addr is not registered".
+ vmaAddr := vseg.End()
+ data = append(data, seqfile.SeqData{
+ Buf: mm.vmaMapsEntryLocked(ctx, vseg),
+ Handle: &vmaAddr,
+ })
+ }
+
+ // We always emulate vsyscall, so advertise it here. Everything about a
+ // vsyscall region is static, so just hard code the maps entry since we
+ // don't have a real vma backing it. The vsyscall region is at the end of
+ // the virtual address space so nothing should be mapped after it (if
+ // something is really mapped in the tiny ~10 MiB segment afterwards, we'll
+ // get the sorting on the maps file wrong at worst; but that's not possible
+ // on any current platform).
+ //
+ // Artifically adjust the seqfile handle so we only output vsyscall entry once.
+ if start != vsyscallEnd {
+ // FIXME: Can't get a pointer to constant vsyscallEnd.
+ vmaAddr := vsyscallEnd
+ data = append(data, seqfile.SeqData{
+ Buf: []byte(vsyscallMapsEntry),
+ Handle: &vmaAddr,
+ })
+ }
+ return data, 1
+}
+
+// vmaMapsEntryLocked returns a /proc/[pid]/maps entry for the vma iterated by
+// vseg, including the trailing newline.
+//
+// Preconditions: mm.mappingMu must be locked.
+func (mm *MemoryManager) vmaMapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte {
+ var b bytes.Buffer
+ mm.appendVMAMapsEntryLocked(ctx, vseg, &b)
+ return b.Bytes()
+}
+
+// Preconditions: mm.mappingMu must be locked.
+func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaIterator, b *bytes.Buffer) {
+ vma := vseg.ValuePtr()
+ private := "p"
+ if !vma.private {
+ private = "s"
+ }
+
+ var dev, ino uint64
+ if vma.id != nil {
+ dev = vma.id.DeviceID()
+ ino = vma.id.InodeID()
+ }
+ devMajor := uint32(dev >> devMinorBits)
+ devMinor := uint32(dev & ((1 << devMinorBits) - 1))
+
+ // Do not include the guard page: fs/proc/task_mmu.c:show_map_vma() =>
+ // stack_guard_page_start().
+ fmt.Fprintf(b, "%08x-%08x %s%s %08x %02x:%02x %d ",
+ vseg.Start(), vseg.End(), vma.realPerms, private, vma.off, devMajor, devMinor, ino)
+
+ // Figure out our filename or hint.
+ var s string
+ if vma.hint != "" {
+ s = vma.hint
+ } else if vma.id != nil {
+ // FIXME: We are holding mm.mappingMu here, which is
+ // consistent with Linux's holding mmap_sem in
+ // fs/proc/task_mmu.c:show_map_vma() => fs/seq_file.c:seq_file_path().
+ // However, it's not clear that fs.File.MappedName() is actually
+ // consistent with this lock order.
+ s = vma.id.MappedName(ctx)
+ }
+ if s != "" {
+ // Per linux, we pad until the 74th character.
+ if pad := 73 - b.Len(); pad > 0 {
+ b.WriteString(strings.Repeat(" ", pad))
+ }
+ b.WriteString(s)
+ }
+ b.WriteString("\n")
+}
+
+// ReadSmapsSeqFileData is called by fs/proc.smapsData.ReadSeqFileData to
+// implement /proc/[pid]/smaps.
+func (mm *MemoryManager) ReadSmapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ mm.mappingMu.RLock()
+ defer mm.mappingMu.RUnlock()
+ var data []seqfile.SeqData
+ var start usermem.Addr
+ if handle != nil {
+ start = *handle.(*usermem.Addr)
+ }
+ for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
+ // FIXME: If we use a usermem.Addr for the handle, we get
+ // "panic: autosave error: type usermem.Addr is not registered".
+ vmaAddr := vseg.End()
+ data = append(data, seqfile.SeqData{
+ Buf: mm.vmaSmapsEntryLocked(ctx, vseg),
+ Handle: &vmaAddr,
+ })
+ }
+
+ // We always emulate vsyscall, so advertise it here. See
+ // ReadMapsSeqFileData for additional commentary.
+ if start != vsyscallEnd {
+ // FIXME: Can't get a pointer to constant vsyscallEnd.
+ vmaAddr := vsyscallEnd
+ data = append(data, seqfile.SeqData{
+ Buf: []byte(vsyscallSmapsEntry),
+ Handle: &vmaAddr,
+ })
+ }
+ return data, 1
+}
+
+// vmaSmapsEntryLocked returns a /proc/[pid]/smaps entry for the vma iterated
+// by vseg, including the trailing newline.
+//
+// Preconditions: mm.mappingMu must be locked.
+func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte {
+ var b bytes.Buffer
+ mm.appendVMAMapsEntryLocked(ctx, vseg, &b)
+ vma := vseg.ValuePtr()
+
+ // We take mm.activeMu here in each call to vmaSmapsEntryLocked, instead of
+ // requiring it to be locked as a precondition, to reduce the latency
+ // impact of reading /proc/[pid]/smaps on concurrent performance-sensitive
+ // operations requiring activeMu for writing like faults.
+ mm.activeMu.RLock()
+ var rss uint64
+ var anon uint64
+ vsegAR := vseg.Range()
+ for pseg := mm.pmas.LowerBoundSegment(vsegAR.Start); pseg.Ok() && pseg.Start() < vsegAR.End; pseg = pseg.NextSegment() {
+ psegAR := pseg.Range().Intersect(vsegAR)
+ size := uint64(psegAR.Length())
+ rss += size
+ if pseg.ValuePtr().private {
+ anon += size
+ }
+ }
+ mm.activeMu.RUnlock()
+
+ fmt.Fprintf(&b, "Size: %8d kB\n", vseg.Range().Length()/1024)
+ fmt.Fprintf(&b, "Rss: %8d kB\n", rss/1024)
+ // Currently we report PSS = RSS, i.e. we pretend each page mapped by a pma
+ // is only mapped by that pma. This avoids having to query memmap.Mappables
+ // for reference count information on each page. As a corollary, all pages
+ // are accounted as "private" whether or not the vma is private; compare
+ // Linux's fs/proc/task_mmu.c:smaps_account().
+ fmt.Fprintf(&b, "Pss: %8d kB\n", rss/1024)
+ fmt.Fprintf(&b, "Shared_Clean: %8d kB\n", 0)
+ fmt.Fprintf(&b, "Shared_Dirty: %8d kB\n", 0)
+ // Pretend that all pages are dirty if the vma is writable, and clean otherwise.
+ clean := rss
+ if vma.effectivePerms.Write {
+ clean = 0
+ }
+ fmt.Fprintf(&b, "Private_Clean: %8d kB\n", clean/1024)
+ fmt.Fprintf(&b, "Private_Dirty: %8d kB\n", (rss-clean)/1024)
+ // Pretend that all pages are "referenced" (recently touched).
+ fmt.Fprintf(&b, "Referenced: %8d kB\n", rss/1024)
+ fmt.Fprintf(&b, "Anonymous: %8d kB\n", anon/1024)
+ // Hugepages (hugetlb and THP) are not implemented.
+ fmt.Fprintf(&b, "AnonHugePages: %8d kB\n", 0)
+ fmt.Fprintf(&b, "Shared_Hugetlb: %8d kB\n", 0)
+ fmt.Fprintf(&b, "Private_Hugetlb: %7d kB\n", 0)
+ // Swap is not implemented.
+ fmt.Fprintf(&b, "Swap: %8d kB\n", 0)
+ fmt.Fprintf(&b, "SwapPss: %8d kB\n", 0)
+ fmt.Fprintf(&b, "KernelPageSize: %8d kB\n", usermem.PageSize/1024)
+ fmt.Fprintf(&b, "MMUPageSize: %8d kB\n", usermem.PageSize/1024)
+ locked := rss
+ if vma.mlockMode == memmap.MLockNone {
+ locked = 0
+ }
+ fmt.Fprintf(&b, "Locked: %8d kB\n", locked/1024)
+
+ b.WriteString("VmFlags: ")
+ if vma.realPerms.Read {
+ b.WriteString("rd ")
+ }
+ if vma.realPerms.Write {
+ b.WriteString("wr ")
+ }
+ if vma.realPerms.Execute {
+ b.WriteString("ex ")
+ }
+ if vma.canWriteMappableLocked() { // VM_SHARED
+ b.WriteString("sh ")
+ }
+ if vma.maxPerms.Read {
+ b.WriteString("mr ")
+ }
+ if vma.maxPerms.Write {
+ b.WriteString("mw ")
+ }
+ if vma.maxPerms.Execute {
+ b.WriteString("me ")
+ }
+ if !vma.private { // VM_MAYSHARE
+ b.WriteString("ms ")
+ }
+ if vma.growsDown {
+ b.WriteString("gd ")
+ }
+ if vma.mlockMode != memmap.MLockNone { // VM_LOCKED
+ b.WriteString("lo ")
+ }
+ if vma.mlockMode == memmap.MLockLazy { // VM_LOCKONFAULT
+ b.WriteString("?? ") // no explicit encoding in fs/proc/task_mmu.c:show_smap_vma_flags()
+ }
+ if vma.private && vma.effectivePerms.Write { // VM_ACCOUNT
+ b.WriteString("ac ")
+ }
+ b.WriteString("\n")
+
+ return b.Bytes()
+}
diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go
index 383703ec3..fd6929e08 100644
--- a/pkg/sentry/mm/syscalls.go
+++ b/pkg/sentry/mm/syscalls.go
@@ -507,7 +507,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
return 0, syserror.EINVAL
}
// Inform the Mappable, if any, of the new mapping.
- if err := vma.mappable.CopyMapping(ctx, mm, oldAR, newAR, vseg.mappableOffsetAt(oldAR.Start), vma.isMappableAsWritable()); err != nil {
+ if err := vma.mappable.CopyMapping(ctx, mm, oldAR, newAR, vseg.mappableOffsetAt(oldAR.Start), vma.canWriteMappableLocked()); err != nil {
return 0, err
}
}
@@ -571,7 +571,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
// Now that pmas have been moved to newAR, we can notify vma.mappable that
// oldAR is no longer mapped.
if vma.mappable != nil {
- vma.mappable.RemoveMapping(ctx, mm, oldAR, vma.off, vma.isMappableAsWritable())
+ vma.mappable.RemoveMapping(ctx, mm, oldAR, vma.off, vma.canWriteMappableLocked())
}
if vma.mlockMode == memmap.MLockEager {
diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go
index 28ba9f2f5..e9c9a80ea 100644
--- a/pkg/sentry/mm/vma.go
+++ b/pkg/sentry/mm/vma.go
@@ -84,6 +84,8 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
// Inform the Mappable, if any, of the new mapping.
if opts.Mappable != nil {
+ // The expression for writable is vma.canWriteMappableLocked(), but we
+ // don't yet have a vma.
if err := opts.Mappable.AddMapping(ctx, mm, ar, opts.Offset, !opts.Private && opts.MaxPerms.Write); err != nil {
return vmaIterator{}, usermem.AddrRange{}, err
}
@@ -366,7 +368,7 @@ func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar usermem.AddrRa
vmaAR := vseg.Range()
vma := vseg.ValuePtr()
if vma.mappable != nil {
- vma.mappable.RemoveMapping(ctx, mm, vmaAR, vma.off, vma.isMappableAsWritable())
+ vma.mappable.RemoveMapping(ctx, mm, vmaAR, vma.off, vma.canWriteMappableLocked())
}
if vma.id != nil {
vma.id.DecRef()
@@ -381,6 +383,19 @@ func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar usermem.AddrRa
return vgap
}
+// canWriteMappableLocked returns true if it is possible for vma.mappable to be
+// written to via this vma, i.e. if it is possible that
+// vma.mappable.Translate(at.Write=true) may be called as a result of this vma.
+// This includes via I/O with usermem.IOOpts.IgnorePermissions = true, such as
+// PTRACE_POKEDATA.
+//
+// canWriteMappableLocked is equivalent to Linux's VM_SHARED.
+//
+// Preconditions: mm.mappingMu must be locked.
+func (vma *vma) canWriteMappableLocked() bool {
+ return !vma.private && vma.maxPerms.Write
+}
+
// vmaSetFunctions implements segment.Functions for vmaSet.
type vmaSetFunctions struct{}