summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAyush Ranjan <ayushranjan@google.com>2020-10-19 09:00:08 -0700
committergVisor bot <gvisor-bot@google.com>2020-10-19 09:02:19 -0700
commitc206fcbfc2b951a49798a1b84e9cd3c6097ffdca (patch)
treebf074df09147f84ef88ce7ec86ddd153fad83fd6
parent9a3d8973c4fcd1475b3748c10eb3e255d44e8a20 (diff)
pgalloc: Do not hold MemoryFile.mu while calling mincore.
This change makes the following changes: - Unlocks MemoryFile.mu while calling mincore (checkCommitted) because mincore can take a really long time. Accordingly looks up the segment in the tree tree again and handles changes to the segment. - MemoryFile.UpdateUsage() can now only be called at frequency at most 100Hz. 100 Hz = linux.CLOCKS_PER_SEC. Co-authored-by: Jamie Liu <jamieliu@google.com> PiperOrigin-RevId: 337865250
-rw-r--r--pkg/sentry/pgalloc/BUILD1
-rw-r--r--pkg/sentry/pgalloc/pgalloc.go122
-rw-r--r--pkg/sentry/syscalls/linux/sys_sysinfo.go2
-rw-r--r--pkg/sentry/usage/memory.go2
4 files changed, 67 insertions, 60 deletions
diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD
index 7a3311a70..5b09b9feb 100644
--- a/pkg/sentry/pgalloc/BUILD
+++ b/pkg/sentry/pgalloc/BUILD
@@ -83,6 +83,7 @@ go_library(
],
visibility = ["//pkg/sentry:internal"],
deps = [
+ "//pkg/abi/linux",
"//pkg/context",
"//pkg/log",
"//pkg/memutil",
diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go
index 626d1eaa4..7c297fb9e 100644
--- a/pkg/sentry/pgalloc/pgalloc.go
+++ b/pkg/sentry/pgalloc/pgalloc.go
@@ -29,6 +29,7 @@ import (
"syscall"
"time"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/safemem"
@@ -224,6 +225,18 @@ type usageInfo struct {
refs uint64
}
+// canCommit returns true if the tracked region can be committed.
+func (u *usageInfo) canCommit() bool {
+ // refs must be greater than 0 because we assume that reclaimable pages
+ // (that aren't already known to be committed) are not committed. This
+ // isn't necessarily true, even after the reclaimer does Decommit(),
+ // because the kernel may subsequently back the hugepage-sized region
+ // containing the decommitted page with a hugepage. However, it's
+ // consistent with our treatment of unallocated pages, which have the same
+ // property.
+ return !u.knownCommitted && u.refs != 0
+}
+
// An EvictableMemoryUser represents a user of MemoryFile-allocated memory that
// may be asked to deallocate that memory in the presence of memory pressure.
type EvictableMemoryUser interface {
@@ -828,6 +841,11 @@ func (f *MemoryFile) UpdateUsage() error {
log.Debugf("UpdateUsage: skipped with usageSwapped!=0.")
return nil
}
+ // Linux updates usage values at CONFIG_HZ.
+ if scanningAfter := time.Now().Sub(f.usageLast).Milliseconds(); scanningAfter < time.Second.Milliseconds()/linux.CLOCKS_PER_SEC {
+ log.Debugf("UpdateUsage: skipped because previous scan happened %d ms back", scanningAfter)
+ return nil
+ }
f.usageLast = time.Now()
err = f.updateUsageLocked(currentUsage, mincore)
@@ -841,7 +859,7 @@ func (f *MemoryFile) UpdateUsage() error {
// pages by invoking checkCommitted, which is a function that, for each page i
// in bs, sets committed[i] to 1 if the page is committed and 0 otherwise.
//
-// Precondition: f.mu must be held.
+// Precondition: f.mu must be held; it may be unlocked and reacquired.
func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func(bs []byte, committed []byte) error) error {
// Track if anything changed to elide the merge. In the common case, we
// expect all segments to be committed and no merge to occur.
@@ -868,7 +886,7 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func(
} else if f.usageSwapped != 0 {
// We have more usage accounted for than the file itself.
// That's fine, we probably caught a race where pages were
- // being committed while the above loop was running. Just
+ // being committed while the below loop was running. Just
// report the higher number that we found and ignore swap.
usage.MemoryAccounting.Dec(f.usageSwapped, usage.System)
f.usageSwapped = 0
@@ -880,21 +898,9 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func(
// Iterate over all usage data. There will only be usage segments
// present when there is an associated reference.
- for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
- val := seg.Value()
-
- // Already known to be committed; ignore.
- if val.knownCommitted {
- continue
- }
-
- // Assume that reclaimable pages (that aren't already known to be
- // committed) are not committed. This isn't necessarily true, even
- // after the reclaimer does Decommit(), because the kernel may
- // subsequently back the hugepage-sized region containing the
- // decommitted page with a hugepage. However, it's consistent with our
- // treatment of unallocated pages, which have the same property.
- if val.refs == 0 {
+ for seg := f.usage.FirstSegment(); seg.Ok(); {
+ if !seg.ValuePtr().canCommit() {
+ seg = seg.NextSegment()
continue
}
@@ -917,56 +923,53 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func(
}
// Query for new pages in core.
- if err := checkCommitted(s, buf); err != nil {
+ // NOTE(b/165896008): mincore (which is passed as checkCommitted)
+ // by f.UpdateUsage() might take a really long time. So unlock f.mu
+ // while checkCommitted runs.
+ f.mu.Unlock()
+ err := checkCommitted(s, buf)
+ f.mu.Lock()
+ if err != nil {
checkErr = err
return
}
// Scan each page and switch out segments.
- populatedRun := false
- populatedRunStart := 0
- for i := 0; i <= bufLen; i++ {
- // We run past the end of the slice here to
- // simplify the logic and only set populated if
- // we're still looking at elements.
- populated := false
- if i < bufLen {
- populated = buf[i]&0x1 != 0
- }
-
- switch {
- case populated == populatedRun:
- // Keep the run going.
- continue
- case populated && !populatedRun:
- // Begin the run.
- populatedRun = true
- populatedRunStart = i
- // Keep going.
+ seg := f.usage.LowerBoundSegment(r.Start)
+ for i := 0; i < bufLen; {
+ if buf[i]&0x1 == 0 {
+ i++
continue
- case !populated && populatedRun:
- // Finish the run by changing this segment.
- runRange := memmap.FileRange{
- Start: r.Start + uint64(populatedRunStart*usermem.PageSize),
- End: r.Start + uint64(i*usermem.PageSize),
+ }
+ // Scan to the end of this committed range.
+ j := i + 1
+ for ; j < bufLen; j++ {
+ if buf[j]&0x1 == 0 {
+ break
}
- seg = f.usage.Isolate(seg, runRange)
- seg.ValuePtr().knownCommitted = true
- // Advance the segment only if we still
- // have work to do in the context of
- // the original segment from the for
- // loop. Otherwise, the for loop itself
- // will advance the segment
- // appropriately.
- if runRange.End != r.End {
- seg = seg.NextSegment()
+ }
+ committedFR := memmap.FileRange{
+ Start: r.Start + uint64(i*usermem.PageSize),
+ End: r.Start + uint64(j*usermem.PageSize),
+ }
+ // Advance seg to committedFR.Start.
+ for seg.Ok() && seg.End() < committedFR.Start {
+ seg = seg.NextSegment()
+ }
+ // Mark pages overlapping committedFR as committed.
+ for seg.Ok() && seg.Start() < committedFR.End {
+ if seg.ValuePtr().canCommit() {
+ seg = f.usage.Isolate(seg, committedFR)
+ seg.ValuePtr().knownCommitted = true
+ amount := seg.Range().Length()
+ usage.MemoryAccounting.Inc(amount, seg.ValuePtr().kind)
+ f.usageExpected += amount
+ changedAny = true
}
- amount := runRange.Length()
- usage.MemoryAccounting.Inc(amount, val.kind)
- f.usageExpected += amount
- changedAny = true
- populatedRun = false
+ seg = seg.NextSegment()
}
+ // Continue scanning for committed pages.
+ i = j + 1
}
// Advance r.Start.
@@ -978,6 +981,9 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func(
if err != nil {
return err
}
+
+ // Continue with the first segment after r.End.
+ seg = f.usage.LowerBoundSegment(r.End)
}
return nil
diff --git a/pkg/sentry/syscalls/linux/sys_sysinfo.go b/pkg/sentry/syscalls/linux/sys_sysinfo.go
index 6320593f0..db3d924d9 100644
--- a/pkg/sentry/syscalls/linux/sys_sysinfo.go
+++ b/pkg/sentry/syscalls/linux/sys_sysinfo.go
@@ -21,7 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/usage"
)
-// Sysinfo implements the sysinfo syscall as described in man 2 sysinfo.
+// Sysinfo implements Linux syscall sysinfo(2).
func Sysinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
addr := args[0].Pointer()
diff --git a/pkg/sentry/usage/memory.go b/pkg/sentry/usage/memory.go
index ab1d140d2..5ed6726ab 100644
--- a/pkg/sentry/usage/memory.go
+++ b/pkg/sentry/usage/memory.go
@@ -278,7 +278,7 @@ func TotalMemory(memSize, used uint64) uint64 {
}
if memSize < used {
memSize = used
- // Bump totalSize to the next largest power of 2, if one exists, so
+ // Bump memSize to the next largest power of 2, if one exists, so
// that MemFree isn't 0.
if msb := bits.MostSignificantOne64(memSize); msb < 63 {
memSize = uint64(1) << (uint(msb) + 1)