summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go77
-rw-r--r--pkg/sentry/pgalloc/pgalloc.go13
-rwxr-xr-xpkg/sentry/platform/ring0/defs_impl.go2
3 files changed, 68 insertions, 24 deletions
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index ed62049a9..e70bc28fb 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -568,21 +568,30 @@ type inodeReadWriter struct {
// ReadToBlocks implements safemem.Reader.ReadToBlocks.
func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+ mem := rw.c.mfp.MemoryFile()
+ fillCache := !rw.c.useHostPageCache() && mem.ShouldCacheEvictable()
+
// Hot path. Avoid defers.
- rw.c.dataMu.RLock()
+ var unlock func()
+ if fillCache {
+ rw.c.dataMu.Lock()
+ unlock = rw.c.dataMu.Unlock
+ } else {
+ rw.c.dataMu.RLock()
+ unlock = rw.c.dataMu.RUnlock
+ }
// Compute the range to read.
if rw.offset >= rw.c.attr.Size {
- rw.c.dataMu.RUnlock()
+ unlock()
return 0, io.EOF
}
end := fs.ReadEndOffset(rw.offset, int64(dsts.NumBytes()), rw.c.attr.Size)
if end == rw.offset { // dsts.NumBytes() == 0?
- rw.c.dataMu.RUnlock()
+ unlock()
return 0, nil
}
- mem := rw.c.mfp.MemoryFile()
var done uint64
seg, gap := rw.c.cache.Find(uint64(rw.offset))
for rw.offset < end {
@@ -592,7 +601,7 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
// Get internal mappings from the cache.
ims, err := mem.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
if err != nil {
- rw.c.dataMu.RUnlock()
+ unlock()
return done, err
}
@@ -602,7 +611,7 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
rw.offset += int64(n)
dsts = dsts.DropFirst64(n)
if err != nil {
- rw.c.dataMu.RUnlock()
+ unlock()
return done, err
}
@@ -610,27 +619,48 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
seg, gap = seg.NextNonEmpty()
case gap.Ok():
- // Read directly from the backing file.
- gapmr := gap.Range().Intersect(mr)
- dst := dsts.TakeFirst64(gapmr.Length())
- n, err := rw.c.backingFile.ReadToBlocksAt(rw.ctx, dst, gapmr.Start)
- done += n
- rw.offset += int64(n)
- dsts = dsts.DropFirst64(n)
- // Partial reads are fine. But we must stop reading.
- if n != dst.NumBytes() || err != nil {
- rw.c.dataMu.RUnlock()
- return done, err
+ gapMR := gap.Range().Intersect(mr)
+ if fillCache {
+ // Read into the cache, then re-enter the loop to read from the
+ // cache.
+ reqMR := memmap.MappableRange{
+ Start: uint64(usermem.Addr(gapMR.Start).RoundDown()),
+ End: fs.OffsetPageEnd(int64(gapMR.End)),
+ }
+ optMR := gap.Range()
+ err := rw.c.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mem, usage.PageCache, rw.c.backingFile.ReadToBlocksAt)
+ mem.MarkEvictable(rw.c, pgalloc.EvictableRange{optMR.Start, optMR.End})
+ seg, gap = rw.c.cache.Find(uint64(rw.offset))
+ if !seg.Ok() {
+ unlock()
+ return done, err
+ }
+ // err might have occurred in part of gap.Range() outside
+ // gapMR. Forget about it for now; if the error matters and
+ // persists, we'll run into it again in a later iteration of
+ // this loop.
+ } else {
+ // Read directly from the backing file.
+ dst := dsts.TakeFirst64(gapMR.Length())
+ n, err := rw.c.backingFile.ReadToBlocksAt(rw.ctx, dst, gapMR.Start)
+ done += n
+ rw.offset += int64(n)
+ dsts = dsts.DropFirst64(n)
+ // Partial reads are fine. But we must stop reading.
+ if n != dst.NumBytes() || err != nil {
+ unlock()
+ return done, err
+ }
+
+ // Continue.
+ seg, gap = gap.NextSegment(), FileRangeGapIterator{}
}
- // Continue.
- seg, gap = gap.NextSegment(), FileRangeGapIterator{}
-
default:
break
}
}
- rw.c.dataMu.RUnlock()
+ unlock()
return done, nil
}
@@ -700,7 +730,10 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
seg, gap = seg.NextNonEmpty()
case gap.Ok() && gap.Start() < mr.End:
- // Write directly to the backing file.
+ // Write directly to the backing file. At present, we never fill
+ // the cache when writing, since doing so can convert small writes
+ // into inefficient read-modify-write cycles, and we have no
+ // mechanism for detecting or avoiding this.
gapmr := gap.Range().Intersect(mr)
src := srcs.TakeFirst64(gapmr.Length())
n, err := rw.c.backingFile.WriteFromBlocksAt(rw.ctx, src, gapmr.Start)
diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go
index 8bd3e885d..f7f7298c4 100644
--- a/pkg/sentry/pgalloc/pgalloc.go
+++ b/pkg/sentry/pgalloc/pgalloc.go
@@ -285,7 +285,10 @@ func NewMemoryFile(file *os.File, opts MemoryFileOpts) (*MemoryFile, error) {
switch opts.DelayedEviction {
case DelayedEvictionDefault:
opts.DelayedEviction = DelayedEvictionEnabled
- case DelayedEvictionDisabled, DelayedEvictionEnabled, DelayedEvictionManual:
+ case DelayedEvictionDisabled, DelayedEvictionManual:
+ opts.UseHostMemcgPressure = false
+ case DelayedEvictionEnabled:
+ // ok
default:
return nil, fmt.Errorf("invalid MemoryFileOpts.DelayedEviction: %v", opts.DelayedEviction)
}
@@ -777,6 +780,14 @@ func (f *MemoryFile) MarkAllUnevictable(user EvictableMemoryUser) {
}
}
+// ShouldCacheEvictable returns true if f is meaningfully delaying evictions of
+// evictable memory, such that it may be advantageous to cache data in
+// evictable memory. The value returned by ShouldCacheEvictable may change
+// between calls.
+func (f *MemoryFile) ShouldCacheEvictable() bool {
+ return f.opts.DelayedEviction == DelayedEvictionManual || f.opts.UseHostMemcgPressure
+}
+
// UpdateUsage ensures that the memory usage statistics in
// usage.MemoryAccounting are up to date.
func (f *MemoryFile) UpdateUsage() error {
diff --git a/pkg/sentry/platform/ring0/defs_impl.go b/pkg/sentry/platform/ring0/defs_impl.go
index acae012dc..a30a9dd4a 100755
--- a/pkg/sentry/platform/ring0/defs_impl.go
+++ b/pkg/sentry/platform/ring0/defs_impl.go
@@ -1,12 +1,12 @@
package ring0
import (
- "fmt"
"gvisor.dev/gvisor/pkg/cpuid"
"io"
"reflect"
"syscall"
+ "fmt"
"gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/usermem"
)