From c8cee7108f1a1b37e89961c6dd69ccab97952c86 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Wed, 17 Apr 2019 12:56:23 -0700
Subject: Use FD limit and file size limit from host

FD limit and file size limit is read from the host, instead
of using hard-coded defaults, given that they effect the sandbox
process. Also limit the direct cache to use no more than half
if the available FDs.

PiperOrigin-RevId: 244050323
Change-Id: I787ad0fdf07c49d589e51aebfeae477324fe26e6
---
 pkg/sentry/fs/BUILD                   |  1 +
 pkg/sentry/fs/context.go              | 12 +++++
 pkg/sentry/fs/dirent_cache.go         | 43 ++++++++++++++---
 pkg/sentry/fs/dirent_cache_limiter.go | 55 +++++++++++++++++++++
 pkg/sentry/fs/dirent_cache_test.go    | 90 +++++++++++++++++++++++++++++++++++
 pkg/sentry/fs/gofer/session.go        |  9 ++++
 pkg/sentry/fs/mount.go                | 20 ++++++--
 pkg/sentry/fs/mount_overlay.go        | 11 ++++-
 pkg/sentry/kernel/kernel.go           |  9 ++++
 pkg/sentry/kernel/task.go             |  2 +
 10 files changed, 241 insertions(+), 11 deletions(-)
 create mode 100644 pkg/sentry/fs/dirent_cache_limiter.go

(limited to 'pkg/sentry')

diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD
index 1742d3a65..1fd9e30f6 100644
--- a/pkg/sentry/fs/BUILD
+++ b/pkg/sentry/fs/BUILD
@@ -12,6 +12,7 @@ go_library(
         "dentry.go",
         "dirent.go",
         "dirent_cache.go",
+        "dirent_cache_limiter.go",
         "dirent_list.go",
         "dirent_state.go",
         "event_list.go",
diff --git a/pkg/sentry/fs/context.go b/pkg/sentry/fs/context.go
index c0e6075e4..4869428a8 100644
--- a/pkg/sentry/fs/context.go
+++ b/pkg/sentry/fs/context.go
@@ -26,6 +26,9 @@ type contextID int
 const (
 	// CtxRoot is a Context.Value key for a Dirent.
 	CtxRoot contextID = iota
+
+	// CtxDirentCacheLimiter is a Context.Value key for DirentCacheLimiter.
+	CtxDirentCacheLimiter
 )
 
 // ContextCanAccessFile determines whether `file` can be accessed in the requested way
@@ -100,3 +103,12 @@ func RootFromContext(ctx context.Context) *Dirent {
 	}
 	return nil
 }
+
+// DirentCacheLimiterFromContext returns the DirentCacheLimiter used by ctx, or
+// nil if ctx does not have a dirent cache limiter.
+func DirentCacheLimiterFromContext(ctx context.Context) *DirentCacheLimiter {
+	if v := ctx.Value(CtxDirentCacheLimiter); v != nil {
+		return v.(*DirentCacheLimiter)
+	}
+	return nil
+}
diff --git a/pkg/sentry/fs/dirent_cache.go b/pkg/sentry/fs/dirent_cache.go
index 502b0a09b..d26a06971 100644
--- a/pkg/sentry/fs/dirent_cache.go
+++ b/pkg/sentry/fs/dirent_cache.go
@@ -32,6 +32,10 @@ type DirentCache struct {
 	// when cache is nil.
 	maxSize uint64
 
+	// limit restricts the number of entries in the cache amoung multiple caches.
+	// It may be nil if there are no global limit for this cache.
+	limit *DirentCacheLimiter
+
 	// mu protects currentSize and direntList.
 	mu sync.Mutex `state:"nosave"`
 
@@ -45,8 +49,7 @@ type DirentCache struct {
 	list direntList `state:"zerovalue"`
 }
 
-// NewDirentCache returns a new DirentCache with the given maxSize. If maxSize
-// is 0, nil is returned.
+// NewDirentCache returns a new DirentCache with the given maxSize.
 func NewDirentCache(maxSize uint64) *DirentCache {
 	return &DirentCache{
 		maxSize: maxSize,
@@ -71,15 +74,24 @@ func (c *DirentCache) Add(d *Dirent) {
 		return
 	}
 
+	// First check against the global limit.
+	for c.limit != nil && !c.limit.tryInc() {
+		if c.currentSize == 0 {
+			// If the global limit is reached, but there is nothing more to drop from
+			// this cache, there is not much else to do.
+			c.mu.Unlock()
+			return
+		}
+		c.remove(c.list.Back())
+	}
+
 	// d is not in cache. Add it and take a reference.
 	c.list.PushFront(d)
 	d.IncRef()
 	c.currentSize++
 
-	// Remove the oldest until we are under the size limit.
-	for c.maxSize > 0 && c.currentSize > c.maxSize {
-		c.remove(c.list.Back())
-	}
+	c.maybeShrink()
+
 	c.mu.Unlock()
 }
 
@@ -92,6 +104,9 @@ func (c *DirentCache) remove(d *Dirent) {
 	d.SetNext(nil)
 	d.DecRef()
 	c.currentSize--
+	if c.limit != nil {
+		c.limit.dec()
+	}
 }
 
 // Remove removes the element from the cache and decrements its refCount. It
@@ -142,3 +157,19 @@ func (c *DirentCache) Invalidate() {
 	}
 	c.mu.Unlock()
 }
+
+// setMaxSize sets cache max size. If current size is larger than max size, the
+// cache shrinks to acommodate the new max.
+func (c *DirentCache) setMaxSize(max uint64) {
+	c.mu.Lock()
+	c.maxSize = max
+	c.maybeShrink()
+	c.mu.Unlock()
+}
+
+// shrink removes the oldest element until the list is under the size limit.
+func (c *DirentCache) maybeShrink() {
+	for c.maxSize > 0 && c.currentSize > c.maxSize {
+		c.remove(c.list.Back())
+	}
+}
diff --git a/pkg/sentry/fs/dirent_cache_limiter.go b/pkg/sentry/fs/dirent_cache_limiter.go
new file mode 100644
index 000000000..024c7b2d5
--- /dev/null
+++ b/pkg/sentry/fs/dirent_cache_limiter.go
@@ -0,0 +1,55 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fs
+
+import (
+	"fmt"
+	"sync"
+)
+
+// DirentCacheLimiter acts as a global limit for all dirent caches in the
+// process.
+//
+// +stateify savable
+type DirentCacheLimiter struct {
+	mu    sync.Mutex `state:"nosave"`
+	max   uint64
+	count uint64 `state:"zerovalue"`
+}
+
+// NewDirentCacheLimiter creates a new DirentCacheLimiter.
+func NewDirentCacheLimiter(max uint64) *DirentCacheLimiter {
+	return &DirentCacheLimiter{max: max}
+}
+
+func (d *DirentCacheLimiter) tryInc() bool {
+	d.mu.Lock()
+	if d.count >= d.max {
+		d.mu.Unlock()
+		return false
+	}
+	d.count++
+	d.mu.Unlock()
+	return true
+}
+
+func (d *DirentCacheLimiter) dec() {
+	d.mu.Lock()
+	if d.count == 0 {
+		panic(fmt.Sprintf("underflowing DirentCacheLimiter count: %+v", d))
+	}
+	d.count--
+	d.mu.Unlock()
+}
diff --git a/pkg/sentry/fs/dirent_cache_test.go b/pkg/sentry/fs/dirent_cache_test.go
index 5d0e9d91c..93e8d415f 100644
--- a/pkg/sentry/fs/dirent_cache_test.go
+++ b/pkg/sentry/fs/dirent_cache_test.go
@@ -120,6 +120,96 @@ func TestDirentCache(t *testing.T) {
 	}
 }
 
+func TestDirentCacheLimiter(t *testing.T) {
+	const (
+		globalMaxSize = 5
+		maxSize       = 3
+	)
+
+	limit := NewDirentCacheLimiter(globalMaxSize)
+	c1 := NewDirentCache(maxSize)
+	c1.limit = limit
+	c2 := NewDirentCache(maxSize)
+	c2.limit = limit
+
+	// Create a Dirent d.
+	d := NewNegativeDirent("")
+
+	// Add d to the cache.
+	c1.Add(d)
+	if got, want := c1.Size(), uint64(1); got != want {
+		t.Errorf("c1.Size() got %v, want %v", got, want)
+	}
+
+	// Add maxSize-1 more elements. d should be oldest element.
+	for i := 0; i < maxSize-1; i++ {
+		c1.Add(NewNegativeDirent(""))
+	}
+	if got, want := c1.Size(), uint64(maxSize); got != want {
+		t.Errorf("c1.Size() got %v, want %v", got, want)
+	}
+
+	// Check that d is still there.
+	if got, want := c1.contains(d), true; got != want {
+		t.Errorf("c1.contains(d) got %v want %v", got, want)
+	}
+
+	// Fill up the other cache, it will start dropping old entries from the cache
+	// when the global limit is reached.
+	for i := 0; i < maxSize; i++ {
+		c2.Add(NewNegativeDirent(""))
+	}
+
+	// Check is what's remaining from global max.
+	if got, want := c2.Size(), globalMaxSize-maxSize; int(got) != want {
+		t.Errorf("c2.Size() got %v, want %v", got, want)
+	}
+
+	// Check that d was not dropped.
+	if got, want := c1.contains(d), true; got != want {
+		t.Errorf("c1.contains(d) got %v want %v", got, want)
+	}
+
+	// Add an entry that will eventually be dropped. Check is done later...
+	drop := NewNegativeDirent("")
+	c1.Add(drop)
+
+	// Check that d is bumped to front even when global limit is reached.
+	c1.Add(d)
+	if got, want := c1.contains(d), true; got != want {
+		t.Errorf("c1.contains(d) got %v want %v", got, want)
+	}
+
+	// Add 2 more element and check that:
+	//   - d is still in the list: to verify that d was bumped
+	//   - d2/d3 are in the list: older entries are dropped when global limit is
+	//     reached.
+	//   - drop is not in the list: indeed older elements are dropped.
+	d2 := NewNegativeDirent("")
+	c1.Add(d2)
+	d3 := NewNegativeDirent("")
+	c1.Add(d3)
+	if got, want := c1.contains(d), true; got != want {
+		t.Errorf("c1.contains(d) got %v want %v", got, want)
+	}
+	if got, want := c1.contains(d2), true; got != want {
+		t.Errorf("c1.contains(d2) got %v want %v", got, want)
+	}
+	if got, want := c1.contains(d3), true; got != want {
+		t.Errorf("c1.contains(d3) got %v want %v", got, want)
+	}
+	if got, want := c1.contains(drop), false; got != want {
+		t.Errorf("c1.contains(drop) got %v want %v", got, want)
+	}
+
+	// Drop all entries from one cache. The other will be allowed to grow.
+	c1.Invalidate()
+	c2.Add(NewNegativeDirent(""))
+	if got, want := c2.Size(), uint64(maxSize); got != want {
+		t.Errorf("c2.Size() got %v, want %v", got, want)
+	}
+}
+
 // TestNilDirentCache tests that a nil cache supports all cache operations, but
 // treats them as noop.
 func TestNilDirentCache(t *testing.T) {
diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go
index d626b86f5..ed5147c65 100644
--- a/pkg/sentry/fs/gofer/session.go
+++ b/pkg/sentry/fs/gofer/session.go
@@ -28,6 +28,10 @@ import (
 	"gvisor.googlesource.com/gvisor/pkg/unet"
 )
 
+// DefaultDirentCacheSize is the default dirent cache size for 9P mounts. It can
+// be adjusted independentely from the other dirent caches.
+var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize
+
 // +stateify savable
 type endpointMaps struct {
 	// mu protexts the direntMap, the keyMap, and the pathMap below.
@@ -249,6 +253,11 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
 	// Construct the MountSource with the session and superBlockFlags.
 	m := fs.NewMountSource(s, filesystem, superBlockFlags)
 
+	// Given that gofer files can consume host FDs, restrict the number
+	// of files that can be held by the cache.
+	m.SetDirentCacheMaxSize(DefaultDirentCacheSize)
+	m.SetDirentCacheLimiter(fs.DirentCacheLimiterFromContext(ctx))
+
 	// Send the Tversion request.
 	s.client, err = p9.NewClient(conn, s.msize, s.version)
 	if err != nil {
diff --git a/pkg/sentry/fs/mount.go b/pkg/sentry/fs/mount.go
index 5cc777bef..1e245ae5f 100644
--- a/pkg/sentry/fs/mount.go
+++ b/pkg/sentry/fs/mount.go
@@ -151,9 +151,9 @@ type MountSource struct {
 	children map[*MountSource]struct{}
 }
 
-// defaultDirentCacheSize is the number of Dirents that the VFS can hold an extra
-// reference on.
-const defaultDirentCacheSize uint64 = 1000
+// DefaultDirentCacheSize is the number of Dirents that the VFS can hold an
+// extra reference on.
+const DefaultDirentCacheSize uint64 = 1000
 
 // NewMountSource returns a new MountSource. Filesystem may be nil if there is no
 // filesystem backing the mount.
@@ -162,7 +162,7 @@ func NewMountSource(mops MountSourceOperations, filesystem Filesystem, flags Mou
 		MountSourceOperations: mops,
 		Flags:                 flags,
 		Filesystem:            filesystem,
-		fscache:               NewDirentCache(defaultDirentCacheSize),
+		fscache:               NewDirentCache(DefaultDirentCacheSize),
 		children:              make(map[*MountSource]struct{}),
 	}
 }
@@ -246,6 +246,18 @@ func (msrc *MountSource) FlushDirentRefs() {
 	msrc.fscache.Invalidate()
 }
 
+// SetDirentCacheMaxSize sets the max size to the dirent cache associated with
+// this mount source.
+func (msrc *MountSource) SetDirentCacheMaxSize(max uint64) {
+	msrc.fscache.setMaxSize(max)
+}
+
+// SetDirentCacheLimiter sets the limiter objcet to the dirent cache associated
+// with this mount source.
+func (msrc *MountSource) SetDirentCacheLimiter(l *DirentCacheLimiter) {
+	msrc.fscache.limit = l
+}
+
 // NewCachingMountSource returns a generic mount that will cache dirents
 // aggressively.
 func NewCachingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource {
diff --git a/pkg/sentry/fs/mount_overlay.go b/pkg/sentry/fs/mount_overlay.go
index 4c89673b5..fb60a1aec 100644
--- a/pkg/sentry/fs/mount_overlay.go
+++ b/pkg/sentry/fs/mount_overlay.go
@@ -31,10 +31,19 @@ type overlayMountSourceOperations struct {
 func newOverlayMountSource(upper, lower *MountSource, flags MountSourceFlags) *MountSource {
 	upper.IncRef()
 	lower.IncRef()
-	return NewMountSource(&overlayMountSourceOperations{
+	msrc := NewMountSource(&overlayMountSourceOperations{
 		upper: upper,
 		lower: lower,
 	}, &overlayFilesystem{}, flags)
+
+	// Use the minimum number to keep resource usage under limits.
+	size := lower.fscache.maxSize
+	if size > upper.fscache.maxSize {
+		size = upper.fscache.maxSize
+	}
+	msrc.fscache.setMaxSize(size)
+
+	return msrc
 }
 
 // Revalidate implements MountSourceOperations.Revalidate for an overlay by
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index b8953657c..290c4a53c 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -188,6 +188,11 @@ type Kernel struct {
 
 	// deviceRegistry is used to save/restore device.SimpleDevices.
 	deviceRegistry struct{} `state:".(*device.Registry)"`
+
+	// DirentCacheLimiter controls the number of total dirent entries can be in
+	// caches. Not all caches use it, only the caches that use host resources use
+	// the limiter. It may be nil if disabled.
+	DirentCacheLimiter *fs.DirentCacheLimiter
 }
 
 // InitKernelArgs holds arguments to Init.
@@ -626,6 +631,8 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} {
 			return ctx.k.mounts.Root()
 		}
 		return nil
+	case fs.CtxDirentCacheLimiter:
+		return ctx.k.DirentCacheLimiter
 	case ktime.CtxRealtimeClock:
 		return ctx.k.RealtimeClock()
 	case limits.CtxLimits:
@@ -1170,6 +1177,8 @@ func (ctx supervisorContext) Value(key interface{}) interface{} {
 		return auth.NewRootCredentials(ctx.k.rootUserNamespace)
 	case fs.CtxRoot:
 		return ctx.k.mounts.Root()
+	case fs.CtxDirentCacheLimiter:
+		return ctx.k.DirentCacheLimiter
 	case ktime.CtxRealtimeClock:
 		return ctx.k.RealtimeClock()
 	case limits.CtxLimits:
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index 9c365e781..ed2175c37 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -601,6 +601,8 @@ func (t *Task) Value(key interface{}) interface{} {
 		return int32(t.ThreadGroup().ID())
 	case fs.CtxRoot:
 		return t.fsc.RootDirectory()
+	case fs.CtxDirentCacheLimiter:
+		return t.k.DirentCacheLimiter
 	case inet.CtxStack:
 		return t.NetworkContext()
 	case ktime.CtxRealtimeClock:
-- 
cgit v1.2.3