From c8cee7108f1a1b37e89961c6dd69ccab97952c86 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 17 Apr 2019 12:56:23 -0700 Subject: Use FD limit and file size limit from host FD limit and file size limit is read from the host, instead of using hard-coded defaults, given that they effect the sandbox process. Also limit the direct cache to use no more than half if the available FDs. PiperOrigin-RevId: 244050323 Change-Id: I787ad0fdf07c49d589e51aebfeae477324fe26e6 --- pkg/sentry/fs/BUILD | 1 + pkg/sentry/fs/context.go | 12 +++++ pkg/sentry/fs/dirent_cache.go | 43 ++++++++++++++--- pkg/sentry/fs/dirent_cache_limiter.go | 55 +++++++++++++++++++++ pkg/sentry/fs/dirent_cache_test.go | 90 +++++++++++++++++++++++++++++++++++ pkg/sentry/fs/gofer/session.go | 9 ++++ pkg/sentry/fs/mount.go | 20 ++++++-- pkg/sentry/fs/mount_overlay.go | 11 ++++- pkg/sentry/kernel/kernel.go | 9 ++++ pkg/sentry/kernel/task.go | 2 + 10 files changed, 241 insertions(+), 11 deletions(-) create mode 100644 pkg/sentry/fs/dirent_cache_limiter.go (limited to 'pkg/sentry') diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD index 1742d3a65..1fd9e30f6 100644 --- a/pkg/sentry/fs/BUILD +++ b/pkg/sentry/fs/BUILD @@ -12,6 +12,7 @@ go_library( "dentry.go", "dirent.go", "dirent_cache.go", + "dirent_cache_limiter.go", "dirent_list.go", "dirent_state.go", "event_list.go", diff --git a/pkg/sentry/fs/context.go b/pkg/sentry/fs/context.go index c0e6075e4..4869428a8 100644 --- a/pkg/sentry/fs/context.go +++ b/pkg/sentry/fs/context.go @@ -26,6 +26,9 @@ type contextID int const ( // CtxRoot is a Context.Value key for a Dirent. CtxRoot contextID = iota + + // CtxDirentCacheLimiter is a Context.Value key for DirentCacheLimiter. + CtxDirentCacheLimiter ) // ContextCanAccessFile determines whether `file` can be accessed in the requested way @@ -100,3 +103,12 @@ func RootFromContext(ctx context.Context) *Dirent { } return nil } + +// DirentCacheLimiterFromContext returns the DirentCacheLimiter used by ctx, or +// nil if ctx does not have a dirent cache limiter. +func DirentCacheLimiterFromContext(ctx context.Context) *DirentCacheLimiter { + if v := ctx.Value(CtxDirentCacheLimiter); v != nil { + return v.(*DirentCacheLimiter) + } + return nil +} diff --git a/pkg/sentry/fs/dirent_cache.go b/pkg/sentry/fs/dirent_cache.go index 502b0a09b..d26a06971 100644 --- a/pkg/sentry/fs/dirent_cache.go +++ b/pkg/sentry/fs/dirent_cache.go @@ -32,6 +32,10 @@ type DirentCache struct { // when cache is nil. maxSize uint64 + // limit restricts the number of entries in the cache amoung multiple caches. + // It may be nil if there are no global limit for this cache. + limit *DirentCacheLimiter + // mu protects currentSize and direntList. mu sync.Mutex `state:"nosave"` @@ -45,8 +49,7 @@ type DirentCache struct { list direntList `state:"zerovalue"` } -// NewDirentCache returns a new DirentCache with the given maxSize. If maxSize -// is 0, nil is returned. +// NewDirentCache returns a new DirentCache with the given maxSize. func NewDirentCache(maxSize uint64) *DirentCache { return &DirentCache{ maxSize: maxSize, @@ -71,15 +74,24 @@ func (c *DirentCache) Add(d *Dirent) { return } + // First check against the global limit. + for c.limit != nil && !c.limit.tryInc() { + if c.currentSize == 0 { + // If the global limit is reached, but there is nothing more to drop from + // this cache, there is not much else to do. + c.mu.Unlock() + return + } + c.remove(c.list.Back()) + } + // d is not in cache. Add it and take a reference. c.list.PushFront(d) d.IncRef() c.currentSize++ - // Remove the oldest until we are under the size limit. - for c.maxSize > 0 && c.currentSize > c.maxSize { - c.remove(c.list.Back()) - } + c.maybeShrink() + c.mu.Unlock() } @@ -92,6 +104,9 @@ func (c *DirentCache) remove(d *Dirent) { d.SetNext(nil) d.DecRef() c.currentSize-- + if c.limit != nil { + c.limit.dec() + } } // Remove removes the element from the cache and decrements its refCount. It @@ -142,3 +157,19 @@ func (c *DirentCache) Invalidate() { } c.mu.Unlock() } + +// setMaxSize sets cache max size. If current size is larger than max size, the +// cache shrinks to acommodate the new max. +func (c *DirentCache) setMaxSize(max uint64) { + c.mu.Lock() + c.maxSize = max + c.maybeShrink() + c.mu.Unlock() +} + +// shrink removes the oldest element until the list is under the size limit. +func (c *DirentCache) maybeShrink() { + for c.maxSize > 0 && c.currentSize > c.maxSize { + c.remove(c.list.Back()) + } +} diff --git a/pkg/sentry/fs/dirent_cache_limiter.go b/pkg/sentry/fs/dirent_cache_limiter.go new file mode 100644 index 000000000..024c7b2d5 --- /dev/null +++ b/pkg/sentry/fs/dirent_cache_limiter.go @@ -0,0 +1,55 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "fmt" + "sync" +) + +// DirentCacheLimiter acts as a global limit for all dirent caches in the +// process. +// +// +stateify savable +type DirentCacheLimiter struct { + mu sync.Mutex `state:"nosave"` + max uint64 + count uint64 `state:"zerovalue"` +} + +// NewDirentCacheLimiter creates a new DirentCacheLimiter. +func NewDirentCacheLimiter(max uint64) *DirentCacheLimiter { + return &DirentCacheLimiter{max: max} +} + +func (d *DirentCacheLimiter) tryInc() bool { + d.mu.Lock() + if d.count >= d.max { + d.mu.Unlock() + return false + } + d.count++ + d.mu.Unlock() + return true +} + +func (d *DirentCacheLimiter) dec() { + d.mu.Lock() + if d.count == 0 { + panic(fmt.Sprintf("underflowing DirentCacheLimiter count: %+v", d)) + } + d.count-- + d.mu.Unlock() +} diff --git a/pkg/sentry/fs/dirent_cache_test.go b/pkg/sentry/fs/dirent_cache_test.go index 5d0e9d91c..93e8d415f 100644 --- a/pkg/sentry/fs/dirent_cache_test.go +++ b/pkg/sentry/fs/dirent_cache_test.go @@ -120,6 +120,96 @@ func TestDirentCache(t *testing.T) { } } +func TestDirentCacheLimiter(t *testing.T) { + const ( + globalMaxSize = 5 + maxSize = 3 + ) + + limit := NewDirentCacheLimiter(globalMaxSize) + c1 := NewDirentCache(maxSize) + c1.limit = limit + c2 := NewDirentCache(maxSize) + c2.limit = limit + + // Create a Dirent d. + d := NewNegativeDirent("") + + // Add d to the cache. + c1.Add(d) + if got, want := c1.Size(), uint64(1); got != want { + t.Errorf("c1.Size() got %v, want %v", got, want) + } + + // Add maxSize-1 more elements. d should be oldest element. + for i := 0; i < maxSize-1; i++ { + c1.Add(NewNegativeDirent("")) + } + if got, want := c1.Size(), uint64(maxSize); got != want { + t.Errorf("c1.Size() got %v, want %v", got, want) + } + + // Check that d is still there. + if got, want := c1.contains(d), true; got != want { + t.Errorf("c1.contains(d) got %v want %v", got, want) + } + + // Fill up the other cache, it will start dropping old entries from the cache + // when the global limit is reached. + for i := 0; i < maxSize; i++ { + c2.Add(NewNegativeDirent("")) + } + + // Check is what's remaining from global max. + if got, want := c2.Size(), globalMaxSize-maxSize; int(got) != want { + t.Errorf("c2.Size() got %v, want %v", got, want) + } + + // Check that d was not dropped. + if got, want := c1.contains(d), true; got != want { + t.Errorf("c1.contains(d) got %v want %v", got, want) + } + + // Add an entry that will eventually be dropped. Check is done later... + drop := NewNegativeDirent("") + c1.Add(drop) + + // Check that d is bumped to front even when global limit is reached. + c1.Add(d) + if got, want := c1.contains(d), true; got != want { + t.Errorf("c1.contains(d) got %v want %v", got, want) + } + + // Add 2 more element and check that: + // - d is still in the list: to verify that d was bumped + // - d2/d3 are in the list: older entries are dropped when global limit is + // reached. + // - drop is not in the list: indeed older elements are dropped. + d2 := NewNegativeDirent("") + c1.Add(d2) + d3 := NewNegativeDirent("") + c1.Add(d3) + if got, want := c1.contains(d), true; got != want { + t.Errorf("c1.contains(d) got %v want %v", got, want) + } + if got, want := c1.contains(d2), true; got != want { + t.Errorf("c1.contains(d2) got %v want %v", got, want) + } + if got, want := c1.contains(d3), true; got != want { + t.Errorf("c1.contains(d3) got %v want %v", got, want) + } + if got, want := c1.contains(drop), false; got != want { + t.Errorf("c1.contains(drop) got %v want %v", got, want) + } + + // Drop all entries from one cache. The other will be allowed to grow. + c1.Invalidate() + c2.Add(NewNegativeDirent("")) + if got, want := c2.Size(), uint64(maxSize); got != want { + t.Errorf("c2.Size() got %v, want %v", got, want) + } +} + // TestNilDirentCache tests that a nil cache supports all cache operations, but // treats them as noop. func TestNilDirentCache(t *testing.T) { diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index d626b86f5..ed5147c65 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -28,6 +28,10 @@ import ( "gvisor.googlesource.com/gvisor/pkg/unet" ) +// DefaultDirentCacheSize is the default dirent cache size for 9P mounts. It can +// be adjusted independentely from the other dirent caches. +var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize + // +stateify savable type endpointMaps struct { // mu protexts the direntMap, the keyMap, and the pathMap below. @@ -249,6 +253,11 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF // Construct the MountSource with the session and superBlockFlags. m := fs.NewMountSource(s, filesystem, superBlockFlags) + // Given that gofer files can consume host FDs, restrict the number + // of files that can be held by the cache. + m.SetDirentCacheMaxSize(DefaultDirentCacheSize) + m.SetDirentCacheLimiter(fs.DirentCacheLimiterFromContext(ctx)) + // Send the Tversion request. s.client, err = p9.NewClient(conn, s.msize, s.version) if err != nil { diff --git a/pkg/sentry/fs/mount.go b/pkg/sentry/fs/mount.go index 5cc777bef..1e245ae5f 100644 --- a/pkg/sentry/fs/mount.go +++ b/pkg/sentry/fs/mount.go @@ -151,9 +151,9 @@ type MountSource struct { children map[*MountSource]struct{} } -// defaultDirentCacheSize is the number of Dirents that the VFS can hold an extra -// reference on. -const defaultDirentCacheSize uint64 = 1000 +// DefaultDirentCacheSize is the number of Dirents that the VFS can hold an +// extra reference on. +const DefaultDirentCacheSize uint64 = 1000 // NewMountSource returns a new MountSource. Filesystem may be nil if there is no // filesystem backing the mount. @@ -162,7 +162,7 @@ func NewMountSource(mops MountSourceOperations, filesystem Filesystem, flags Mou MountSourceOperations: mops, Flags: flags, Filesystem: filesystem, - fscache: NewDirentCache(defaultDirentCacheSize), + fscache: NewDirentCache(DefaultDirentCacheSize), children: make(map[*MountSource]struct{}), } } @@ -246,6 +246,18 @@ func (msrc *MountSource) FlushDirentRefs() { msrc.fscache.Invalidate() } +// SetDirentCacheMaxSize sets the max size to the dirent cache associated with +// this mount source. +func (msrc *MountSource) SetDirentCacheMaxSize(max uint64) { + msrc.fscache.setMaxSize(max) +} + +// SetDirentCacheLimiter sets the limiter objcet to the dirent cache associated +// with this mount source. +func (msrc *MountSource) SetDirentCacheLimiter(l *DirentCacheLimiter) { + msrc.fscache.limit = l +} + // NewCachingMountSource returns a generic mount that will cache dirents // aggressively. func NewCachingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource { diff --git a/pkg/sentry/fs/mount_overlay.go b/pkg/sentry/fs/mount_overlay.go index 4c89673b5..fb60a1aec 100644 --- a/pkg/sentry/fs/mount_overlay.go +++ b/pkg/sentry/fs/mount_overlay.go @@ -31,10 +31,19 @@ type overlayMountSourceOperations struct { func newOverlayMountSource(upper, lower *MountSource, flags MountSourceFlags) *MountSource { upper.IncRef() lower.IncRef() - return NewMountSource(&overlayMountSourceOperations{ + msrc := NewMountSource(&overlayMountSourceOperations{ upper: upper, lower: lower, }, &overlayFilesystem{}, flags) + + // Use the minimum number to keep resource usage under limits. + size := lower.fscache.maxSize + if size > upper.fscache.maxSize { + size = upper.fscache.maxSize + } + msrc.fscache.setMaxSize(size) + + return msrc } // Revalidate implements MountSourceOperations.Revalidate for an overlay by diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index b8953657c..290c4a53c 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -188,6 +188,11 @@ type Kernel struct { // deviceRegistry is used to save/restore device.SimpleDevices. deviceRegistry struct{} `state:".(*device.Registry)"` + + // DirentCacheLimiter controls the number of total dirent entries can be in + // caches. Not all caches use it, only the caches that use host resources use + // the limiter. It may be nil if disabled. + DirentCacheLimiter *fs.DirentCacheLimiter } // InitKernelArgs holds arguments to Init. @@ -626,6 +631,8 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} { return ctx.k.mounts.Root() } return nil + case fs.CtxDirentCacheLimiter: + return ctx.k.DirentCacheLimiter case ktime.CtxRealtimeClock: return ctx.k.RealtimeClock() case limits.CtxLimits: @@ -1170,6 +1177,8 @@ func (ctx supervisorContext) Value(key interface{}) interface{} { return auth.NewRootCredentials(ctx.k.rootUserNamespace) case fs.CtxRoot: return ctx.k.mounts.Root() + case fs.CtxDirentCacheLimiter: + return ctx.k.DirentCacheLimiter case ktime.CtxRealtimeClock: return ctx.k.RealtimeClock() case limits.CtxLimits: diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 9c365e781..ed2175c37 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -601,6 +601,8 @@ func (t *Task) Value(key interface{}) interface{} { return int32(t.ThreadGroup().ID()) case fs.CtxRoot: return t.fsc.RootDirectory() + case fs.CtxDirentCacheLimiter: + return t.k.DirentCacheLimiter case inet.CtxStack: return t.NetworkContext() case ktime.CtxRealtimeClock: -- cgit v1.2.3