From c8d252466fbe9709f51766cd9862fd9958b00798 Mon Sep 17 00:00:00 2001 From: Howard Zhang Date: Fri, 21 May 2021 10:36:54 +0000 Subject: apply bitmap for fd_table Apply bitmap in fd_table to record open file fd. It can accelerate the speed of allocating or removing fd from fdtable. Signed-off-by: Howard Zhang --- pkg/sentry/kernel/BUILD | 1 + pkg/sentry/kernel/fd_table.go | 214 +++++++++++++++++++---------------- pkg/sentry/kernel/fd_table_unsafe.go | 11 +- 3 files changed, 118 insertions(+), 108 deletions(-) (limited to 'pkg/sentry') diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index 9a4b08469..10563e3d7 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -220,6 +220,7 @@ go_library( "//pkg/abi/linux", "//pkg/abi/linux/errno", "//pkg/amutex", + "//pkg/bitmap", "//pkg/bits", "//pkg/bpf", "//pkg/cleanup", diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index 8786a70b5..5648be11b 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -18,10 +18,10 @@ import ( "fmt" "math" "strings" - "sync/atomic" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bitmap" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -84,13 +84,8 @@ type FDTable struct { // mu protects below. mu sync.Mutex `state:"nosave"` - // next is start position to find fd. - next int32 - - // used contains the number of non-nil entries. It must be accessed - // atomically. It may be read atomically without holding mu (but not - // written). - used int32 + // fdBitmap shows which fds are already in use. + fdBitmap bitmap.Bitmap `state:"nosave"` // descriptorTable holds descriptors. descriptorTable `state:".(map[int32]descriptor)"` @@ -98,6 +93,8 @@ type FDTable struct { func (f *FDTable) saveDescriptorTable() map[int32]descriptor { m := make(map[int32]descriptor) + f.mu.Lock() + defer f.mu.Unlock() f.forEach(context.Background(), func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { m[fd] = descriptor{ file: file, @@ -111,12 +108,16 @@ func (f *FDTable) saveDescriptorTable() map[int32]descriptor { func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) { ctx := context.Background() f.initNoLeakCheck() // Initialize table. - f.used = 0 + f.fdBitmap = bitmap.BitmapWithSize(uint32(math.MaxUint16)) for fd, d := range m { + if fd < 0 { + panic(fmt.Sprintf("FD is not supposed to be negative. FD: %d", fd)) + } + if file, fileVFS2 := f.setAll(ctx, fd, d.file, d.fileVFS2, d.flags); file != nil || fileVFS2 != nil { panic("VFS1 or VFS2 files set") } - + f.fdBitmap.Add(uint32(fd)) // Note that we do _not_ need to acquire a extra table reference here. The // table reference will already be accounted for in the file, so we drop the // reference taken by set above. @@ -189,8 +190,10 @@ func (f *FDTable) DecRef(ctx context.Context) { func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags)) { // retries tracks the number of failed TryIncRef attempts for the same FD. retries := 0 - fd := int32(0) - for { + fds := f.fdBitmap.ToSlice() + // Iterate through the fdBitmap. + for _, ufd := range fds { + fd := int32(ufd) file, fileVFS2, flags, ok := f.getAll(fd) if !ok { break @@ -218,7 +221,6 @@ func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *fs.File, fileVFS2.DecRef(ctx) } retries = 0 - fd++ } } @@ -226,6 +228,8 @@ func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *fs.File, func (f *FDTable) String() string { var buf strings.Builder ctx := context.Background() + f.mu.Lock() + defer f.mu.Unlock() f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { switch { case file != nil: @@ -250,10 +254,10 @@ func (f *FDTable) String() string { } // NewFDs allocates new FDs guaranteed to be the lowest number available -// greater than or equal to the fd parameter. All files will share the set +// greater than or equal to the minFD parameter. All files will share the set // flags. Success is guaranteed to be all or none. -func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags FDFlags) (fds []int32, err error) { - if fd < 0 { +func (f *FDTable) NewFDs(ctx context.Context, minFD int32, files []*fs.File, flags FDFlags) (fds []int32, err error) { + if minFD < 0 { // Don't accept negative FDs. return nil, unix.EINVAL } @@ -267,24 +271,49 @@ func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags if lim.Cur != limits.Infinity { end = int32(lim.Cur) } - if fd >= end { + if minFD >= end { return nil, unix.EMFILE } } f.mu.Lock() - // From f.next to find available fd. - if fd < f.next { - fd = f.next + // max is used as the largest number in fdBitmap + 1. + max := int32(0) + // flipFdBitmap is flip of fdBitmap which is used to find free fd in fdBitmap. + flipFdBitmap := f.fdBitmap.Clone() + + if !f.fdBitmap.IsEmpty() { + max = int32(f.fdBitmap.Maximum()) + max++ + flipFdBitmap.FlipRange(uint32(0), uint32(max)) + // Clear 0 to (minFD-1) bit in flipFdBitmap, thus it will not use fd that is less than minFD. + if minFD > 0 { + flipFdBitmap.ClearRange(uint32(0), uint32(minFD)) + } } + // Adjust max in case it is less than minFD. + if max < minFD { + max = minFD + } // Install all entries. - for i := fd; i < end && len(fds) < len(files); i++ { - if d, _, _ := f.get(i); d == nil { - // Set the descriptor. - f.set(ctx, i, files[len(fds)], flags) - fds = append(fds, i) // Record the file descriptor. + for len(fds) < len(files) { + // Try to use free bit in fdBitmap. + // If all bits in fdBitmap are used, expand fd to the max. + if !flipFdBitmap.IsEmpty() { + fd := flipFdBitmap.Minimum() + f.fdBitmap.Add(fd) + flipFdBitmap.Remove(fd) + f.set(ctx, int32(fd), files[len(fds)], flags) + fds = append(fds, int32(fd)) + } else if max < end { + f.fdBitmap.Add(uint32(max)) + f.set(ctx, max, files[len(fds)], flags) + fds = append(fds, max) + max++ + } else { + break } } @@ -292,6 +321,7 @@ func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags if len(fds) < len(files) { for _, i := range fds { f.set(ctx, i, nil, FDFlags{}) + f.fdBitmap.Remove(uint32(i)) } f.mu.Unlock() @@ -305,20 +335,15 @@ func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags return nil, unix.EMFILE } - if fd == f.next { - // Update next search start position. - f.next = fds[len(fds)-1] + 1 - } - f.mu.Unlock() return fds, nil } // NewFDsVFS2 allocates new FDs guaranteed to be the lowest number available -// greater than or equal to the fd parameter. All files will share the set +// greater than or equal to the minFD parameter. All files will share the set // flags. Success is guaranteed to be all or none. -func (f *FDTable) NewFDsVFS2(ctx context.Context, fd int32, files []*vfs.FileDescription, flags FDFlags) (fds []int32, err error) { - if fd < 0 { +func (f *FDTable) NewFDsVFS2(ctx context.Context, minFD int32, files []*vfs.FileDescription, flags FDFlags) (fds []int32, err error) { + if minFD < 0 { // Don't accept negative FDs. return nil, unix.EINVAL } @@ -332,31 +357,56 @@ func (f *FDTable) NewFDsVFS2(ctx context.Context, fd int32, files []*vfs.FileDes if lim.Cur != limits.Infinity { end = int32(lim.Cur) } - if fd >= end { + if minFD >= end { return nil, unix.EMFILE } } f.mu.Lock() - // From f.next to find available fd. - if fd < f.next { - fd = f.next + // max is used as the largest number in fdBitmap + 1. + max := int32(0) + // flipFdBitmap is flip of fdBitmap which is used to find free fd in fdBitmap. + flipFdBitmap := f.fdBitmap.Clone() + + if !f.fdBitmap.IsEmpty() { + max = int32(f.fdBitmap.Maximum()) + max++ + flipFdBitmap.FlipRange(uint32(0), uint32(max)) + // Clear 0 to (minFD-1) bit in flipFdBitmap, thus it will not use fd that is less than minFD. + if minFD > 0 { + flipFdBitmap.ClearRange(uint32(0), uint32(minFD)) + } } - // Install all entries. - for i := fd; i < end && len(fds) < len(files); i++ { - if d, _, _ := f.getVFS2(i); d == nil { - // Set the descriptor. - f.setVFS2(ctx, i, files[len(fds)], flags) - fds = append(fds, i) // Record the file descriptor. + // Adjust max in case it is less than minFD. + if max < minFD { + max = minFD + } + + for len(fds) < len(files) { + // Try to use free bit in fdBitmap. + // If all bits in fdBitmap are used, expand fd to the max. + if !flipFdBitmap.IsEmpty() { + fd := flipFdBitmap.Minimum() + f.fdBitmap.Add(fd) + flipFdBitmap.Remove(fd) + f.setVFS2(ctx, int32(fd), files[len(fds)], flags) + fds = append(fds, int32(fd)) + } else if max < end { + f.fdBitmap.Add(uint32(max)) + f.setVFS2(ctx, max, files[len(fds)], flags) + fds = append(fds, max) + max++ + } else { + break } } - // Failure? Unwind existing FDs. if len(fds) < len(files) { for _, i := range fds { f.setVFS2(ctx, i, nil, FDFlags{}) + f.fdBitmap.Remove(uint32(i)) } f.mu.Unlock() @@ -370,57 +420,19 @@ func (f *FDTable) NewFDsVFS2(ctx context.Context, fd int32, files []*vfs.FileDes return nil, unix.EMFILE } - if fd == f.next { - // Update next search start position. - f.next = fds[len(fds)-1] + 1 - } - f.mu.Unlock() return fds, nil } -// NewFDVFS2 allocates a file descriptor greater than or equal to minfd for +// NewFDVFS2 allocates a file descriptor greater than or equal to minFD for // the given file description. If it succeeds, it takes a reference on file. -func (f *FDTable) NewFDVFS2(ctx context.Context, minfd int32, file *vfs.FileDescription, flags FDFlags) (int32, error) { - if minfd < 0 { - // Don't accept negative FDs. - return -1, unix.EINVAL - } - - // Default limit. - end := int32(math.MaxInt32) - - // Ensure we don't get past the provided limit. - if limitSet := limits.FromContext(ctx); limitSet != nil { - lim := limitSet.Get(limits.NumberOfFiles) - if lim.Cur != limits.Infinity { - end = int32(lim.Cur) - } - if minfd >= end { - return -1, unix.EMFILE - } - } - - f.mu.Lock() - defer f.mu.Unlock() - - // From f.next to find available fd. - fd := minfd - if fd < f.next { - fd = f.next - } - for fd < end { - if d, _, _ := f.getVFS2(fd); d == nil { - f.setVFS2(ctx, fd, file, flags) - if fd == f.next { - // Update next search start position. - f.next = fd + 1 - } - return fd, nil - } - fd++ +func (f *FDTable) NewFDVFS2(ctx context.Context, minFD int32, file *vfs.FileDescription, flags FDFlags) (int32, error) { + files := []*vfs.FileDescription{file} + fileSlice, error := f.NewFDsVFS2(ctx, minFD, files, flags) + if error != nil { + return -1, error } - return -1, unix.EMFILE + return fileSlice[0], nil } // NewFDAt sets the file reference for the given FD. If there is an active @@ -469,6 +481,11 @@ func (f *FDTable) newFDAt(ctx context.Context, fd int32, file *fs.File, fileVFS2 defer f.mu.Unlock() df, dfVFS2 := f.setAll(ctx, fd, file, fileVFS2, flags) + // Add fd to fdBitmap. + if file != nil || fileVFS2 != nil { + f.fdBitmap.Add(uint32(fd)) + } + return df, dfVFS2, nil } @@ -573,7 +590,9 @@ func (f *FDTable) GetVFS2(fd int32) (*vfs.FileDescription, FDFlags) { // Precondition: The caller must be running on the task goroutine, or Task.mu // must be locked. func (f *FDTable) GetFDs(ctx context.Context) []int32 { - fds := make([]int32, 0, int(atomic.LoadInt32(&f.used))) + f.mu.Lock() + defer f.mu.Unlock() + fds := make([]int32, 0, int(f.fdBitmap.GetNumOnes())) f.forEach(ctx, func(fd int32, _ *fs.File, _ *vfs.FileDescription, _ FDFlags) { fds = append(fds, fd) }) @@ -583,13 +602,15 @@ func (f *FDTable) GetFDs(ctx context.Context) []int32 { // Fork returns an independent FDTable. func (f *FDTable) Fork(ctx context.Context) *FDTable { clone := f.k.NewFDTable() - + f.mu.Lock() + defer f.mu.Unlock() f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { // The set function here will acquire an appropriate table // reference for the clone. We don't need anything else. if df, dfVFS2 := clone.setAll(ctx, fd, file, fileVFS2, flags); df != nil || dfVFS2 != nil { panic("VFS1 or VFS2 files set") } + clone.fdBitmap.Add(uint32(fd)) }) return clone } @@ -604,11 +625,6 @@ func (f *FDTable) Remove(ctx context.Context, fd int32) (*fs.File, *vfs.FileDesc f.mu.Lock() - // Update current available position. - if fd < f.next { - f.next = fd - } - orig, orig2, _, _ := f.getAll(fd) // Add reference for caller. @@ -621,6 +637,7 @@ func (f *FDTable) Remove(ctx context.Context, fd int32) (*fs.File, *vfs.FileDesc if orig != nil || orig2 != nil { orig, orig2 = f.setAll(ctx, fd, nil, nil, FDFlags{}) // Zap entry. + f.fdBitmap.Remove(uint32(fd)) } f.mu.Unlock() @@ -644,16 +661,13 @@ func (f *FDTable) RemoveIf(ctx context.Context, cond func(*fs.File, *vfs.FileDes f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { if cond(file, fileVFS2, flags) { df, dfVFS2 := f.setAll(ctx, fd, nil, nil, FDFlags{}) // Clear from table. + f.fdBitmap.Remove(uint32(fd)) if df != nil { files = append(files, df) } if dfVFS2 != nil { filesVFS2 = append(filesVFS2, dfVFS2) } - // Update current available position. - if fd < f.next { - f.next = fd - } } }) f.mu.Unlock() diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go index f17f9c59c..c4cac6b99 100644 --- a/pkg/sentry/kernel/fd_table_unsafe.go +++ b/pkg/sentry/kernel/fd_table_unsafe.go @@ -15,9 +15,11 @@ package kernel import ( + "math" "sync/atomic" "unsafe" + "gvisor.dev/gvisor/pkg/bitmap" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -44,6 +46,7 @@ func (f *FDTable) initNoLeakCheck() { func (f *FDTable) init() { f.initNoLeakCheck() f.InitRefs() + f.fdBitmap = bitmap.BitmapWithSize(uint32(math.MaxUint16)) } // get gets a file entry. @@ -162,14 +165,6 @@ func (f *FDTable) setAll(ctx context.Context, fd int32, file *fs.File, fileVFS2 } } - // Adjust used. - switch { - case orig == nil && desc != nil: - atomic.AddInt32(&f.used, 1) - case orig != nil && desc == nil: - atomic.AddInt32(&f.used, -1) - } - if orig != nil { switch { case orig.file != nil: -- cgit v1.2.3 From 68cf8cc9a244041f859dc484abe551b8e018ad05 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 24 Jul 2021 17:09:33 -0700 Subject: Don't create an extra fd bitmap to allocate a new fd. --- pkg/bitmap/BUILD | 2 -- pkg/bitmap/bitmap.go | 34 +++++++++++++++---- pkg/bitmap/bitmap_test.go | 70 +++++++++++++++++++++++----------------- pkg/sentry/kernel/fd_table.go | 56 +++++++++++--------------------- test/perf/BUILD | 7 ++++ test/perf/linux/BUILD | 16 +++++++++ test/perf/linux/dup_benchmark.cc | 57 ++++++++++++++++++++++++++++++++ test/syscalls/linux/BUILD | 1 + test/syscalls/linux/dup.cc | 38 ++++++++++++++++++++++ 9 files changed, 207 insertions(+), 74 deletions(-) create mode 100644 test/perf/linux/dup_benchmark.cc (limited to 'pkg/sentry') diff --git a/pkg/bitmap/BUILD b/pkg/bitmap/BUILD index 1af60002a..0f1e7006d 100644 --- a/pkg/bitmap/BUILD +++ b/pkg/bitmap/BUILD @@ -6,7 +6,6 @@ go_library( name = "bitmap", srcs = ["bitmap.go"], visibility = ["//:sandbox"], - deps = ["@org_golang_x_sys//unix:go_default_library"], ) go_test( @@ -14,5 +13,4 @@ go_test( size = "small", srcs = ["bitmap_test.go"], library = ":bitmap", - deps = ["@org_golang_x_sys//unix:go_default_library"], ) diff --git a/pkg/bitmap/bitmap.go b/pkg/bitmap/bitmap.go index 40cf88103..803b7b3c7 100644 --- a/pkg/bitmap/bitmap.go +++ b/pkg/bitmap/bitmap.go @@ -25,7 +25,7 @@ import ( // +stateify savable type Bitmap struct { // numOnes is the number of ones in the bitmap. - numOnes uint32 + numOnes uint32 // bitBlock holds the bits. The type of bitBlock is uint64 which means // each number in bitBlock contains 64 entries. @@ -56,6 +56,28 @@ func (b *Bitmap) Minimum() uint32 { return math.MaxInt32 } +// FirstZero returns the first unset bit from the range [start, ). +func (b *Bitmap) FirstZero(start uint32) uint32 { + i, nbit := int(start/64), start%64 + n := len(b.bitBlock) + if i >= n { + return math.MaxInt32 + } + w := b.bitBlock[i] | ((1 << nbit) - 1) + for { + if w != ^uint64(0) { + r := bits.TrailingZeros64(^w) + return uint32(r + i*64) + } + i++ + if i == n { + break + } + w = b.bitBlock[i] + } + return math.MaxInt32 +} + // Maximum return the largest value in the Bitmap. func (b *Bitmap) Maximum() uint32 { for i := len(b.bitBlock) - 1; i >= 0; i-- { @@ -127,13 +149,13 @@ func (b *Bitmap) flipRange(begin, end uint32) { beginBlock := begin / 64 endBlock := end / 64 if beginBlock == endBlock { - b.bitBlock[endBlock] ^= ((^uint64(0) << uint(begin%64)) & ((uint64(1) << (uint(end) % 64 + 1))-1)) + b.bitBlock[endBlock] ^= ((^uint64(0) << uint(begin%64)) & ((uint64(1) << (uint(end)%64 + 1)) - 1)) } else { b.bitBlock[beginBlock] ^= ^(^uint64(0) << uint(begin%64)) for i := beginBlock; i < endBlock; i++ { b.bitBlock[i] = ^b.bitBlock[i] } - b.bitBlock[endBlock] ^= ((uint64(1) << (uint(end) % 64 + 1))-1) + b.bitBlock[endBlock] ^= ((uint64(1) << (uint(end)%64 + 1)) - 1) } } @@ -143,13 +165,13 @@ func (b *Bitmap) clearRange(begin, end uint32) { beginBlock := begin / 64 endBlock := end / 64 if beginBlock == endBlock { - b.bitBlock[beginBlock] &= (((uint64(1) << uint(begin%64)) - 1) | ^((uint64(1) << (uint(end) % 64 + 1)) - 1)) + b.bitBlock[beginBlock] &= (((uint64(1) << uint(begin%64)) - 1) | ^((uint64(1) << (uint(end)%64 + 1)) - 1)) } else { b.bitBlock[beginBlock] &= ((uint64(1) << uint(begin%64)) - 1) for i := beginBlock + 1; i < endBlock; i++ { b.bitBlock[i] &= ^b.bitBlock[i] } - b.bitBlock[endBlock] &= ^((uint64(1) << (uint(end) % 64 + 1)) - 1) + b.bitBlock[endBlock] &= ^((uint64(1) << (uint(end)%64 + 1)) - 1) } } @@ -198,7 +220,7 @@ func (b *Bitmap) ToSlice() []uint32 { // Extract the lowest set 1 bit. j := bitBlock & -bitBlock // Interpret the bit as the in32 number it represents and add it to result. - bitmapSlice = append(bitmapSlice, uint32((base + int(bits.OnesCount64(j - 1))))) + bitmapSlice = append(bitmapSlice, uint32((base + int(bits.OnesCount64(j-1))))) bitBlock ^= j } base += 64 diff --git a/pkg/bitmap/bitmap_test.go b/pkg/bitmap/bitmap_test.go index a7c337b55..37f068438 100644 --- a/pkg/bitmap/bitmap_test.go +++ b/pkg/bitmap/bitmap_test.go @@ -15,6 +15,7 @@ package bitmap import ( + "math" "reflect" "testing" ) @@ -29,9 +30,9 @@ func generateFilledSlice(min, max, length int) []uint32 { } randSlice := make([]uint32, length) if length != 0 { - rangeNum := uint32((max - min)/length) + rangeNum := uint32((max - min) / length) randSlice[0], randSlice[length-1] = uint32(min), uint32(max) - for i := 1 ; i < length - 1; i++ { + for i := 1; i < length-1; i++ { randSlice[i] = randSlice[i-1] + rangeNum } } @@ -50,9 +51,9 @@ func generateFilledBitmap(min, max, fillNum int) ([]uint32, Bitmap) { } func TestNewBitmap(t *testing.T) { - tests := []struct { - name string - size int + tests := []struct { + name string + size int expectSize int }{ {"length 1", 1, 1}, @@ -62,7 +63,7 @@ func TestNewBitmap(t *testing.T) { for _, tt := range tests { tt := tt - t.Run(tt.name, func(t *testing.T){ + t.Run(tt.name, func(t *testing.T) { if bitmap := BitmapWithSize(uint32(tt.size)); len(bitmap.bitBlock) != tt.expectSize { t.Errorf("BitmapWithSize created bitmap with %v, bitBlock size: %d, wanted: %d", tt.name, len(bitmap.bitBlock), tt.expectSize) } @@ -72,14 +73,14 @@ func TestNewBitmap(t *testing.T) { func TestAdd(t *testing.T) { tests := []struct { - name string + name string bitmapSize int - addNum int + addNum int }{ {"Add with null bitmap.bitBlock", 0, 10}, - {"Add without extending bitBlock",64, 10}, + {"Add without extending bitBlock", 64, 10}, {"Add without extending bitblock with margin number", 63, 64}, - {"Add with extended one block",1024, 1025}, + {"Add with extended one block", 1024, 1025}, {"Add with extended more then one block", 1024, 2048}, } @@ -110,14 +111,14 @@ func TestRemove(t *testing.T) { } bitmapSlice := bitmap.ToSlice() if !reflect.DeepEqual(bitmapSlice, secondSlice) { - t.Errorf("After Remove() firstSlice, remained slice: %v, wanted: %v", bitmapSlice, secondSlice) + t.Errorf("After Remove() firstSlice, remained slice: %v, wanted: %v", bitmapSlice, secondSlice) } for i := 0; i < 50; i++ { bitmap.Remove(secondSlice[i]) } bitmapSlice = bitmap.ToSlice() - emptySlice := make([]uint32,0) + emptySlice := make([]uint32, 0) if !reflect.DeepEqual(bitmapSlice, emptySlice) { t.Errorf("After Remove secondSlice, remained slice: %v, wanted: %v", bitmapSlice, emptySlice) } @@ -127,9 +128,9 @@ func TestRemove(t *testing.T) { // Verify flip bits within one bitBlock, one bit and bits cross multi bitBlocks. func TestFlipRange(t *testing.T) { tests := []struct { - name string - flipRangeMin int - flipRangeMax int + name string + flipRangeMin int + flipRangeMax int filledSliceLen int }{ {"Flip one number in bitmap", 77, 77, 1}, @@ -141,7 +142,7 @@ func TestFlipRange(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { fillSlice, bitmap := generateFilledBitmap(tt.flipRangeMin, tt.flipRangeMax, tt.filledSliceLen) - flipFillSlice := make([]uint32,0) + flipFillSlice := make([]uint32, 0) for i, j := tt.flipRangeMin, 0; i <= tt.flipRangeMax; i++ { if uint32(i) != fillSlice[j] { flipFillSlice = append(flipFillSlice, uint32(i)) @@ -150,7 +151,7 @@ func TestFlipRange(t *testing.T) { } } - bitmap.FlipRange(uint32(tt.flipRangeMin), uint32(tt.flipRangeMax + 1)) + bitmap.FlipRange(uint32(tt.flipRangeMin), uint32(tt.flipRangeMax+1)) flipBitmapSlice := bitmap.ToSlice() if !reflect.DeepEqual(flipFillSlice, flipBitmapSlice) { t.Errorf("%v, flipped slice: %v, wanted: %v", tt.name, flipBitmapSlice, flipFillSlice) @@ -162,10 +163,10 @@ func TestFlipRange(t *testing.T) { // Verify clear bits within one bitBlock, one bit and bits cross multi bitBlocks. func TestClearRange(t *testing.T) { tests := []struct { - name string + name string clearRangeMin int clearRangeMax int - bitmapSize int + bitmapSize int }{ {"ClearRange clear one number", 5, 5, 64}, {"ClearRange clear numbers within one bitBlock", 4, 61, 64}, @@ -177,9 +178,9 @@ func TestClearRange(t *testing.T) { t.Run(tt.name, func(t *testing.T) { bitmap := BitmapWithSize(uint32(tt.bitmapSize)) bitmap.FlipRange(uint32(0), uint32(tt.bitmapSize)) - bitmap.ClearRange(uint32(tt.clearRangeMin), uint32(tt.clearRangeMax + 1)) + bitmap.ClearRange(uint32(tt.clearRangeMin), uint32(tt.clearRangeMax+1)) clearedBitmapSlice := bitmap.ToSlice() - clearedSlice := make([]uint32,0) + clearedSlice := make([]uint32, 0) for i := 0; i < tt.bitmapSize; i++ { if i < tt.clearRangeMin || i > tt.clearRangeMax { clearedSlice = append(clearedSlice, uint32(i)) @@ -218,22 +219,22 @@ func TestMinimum(t *testing.T) { func TestMaximum(t *testing.T) { randSlice, bitmap := generateFilledBitmap(0, 1024, 200) max := bitmap.Maximum() - if max != randSlice[len(randSlice) - 1] { - t.Errorf("Maximum() returns: %v, wanted: %v", max, randSlice[len(randSlice) - 1]) + if max != randSlice[len(randSlice)-1] { + t.Errorf("Maximum() returns: %v, wanted: %v", max, randSlice[len(randSlice)-1]) } bitmap.ClearRange(uint32(1000), uint32(1025)) max = bitmap.Maximum() bitmapSlice := bitmap.ToSlice() - if max != bitmapSlice[len(bitmapSlice) - 1] { - t.Errorf("After ClearRange, Maximum() returns: %v, wanted: %v", max, bitmapSlice[len(bitmapSlice) - 1]) + if max != bitmapSlice[len(bitmapSlice)-1] { + t.Errorf("After ClearRange, Maximum() returns: %v, wanted: %v", max, bitmapSlice[len(bitmapSlice)-1]) } bitmap.FlipRange(uint32(1001), uint32(1021)) max = bitmap.Maximum() bitmapSlice = bitmap.ToSlice() - if max != bitmapSlice[len(bitmapSlice) - 1] { - t.Errorf("After Flip, Maximum() returns: %v, wanted: %v", max, bitmapSlice[len(bitmapSlice) - 1]) + if max != bitmapSlice[len(bitmapSlice)-1] { + t.Errorf("After Flip, Maximum() returns: %v, wanted: %v", max, bitmapSlice[len(bitmapSlice)-1]) } } @@ -261,7 +262,7 @@ func TestBitmapNumOnes(t *testing.T) { } // Add 10 number. - for i := 1080; i < 1090; i++{ + for i := 1080; i < 1090; i++ { bitmap.Add(uint32(i)) } bitmapOnes = bitmap.GetNumOnes() @@ -270,7 +271,7 @@ func TestBitmapNumOnes(t *testing.T) { } // Add the 10 number again, the length supposed not change. - for i := 1080; i < 1090; i++{ + for i := 1080; i < 1090; i++ { bitmap.Add(uint32(i)) } bitmapOnes = bitmap.GetNumOnes() @@ -292,3 +293,14 @@ func TestBitmapNumOnes(t *testing.T) { t.Errorf("After ClearRange, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 20) } } + +func TestFirstZero(t *testing.T) { + bitmap := BitmapWithSize(uint32(1000)) + bitmap.FlipRange(200, 400) + for i, j := range map[uint32]uint32{0: 0, 201: 400, 200: 400, 199: 199, 400: 400, 10000: math.MaxInt32} { + v := bitmap.FirstZero(i) + if v != j { + t.Errorf("Minimum() returns: %v, wanted: %v", v, j) + } + } +} diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index 5648be11b..9f7702fcc 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -271,7 +271,7 @@ func (f *FDTable) NewFDs(ctx context.Context, minFD int32, files []*fs.File, fla if lim.Cur != limits.Infinity { end = int32(lim.Cur) } - if minFD >= end { + if minFD+int32(len(files)) > end { return nil, unix.EMFILE } } @@ -280,17 +280,10 @@ func (f *FDTable) NewFDs(ctx context.Context, minFD int32, files []*fs.File, fla // max is used as the largest number in fdBitmap + 1. max := int32(0) - // flipFdBitmap is flip of fdBitmap which is used to find free fd in fdBitmap. - flipFdBitmap := f.fdBitmap.Clone() if !f.fdBitmap.IsEmpty() { max = int32(f.fdBitmap.Maximum()) max++ - flipFdBitmap.FlipRange(uint32(0), uint32(max)) - // Clear 0 to (minFD-1) bit in flipFdBitmap, thus it will not use fd that is less than minFD. - if minFD > 0 { - flipFdBitmap.ClearRange(uint32(0), uint32(minFD)) - } } // Adjust max in case it is less than minFD. @@ -301,20 +294,18 @@ func (f *FDTable) NewFDs(ctx context.Context, minFD int32, files []*fs.File, fla for len(fds) < len(files) { // Try to use free bit in fdBitmap. // If all bits in fdBitmap are used, expand fd to the max. - if !flipFdBitmap.IsEmpty() { - fd := flipFdBitmap.Minimum() - f.fdBitmap.Add(fd) - flipFdBitmap.Remove(fd) - f.set(ctx, int32(fd), files[len(fds)], flags) - fds = append(fds, int32(fd)) - } else if max < end { - f.fdBitmap.Add(uint32(max)) - f.set(ctx, max, files[len(fds)], flags) - fds = append(fds, max) + fd := f.fdBitmap.FirstZero(uint32(minFD)) + if fd == math.MaxInt32 { + fd = uint32(max) max++ - } else { + } + if fd >= uint32(end) { break } + f.fdBitmap.Add(fd) + f.set(ctx, int32(fd), files[len(fds)], flags) + fds = append(fds, int32(fd)) + minFD = int32(fd) } // Failure? Unwind existing FDs. @@ -366,17 +357,10 @@ func (f *FDTable) NewFDsVFS2(ctx context.Context, minFD int32, files []*vfs.File // max is used as the largest number in fdBitmap + 1. max := int32(0) - // flipFdBitmap is flip of fdBitmap which is used to find free fd in fdBitmap. - flipFdBitmap := f.fdBitmap.Clone() if !f.fdBitmap.IsEmpty() { max = int32(f.fdBitmap.Maximum()) max++ - flipFdBitmap.FlipRange(uint32(0), uint32(max)) - // Clear 0 to (minFD-1) bit in flipFdBitmap, thus it will not use fd that is less than minFD. - if minFD > 0 { - flipFdBitmap.ClearRange(uint32(0), uint32(minFD)) - } } // Adjust max in case it is less than minFD. @@ -387,20 +371,18 @@ func (f *FDTable) NewFDsVFS2(ctx context.Context, minFD int32, files []*vfs.File for len(fds) < len(files) { // Try to use free bit in fdBitmap. // If all bits in fdBitmap are used, expand fd to the max. - if !flipFdBitmap.IsEmpty() { - fd := flipFdBitmap.Minimum() - f.fdBitmap.Add(fd) - flipFdBitmap.Remove(fd) - f.setVFS2(ctx, int32(fd), files[len(fds)], flags) - fds = append(fds, int32(fd)) - } else if max < end { - f.fdBitmap.Add(uint32(max)) - f.setVFS2(ctx, max, files[len(fds)], flags) - fds = append(fds, max) + fd := f.fdBitmap.FirstZero(uint32(minFD)) + if fd == math.MaxInt32 { + fd = uint32(max) max++ - } else { + } + if fd >= uint32(end) { break } + f.fdBitmap.Add(fd) + f.setVFS2(ctx, int32(fd), files[len(fds)], flags) + fds = append(fds, int32(fd)) + minFD = int32(fd) } // Failure? Unwind existing FDs. if len(fds) < len(files) { diff --git a/test/perf/BUILD b/test/perf/BUILD index 75b5003e2..08e04d8ad 100644 --- a/test/perf/BUILD +++ b/test/perf/BUILD @@ -69,6 +69,13 @@ syscall_test( test = "//test/perf/linux:open_benchmark", ) +syscall_test( + size = "large", + add_overlay = True, + debug = False, + test = "//test/perf/linux:dup_benchmark", +) + syscall_test( debug = False, test = "//test/perf/linux:pipe_benchmark", diff --git a/test/perf/linux/BUILD b/test/perf/linux/BUILD index dd1d2438c..4511611ad 100644 --- a/test/perf/linux/BUILD +++ b/test/perf/linux/BUILD @@ -108,6 +108,22 @@ cc_binary( ], ) +cc_binary( + name = "dup_benchmark", + testonly = 1, + srcs = [ + "dup_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:fs_util", + "//test/util:logging", + "//test/util:temp_path", + "//test/util:test_main", + ], +) + cc_binary( name = "read_benchmark", testonly = 1, diff --git a/test/perf/linux/dup_benchmark.cc b/test/perf/linux/dup_benchmark.cc new file mode 100644 index 000000000..fcf14368b --- /dev/null +++ b/test/perf/linux/dup_benchmark.cc @@ -0,0 +1,57 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/fs_util.h" +#include "test/util/logging.h" +#include "test/util/temp_path.h" + +namespace gvisor { +namespace testing { + +namespace { + +void BM_Dup(benchmark::State& state) { + const int size = state.range(0); + + for (auto _ : state) { + std::vector v; + for (int i = 0; i < size; i++) { + int fd = dup(2); + TEST_CHECK(fd != -1); + v.push_back(fd); + } + for (int i = 0; i < size; i++) { + int fd = v[i]; + close(fd); + } + } + state.SetItemsProcessed(state.iterations() * size); +} + +BENCHMARK(BM_Dup)->Range(1, 1 << 15)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 2bf685524..1bbcd8abb 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -583,6 +583,7 @@ cc_binary( deps = [ "//test/util:eventfd_util", "//test/util:file_descriptor", + "@com_google_absl//absl/memory", gtest, "//test/util:fs_util", "//test/util:posix_error", diff --git a/test/syscalls/linux/dup.cc b/test/syscalls/linux/dup.cc index ba4e13fb9..fca0880a6 100644 --- a/test/syscalls/linux/dup.cc +++ b/test/syscalls/linux/dup.cc @@ -13,9 +13,11 @@ // limitations under the License. #include +#include #include #include "gtest/gtest.h" +#include "absl/memory/memory.h" #include "test/util/eventfd_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" @@ -98,6 +100,42 @@ TEST(DupTest, Dup2) { ASSERT_NO_ERRNO(CheckSameFile(fd, nfd2)); } +TEST(DupTest, Rlimit) { + constexpr int kFDLimit = 101; + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + struct rlimit rl = {}; + EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + + // Lower the rlimit first, as it may be equal to /proc/sys/fs/nr_open, in + // which case even users with CAP_SYS_RESOURCE can't raise it. + rl.rlim_cur = kFDLimit * 2; + ASSERT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + + FileDescriptor aboveLimitFD = + ASSERT_NO_ERRNO_AND_VALUE(Dup2(fd, kFDLimit * 2 - 1)); + + rl.rlim_cur = kFDLimit; + ASSERT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + ASSERT_THAT(dup3(fd.get(), kFDLimit, 0), SyscallFails()); + + std::vector> fds; + int prev = fd.get(); + for (int i = 0; i < kFDLimit; i++) { + int d = dup(fd.get()); + if (d == -1) { + break; + } + std::unique_ptr f = absl::make_unique(d); + EXPECT_LT(d, kFDLimit); + EXPECT_GT(d, prev); + prev = d; + fds.push_back(std::move(f)); + } + EXPECT_EQ(fds.size(), kFDLimit - fd.get() - 1); +} + TEST(DupTest, Dup2SameFD) { auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); -- cgit v1.2.3