summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/bitmap/BUILD16
-rw-r--r--pkg/bitmap/bitmap.go234
-rw-r--r--pkg/bitmap/bitmap_test.go306
-rw-r--r--pkg/sentry/kernel/BUILD1
-rw-r--r--pkg/sentry/kernel/fd_table.go196
-rw-r--r--pkg/sentry/kernel/fd_table_unsafe.go11
-rw-r--r--test/perf/BUILD7
-rw-r--r--test/perf/linux/BUILD16
-rw-r--r--test/perf/linux/dup_benchmark.cc55
-rw-r--r--test/syscalls/linux/BUILD1
-rw-r--r--test/syscalls/linux/dup.cc41
11 files changed, 776 insertions, 108 deletions
diff --git a/pkg/bitmap/BUILD b/pkg/bitmap/BUILD
new file mode 100644
index 000000000..0f1e7006d
--- /dev/null
+++ b/pkg/bitmap/BUILD
@@ -0,0 +1,16 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "bitmap",
+ srcs = ["bitmap.go"],
+ visibility = ["//:sandbox"],
+)
+
+go_test(
+ name = "bitmap_test",
+ size = "small",
+ srcs = ["bitmap_test.go"],
+ library = ":bitmap",
+)
diff --git a/pkg/bitmap/bitmap.go b/pkg/bitmap/bitmap.go
new file mode 100644
index 000000000..12d2fc2b8
--- /dev/null
+++ b/pkg/bitmap/bitmap.go
@@ -0,0 +1,234 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package bitmap provides the implementation of bitmap.
+package bitmap
+
+import (
+ "math"
+ "math/bits"
+)
+
+// Bitmap implements an efficient bitmap.
+//
+// +stateify savable
+type Bitmap struct {
+ // numOnes is the number of ones in the bitmap.
+ numOnes uint32
+
+ // bitBlock holds the bits. The type of bitBlock is uint64 which means
+ // each number in bitBlock contains 64 entries.
+ bitBlock []uint64
+}
+
+// New create a new empty Bitmap.
+func New(size uint32) Bitmap {
+ b := Bitmap{}
+ bSize := (size + 63) / 64
+ b.bitBlock = make([]uint64, bSize)
+ return b
+}
+
+// IsEmpty verifies whether the Bitmap is empty.
+func (b *Bitmap) IsEmpty() bool {
+ return b.numOnes == 0
+}
+
+// Minimum return the smallest value in the Bitmap.
+func (b *Bitmap) Minimum() uint32 {
+ for i := 0; i < len(b.bitBlock); i++ {
+ if w := b.bitBlock[i]; w != 0 {
+ r := bits.TrailingZeros64(w)
+ return uint32(r + i*64)
+ }
+ }
+ return math.MaxInt32
+}
+
+// FirstZero returns the first unset bit from the range [start, ).
+func (b *Bitmap) FirstZero(start uint32) uint32 {
+ i, nbit := int(start/64), start%64
+ n := len(b.bitBlock)
+ if i >= n {
+ return math.MaxInt32
+ }
+ w := b.bitBlock[i] | ((1 << nbit) - 1)
+ for {
+ if w != ^uint64(0) {
+ r := bits.TrailingZeros64(^w)
+ return uint32(r + i*64)
+ }
+ i++
+ if i == n {
+ break
+ }
+ w = b.bitBlock[i]
+ }
+ return math.MaxInt32
+}
+
+// Maximum return the largest value in the Bitmap.
+func (b *Bitmap) Maximum() uint32 {
+ for i := len(b.bitBlock) - 1; i >= 0; i-- {
+ if w := b.bitBlock[i]; w != 0 {
+ r := bits.LeadingZeros64(w)
+ return uint32(i*64 + 63 - r)
+ }
+ }
+ return uint32(0)
+}
+
+// Add add i to the Bitmap.
+func (b *Bitmap) Add(i uint32) {
+ blockNum, mask := i/64, uint64(1)<<(i%64)
+ // if blockNum is out of range, extend b.bitBlock
+ if x, y := int(blockNum), len(b.bitBlock); x >= y {
+ b.bitBlock = append(b.bitBlock, make([]uint64, x-y+1)...)
+ }
+ oldBlock := b.bitBlock[blockNum]
+ newBlock := oldBlock | mask
+ if oldBlock != newBlock {
+ b.bitBlock[blockNum] = newBlock
+ b.numOnes++
+ }
+}
+
+// Remove i from the Bitmap.
+func (b *Bitmap) Remove(i uint32) {
+ blockNum, mask := i/64, uint64(1)<<(i%64)
+ oldBlock := b.bitBlock[blockNum]
+ newBlock := oldBlock &^ mask
+ if oldBlock != newBlock {
+ b.bitBlock[blockNum] = newBlock
+ b.numOnes--
+ }
+}
+
+// Clone the Bitmap.
+func (b *Bitmap) Clone() Bitmap {
+ bitmap := Bitmap{b.numOnes, make([]uint64, len(b.bitBlock))}
+ copy(bitmap.bitBlock, b.bitBlock[:])
+ return bitmap
+}
+
+// countOnesForBlocks count all 1 bits within b.bitBlock of begin and that of end.
+// The begin block and end block are inclusive.
+func (b *Bitmap) countOnesForBlocks(begin, end uint32) uint64 {
+ ones := uint64(0)
+ beginBlock := begin / 64
+ endBlock := end / 64
+ for i := beginBlock; i <= endBlock; i++ {
+ ones += uint64(bits.OnesCount64(b.bitBlock[i]))
+ }
+ return ones
+}
+
+// countOnesForAllBlocks count all 1 bits in b.bitBlock.
+func (b *Bitmap) countOnesForAllBlocks() uint64 {
+ ones := uint64(0)
+ for i := 0; i < len(b.bitBlock); i++ {
+ ones += uint64(bits.OnesCount64(b.bitBlock[i]))
+ }
+ return ones
+}
+
+// flipRange flip the bits within range (begin and end). begin is inclusive and end is exclusive.
+func (b *Bitmap) flipRange(begin, end uint32) {
+ end--
+ beginBlock := begin / 64
+ endBlock := end / 64
+ if beginBlock == endBlock {
+ b.bitBlock[endBlock] ^= ((^uint64(0) << uint(begin%64)) & ((uint64(1) << (uint(end)%64 + 1)) - 1))
+ } else {
+ b.bitBlock[beginBlock] ^= ^(^uint64(0) << uint(begin%64))
+ for i := beginBlock; i < endBlock; i++ {
+ b.bitBlock[i] = ^b.bitBlock[i]
+ }
+ b.bitBlock[endBlock] ^= ((uint64(1) << (uint(end)%64 + 1)) - 1)
+ }
+}
+
+// clearRange clear the bits within range (begin and end). begin is inclusive and end is exclusive.
+func (b *Bitmap) clearRange(begin, end uint32) {
+ end--
+ beginBlock := begin / 64
+ endBlock := end / 64
+ if beginBlock == endBlock {
+ b.bitBlock[beginBlock] &= (((uint64(1) << uint(begin%64)) - 1) | ^((uint64(1) << (uint(end)%64 + 1)) - 1))
+ } else {
+ b.bitBlock[beginBlock] &= ((uint64(1) << uint(begin%64)) - 1)
+ for i := beginBlock + 1; i < endBlock; i++ {
+ b.bitBlock[i] &= ^b.bitBlock[i]
+ }
+ b.bitBlock[endBlock] &= ^((uint64(1) << (uint(end)%64 + 1)) - 1)
+ }
+}
+
+// ClearRange clear bits within range (begin and end) for the Bitmap. begin is inclusive and end is exclusive.
+func (b *Bitmap) ClearRange(begin, end uint32) {
+ blockRange := end/64 - begin/64
+ // When the number of cleared blocks is larger than half of the length of b.bitBlock,
+ // counting 1s for the entire bitmap has better performance.
+ if blockRange > uint32(len(b.bitBlock)/2) {
+ b.clearRange(begin, end)
+ b.numOnes = uint32(b.countOnesForAllBlocks())
+ } else {
+ oldRangeOnes := b.countOnesForBlocks(begin, end)
+ b.clearRange(begin, end)
+ newRangeOnes := b.countOnesForBlocks(begin, end)
+ b.numOnes += uint32(newRangeOnes - oldRangeOnes)
+ }
+}
+
+// FlipRange flip bits within range (begin and end) for the Bitmap. begin is inclusive and end is exclusive.
+func (b *Bitmap) FlipRange(begin, end uint32) {
+ blockRange := end/64 - begin/64
+ // When the number of flipped blocks is larger than half of the length of b.bitBlock,
+ // counting 1s for the entire bitmap has better performance.
+ if blockRange > uint32(len(b.bitBlock)/2) {
+ b.flipRange(begin, end)
+ b.numOnes = uint32(b.countOnesForAllBlocks())
+ } else {
+ oldRangeOnes := b.countOnesForBlocks(begin, end)
+ b.flipRange(begin, end)
+ newRangeOnes := b.countOnesForBlocks(begin, end)
+ b.numOnes += uint32(newRangeOnes - oldRangeOnes)
+ }
+}
+
+// ToSlice transform the Bitmap into slice. For example, a bitmap of [0, 1, 0, 1]
+// will return the slice [1, 3].
+func (b *Bitmap) ToSlice() []uint32 {
+ bitmapSlice := make([]uint32, 0, b.numOnes)
+ // base is the start number of a bitBlock
+ base := 0
+ for i := 0; i < len(b.bitBlock); i++ {
+ bitBlock := b.bitBlock[i]
+ // Iterate through all the numbers held by this bit block.
+ for bitBlock != 0 {
+ // Extract the lowest set 1 bit.
+ j := bitBlock & -bitBlock
+ // Interpret the bit as the in32 number it represents and add it to result.
+ bitmapSlice = append(bitmapSlice, uint32((base + int(bits.OnesCount64(j-1)))))
+ bitBlock ^= j
+ }
+ base += 64
+ }
+ return bitmapSlice
+}
+
+// GetNumOnes return the the number of ones in the Bitmap.
+func (b *Bitmap) GetNumOnes() uint32 {
+ return b.numOnes
+}
diff --git a/pkg/bitmap/bitmap_test.go b/pkg/bitmap/bitmap_test.go
new file mode 100644
index 000000000..76ebd779f
--- /dev/null
+++ b/pkg/bitmap/bitmap_test.go
@@ -0,0 +1,306 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bitmap
+
+import (
+ "math"
+ "reflect"
+ "testing"
+)
+
+// generateFilledSlice generates a slice fill with numbers.
+func generateFilledSlice(min, max, length int) []uint32 {
+ if max == min {
+ return []uint32{uint32(min)}
+ }
+ if length > (max - min) {
+ return nil
+ }
+ randSlice := make([]uint32, length)
+ if length != 0 {
+ rangeNum := uint32((max - min) / length)
+ randSlice[0], randSlice[length-1] = uint32(min), uint32(max)
+ for i := 1; i < length-1; i++ {
+ randSlice[i] = randSlice[i-1] + rangeNum
+ }
+ }
+ return randSlice
+}
+
+// generateFilledBitmap generates a Bitmap filled with fillNum of numbers,
+// and returns the slice and bitmap.
+func generateFilledBitmap(min, max, fillNum int) ([]uint32, Bitmap) {
+ bitmap := New(uint32(max))
+ randSlice := generateFilledSlice(min, max, fillNum)
+ for i := 0; i < fillNum; i++ {
+ bitmap.Add(randSlice[i])
+ }
+ return randSlice, bitmap
+}
+
+func TestNewBitmap(t *testing.T) {
+ tests := []struct {
+ name string
+ size int
+ expectSize int
+ }{
+ {"length 1", 1, 1},
+ {"length 1024", 1024, 16},
+ {"length 1025", 1025, 17},
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ if bitmap := New(uint32(tt.size)); len(bitmap.bitBlock) != tt.expectSize {
+ t.Errorf("New created bitmap with %v, bitBlock size: %d, wanted: %d", tt.name, len(bitmap.bitBlock), tt.expectSize)
+ }
+ })
+ }
+}
+
+func TestAdd(t *testing.T) {
+ tests := []struct {
+ name string
+ bitmapSize int
+ addNum int
+ }{
+ {"Add with null bitmap.bitBlock", 0, 10},
+ {"Add without extending bitBlock", 64, 10},
+ {"Add without extending bitblock with margin number", 63, 64},
+ {"Add with extended one block", 1024, 1025},
+ {"Add with extended more then one block", 1024, 2048},
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ bitmap := New(uint32(tt.bitmapSize))
+ bitmap.Add(uint32(tt.addNum))
+ bitmapSlice := bitmap.ToSlice()
+ if bitmapSlice[0] != uint32(tt.addNum) {
+ t.Errorf("%v, get number: %d, wanted: %d.", tt.name, bitmapSlice[0], tt.addNum)
+ }
+ })
+ }
+}
+
+func TestRemove(t *testing.T) {
+ bitmap := New(uint32(1024))
+ firstSlice := generateFilledSlice(0, 511, 50)
+ secondSlice := generateFilledSlice(512, 1024, 50)
+ for i := 0; i < 50; i++ {
+ bitmap.Add(firstSlice[i])
+ bitmap.Add(secondSlice[i])
+ }
+
+ for i := 0; i < 50; i++ {
+ bitmap.Remove(firstSlice[i])
+ }
+ bitmapSlice := bitmap.ToSlice()
+ if !reflect.DeepEqual(bitmapSlice, secondSlice) {
+ t.Errorf("After Remove() firstSlice, remained slice: %v, wanted: %v", bitmapSlice, secondSlice)
+ }
+
+ for i := 0; i < 50; i++ {
+ bitmap.Remove(secondSlice[i])
+ }
+ bitmapSlice = bitmap.ToSlice()
+ emptySlice := make([]uint32, 0)
+ if !reflect.DeepEqual(bitmapSlice, emptySlice) {
+ t.Errorf("After Remove secondSlice, remained slice: %v, wanted: %v", bitmapSlice, emptySlice)
+ }
+
+}
+
+// Verify flip bits within one bitBlock, one bit and bits cross multi bitBlocks.
+func TestFlipRange(t *testing.T) {
+ tests := []struct {
+ name string
+ flipRangeMin int
+ flipRangeMax int
+ filledSliceLen int
+ }{
+ {"Flip one number in bitmap", 77, 77, 1},
+ {"Flip numbers within one bitBlocks", 5, 60, 20},
+ {"Flip numbers that cross multi bitBlocks", 20, 1000, 300},
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ fillSlice, bitmap := generateFilledBitmap(tt.flipRangeMin, tt.flipRangeMax, tt.filledSliceLen)
+ flipFillSlice := make([]uint32, 0)
+ for i, j := tt.flipRangeMin, 0; i <= tt.flipRangeMax; i++ {
+ if uint32(i) != fillSlice[j] {
+ flipFillSlice = append(flipFillSlice, uint32(i))
+ } else {
+ j++
+ }
+ }
+
+ bitmap.FlipRange(uint32(tt.flipRangeMin), uint32(tt.flipRangeMax+1))
+ flipBitmapSlice := bitmap.ToSlice()
+ if !reflect.DeepEqual(flipFillSlice, flipBitmapSlice) {
+ t.Errorf("%v, flipped slice: %v, wanted: %v", tt.name, flipBitmapSlice, flipFillSlice)
+ }
+ })
+ }
+}
+
+// Verify clear bits within one bitBlock, one bit and bits cross multi bitBlocks.
+func TestClearRange(t *testing.T) {
+ tests := []struct {
+ name string
+ clearRangeMin int
+ clearRangeMax int
+ bitmapSize int
+ }{
+ {"ClearRange clear one number", 5, 5, 64},
+ {"ClearRange clear numbers within one bitBlock", 4, 61, 64},
+ {"ClearRange clear numbers cross multi bitBlocks", 20, 254, 512},
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ bitmap := New(uint32(tt.bitmapSize))
+ bitmap.FlipRange(uint32(0), uint32(tt.bitmapSize))
+ bitmap.ClearRange(uint32(tt.clearRangeMin), uint32(tt.clearRangeMax+1))
+ clearedBitmapSlice := bitmap.ToSlice()
+ clearedSlice := make([]uint32, 0)
+ for i := 0; i < tt.bitmapSize; i++ {
+ if i < tt.clearRangeMin || i > tt.clearRangeMax {
+ clearedSlice = append(clearedSlice, uint32(i))
+ }
+ }
+ if !reflect.DeepEqual(clearedSlice, clearedBitmapSlice) {
+ t.Errorf("%v, cleared slice: %v, wanted: %v", tt.name, clearedBitmapSlice, clearedSlice)
+ }
+ })
+
+ }
+}
+
+func TestMinimum(t *testing.T) {
+ randSlice, bitmap := generateFilledBitmap(0, 1024, 200)
+ min := bitmap.Minimum()
+ if min != randSlice[0] {
+ t.Errorf("Minimum() returns: %v, wanted: %v", min, randSlice[0])
+ }
+
+ bitmap.ClearRange(uint32(0), uint32(200))
+ min = bitmap.Minimum()
+ bitmapSlice := bitmap.ToSlice()
+ if min != bitmapSlice[0] {
+ t.Errorf("After ClearRange, Minimum() returns: %v, wanted: %v", min, bitmapSlice[0])
+ }
+
+ bitmap.FlipRange(uint32(2), uint32(11))
+ min = bitmap.Minimum()
+ bitmapSlice = bitmap.ToSlice()
+ if min != bitmapSlice[0] {
+ t.Errorf("After Flip, Minimum() returns: %v, wanted: %v", min, bitmapSlice[0])
+ }
+}
+
+func TestMaximum(t *testing.T) {
+ randSlice, bitmap := generateFilledBitmap(0, 1024, 200)
+ max := bitmap.Maximum()
+ if max != randSlice[len(randSlice)-1] {
+ t.Errorf("Maximum() returns: %v, wanted: %v", max, randSlice[len(randSlice)-1])
+ }
+
+ bitmap.ClearRange(uint32(1000), uint32(1025))
+ max = bitmap.Maximum()
+ bitmapSlice := bitmap.ToSlice()
+ if max != bitmapSlice[len(bitmapSlice)-1] {
+ t.Errorf("After ClearRange, Maximum() returns: %v, wanted: %v", max, bitmapSlice[len(bitmapSlice)-1])
+ }
+
+ bitmap.FlipRange(uint32(1001), uint32(1021))
+ max = bitmap.Maximum()
+ bitmapSlice = bitmap.ToSlice()
+ if max != bitmapSlice[len(bitmapSlice)-1] {
+ t.Errorf("After Flip, Maximum() returns: %v, wanted: %v", max, bitmapSlice[len(bitmapSlice)-1])
+ }
+}
+
+func TestBitmapNumOnes(t *testing.T) {
+ randSlice, bitmap := generateFilledBitmap(0, 1024, 200)
+ bitmapOnes := bitmap.GetNumOnes()
+ if bitmapOnes != uint32(200) {
+ t.Errorf("GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 200)
+ }
+ // Remove 10 numbers.
+ for i := 5; i < 15; i++ {
+ bitmap.Remove(randSlice[i])
+ }
+ bitmapOnes = bitmap.GetNumOnes()
+ if bitmapOnes != uint32(190) {
+ t.Errorf("After Remove 10 number, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 190)
+ }
+ // Remove the 10 number again, the length supposed not change.
+ for i := 5; i < 15; i++ {
+ bitmap.Remove(randSlice[i])
+ }
+ bitmapOnes = bitmap.GetNumOnes()
+ if bitmapOnes != uint32(190) {
+ t.Errorf("After Remove the 10 number again, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 190)
+ }
+
+ // Add 10 number.
+ for i := 1080; i < 1090; i++ {
+ bitmap.Add(uint32(i))
+ }
+ bitmapOnes = bitmap.GetNumOnes()
+ if bitmapOnes != uint32(200) {
+ t.Errorf("After Add 10 number, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 200)
+ }
+
+ // Add the 10 number again, the length supposed not change.
+ for i := 1080; i < 1090; i++ {
+ bitmap.Add(uint32(i))
+ }
+ bitmapOnes = bitmap.GetNumOnes()
+ if bitmapOnes != uint32(200) {
+ t.Errorf("After Add the 10 number again, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 200)
+ }
+
+ // Flip 10 bits from 0 to 1.
+ bitmap.FlipRange(uint32(1025), uint32(1035))
+ bitmapOnes = bitmap.GetNumOnes()
+ if bitmapOnes != uint32(210) {
+ t.Errorf("After Flip, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 210)
+ }
+
+ // ClearRange numbers range from [0, 1025).
+ bitmap.ClearRange(uint32(0), uint32(1025))
+ bitmapOnes = bitmap.GetNumOnes()
+ if bitmapOnes != uint32(20) {
+ t.Errorf("After ClearRange, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 20)
+ }
+}
+
+func TestFirstZero(t *testing.T) {
+ bitmap := New(uint32(1000))
+ bitmap.FlipRange(200, 400)
+ for i, j := range map[uint32]uint32{0: 0, 201: 400, 200: 400, 199: 199, 400: 400, 10000: math.MaxInt32} {
+ v := bitmap.FirstZero(i)
+ if v != j {
+ t.Errorf("Minimum() returns: %v, wanted: %v", v, j)
+ }
+ }
+}
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index c613f4932..e4e0dc04f 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -220,6 +220,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/abi/linux/errno",
"//pkg/amutex",
+ "//pkg/bitmap",
"//pkg/bits",
"//pkg/bpf",
"//pkg/cleanup",
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
index 8786a70b5..eff556a0c 100644
--- a/pkg/sentry/kernel/fd_table.go
+++ b/pkg/sentry/kernel/fd_table.go
@@ -18,10 +18,10 @@ import (
"fmt"
"math"
"strings"
- "sync/atomic"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bitmap"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -84,13 +84,8 @@ type FDTable struct {
// mu protects below.
mu sync.Mutex `state:"nosave"`
- // next is start position to find fd.
- next int32
-
- // used contains the number of non-nil entries. It must be accessed
- // atomically. It may be read atomically without holding mu (but not
- // written).
- used int32
+ // fdBitmap shows which fds are already in use.
+ fdBitmap bitmap.Bitmap `state:"nosave"`
// descriptorTable holds descriptors.
descriptorTable `state:".(map[int32]descriptor)"`
@@ -98,6 +93,8 @@ type FDTable struct {
func (f *FDTable) saveDescriptorTable() map[int32]descriptor {
m := make(map[int32]descriptor)
+ f.mu.Lock()
+ defer f.mu.Unlock()
f.forEach(context.Background(), func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) {
m[fd] = descriptor{
file: file,
@@ -111,12 +108,16 @@ func (f *FDTable) saveDescriptorTable() map[int32]descriptor {
func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) {
ctx := context.Background()
f.initNoLeakCheck() // Initialize table.
- f.used = 0
+ f.fdBitmap = bitmap.New(uint32(math.MaxUint16))
for fd, d := range m {
+ if fd < 0 {
+ panic(fmt.Sprintf("FD is not supposed to be negative. FD: %d", fd))
+ }
+
if file, fileVFS2 := f.setAll(ctx, fd, d.file, d.fileVFS2, d.flags); file != nil || fileVFS2 != nil {
panic("VFS1 or VFS2 files set")
}
-
+ f.fdBitmap.Add(uint32(fd))
// Note that we do _not_ need to acquire a extra table reference here. The
// table reference will already be accounted for in the file, so we drop the
// reference taken by set above.
@@ -189,8 +190,10 @@ func (f *FDTable) DecRef(ctx context.Context) {
func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags)) {
// retries tracks the number of failed TryIncRef attempts for the same FD.
retries := 0
- fd := int32(0)
- for {
+ fds := f.fdBitmap.ToSlice()
+ // Iterate through the fdBitmap.
+ for _, ufd := range fds {
+ fd := int32(ufd)
file, fileVFS2, flags, ok := f.getAll(fd)
if !ok {
break
@@ -218,7 +221,6 @@ func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *fs.File,
fileVFS2.DecRef(ctx)
}
retries = 0
- fd++
}
}
@@ -226,6 +228,8 @@ func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *fs.File,
func (f *FDTable) String() string {
var buf strings.Builder
ctx := context.Background()
+ f.mu.Lock()
+ defer f.mu.Unlock()
f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) {
switch {
case file != nil:
@@ -250,10 +254,10 @@ func (f *FDTable) String() string {
}
// NewFDs allocates new FDs guaranteed to be the lowest number available
-// greater than or equal to the fd parameter. All files will share the set
+// greater than or equal to the minFD parameter. All files will share the set
// flags. Success is guaranteed to be all or none.
-func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags FDFlags) (fds []int32, err error) {
- if fd < 0 {
+func (f *FDTable) NewFDs(ctx context.Context, minFD int32, files []*fs.File, flags FDFlags) (fds []int32, err error) {
+ if minFD < 0 {
// Don't accept negative FDs.
return nil, unix.EINVAL
}
@@ -267,31 +271,48 @@ func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags
if lim.Cur != limits.Infinity {
end = int32(lim.Cur)
}
- if fd >= end {
+ if minFD+int32(len(files)) > end {
return nil, unix.EMFILE
}
}
f.mu.Lock()
- // From f.next to find available fd.
- if fd < f.next {
- fd = f.next
+ // max is used as the largest number in fdBitmap + 1.
+ max := int32(0)
+
+ if !f.fdBitmap.IsEmpty() {
+ max = int32(f.fdBitmap.Maximum())
+ max++
}
+ // Adjust max in case it is less than minFD.
+ if max < minFD {
+ max = minFD
+ }
// Install all entries.
- for i := fd; i < end && len(fds) < len(files); i++ {
- if d, _, _ := f.get(i); d == nil {
- // Set the descriptor.
- f.set(ctx, i, files[len(fds)], flags)
- fds = append(fds, i) // Record the file descriptor.
+ for len(fds) < len(files) {
+ // Try to use free bit in fdBitmap.
+ // If all bits in fdBitmap are used, expand fd to the max.
+ fd := f.fdBitmap.FirstZero(uint32(minFD))
+ if fd == math.MaxInt32 {
+ fd = uint32(max)
+ max++
+ }
+ if fd >= uint32(end) {
+ break
}
+ f.fdBitmap.Add(fd)
+ f.set(ctx, int32(fd), files[len(fds)], flags)
+ fds = append(fds, int32(fd))
+ minFD = int32(fd)
}
// Failure? Unwind existing FDs.
if len(fds) < len(files) {
for _, i := range fds {
f.set(ctx, i, nil, FDFlags{})
+ f.fdBitmap.Remove(uint32(i))
}
f.mu.Unlock()
@@ -305,20 +326,15 @@ func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags
return nil, unix.EMFILE
}
- if fd == f.next {
- // Update next search start position.
- f.next = fds[len(fds)-1] + 1
- }
-
f.mu.Unlock()
return fds, nil
}
// NewFDsVFS2 allocates new FDs guaranteed to be the lowest number available
-// greater than or equal to the fd parameter. All files will share the set
+// greater than or equal to the minFD parameter. All files will share the set
// flags. Success is guaranteed to be all or none.
-func (f *FDTable) NewFDsVFS2(ctx context.Context, fd int32, files []*vfs.FileDescription, flags FDFlags) (fds []int32, err error) {
- if fd < 0 {
+func (f *FDTable) NewFDsVFS2(ctx context.Context, minFD int32, files []*vfs.FileDescription, flags FDFlags) (fds []int32, err error) {
+ if minFD < 0 {
// Don't accept negative FDs.
return nil, unix.EINVAL
}
@@ -332,31 +348,47 @@ func (f *FDTable) NewFDsVFS2(ctx context.Context, fd int32, files []*vfs.FileDes
if lim.Cur != limits.Infinity {
end = int32(lim.Cur)
}
- if fd >= end {
+ if minFD >= end {
return nil, unix.EMFILE
}
}
f.mu.Lock()
- // From f.next to find available fd.
- if fd < f.next {
- fd = f.next
+ // max is used as the largest number in fdBitmap + 1.
+ max := int32(0)
+
+ if !f.fdBitmap.IsEmpty() {
+ max = int32(f.fdBitmap.Maximum())
+ max++
}
- // Install all entries.
- for i := fd; i < end && len(fds) < len(files); i++ {
- if d, _, _ := f.getVFS2(i); d == nil {
- // Set the descriptor.
- f.setVFS2(ctx, i, files[len(fds)], flags)
- fds = append(fds, i) // Record the file descriptor.
- }
+ // Adjust max in case it is less than minFD.
+ if max < minFD {
+ max = minFD
}
+ for len(fds) < len(files) {
+ // Try to use free bit in fdBitmap.
+ // If all bits in fdBitmap are used, expand fd to the max.
+ fd := f.fdBitmap.FirstZero(uint32(minFD))
+ if fd == math.MaxInt32 {
+ fd = uint32(max)
+ max++
+ }
+ if fd >= uint32(end) {
+ break
+ }
+ f.fdBitmap.Add(fd)
+ f.setVFS2(ctx, int32(fd), files[len(fds)], flags)
+ fds = append(fds, int32(fd))
+ minFD = int32(fd)
+ }
// Failure? Unwind existing FDs.
if len(fds) < len(files) {
for _, i := range fds {
f.setVFS2(ctx, i, nil, FDFlags{})
+ f.fdBitmap.Remove(uint32(i))
}
f.mu.Unlock()
@@ -370,57 +402,19 @@ func (f *FDTable) NewFDsVFS2(ctx context.Context, fd int32, files []*vfs.FileDes
return nil, unix.EMFILE
}
- if fd == f.next {
- // Update next search start position.
- f.next = fds[len(fds)-1] + 1
- }
-
f.mu.Unlock()
return fds, nil
}
-// NewFDVFS2 allocates a file descriptor greater than or equal to minfd for
+// NewFDVFS2 allocates a file descriptor greater than or equal to minFD for
// the given file description. If it succeeds, it takes a reference on file.
-func (f *FDTable) NewFDVFS2(ctx context.Context, minfd int32, file *vfs.FileDescription, flags FDFlags) (int32, error) {
- if minfd < 0 {
- // Don't accept negative FDs.
- return -1, unix.EINVAL
- }
-
- // Default limit.
- end := int32(math.MaxInt32)
-
- // Ensure we don't get past the provided limit.
- if limitSet := limits.FromContext(ctx); limitSet != nil {
- lim := limitSet.Get(limits.NumberOfFiles)
- if lim.Cur != limits.Infinity {
- end = int32(lim.Cur)
- }
- if minfd >= end {
- return -1, unix.EMFILE
- }
- }
-
- f.mu.Lock()
- defer f.mu.Unlock()
-
- // From f.next to find available fd.
- fd := minfd
- if fd < f.next {
- fd = f.next
- }
- for fd < end {
- if d, _, _ := f.getVFS2(fd); d == nil {
- f.setVFS2(ctx, fd, file, flags)
- if fd == f.next {
- // Update next search start position.
- f.next = fd + 1
- }
- return fd, nil
- }
- fd++
+func (f *FDTable) NewFDVFS2(ctx context.Context, minFD int32, file *vfs.FileDescription, flags FDFlags) (int32, error) {
+ files := []*vfs.FileDescription{file}
+ fileSlice, error := f.NewFDsVFS2(ctx, minFD, files, flags)
+ if error != nil {
+ return -1, error
}
- return -1, unix.EMFILE
+ return fileSlice[0], nil
}
// NewFDAt sets the file reference for the given FD. If there is an active
@@ -469,6 +463,11 @@ func (f *FDTable) newFDAt(ctx context.Context, fd int32, file *fs.File, fileVFS2
defer f.mu.Unlock()
df, dfVFS2 := f.setAll(ctx, fd, file, fileVFS2, flags)
+ // Add fd to fdBitmap.
+ if file != nil || fileVFS2 != nil {
+ f.fdBitmap.Add(uint32(fd))
+ }
+
return df, dfVFS2, nil
}
@@ -573,7 +572,9 @@ func (f *FDTable) GetVFS2(fd int32) (*vfs.FileDescription, FDFlags) {
// Precondition: The caller must be running on the task goroutine, or Task.mu
// must be locked.
func (f *FDTable) GetFDs(ctx context.Context) []int32 {
- fds := make([]int32, 0, int(atomic.LoadInt32(&f.used)))
+ f.mu.Lock()
+ defer f.mu.Unlock()
+ fds := make([]int32, 0, int(f.fdBitmap.GetNumOnes()))
f.forEach(ctx, func(fd int32, _ *fs.File, _ *vfs.FileDescription, _ FDFlags) {
fds = append(fds, fd)
})
@@ -583,13 +584,15 @@ func (f *FDTable) GetFDs(ctx context.Context) []int32 {
// Fork returns an independent FDTable.
func (f *FDTable) Fork(ctx context.Context) *FDTable {
clone := f.k.NewFDTable()
-
+ f.mu.Lock()
+ defer f.mu.Unlock()
f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) {
// The set function here will acquire an appropriate table
// reference for the clone. We don't need anything else.
if df, dfVFS2 := clone.setAll(ctx, fd, file, fileVFS2, flags); df != nil || dfVFS2 != nil {
panic("VFS1 or VFS2 files set")
}
+ clone.fdBitmap.Add(uint32(fd))
})
return clone
}
@@ -604,11 +607,6 @@ func (f *FDTable) Remove(ctx context.Context, fd int32) (*fs.File, *vfs.FileDesc
f.mu.Lock()
- // Update current available position.
- if fd < f.next {
- f.next = fd
- }
-
orig, orig2, _, _ := f.getAll(fd)
// Add reference for caller.
@@ -621,6 +619,7 @@ func (f *FDTable) Remove(ctx context.Context, fd int32) (*fs.File, *vfs.FileDesc
if orig != nil || orig2 != nil {
orig, orig2 = f.setAll(ctx, fd, nil, nil, FDFlags{}) // Zap entry.
+ f.fdBitmap.Remove(uint32(fd))
}
f.mu.Unlock()
@@ -644,16 +643,13 @@ func (f *FDTable) RemoveIf(ctx context.Context, cond func(*fs.File, *vfs.FileDes
f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) {
if cond(file, fileVFS2, flags) {
df, dfVFS2 := f.setAll(ctx, fd, nil, nil, FDFlags{}) // Clear from table.
+ f.fdBitmap.Remove(uint32(fd))
if df != nil {
files = append(files, df)
}
if dfVFS2 != nil {
filesVFS2 = append(filesVFS2, dfVFS2)
}
- // Update current available position.
- if fd < f.next {
- f.next = fd
- }
}
})
f.mu.Unlock()
diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go
index f17f9c59c..2b3e6ef71 100644
--- a/pkg/sentry/kernel/fd_table_unsafe.go
+++ b/pkg/sentry/kernel/fd_table_unsafe.go
@@ -15,9 +15,11 @@
package kernel
import (
+ "math"
"sync/atomic"
"unsafe"
+ "gvisor.dev/gvisor/pkg/bitmap"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -44,6 +46,7 @@ func (f *FDTable) initNoLeakCheck() {
func (f *FDTable) init() {
f.initNoLeakCheck()
f.InitRefs()
+ f.fdBitmap = bitmap.New(uint32(math.MaxUint16))
}
// get gets a file entry.
@@ -162,14 +165,6 @@ func (f *FDTable) setAll(ctx context.Context, fd int32, file *fs.File, fileVFS2
}
}
- // Adjust used.
- switch {
- case orig == nil && desc != nil:
- atomic.AddInt32(&f.used, 1)
- case orig != nil && desc == nil:
- atomic.AddInt32(&f.used, -1)
- }
-
if orig != nil {
switch {
case orig.file != nil:
diff --git a/test/perf/BUILD b/test/perf/BUILD
index 8ac63f11b..4299f08d5 100644
--- a/test/perf/BUILD
+++ b/test/perf/BUILD
@@ -70,6 +70,13 @@ syscall_test(
)
syscall_test(
+ size = "large",
+ add_overlay = True,
+ debug = False,
+ test = "//test/perf/linux:dup_benchmark",
+)
+
+syscall_test(
debug = False,
test = "//test/perf/linux:pipe_benchmark",
)
diff --git a/test/perf/linux/BUILD b/test/perf/linux/BUILD
index 50dd8e808..8a3216550 100644
--- a/test/perf/linux/BUILD
+++ b/test/perf/linux/BUILD
@@ -109,6 +109,22 @@ cc_binary(
)
cc_binary(
+ name = "dup_benchmark",
+ testonly = 1,
+ srcs = [
+ "dup_benchmark.cc",
+ ],
+ deps = [
+ gbenchmark,
+ gtest,
+ "//test/util:fs_util",
+ "//test/util:logging",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ ],
+)
+
+cc_binary(
name = "read_benchmark",
testonly = 1,
srcs = [
diff --git a/test/perf/linux/dup_benchmark.cc b/test/perf/linux/dup_benchmark.cc
new file mode 100644
index 000000000..5d808d225
--- /dev/null
+++ b/test/perf/linux/dup_benchmark.cc
@@ -0,0 +1,55 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "benchmark/benchmark.h"
+#include "test/util/fs_util.h"
+#include "test/util/logging.h"
+#include "test/util/temp_path.h"
+
+namespace gvisor {
+
+namespace {
+
+void BM_Dup(benchmark::State& state) {
+ const int size = state.range(0);
+
+ for (auto _ : state) {
+ std::vector<int> v;
+ for (int i = 0; i < size; i++) {
+ int fd = dup(2);
+ TEST_CHECK(fd != -1);
+ v.push_back(fd);
+ }
+ for (int i = 0; i < size; i++) {
+ int fd = v[i];
+ close(fd);
+ }
+ }
+ state.SetItemsProcessed(state.iterations() * size);
+}
+
+BENCHMARK(BM_Dup)->Range(1, 1 << 15)->UseRealTime();
+
+} // namespace
+
+} // namespace gvisor
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 188c052b3..3383495d0 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -560,6 +560,7 @@ cc_binary(
deps = [
"//test/util:eventfd_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/memory",
gtest,
"//test/util:fs_util",
"//test/util:posix_error",
diff --git a/test/syscalls/linux/dup.cc b/test/syscalls/linux/dup.cc
index ba4e13fb9..8f0974f45 100644
--- a/test/syscalls/linux/dup.cc
+++ b/test/syscalls/linux/dup.cc
@@ -13,9 +13,11 @@
// limitations under the License.
#include <fcntl.h>
+#include <sys/resource.h>
#include <unistd.h>
#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
#include "test/util/eventfd_util.h"
#include "test/util/file_descriptor.h"
#include "test/util/fs_util.h"
@@ -98,6 +100,45 @@ TEST(DupTest, Dup2) {
ASSERT_NO_ERRNO(CheckSameFile(fd, nfd2));
}
+TEST(DupTest, Rlimit) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+ struct rlimit rl = {};
+ EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds());
+
+ ASSERT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds());
+
+ constexpr int kFDLimit = 101;
+ // Create a file descriptor that will be above the limit.
+ FileDescriptor aboveLimitFD =
+ ASSERT_NO_ERRNO_AND_VALUE(Dup2(fd, kFDLimit * 2 - 1));
+
+ rl.rlim_cur = kFDLimit;
+ ASSERT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds());
+ ASSERT_THAT(dup3(fd.get(), kFDLimit, 0), SyscallFails());
+
+ std::vector<std::unique_ptr<FileDescriptor>> fds;
+ int prev_fd = fd.get();
+ int used_fds = 0;
+ for (int i = 0; i < kFDLimit; ++i) {
+ int new_fd = dup(fd.get());
+ if (new_fd == -1) {
+ break;
+ }
+ auto f = absl::make_unique<FileDescriptor>(new_fd);
+ EXPECT_LT(new_fd, kFDLimit);
+ EXPECT_GT(new_fd, prev_fd);
+ // Check that all fds in (prev_fd, new_fd) are used.
+ for (int j = prev_fd + 1; j < new_fd; ++j) {
+ if (fcntl(j, F_GETFD) != -1) used_fds++;
+ }
+ prev_fd = new_fd;
+ fds.push_back(std::move(f));
+ }
+ EXPECT_EQ(fds.size() + used_fds, kFDLimit - fd.get() - 1);
+}
+
TEST(DupTest, Dup2SameFD) {
auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));