diff options
286 files changed, 4475 insertions, 7937 deletions
diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index 6f197436c..9eb6d38f4 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -11,14 +11,6 @@ install_pkgs make "linux-headers-$(uname -r)" linux-libc-dev \ graphviz jq curl binutils gnupg gnupg-agent gcc pkg-config \ apt-transport-https ca-certificates software-properties-common -# Install Go 1.16, as only 1.13 is available via apt. If it's installed via apt, -# remove it. -sudo apt-get autoremove -y golang-go -declare -r go_archive=go1.16.6.linux-amd64.tar.gz -wget -c "https://golang.org/dl/${go_archive}" -sudo tar -xzf "${go_archive}" -C /usr/local -sudo ln -s /usr/local/go/bin/go /usr/bin/go - # Setup for parallelization with PARTITION and TOTAL_PARTITIONS. export PARTITION=${BUILDKITE_PARALLEL_JOB:-0} PARTITION=$((${PARTITION}+1)) # 1-indexed, but PARALLEL_JOB is 0-indexed. @@ -130,6 +130,8 @@ analyzers: external: # Enabled. unreachable: external: # Enabled. + exclude: + - ".*protobuf/.*.go" unsafeptr: internal: exclude: @@ -148,6 +150,17 @@ analyzers: external: # Enabled. checkescape: external: # Enabled. + checklinkname: + external: # Enabled. + suppress: + # We don't care to check every single linkname in the Go standard + # library. Suppress findings about stdlib linkname targets we haven't + # described in checklinkname. + # + # Note that we _do_ want to check the signature of the known linkname + # targets in the standard library, so we still need to run + # checklinkname on stdlib generally. + - "linkname to unknown symbol" SA4016: internal: exclude: diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index eb004a7f6..3576396c1 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -41,6 +41,7 @@ go_library( "linux.go", "membarrier.go", "mm.go", + "msgqueue.go", "netdevice.go", "netfilter.go", "netfilter_ipv6.go", diff --git a/pkg/abi/linux/clone.go b/pkg/abi/linux/clone.go index c2cbfca5e..322a4ef5a 100644 --- a/pkg/abi/linux/clone.go +++ b/pkg/abi/linux/clone.go @@ -16,13 +16,16 @@ package linux // Clone constants per clone(2). const ( + CSIGNAL = 0xff + CLONE_VM = 0x100 CLONE_FS = 0x200 CLONE_FILES = 0x400 CLONE_SIGHAND = 0x800 - CLONE_PARENT = 0x8000 + CLONE_PIDFD = 0x1000 CLONE_PTRACE = 0x2000 CLONE_VFORK = 0x4000 + CLONE_PARENT = 0x8000 CLONE_THREAD = 0x10000 CLONE_NEWNS = 0x20000 CLONE_SYSVSEM = 0x40000 @@ -32,10 +35,30 @@ const ( CLONE_DETACHED = 0x400000 CLONE_UNTRACED = 0x800000 CLONE_CHILD_SETTID = 0x1000000 + CLONE_NEWCGROUP = 0x2000000 CLONE_NEWUTS = 0x4000000 CLONE_NEWIPC = 0x8000000 CLONE_NEWUSER = 0x10000000 CLONE_NEWPID = 0x20000000 CLONE_NEWNET = 0x40000000 CLONE_IO = 0x80000000 + + // Only passable via clone3(2). + CLONE_CLEAR_SIGHAND = 0x100000000 + CLONE_INTO_CGROUP = 0x200000000 ) + +// CloneArgs is struct clone_args, from include/uapi/linux/sched.h. +type CloneArgs struct { + Flags uint64 + Pidfd uint64 + ChildTID uint64 + ParentTID uint64 + ExitSignal uint64 + Stack uint64 + StackSize uint64 + TLS uint64 + SetTID uint64 + SetTIDSize uint64 + Cgroup uint64 +} diff --git a/pkg/abi/linux/msgqueue.go b/pkg/abi/linux/msgqueue.go new file mode 100644 index 000000000..e1e8d0357 --- /dev/null +++ b/pkg/abi/linux/msgqueue.go @@ -0,0 +1,108 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "gvisor.dev/gvisor/pkg/marshal/primitive" +) + +// Linux-specific control commands. Source: include/uapi/linux/msg.h +const ( + MSG_STAT = 11 + MSG_INFO = 12 + MSG_STAT_ANY = 13 +) + +// msgrcv(2) options. Source: include/uapi/linux/msg.h +const ( + MSG_NOERROR = 010000 // No error if message is too big. + MSG_EXCEPT = 020000 // Receive any message except of specified type. + MSG_COPY = 040000 // Copy (not remove) all queue messages. +) + +// System-wide limits for message queues. Source: include/uapi/linux/msg.h +const ( + MSGMNI = 32000 // Maximum number of message queue identifiers. + MSGMAX = 8192 // Maximum size of message (bytes). + MSGMNB = 16384 // Default max size of a message queue. +) + +// System-wide limits. Unused. Source: include/uapi/linux/msg.h +const ( + MSGPOOL = (MSGMNI * MSGMNB / 1024) + MSGTQL = MSGMNB + MSGMAP = MSGMNB + MSGSSZ = 16 + + // MSGSEG is simplified due to the inexistance of a ternary operator. + MSGSEG = (MSGPOOL * 1024) / MSGSSZ +) + +// MsqidDS is equivelant to struct msqid64_ds. Source: +// include/uapi/asm-generic/shmbuf.h +// +// +marshal +type MsqidDS struct { + MsgPerm IPCPerm // IPC permissions. + MsgStime TimeT // Last msgsnd time. + MsgRtime TimeT // Last msgrcv time. + MsgCtime TimeT // Last change time. + MsgCbytes uint64 // Current number of bytes on the queue. + MsgQnum uint64 // Number of messages in the queue. + MsgQbytes uint64 // Max number of bytes in the queue. + MsgLspid int32 // PID of last msgsnd. + MsgLrpid int32 // PID of last msgrcv. + unused4 uint64 + unused5 uint64 +} + +// MsgBuf is equivelant to struct msgbuf. Source: include/uapi/linux/msg.h +// +// +marshal dynamic +type MsgBuf struct { + Type primitive.Int64 + Text primitive.ByteSlice +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (b *MsgBuf) SizeBytes() int { + return b.Type.SizeBytes() + b.Text.SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (b *MsgBuf) MarshalBytes(dst []byte) { + b.Type.MarshalUnsafe(dst) + b.Text.MarshalBytes(dst[b.Type.SizeBytes():]) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (b *MsgBuf) UnmarshalBytes(src []byte) { + b.Type.UnmarshalUnsafe(src) + b.Text.UnmarshalBytes(src[b.Type.SizeBytes():]) +} + +// MsgInfo is equivelant to struct msginfo. Source: include/uapi/linux/msg.h +// +// +marshal +type MsgInfo struct { + MsgPool int32 + MsgMap int32 + MsgMax int32 + MsgMnb int32 + MsgMni int32 + MsgSsz int32 + MsgTql int32 + MsgSeg uint16 `marshal:"unaligned"` +} diff --git a/pkg/bitmap/BUILD b/pkg/bitmap/BUILD new file mode 100644 index 000000000..0f1e7006d --- /dev/null +++ b/pkg/bitmap/BUILD @@ -0,0 +1,16 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "bitmap", + srcs = ["bitmap.go"], + visibility = ["//:sandbox"], +) + +go_test( + name = "bitmap_test", + size = "small", + srcs = ["bitmap_test.go"], + library = ":bitmap", +) diff --git a/pkg/bitmap/bitmap.go b/pkg/bitmap/bitmap.go new file mode 100644 index 000000000..12d2fc2b8 --- /dev/null +++ b/pkg/bitmap/bitmap.go @@ -0,0 +1,234 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package bitmap provides the implementation of bitmap. +package bitmap + +import ( + "math" + "math/bits" +) + +// Bitmap implements an efficient bitmap. +// +// +stateify savable +type Bitmap struct { + // numOnes is the number of ones in the bitmap. + numOnes uint32 + + // bitBlock holds the bits. The type of bitBlock is uint64 which means + // each number in bitBlock contains 64 entries. + bitBlock []uint64 +} + +// New create a new empty Bitmap. +func New(size uint32) Bitmap { + b := Bitmap{} + bSize := (size + 63) / 64 + b.bitBlock = make([]uint64, bSize) + return b +} + +// IsEmpty verifies whether the Bitmap is empty. +func (b *Bitmap) IsEmpty() bool { + return b.numOnes == 0 +} + +// Minimum return the smallest value in the Bitmap. +func (b *Bitmap) Minimum() uint32 { + for i := 0; i < len(b.bitBlock); i++ { + if w := b.bitBlock[i]; w != 0 { + r := bits.TrailingZeros64(w) + return uint32(r + i*64) + } + } + return math.MaxInt32 +} + +// FirstZero returns the first unset bit from the range [start, ). +func (b *Bitmap) FirstZero(start uint32) uint32 { + i, nbit := int(start/64), start%64 + n := len(b.bitBlock) + if i >= n { + return math.MaxInt32 + } + w := b.bitBlock[i] | ((1 << nbit) - 1) + for { + if w != ^uint64(0) { + r := bits.TrailingZeros64(^w) + return uint32(r + i*64) + } + i++ + if i == n { + break + } + w = b.bitBlock[i] + } + return math.MaxInt32 +} + +// Maximum return the largest value in the Bitmap. +func (b *Bitmap) Maximum() uint32 { + for i := len(b.bitBlock) - 1; i >= 0; i-- { + if w := b.bitBlock[i]; w != 0 { + r := bits.LeadingZeros64(w) + return uint32(i*64 + 63 - r) + } + } + return uint32(0) +} + +// Add add i to the Bitmap. +func (b *Bitmap) Add(i uint32) { + blockNum, mask := i/64, uint64(1)<<(i%64) + // if blockNum is out of range, extend b.bitBlock + if x, y := int(blockNum), len(b.bitBlock); x >= y { + b.bitBlock = append(b.bitBlock, make([]uint64, x-y+1)...) + } + oldBlock := b.bitBlock[blockNum] + newBlock := oldBlock | mask + if oldBlock != newBlock { + b.bitBlock[blockNum] = newBlock + b.numOnes++ + } +} + +// Remove i from the Bitmap. +func (b *Bitmap) Remove(i uint32) { + blockNum, mask := i/64, uint64(1)<<(i%64) + oldBlock := b.bitBlock[blockNum] + newBlock := oldBlock &^ mask + if oldBlock != newBlock { + b.bitBlock[blockNum] = newBlock + b.numOnes-- + } +} + +// Clone the Bitmap. +func (b *Bitmap) Clone() Bitmap { + bitmap := Bitmap{b.numOnes, make([]uint64, len(b.bitBlock))} + copy(bitmap.bitBlock, b.bitBlock[:]) + return bitmap +} + +// countOnesForBlocks count all 1 bits within b.bitBlock of begin and that of end. +// The begin block and end block are inclusive. +func (b *Bitmap) countOnesForBlocks(begin, end uint32) uint64 { + ones := uint64(0) + beginBlock := begin / 64 + endBlock := end / 64 + for i := beginBlock; i <= endBlock; i++ { + ones += uint64(bits.OnesCount64(b.bitBlock[i])) + } + return ones +} + +// countOnesForAllBlocks count all 1 bits in b.bitBlock. +func (b *Bitmap) countOnesForAllBlocks() uint64 { + ones := uint64(0) + for i := 0; i < len(b.bitBlock); i++ { + ones += uint64(bits.OnesCount64(b.bitBlock[i])) + } + return ones +} + +// flipRange flip the bits within range (begin and end). begin is inclusive and end is exclusive. +func (b *Bitmap) flipRange(begin, end uint32) { + end-- + beginBlock := begin / 64 + endBlock := end / 64 + if beginBlock == endBlock { + b.bitBlock[endBlock] ^= ((^uint64(0) << uint(begin%64)) & ((uint64(1) << (uint(end)%64 + 1)) - 1)) + } else { + b.bitBlock[beginBlock] ^= ^(^uint64(0) << uint(begin%64)) + for i := beginBlock; i < endBlock; i++ { + b.bitBlock[i] = ^b.bitBlock[i] + } + b.bitBlock[endBlock] ^= ((uint64(1) << (uint(end)%64 + 1)) - 1) + } +} + +// clearRange clear the bits within range (begin and end). begin is inclusive and end is exclusive. +func (b *Bitmap) clearRange(begin, end uint32) { + end-- + beginBlock := begin / 64 + endBlock := end / 64 + if beginBlock == endBlock { + b.bitBlock[beginBlock] &= (((uint64(1) << uint(begin%64)) - 1) | ^((uint64(1) << (uint(end)%64 + 1)) - 1)) + } else { + b.bitBlock[beginBlock] &= ((uint64(1) << uint(begin%64)) - 1) + for i := beginBlock + 1; i < endBlock; i++ { + b.bitBlock[i] &= ^b.bitBlock[i] + } + b.bitBlock[endBlock] &= ^((uint64(1) << (uint(end)%64 + 1)) - 1) + } +} + +// ClearRange clear bits within range (begin and end) for the Bitmap. begin is inclusive and end is exclusive. +func (b *Bitmap) ClearRange(begin, end uint32) { + blockRange := end/64 - begin/64 + // When the number of cleared blocks is larger than half of the length of b.bitBlock, + // counting 1s for the entire bitmap has better performance. + if blockRange > uint32(len(b.bitBlock)/2) { + b.clearRange(begin, end) + b.numOnes = uint32(b.countOnesForAllBlocks()) + } else { + oldRangeOnes := b.countOnesForBlocks(begin, end) + b.clearRange(begin, end) + newRangeOnes := b.countOnesForBlocks(begin, end) + b.numOnes += uint32(newRangeOnes - oldRangeOnes) + } +} + +// FlipRange flip bits within range (begin and end) for the Bitmap. begin is inclusive and end is exclusive. +func (b *Bitmap) FlipRange(begin, end uint32) { + blockRange := end/64 - begin/64 + // When the number of flipped blocks is larger than half of the length of b.bitBlock, + // counting 1s for the entire bitmap has better performance. + if blockRange > uint32(len(b.bitBlock)/2) { + b.flipRange(begin, end) + b.numOnes = uint32(b.countOnesForAllBlocks()) + } else { + oldRangeOnes := b.countOnesForBlocks(begin, end) + b.flipRange(begin, end) + newRangeOnes := b.countOnesForBlocks(begin, end) + b.numOnes += uint32(newRangeOnes - oldRangeOnes) + } +} + +// ToSlice transform the Bitmap into slice. For example, a bitmap of [0, 1, 0, 1] +// will return the slice [1, 3]. +func (b *Bitmap) ToSlice() []uint32 { + bitmapSlice := make([]uint32, 0, b.numOnes) + // base is the start number of a bitBlock + base := 0 + for i := 0; i < len(b.bitBlock); i++ { + bitBlock := b.bitBlock[i] + // Iterate through all the numbers held by this bit block. + for bitBlock != 0 { + // Extract the lowest set 1 bit. + j := bitBlock & -bitBlock + // Interpret the bit as the in32 number it represents and add it to result. + bitmapSlice = append(bitmapSlice, uint32((base + int(bits.OnesCount64(j-1))))) + bitBlock ^= j + } + base += 64 + } + return bitmapSlice +} + +// GetNumOnes return the the number of ones in the Bitmap. +func (b *Bitmap) GetNumOnes() uint32 { + return b.numOnes +} diff --git a/pkg/bitmap/bitmap_test.go b/pkg/bitmap/bitmap_test.go new file mode 100644 index 000000000..76ebd779f --- /dev/null +++ b/pkg/bitmap/bitmap_test.go @@ -0,0 +1,306 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bitmap + +import ( + "math" + "reflect" + "testing" +) + +// generateFilledSlice generates a slice fill with numbers. +func generateFilledSlice(min, max, length int) []uint32 { + if max == min { + return []uint32{uint32(min)} + } + if length > (max - min) { + return nil + } + randSlice := make([]uint32, length) + if length != 0 { + rangeNum := uint32((max - min) / length) + randSlice[0], randSlice[length-1] = uint32(min), uint32(max) + for i := 1; i < length-1; i++ { + randSlice[i] = randSlice[i-1] + rangeNum + } + } + return randSlice +} + +// generateFilledBitmap generates a Bitmap filled with fillNum of numbers, +// and returns the slice and bitmap. +func generateFilledBitmap(min, max, fillNum int) ([]uint32, Bitmap) { + bitmap := New(uint32(max)) + randSlice := generateFilledSlice(min, max, fillNum) + for i := 0; i < fillNum; i++ { + bitmap.Add(randSlice[i]) + } + return randSlice, bitmap +} + +func TestNewBitmap(t *testing.T) { + tests := []struct { + name string + size int + expectSize int + }{ + {"length 1", 1, 1}, + {"length 1024", 1024, 16}, + {"length 1025", 1025, 17}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + if bitmap := New(uint32(tt.size)); len(bitmap.bitBlock) != tt.expectSize { + t.Errorf("New created bitmap with %v, bitBlock size: %d, wanted: %d", tt.name, len(bitmap.bitBlock), tt.expectSize) + } + }) + } +} + +func TestAdd(t *testing.T) { + tests := []struct { + name string + bitmapSize int + addNum int + }{ + {"Add with null bitmap.bitBlock", 0, 10}, + {"Add without extending bitBlock", 64, 10}, + {"Add without extending bitblock with margin number", 63, 64}, + {"Add with extended one block", 1024, 1025}, + {"Add with extended more then one block", 1024, 2048}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + bitmap := New(uint32(tt.bitmapSize)) + bitmap.Add(uint32(tt.addNum)) + bitmapSlice := bitmap.ToSlice() + if bitmapSlice[0] != uint32(tt.addNum) { + t.Errorf("%v, get number: %d, wanted: %d.", tt.name, bitmapSlice[0], tt.addNum) + } + }) + } +} + +func TestRemove(t *testing.T) { + bitmap := New(uint32(1024)) + firstSlice := generateFilledSlice(0, 511, 50) + secondSlice := generateFilledSlice(512, 1024, 50) + for i := 0; i < 50; i++ { + bitmap.Add(firstSlice[i]) + bitmap.Add(secondSlice[i]) + } + + for i := 0; i < 50; i++ { + bitmap.Remove(firstSlice[i]) + } + bitmapSlice := bitmap.ToSlice() + if !reflect.DeepEqual(bitmapSlice, secondSlice) { + t.Errorf("After Remove() firstSlice, remained slice: %v, wanted: %v", bitmapSlice, secondSlice) + } + + for i := 0; i < 50; i++ { + bitmap.Remove(secondSlice[i]) + } + bitmapSlice = bitmap.ToSlice() + emptySlice := make([]uint32, 0) + if !reflect.DeepEqual(bitmapSlice, emptySlice) { + t.Errorf("After Remove secondSlice, remained slice: %v, wanted: %v", bitmapSlice, emptySlice) + } + +} + +// Verify flip bits within one bitBlock, one bit and bits cross multi bitBlocks. +func TestFlipRange(t *testing.T) { + tests := []struct { + name string + flipRangeMin int + flipRangeMax int + filledSliceLen int + }{ + {"Flip one number in bitmap", 77, 77, 1}, + {"Flip numbers within one bitBlocks", 5, 60, 20}, + {"Flip numbers that cross multi bitBlocks", 20, 1000, 300}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + fillSlice, bitmap := generateFilledBitmap(tt.flipRangeMin, tt.flipRangeMax, tt.filledSliceLen) + flipFillSlice := make([]uint32, 0) + for i, j := tt.flipRangeMin, 0; i <= tt.flipRangeMax; i++ { + if uint32(i) != fillSlice[j] { + flipFillSlice = append(flipFillSlice, uint32(i)) + } else { + j++ + } + } + + bitmap.FlipRange(uint32(tt.flipRangeMin), uint32(tt.flipRangeMax+1)) + flipBitmapSlice := bitmap.ToSlice() + if !reflect.DeepEqual(flipFillSlice, flipBitmapSlice) { + t.Errorf("%v, flipped slice: %v, wanted: %v", tt.name, flipBitmapSlice, flipFillSlice) + } + }) + } +} + +// Verify clear bits within one bitBlock, one bit and bits cross multi bitBlocks. +func TestClearRange(t *testing.T) { + tests := []struct { + name string + clearRangeMin int + clearRangeMax int + bitmapSize int + }{ + {"ClearRange clear one number", 5, 5, 64}, + {"ClearRange clear numbers within one bitBlock", 4, 61, 64}, + {"ClearRange clear numbers cross multi bitBlocks", 20, 254, 512}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + bitmap := New(uint32(tt.bitmapSize)) + bitmap.FlipRange(uint32(0), uint32(tt.bitmapSize)) + bitmap.ClearRange(uint32(tt.clearRangeMin), uint32(tt.clearRangeMax+1)) + clearedBitmapSlice := bitmap.ToSlice() + clearedSlice := make([]uint32, 0) + for i := 0; i < tt.bitmapSize; i++ { + if i < tt.clearRangeMin || i > tt.clearRangeMax { + clearedSlice = append(clearedSlice, uint32(i)) + } + } + if !reflect.DeepEqual(clearedSlice, clearedBitmapSlice) { + t.Errorf("%v, cleared slice: %v, wanted: %v", tt.name, clearedBitmapSlice, clearedSlice) + } + }) + + } +} + +func TestMinimum(t *testing.T) { + randSlice, bitmap := generateFilledBitmap(0, 1024, 200) + min := bitmap.Minimum() + if min != randSlice[0] { + t.Errorf("Minimum() returns: %v, wanted: %v", min, randSlice[0]) + } + + bitmap.ClearRange(uint32(0), uint32(200)) + min = bitmap.Minimum() + bitmapSlice := bitmap.ToSlice() + if min != bitmapSlice[0] { + t.Errorf("After ClearRange, Minimum() returns: %v, wanted: %v", min, bitmapSlice[0]) + } + + bitmap.FlipRange(uint32(2), uint32(11)) + min = bitmap.Minimum() + bitmapSlice = bitmap.ToSlice() + if min != bitmapSlice[0] { + t.Errorf("After Flip, Minimum() returns: %v, wanted: %v", min, bitmapSlice[0]) + } +} + +func TestMaximum(t *testing.T) { + randSlice, bitmap := generateFilledBitmap(0, 1024, 200) + max := bitmap.Maximum() + if max != randSlice[len(randSlice)-1] { + t.Errorf("Maximum() returns: %v, wanted: %v", max, randSlice[len(randSlice)-1]) + } + + bitmap.ClearRange(uint32(1000), uint32(1025)) + max = bitmap.Maximum() + bitmapSlice := bitmap.ToSlice() + if max != bitmapSlice[len(bitmapSlice)-1] { + t.Errorf("After ClearRange, Maximum() returns: %v, wanted: %v", max, bitmapSlice[len(bitmapSlice)-1]) + } + + bitmap.FlipRange(uint32(1001), uint32(1021)) + max = bitmap.Maximum() + bitmapSlice = bitmap.ToSlice() + if max != bitmapSlice[len(bitmapSlice)-1] { + t.Errorf("After Flip, Maximum() returns: %v, wanted: %v", max, bitmapSlice[len(bitmapSlice)-1]) + } +} + +func TestBitmapNumOnes(t *testing.T) { + randSlice, bitmap := generateFilledBitmap(0, 1024, 200) + bitmapOnes := bitmap.GetNumOnes() + if bitmapOnes != uint32(200) { + t.Errorf("GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 200) + } + // Remove 10 numbers. + for i := 5; i < 15; i++ { + bitmap.Remove(randSlice[i]) + } + bitmapOnes = bitmap.GetNumOnes() + if bitmapOnes != uint32(190) { + t.Errorf("After Remove 10 number, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 190) + } + // Remove the 10 number again, the length supposed not change. + for i := 5; i < 15; i++ { + bitmap.Remove(randSlice[i]) + } + bitmapOnes = bitmap.GetNumOnes() + if bitmapOnes != uint32(190) { + t.Errorf("After Remove the 10 number again, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 190) + } + + // Add 10 number. + for i := 1080; i < 1090; i++ { + bitmap.Add(uint32(i)) + } + bitmapOnes = bitmap.GetNumOnes() + if bitmapOnes != uint32(200) { + t.Errorf("After Add 10 number, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 200) + } + + // Add the 10 number again, the length supposed not change. + for i := 1080; i < 1090; i++ { + bitmap.Add(uint32(i)) + } + bitmapOnes = bitmap.GetNumOnes() + if bitmapOnes != uint32(200) { + t.Errorf("After Add the 10 number again, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 200) + } + + // Flip 10 bits from 0 to 1. + bitmap.FlipRange(uint32(1025), uint32(1035)) + bitmapOnes = bitmap.GetNumOnes() + if bitmapOnes != uint32(210) { + t.Errorf("After Flip, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 210) + } + + // ClearRange numbers range from [0, 1025). + bitmap.ClearRange(uint32(0), uint32(1025)) + bitmapOnes = bitmap.GetNumOnes() + if bitmapOnes != uint32(20) { + t.Errorf("After ClearRange, GetNumOnes() returns: %v, wanted: %v", bitmapOnes, 20) + } +} + +func TestFirstZero(t *testing.T) { + bitmap := New(uint32(1000)) + bitmap.FlipRange(200, 400) + for i, j := range map[uint32]uint32{0: 0, 201: 400, 200: 400, 199: 199, 400: 400, 10000: math.MaxInt32} { + v := bitmap.FirstZero(i) + if v != j { + t.Errorf("Minimum() returns: %v, wanted: %v", v, j) + } + } +} diff --git a/pkg/gohacks/gohacks_unsafe.go b/pkg/gohacks/gohacks_unsafe.go index 09fc14787..bd8ceba19 100644 --- a/pkg/gohacks/gohacks_unsafe.go +++ b/pkg/gohacks/gohacks_unsafe.go @@ -15,7 +15,13 @@ //go:build go1.13 && !go1.18 // +build go1.13,!go1.18 -// Check type signatures when updating Go version. +// //go:linkname directives type-checked by checklinkname. Any other +// non-linkname assumptions outside the Go 1 compatibility guarantee should +// have an accompanied vet check or version guard build tag. + +// Check type signatures and Noescape when updating Go version. +// +// TODO(b/165820485): add these checks to checklinkname. // Package gohacks contains utilities for subverting the Go compiler. package gohacks diff --git a/pkg/procid/procid_amd64.s b/pkg/procid/procid_amd64.s index b5bbfff90..74a8de42c 100644 --- a/pkg/procid/procid_amd64.s +++ b/pkg/procid/procid_amd64.s @@ -15,6 +15,10 @@ //go:build amd64 && go1.8 && !go1.18 && go1.1 // +build amd64,go1.8,!go1.18,go1.1 +// //go:linkname directives type-checked by checklinkname. Any other +// non-linkname assumptions outside the Go 1 compatibility guarantee should +// have an accompanied vet check or version guard build tag. + #include "textflag.h" TEXT ·Current(SB),NOSPLIT,$0-8 diff --git a/pkg/procid/procid_arm64.s b/pkg/procid/procid_arm64.s index 772d96289..48182c4a9 100644 --- a/pkg/procid/procid_arm64.s +++ b/pkg/procid/procid_arm64.s @@ -15,6 +15,10 @@ //go:build arm64 && go1.8 && !go1.18 && go1.1 // +build arm64,go1.8,!go1.18,go1.1 +// //go:linkname directives type-checked by checklinkname. Any other +// non-linkname assumptions outside the Go 1 compatibility guarantee should +// have an accompanied vet check or version guard build tag. + #include "textflag.h" TEXT ·Current(SB),NOSPLIT,$0-8 diff --git a/pkg/ring0/kernel_amd64.go b/pkg/ring0/kernel_amd64.go index 23ec33f92..4a4c0ae26 100644 --- a/pkg/ring0/kernel_amd64.go +++ b/pkg/ring0/kernel_amd64.go @@ -177,7 +177,7 @@ func (c *CPU) TSS() (uint64, uint16, *SegmentDescriptor) { // //go:nosplit func (c *CPU) CR0() uint64 { - return _CR0_PE | _CR0_PG | _CR0_AM | _CR0_ET + return _CR0_PE | _CR0_PG | _CR0_AM | _CR0_ET | _CR0_NE } // CR4 returns the CPU's CR4 value. diff --git a/pkg/ring0/x86.go b/pkg/ring0/x86.go index 7c96cca6b..9a98703da 100644 --- a/pkg/ring0/x86.go +++ b/pkg/ring0/x86.go @@ -25,6 +25,7 @@ import ( const ( _CR0_PE = 1 << 0 _CR0_ET = 1 << 4 + _CR0_NE = 1 << 5 _CR0_AM = 1 << 18 _CR0_PG = 1 << 31 diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD deleted file mode 100644 index 5e8b464a0..000000000 --- a/pkg/sentry/fsimpl/ext/BUILD +++ /dev/null @@ -1,104 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "dirent_list", - out = "dirent_list.go", - package = "ext", - prefix = "dirent", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*dirent", - "Linker": "*dirent", - }, -) - -go_template_instance( - name = "fstree", - out = "fstree.go", - package = "ext", - prefix = "generic", - template = "//pkg/sentry/vfs/genericfstree:generic_fstree", - types = { - "Dentry": "dentry", - }, -) - -go_library( - name = "ext", - srcs = [ - "block_map_file.go", - "dentry.go", - "directory.go", - "dirent_list.go", - "ext.go", - "extent_file.go", - "file_description.go", - "filesystem.go", - "fstree.go", - "inode.go", - "regular_file.go", - "symlink.go", - "utils.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fd", - "//pkg/fspath", - "//pkg/log", - "//pkg/marshal", - "//pkg/marshal/primitive", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/fs", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fsimpl/ext/disklayout", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/syscalls/linux", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "ext_test", - size = "small", - srcs = [ - "block_map_test.go", - "ext_test.go", - "extent_test.go", - ], - data = [ - "//pkg/sentry/fsimpl/ext:assets/bigfile.txt", - "//pkg/sentry/fsimpl/ext:assets/file.txt", - "//pkg/sentry/fsimpl/ext:assets/tiny.ext2", - "//pkg/sentry/fsimpl/ext:assets/tiny.ext3", - "//pkg/sentry/fsimpl/ext:assets/tiny.ext4", - ], - library = ":ext", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/marshal/primitive", - "//pkg/sentry/contexttest", - "//pkg/sentry/fsimpl/ext/disklayout", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/test/testutil", - "//pkg/usermem", - "@com_github_google_go_cmp//cmp:go_default_library", - "@com_github_google_go_cmp//cmp/cmpopts:go_default_library", - ], -) diff --git a/pkg/sentry/fsimpl/ext/README.md b/pkg/sentry/fsimpl/ext/README.md deleted file mode 100644 index af00cfda8..000000000 --- a/pkg/sentry/fsimpl/ext/README.md +++ /dev/null @@ -1,117 +0,0 @@ -## EXT(2/3/4) File System - -This is a filesystem driver which supports ext2, ext3 and ext4 filesystems. -Linux has specialized drivers for each variant but none which supports all. This -library takes advantage of ext's backward compatibility and understands the -internal organization of on-disk structures to support all variants. - -This driver implementation diverges from the Linux implementations in being more -forgiving about versioning. For instance, if a filesystem contains both extent -based inodes and classical block map based inodes, this driver will not complain -and interpret them both correctly. While in Linux this would be an issue. This -blurs the line between the three ext fs variants. - -Ext2 is considered deprecated as of Red Hat Enterprise Linux 7, and ext3 has -been superseded by ext4 by large performance gains. Thus it is recommended to -upgrade older filesystem images to ext4 using e2fsprogs for better performance. - -### Read Only - -This driver currently only allows read only operations. A lot of the design -decisions are based on this feature. There are plans to implement write (the -process for which is documented in the future work section). - -### Performance - -One of the biggest wins about this driver is that it directly talks to the -underlying block device (or whatever persistent storage is being used), instead -of making expensive RPCs to a gofer. - -Another advantage is that ext fs supports fast concurrent reads. Currently the -device is represented using a `io.ReaderAt` which allows for concurrent reads. -All reads are directly passed to the device driver which intelligently serves -the read requests in the optimal order. There is no congestion due to locking -while reading in the filesystem level. - -Reads are optimized further in the way file data is transferred over to user -memory. Ext fs directly copies over file data from disk into user memory with no -additional allocations on the way. We can only get faster by preloading file -data into memory (see future work section). - -The internal structures used to represent files, inodes and file descriptors use -a lot of inheritance. With the level of indirection that an interface adds with -an internal pointer, it can quickly fragment a structure across memory. As this -runs along side a full blown kernel (which is memory intensive), having a -fragmented struct might hurt performance. Hence these internal structures, -though interfaced, are tightly packed in memory using the same inheritance -pattern that pkg/sentry/vfs uses. The pkg/sentry/fsimpl/ext/disklayout package -makes an execption to this pattern for reasons documented in the package. - -### Security - -This driver also intends to help sandbox the container better by reducing the -surface of the host kernel that the application touches. It prevents the -application from exploiting vulnerabilities in the host filesystem driver. All -`io.ReaderAt.ReadAt()` calls are translated to `pread(2)` which are directly -passed to the device driver in the kernel. Hence this reduces the surface for -attack. - -The application can not affect any host filesystems other than the one passed -via block device by the user. - -### Future Work - -#### Write - -To support write operations we would need to modify the block device underneath. -Currently, the driver does not modify the device at all, not even for updating -the access times for reads. Modifying the filesystem incorrectly can corrupt it -and render it unreadable for other correct ext(x) drivers. Hence caution must be -maintained while modifying metadata structures. - -Ext4 specifically is built for performance and has added a lot of complexity as -to how metadata structures are modified. For instance, files that are organized -via an extent tree which must be balanced and file data blocks must be placed in -the same extent as much as possible to increase locality. Such properties must -be maintained while modifying the tree. - -Ext filesystems boast a lot about locality, which plays a big role in them being -performant. The block allocation algorithm in Linux does a good job in keeping -related data together. This behavior must be maintained as much as possible, -else we might end up degrading the filesystem performance over time. - -Ext4 also supports a wide variety of features which are specialized for varying -use cases. Implementing all of them can get difficult very quickly. - -Ext(x) checksums all its metadata structures to check for corruption, so -modification of any metadata struct must correspond with re-checksumming the -struct. Linux filesystem drivers also order on-disk updates intelligently to not -corrupt the filesystem and also remain performant. The in-memory metadata -structures must be kept in sync with what is on disk. - -There is also replication of some important structures across the filesystem. -All replicas must be updated when their original copy is updated. There is also -provisioning for snapshotting which must be kept in mind, although it should not -affect this implementation unless we allow users to create filesystem snapshots. - -Ext4 also introduced journaling (jbd2). The journal must be updated -appropriately. - -#### Performance - -To improve performance we should implement a buffer cache, and optionally, read -ahead for small files. While doing so we must also keep in mind the memory usage -and have a reasonable cap on how much file data we want to hold in memory. - -#### Features - -Our current implementation will work with most ext4 filesystems for readonly -purposed. However, the following features are not supported yet: - -- Journal -- Snapshotting -- Extended Attributes -- Hash Tree Directories -- Meta Block Groups -- Multiple Mount Protection -- Bigalloc diff --git a/pkg/sentry/fsimpl/ext/assets/README.md b/pkg/sentry/fsimpl/ext/assets/README.md deleted file mode 100644 index 6f1e81b3a..000000000 --- a/pkg/sentry/fsimpl/ext/assets/README.md +++ /dev/null @@ -1,36 +0,0 @@ -### Tiny Ext(2/3/4) Images - -The images are of size 64Kb which supports 64 1k blocks and 16 inodes. This is -the smallest size mkfs.ext(2/3/4) works with. - -These images were generated using the following commands. - -```bash -fallocate -l 64K tiny.ext$VERSION -mkfs.ext$VERSION -j tiny.ext$VERSION -``` - -where `VERSION` is `2`, `3` or `4`. - -You can mount it using: - -```bash -sudo mount -o loop tiny.ext$VERSION $MOUNTPOINT -``` - -`file.txt`, `bigfile.txt` and `symlink.txt` were added to this image by just -mounting it and copying (while preserving links) those files to the mountpoint -directory using: - -```bash -sudo cp -P {file.txt,symlink.txt,bigfile.txt} $MOUNTPOINT -``` - -The files in this directory mirror the contents and organisation of the files -stored in the image. - -You can umount the filesystem using: - -```bash -sudo umount $MOUNTPOINT -``` diff --git a/pkg/sentry/fsimpl/ext/assets/bigfile.txt b/pkg/sentry/fsimpl/ext/assets/bigfile.txt deleted file mode 100644 index 3857cf516..000000000 --- a/pkg/sentry/fsimpl/ext/assets/bigfile.txt +++ /dev/null @@ -1,41 +0,0 @@ -Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus faucibus eleifend orci, ut ornare nibh faucibus eu. Cras at condimentum massa. Nullam luctus, elit non porttitor congue, sapien diam feugiat sapien, sed eleifend nulla mauris non arcu. Sed lacinia mauris magna, eu mollis libero varius sit amet. Donec mollis, quam convallis commodo posuere, dolor nisi placerat nisi, in faucibus augue mi eu lorem. In pharetra consectetur faucibus. Ut euismod ex efficitur egestas tincidunt. Maecenas condimentum ut ante in rutrum. Vivamus sed arcu tempor, faucibus turpis et, lacinia diam. - -Sed in lacus vel nisl interdum bibendum in sed justo. Nunc tellus risus, molestie vitae arcu sed, molestie tempus ligula. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc risus neque, volutpat et ante non, ullamcorper condimentum ante. Aliquam sed metus in urna condimentum convallis. Vivamus ut libero mauris. Proin mollis posuere consequat. Vestibulum placerat mollis est et pulvinar. - -Donec rutrum odio ac diam pharetra, id fermentum magna cursus. Pellentesque in dapibus elit, et condimentum orci. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Suspendisse euismod dapibus est, id vestibulum mauris. Nulla facilisi. Nulla cursus gravida nisi. Phasellus vestibulum rutrum lectus, a dignissim mauris hendrerit vitae. In at elementum mauris. Integer vel efficitur velit. Nullam fringilla sapien mi, quis luctus neque efficitur ac. Aenean nec quam dapibus nunc commodo pharetra. Proin sapien mi, fermentum aliquet vulputate non, aliquet porttitor diam. Quisque lacinia, urna et finibus fermentum, nunc lacus vehicula ex, sed congue metus lectus ac quam. Aliquam erat volutpat. Suspendisse sodales, dolor ut tincidunt finibus, augue erat varius tellus, a interdum erat sem at nunc. Vestibulum cursus iaculis sapien, vitae feugiat dui auctor quis. - -Pellentesque nec maximus nulla, eu blandit diam. Maecenas quis arcu ornare, congue ante at, vehicula ipsum. Praesent feugiat mauris rutrum sem fermentum, nec luctus ipsum placerat. Pellentesque placerat ipsum at dignissim fringilla. Vivamus et posuere sem, eget hendrerit felis. Aenean vulputate, augue vel mollis feugiat, justo ipsum mollis dolor, eu mollis elit neque ut ipsum. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Fusce bibendum sem quam, vulputate laoreet mi dapibus imperdiet. Sed a purus non nibh pretium aliquet. Integer eget luctus augue, vitae tincidunt magna. Ut eros enim, egestas eu nulla et, lobortis egestas arcu. Cras id ipsum ac justo lacinia rutrum. Vivamus lectus leo, ultricies sed justo at, pellentesque feugiat magna. Ut sollicitudin neque elit, vel ornare mauris commodo id. - -Duis dapibus orci et sapien finibus finibus. Mauris eleifend, lacus at vestibulum maximus, quam ligula pharetra erat, sit amet dapibus neque elit vitae neque. In bibendum sollicitudin erat, eget ultricies tortor malesuada at. Sed sit amet orci turpis. Donec feugiat ligula nibh, molestie tincidunt lectus elementum id. Donec volutpat maximus nibh, in vulputate felis posuere eu. Cras tincidunt ullamcorper lacus. Phasellus porta lorem auctor, congue magna a, commodo elit. - -Etiam auctor mi quis elit sodales, eu pulvinar arcu condimentum. Aenean imperdiet risus et dapibus tincidunt. Nullam tincidunt dictum dui, sed commodo urna rutrum id. Ut mollis libero vel elit laoreet bibendum. Quisque arcu arcu, tincidunt at ultricies id, vulputate nec metus. In tristique posuere quam sit amet volutpat. Vivamus scelerisque et nunc at dapibus. Fusce finibus libero ut ligula pretium rhoncus. Mauris non elit in arcu finibus imperdiet. Pellentesque nec massa odio. Proin rutrum mauris non sagittis efficitur. Aliquam auctor quam at dignissim faucibus. Ut eget ligula in magna posuere ultricies vitae sit amet turpis. Duis maximus odio nulla. Donec gravida sem tristique tempus scelerisque. - -Interdum et malesuada fames ac ante ipsum primis in faucibus. Fusce pharetra magna vulputate aliquet tempus. Duis id hendrerit arcu. Quisque ut ex elit. Integer velit orci, venenatis ut sapien ac, placerat porttitor dui. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nunc hendrerit cursus diam, hendrerit finibus ipsum scelerisque ut. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. - -Nulla non euismod neque. Phasellus vel sapien eu metus pulvinar rhoncus. Suspendisse eu mollis tellus, quis vestibulum tortor. Maecenas interdum dolor sed nulla fermentum maximus. Donec imperdiet ullamcorper condimentum. Nam quis nibh ante. Praesent quis tellus ut tortor pulvinar blandit sit amet ut sapien. Vestibulum est orci, pellentesque vitae tristique sit amet, tristique non felis. - -Vivamus sodales pellentesque varius. Sed vel tempus ligula. Nulla tristique nisl vel dui facilisis, ac sodales augue hendrerit. Proin augue nisi, vestibulum quis augue nec, sagittis tincidunt velit. Vestibulum euismod, nulla nec sodales faucibus, urna sapien vulputate magna, id varius metus sapien ut neque. Duis in mollis urna, in scelerisque enim. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc condimentum dictum turpis, et egestas neque dapibus eget. Quisque fringilla, dui eu venenatis eleifend, erat nibh lacinia urna, at lacinia lacus sapien eu dui. Duis eu erat ut mi lacinia convallis a sed ex. - -Fusce elit metus, tincidunt nec eleifend a, hendrerit nec ligula. Duis placerat finibus sollicitudin. In euismod porta tellus, in luctus justo bibendum bibendum. Maecenas at magna eleifend lectus tincidunt suscipit ut a ligula. Nulla tempor accumsan felis, fermentum dapibus est eleifend vitae. Mauris urna sem, fringilla at ultricies non, ultrices in arcu. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nam vehicula nunc at laoreet imperdiet. Nunc tristique ut risus id aliquet. Integer eleifend massa orci. - -Vestibulum sed ante sollicitudin nisi fringilla bibendum nec vel quam. Sed pretium augue eu ligula congue pulvinar. Donec vitae magna tincidunt, pharetra lacus id, convallis nulla. Cras viverra nisl nisl, varius convallis leo vulputate nec. Morbi at consequat dui, sed aliquet metus. Sed suscipit fermentum mollis. Maecenas nec mi sodales, tincidunt purus in, tristique mauris. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec interdum mi in velit efficitur, quis ultrices ex imperdiet. Sed vestibulum, magna ut tristique pretium, mi ipsum placerat tellus, non tempor enim augue et ex. Pellentesque eget felis quis ante sodales viverra ac sed lacus. Donec suscipit tempus massa, eget laoreet massa molestie at. - -Aenean fringilla dui non aliquet consectetur. Fusce cursus quam nec orci hendrerit faucibus. Donec consequat suscipit enim, non volutpat lectus auctor interdum. Proin lorem purus, maximus vel orci vitae, suscipit egestas turpis. Donec risus urna, congue a sem eu, aliquet placerat odio. Morbi gravida tristique turpis, quis efficitur enim. Nunc interdum gravida ipsum vel facilisis. Nunc congue finibus sollicitudin. Quisque euismod aliquet lectus et tincidunt. Curabitur ultrices sem ut mi fringilla fermentum. Morbi pretium, nisi sit amet dapibus congue, dolor enim consectetur risus, a interdum ligula odio sed odio. Quisque facilisis, mi at suscipit gravida, nunc sapien cursus justo, ut luctus odio nulla quis leo. Integer condimentum lobortis mauris, non egestas tellus lobortis sit amet. - -In sollicitudin velit ac ante vehicula, vitae varius tortor mollis. In hac habitasse platea dictumst. Quisque et orci lorem. Integer malesuada fringilla luctus. Pellentesque malesuada, mi non lobortis porttitor, ante ligula vulputate ante, nec dictum risus eros sit amet sapien. Nulla aliquam lorem libero, ac varius nulla tristique eget. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Ut pellentesque mauris orci, vel consequat mi varius a. Ut sit amet elit vulputate, lacinia metus non, fermentum nisl. Pellentesque eu nisi sed quam egestas blandit. Duis sit amet lobortis dolor. Donec consectetur sem interdum, tristique elit sit amet, sodales lacus. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Fusce id aliquam augue. Sed pretium congue risus vitae lacinia. Vestibulum non vulputate risus, ut malesuada justo. - -Sed odio elit, consectetur ac mauris quis, consequat commodo libero. Fusce sodales velit vulputate pulvinar fermentum. Donec iaculis nec nisl eget faucibus. Mauris at dictum velit. Donec fermentum lectus eu viverra volutpat. Aliquam consequat facilisis lorem, cursus consequat dui bibendum ullamcorper. Pellentesque nulla magna, imperdiet at magna et, cursus egestas enim. Nullam semper molestie lectus sit amet semper. Duis eget tincidunt est. Integer id neque risus. Integer ultricies hendrerit vestibulum. Donec blandit blandit sagittis. Nunc consectetur vitae nisi consectetur volutpat. - -Nulla id lorem fermentum, efficitur magna a, hendrerit dui. Vivamus sagittis orci gravida, bibendum quam eget, molestie est. Phasellus nec enim tincidunt, volutpat sapien non, laoreet diam. Nulla posuere enim nec porttitor lobortis. Donec auctor odio ut orci eleifend, ut eleifend purus convallis. Interdum et malesuada fames ac ante ipsum primis in faucibus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut hendrerit, purus eget viverra tincidunt, sem magna imperdiet libero, et aliquam turpis neque vitae elit. Maecenas semper varius iaculis. Cras non lorem quis quam bibendum eleifend in et libero. Curabitur at purus mauris. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus porta diam sed elit eleifend gravida. - -Nulla facilisi. Ut ultricies diam vel diam consectetur, vel porta augue molestie. Fusce interdum sapien et metus facilisis pellentesque. Nulla convallis sem at nunc vehicula facilisis. Nam ac rutrum purus. Nunc bibendum, dolor sit amet tempus ullamcorper, lorem leo tempor sem, id fringilla nunc augue scelerisque augue. Nullam sit amet rutrum nisl. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Donec sed mauris gravida eros vehicula sagittis at eget orci. Cras elementum, eros at accumsan bibendum, libero neque blandit purus, vitae vestibulum libero massa ac nibh. Integer at placerat nulla. Mauris eu eleifend orci. Aliquam consequat ligula vitae erat porta lobortis. Duis fermentum elit ac aliquet ornare. - -Mauris eget cursus tellus, eget sodales purus. Aliquam malesuada, augue id vulputate finibus, nisi ex bibendum nisl, sit amet laoreet quam urna a dolor. Nullam ultricies, sapien eu laoreet consequat, erat eros dignissim diam, ultrices sodales lectus mauris et leo. Morbi lacinia eu ante at tempus. Sed iaculis finibus magna malesuada efficitur. Donec faucibus erat sit amet elementum feugiat. Praesent a placerat nisi. Etiam lacinia gravida diam, et sollicitudin sapien tincidunt ut. - -Maecenas felis quam, tincidunt vitae venenatis scelerisque, viverra vitae odio. Phasellus enim neque, ultricies suscipit malesuada sit amet, vehicula sit amet purus. Nulla placerat sit amet dui vel tincidunt. Nam quis neque vel magna commodo egestas. Vestibulum sagittis rutrum lorem ut congue. Maecenas vel ultrices tellus. Donec efficitur, urna ac consequat iaculis, lorem felis pharetra eros, eget faucibus orci lectus sit amet arcu. - -Ut a tempus nisi. Nulla facilisi. Praesent vulputate maximus mi et dapibus. Sed sit amet libero ac augue hendrerit efficitur in a sapien. Mauris placerat velit sit amet tellus sollicitudin faucibus. Donec egestas a magna ac suscipit. Duis enim sapien, mollis sed egestas et, vestibulum vel leo. - -Proin quis dapibus dui. Donec eu tincidunt nunc. Vivamus eget purus consectetur, maximus ante vitae, tincidunt elit. Aenean mattis dolor a gravida aliquam. Praesent quis tellus id sem maximus vulputate nec sed nulla. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur metus nulla, volutpat volutpat est eu, hendrerit congue erat. Aliquam sollicitudin augue ante. Sed sollicitudin, magna eu consequat elementum, mi augue ullamcorper felis, molestie imperdiet erat metus iaculis est. Proin ac tortor nisi. Pellentesque quis nisi risus. Integer enim sapien, tincidunt quis tortor id, accumsan venenatis mi. Nulla facilisi. - -Cras pretium sit amet quam congue maximus. Morbi lacus libero, imperdiet commodo massa sed, scelerisque placerat libero. Cras nisl nisi, consectetur sed bibendum eu, venenatis at enim. Proin sodales justo at quam aliquam, a consectetur mi ornare. Donec porta ac est sit amet efficitur. Suspendisse vestibulum tortor id neque imperdiet, id lacinia risus vehicula. Phasellus ac eleifend purus. Mauris vel gravida ante. Aliquam vitae lobortis risus. Sed vehicula consectetur tincidunt. Nam et justo vitae purus molestie consequat. Pellentesque ipsum ex, convallis quis blandit non, gravida et urna. Donec diam ligula amet. diff --git a/pkg/sentry/fsimpl/ext/assets/file.txt b/pkg/sentry/fsimpl/ext/assets/file.txt deleted file mode 100644 index 980a0d5f1..000000000 --- a/pkg/sentry/fsimpl/ext/assets/file.txt +++ /dev/null @@ -1 +0,0 @@ -Hello World! diff --git a/pkg/sentry/fsimpl/ext/assets/symlink.txt b/pkg/sentry/fsimpl/ext/assets/symlink.txt deleted file mode 120000 index 4c330738c..000000000 --- a/pkg/sentry/fsimpl/ext/assets/symlink.txt +++ /dev/null @@ -1 +0,0 @@ -file.txt
\ No newline at end of file diff --git a/pkg/sentry/fsimpl/ext/assets/tiny.ext2 b/pkg/sentry/fsimpl/ext/assets/tiny.ext2 Binary files differdeleted file mode 100644 index 381ade9bf..000000000 --- a/pkg/sentry/fsimpl/ext/assets/tiny.ext2 +++ /dev/null diff --git a/pkg/sentry/fsimpl/ext/assets/tiny.ext3 b/pkg/sentry/fsimpl/ext/assets/tiny.ext3 Binary files differdeleted file mode 100644 index 0e97a324c..000000000 --- a/pkg/sentry/fsimpl/ext/assets/tiny.ext3 +++ /dev/null diff --git a/pkg/sentry/fsimpl/ext/assets/tiny.ext4 b/pkg/sentry/fsimpl/ext/assets/tiny.ext4 Binary files differdeleted file mode 100644 index a6859736d..000000000 --- a/pkg/sentry/fsimpl/ext/assets/tiny.ext4 +++ /dev/null diff --git a/pkg/sentry/fsimpl/ext/benchmark/BUILD b/pkg/sentry/fsimpl/ext/benchmark/BUILD deleted file mode 100644 index 6c5a559fd..000000000 --- a/pkg/sentry/fsimpl/ext/benchmark/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -load("//tools:defs.bzl", "go_test") - -package(licenses = ["notice"]) - -go_test( - name = "benchmark_test", - size = "small", - srcs = ["benchmark_test.go"], - deps = [ - "//pkg/context", - "//pkg/fspath", - "//pkg/sentry/contexttest", - "//pkg/sentry/fsimpl/ext", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - ], -) diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go deleted file mode 100644 index 2ee7cc7ac..000000000 --- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// These benchmarks emulate memfs benchmarks. Ext4 images must be created -// before this benchmark is run using the `make_deep_ext4.sh` script at -// /tmp/image-{depth}.ext4 for all the depths tested below. -// -// The benchmark itself cannot run the script because the script requires -// sudo privileges to create the file system images. -package benchmark_test - -import ( - "fmt" - "os" - "runtime" - "strings" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -var depths = []int{1, 2, 3, 8, 64, 100} - -const filename = "file.txt" - -// setUp opens imagePath as an ext Filesystem and returns all necessary -// elements required to run tests. If error is nil, it also returns a tear -// down function which must be called after the test is run for clean up. -func setUp(b *testing.B, imagePath string) (context.Context, *vfs.VirtualFilesystem, *vfs.VirtualDentry, func(), error) { - f, err := os.Open(imagePath) - if err != nil { - return nil, nil, nil, nil, err - } - - ctx := contexttest.Context(b) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - return nil, nil, nil, nil, err - } - vfsObj.MustRegisterFilesystemType("extfs", ext.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, imagePath, "extfs", &vfs.MountOptions{ - GetFilesystemOptions: vfs.GetFilesystemOptions{ - InternalData: int(f.Fd()), - }, - }) - if err != nil { - f.Close() - return nil, nil, nil, nil, err - } - - root := mntns.Root() - root.IncRef() - - tearDown := func() { - root.DecRef(ctx) - - if err := f.Close(); err != nil { - b.Fatalf("tearDown failed: %v", err) - } - } - return ctx, vfsObj, &root, tearDown, nil -} - -// mount mounts extfs at the path operation passed. Returns a tear down -// function which must be called after the test is run for clean up. -func mount(b *testing.B, imagePath string, vfsfs *vfs.VirtualFilesystem, pop *vfs.PathOperation) func() { - b.Helper() - - f, err := os.Open(imagePath) - if err != nil { - b.Fatalf("could not open image at %s: %v", imagePath, err) - } - - ctx := contexttest.Context(b) - creds := auth.CredentialsFromContext(ctx) - - if _, err := vfsfs.MountAt(ctx, creds, imagePath, pop, "extfs", &vfs.MountOptions{ - GetFilesystemOptions: vfs.GetFilesystemOptions{ - InternalData: int(f.Fd()), - }, - }); err != nil { - b.Fatalf("failed to mount tmpfs submount: %v", err) - } - return func() { - if err := f.Close(); err != nil { - b.Fatalf("tearDown failed: %v", err) - } - } -} - -// BenchmarkVFS2Ext4fsStat emulates BenchmarkVFS2MemfsStat. -func BenchmarkVFS2Ext4fsStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx, vfsfs, root, tearDown, err := setUp(b, fmt.Sprintf("/tmp/image-%d.ext4", depth)) - if err != nil { - b.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - creds := auth.CredentialsFromContext(ctx) - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - for i := 1; i <= depth; i++ { - filePathBuilder.WriteString(fmt.Sprintf("%d", i)) - filePathBuilder.WriteByte('/') - } - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - stat, err := vfsfs.StatAt(ctx, creds, &vfs.PathOperation{ - Root: *root, - Start: *root, - Path: fspath.Parse(filePath), - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - // Sanity check. - if stat.Size > 0 { - b.Fatalf("got wrong file size (%d)", stat.Size) - } - } - }) - } -} - -// BenchmarkVFS2ExtfsMountStat emulates BenchmarkVFS2MemfsMountStat. -func BenchmarkVFS2ExtfsMountStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - // Create root extfs with depth 1 so we can mount extfs again at /1/. - ctx, vfsfs, root, tearDown, err := setUp(b, fmt.Sprintf("/tmp/image-%d.ext4", 1)) - if err != nil { - b.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - creds := auth.CredentialsFromContext(ctx) - mountPointName := "/1/" - pop := vfs.PathOperation{ - Root: *root, - Start: *root, - Path: fspath.Parse(mountPointName), - } - - // Save the mount point for later use. - mountPoint, err := vfsfs.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to mount point: %v", err) - } - defer mountPoint.DecRef(ctx) - - // Create extfs submount. - mountTearDown := mount(b, fmt.Sprintf("/tmp/image-%d.ext4", depth), vfsfs, &pop) - defer mountTearDown() - - var filePathBuilder strings.Builder - filePathBuilder.WriteString(mountPointName) - for i := 1; i <= depth; i++ { - filePathBuilder.WriteString(fmt.Sprintf("%d", i)) - filePathBuilder.WriteByte('/') - } - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - stat, err := vfsfs.StatAt(ctx, creds, &vfs.PathOperation{ - Root: *root, - Start: *root, - Path: fspath.Parse(filePath), - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - // Sanity check. touch(1) always creates files of size 0 (empty). - if stat.Size > 0 { - b.Fatalf("got wrong file size (%d)", stat.Size) - } - } - }) - } -} diff --git a/pkg/sentry/fsimpl/ext/benchmark/make_deep_ext4.sh b/pkg/sentry/fsimpl/ext/benchmark/make_deep_ext4.sh deleted file mode 100755 index d0910da1f..000000000 --- a/pkg/sentry/fsimpl/ext/benchmark/make_deep_ext4.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script creates an ext4 image with $1 depth of directories and a file in -# the inner most directory. The created file is at path /1/2/.../depth/file.txt. -# The ext4 image is written to $2. The image is temporarily mounted at -# /tmp/mountpoint. This script must be run with sudo privileges. - -# Usage: -# sudo bash make_deep_ext4.sh {depth} {output path} - -# Check positional arguments. -if [ "$#" -ne 2 ]; then - echo "Usage: sudo bash make_deep_ext4.sh {depth} {output path}" - exit 1 -fi - -# Make sure depth is a non-negative number. -if ! [[ "$1" =~ ^[0-9]+$ ]]; then - echo "Depth must be a non-negative number." - exit 1 -fi - -# Create a 1 MB filesystem image at the requested output path. -rm -f $2 -fallocate -l 1M $2 -if [ $? -ne 0 ]; then - echo "fallocate failed" - exit $? -fi - -# Convert that blank into an ext4 image. -mkfs.ext4 -j $2 -if [ $? -ne 0 ]; then - echo "mkfs.ext4 failed" - exit $? -fi - -# Mount the image. -MOUNTPOINT=/tmp/mountpoint -mkdir -p $MOUNTPOINT -mount -o loop $2 $MOUNTPOINT -if [ $? -ne 0 ]; then - echo "mount failed" - exit $? -fi - -# Create nested directories and the file. -if [ "$1" -eq 0 ]; then - FILEPATH=$MOUNTPOINT/file.txt -else - FILEPATH=$MOUNTPOINT/$(seq -s '/' 1 $1)/file.txt -fi -mkdir -p $(dirname $FILEPATH) || exit -touch $FILEPATH - -# Clean up. -umount $MOUNTPOINT -rm -rf $MOUNTPOINT diff --git a/pkg/sentry/fsimpl/ext/block_map_file.go b/pkg/sentry/fsimpl/ext/block_map_file.go deleted file mode 100644 index 79719faed..000000000 --- a/pkg/sentry/fsimpl/ext/block_map_file.go +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "io" - "math" - - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/marshal/primitive" - "gvisor.dev/gvisor/pkg/syserror" -) - -const ( - // numDirectBlks is the number of direct blocks in ext block map inodes. - numDirectBlks = 12 -) - -// blockMapFile is a type of regular file which uses direct/indirect block -// addressing to store file data. This was deprecated in ext4. -type blockMapFile struct { - regFile regularFile - - // directBlks are the direct blocks numbers. The physical blocks pointed by - // these holds file data. Contains file blocks 0 to 11. - directBlks [numDirectBlks]primitive.Uint32 - - // indirectBlk is the physical block which contains (blkSize/4) direct block - // numbers (as uint32 integers). - indirectBlk primitive.Uint32 - - // doubleIndirectBlk is the physical block which contains (blkSize/4) indirect - // block numbers (as uint32 integers). - doubleIndirectBlk primitive.Uint32 - - // tripleIndirectBlk is the physical block which contains (blkSize/4) doubly - // indirect block numbers (as uint32 integers). - tripleIndirectBlk primitive.Uint32 - - // coverage at (i)th index indicates the amount of file data a node at - // height (i) covers. Height 0 is the direct block. - coverage [4]uint64 -} - -// Compiles only if blockMapFile implements io.ReaderAt. -var _ io.ReaderAt = (*blockMapFile)(nil) - -// newBlockMapFile is the blockMapFile constructor. It initializes the file to -// physical blocks map with (at most) the first 12 (direct) blocks. -func newBlockMapFile(args inodeArgs) (*blockMapFile, error) { - file := &blockMapFile{} - file.regFile.impl = file - file.regFile.inode.init(args, &file.regFile) - - for i := uint(0); i < 4; i++ { - file.coverage[i] = getCoverage(file.regFile.inode.blkSize, i) - } - - blkMap := file.regFile.inode.diskInode.Data() - for i := 0; i < numDirectBlks; i++ { - file.directBlks[i].UnmarshalBytes(blkMap[i*4 : (i+1)*4]) - } - file.indirectBlk.UnmarshalBytes(blkMap[numDirectBlks*4 : (numDirectBlks+1)*4]) - file.doubleIndirectBlk.UnmarshalBytes(blkMap[(numDirectBlks+1)*4 : (numDirectBlks+2)*4]) - file.tripleIndirectBlk.UnmarshalBytes(blkMap[(numDirectBlks+2)*4 : (numDirectBlks+3)*4]) - return file, nil -} - -// ReadAt implements io.ReaderAt.ReadAt. -func (f *blockMapFile) ReadAt(dst []byte, off int64) (int, error) { - if len(dst) == 0 { - return 0, nil - } - - if off < 0 { - return 0, linuxerr.EINVAL - } - - offset := uint64(off) - size := f.regFile.inode.diskInode.Size() - if offset >= size { - return 0, io.EOF - } - - // dirBlksEnd is the file offset until which direct blocks cover file data. - // Direct blocks cover 0 <= file offset < dirBlksEnd. - dirBlksEnd := numDirectBlks * f.coverage[0] - - // indirBlkEnd is the file offset until which the indirect block covers file - // data. The indirect block covers dirBlksEnd <= file offset < indirBlkEnd. - indirBlkEnd := dirBlksEnd + f.coverage[1] - - // doubIndirBlkEnd is the file offset until which the double indirect block - // covers file data. The double indirect block covers the range - // indirBlkEnd <= file offset < doubIndirBlkEnd. - doubIndirBlkEnd := indirBlkEnd + f.coverage[2] - - read := 0 - toRead := len(dst) - if uint64(toRead)+offset > size { - toRead = int(size - offset) - } - for read < toRead { - var err error - var curR int - - // Figure out which block to delegate the read to. - switch { - case offset < dirBlksEnd: - // Direct block. - curR, err = f.read(uint32(f.directBlks[offset/f.regFile.inode.blkSize]), offset%f.regFile.inode.blkSize, 0, dst[read:]) - case offset < indirBlkEnd: - // Indirect block. - curR, err = f.read(uint32(f.indirectBlk), offset-dirBlksEnd, 1, dst[read:]) - case offset < doubIndirBlkEnd: - // Doubly indirect block. - curR, err = f.read(uint32(f.doubleIndirectBlk), offset-indirBlkEnd, 2, dst[read:]) - default: - // Triply indirect block. - curR, err = f.read(uint32(f.tripleIndirectBlk), offset-doubIndirBlkEnd, 3, dst[read:]) - } - - read += curR - offset += uint64(curR) - if err != nil { - return read, err - } - } - - if read < len(dst) { - return read, io.EOF - } - return read, nil -} - -// read is the recursive step of the ReadAt function. It relies on knowing the -// current node's location on disk (curPhyBlk) and its height in the block map -// tree. A height of 0 shows that the current node is actually holding file -// data. relFileOff tells the offset from which we need to start to reading -// under the current node. It is completely relative to the current node. -func (f *blockMapFile) read(curPhyBlk uint32, relFileOff uint64, height uint, dst []byte) (int, error) { - curPhyBlkOff := int64(curPhyBlk) * int64(f.regFile.inode.blkSize) - if height == 0 { - toRead := int(f.regFile.inode.blkSize - relFileOff) - if len(dst) < toRead { - toRead = len(dst) - } - - n, _ := f.regFile.inode.fs.dev.ReadAt(dst[:toRead], curPhyBlkOff+int64(relFileOff)) - if n < toRead { - return n, syserror.EIO - } - return n, nil - } - - childCov := f.coverage[height-1] - startIdx := relFileOff / childCov - endIdx := f.regFile.inode.blkSize / 4 // This is exclusive. - wantEndIdx := (relFileOff + uint64(len(dst))) / childCov - wantEndIdx++ // Make this exclusive. - if wantEndIdx < endIdx { - endIdx = wantEndIdx - } - - read := 0 - curChildOff := relFileOff % childCov - for i := startIdx; i < endIdx; i++ { - var childPhyBlk primitive.Uint32 - err := readFromDisk(f.regFile.inode.fs.dev, curPhyBlkOff+int64(i*4), &childPhyBlk) - if err != nil { - return read, err - } - - n, err := f.read(uint32(childPhyBlk), curChildOff, height-1, dst[read:]) - read += n - if err != nil { - return read, err - } - - curChildOff = 0 - } - - return read, nil -} - -// getCoverage returns the number of bytes a node at the given height covers. -// Height 0 is the file data block itself. Height 1 is the indirect block. -// -// Formula: blkSize * ((blkSize / 4)^height) -func getCoverage(blkSize uint64, height uint) uint64 { - return blkSize * uint64(math.Pow(float64(blkSize/4), float64(height))) -} diff --git a/pkg/sentry/fsimpl/ext/block_map_test.go b/pkg/sentry/fsimpl/ext/block_map_test.go deleted file mode 100644 index ed98b482e..000000000 --- a/pkg/sentry/fsimpl/ext/block_map_test.go +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "bytes" - "math/rand" - "testing" - - "github.com/google/go-cmp/cmp" - "gvisor.dev/gvisor/pkg/marshal/primitive" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" -) - -// These consts are for mocking the block map tree. -const ( - mockBMBlkSize = uint32(16) - mockBMDiskSize = 2500 -) - -// TestBlockMapReader stress tests block map reader functionality. It performs -// random length reads from all possible positions in the block map structure. -func TestBlockMapReader(t *testing.T) { - mockBMFile, want := blockMapSetUp(t) - n := len(want) - - for from := 0; from < n; from++ { - got := make([]byte, n-from) - - if read, err := mockBMFile.ReadAt(got, int64(from)); err != nil { - t.Fatalf("file read operation from offset %d to %d only read %d bytes: %v", from, n, read, err) - } - - if diff := cmp.Diff(got, want[from:]); diff != "" { - t.Fatalf("file data from offset %d to %d mismatched (-want +got):\n%s", from, n, diff) - } - } -} - -// blkNumGen is a number generator which gives block numbers for building the -// block map file on disk. It gives unique numbers in a random order which -// facilitates in creating an extremely fragmented filesystem. -type blkNumGen struct { - nums []uint32 -} - -// newBlkNumGen is the blkNumGen constructor. -func newBlkNumGen() *blkNumGen { - blkNums := &blkNumGen{} - lim := mockBMDiskSize / mockBMBlkSize - blkNums.nums = make([]uint32, lim) - for i := uint32(0); i < lim; i++ { - blkNums.nums[i] = i - } - - rand.Shuffle(int(lim), func(i, j int) { - blkNums.nums[i], blkNums.nums[j] = blkNums.nums[j], blkNums.nums[i] - }) - return blkNums -} - -// next returns the next random block number. -func (n *blkNumGen) next() uint32 { - ret := n.nums[0] - n.nums = n.nums[1:] - return ret -} - -// blockMapSetUp creates a mock disk and a block map file. It initializes the -// block map file with 12 direct block, 1 indirect block, 1 double indirect -// block and 1 triple indirect block (basically fill it till the rim). It -// initializes the disk to reflect the inode. Also returns the file data that -// the inode covers and that is written to disk. -func blockMapSetUp(t *testing.T) (*blockMapFile, []byte) { - mockDisk := make([]byte, mockBMDiskSize) - var fileData []byte - blkNums := newBlkNumGen() - off := 0 - data := make([]byte, (numDirectBlks+3)*(*primitive.Uint32)(nil).SizeBytes()) - - // Write the direct blocks. - for i := 0; i < numDirectBlks; i++ { - curBlkNum := primitive.Uint32(blkNums.next()) - curBlkNum.MarshalBytes(data[off:]) - off += curBlkNum.SizeBytes() - fileData = append(fileData, writeFileDataToBlock(mockDisk, uint32(curBlkNum), 0, blkNums)...) - } - - // Write to indirect block. - indirectBlk := primitive.Uint32(blkNums.next()) - indirectBlk.MarshalBytes(data[off:]) - off += indirectBlk.SizeBytes() - fileData = append(fileData, writeFileDataToBlock(mockDisk, uint32(indirectBlk), 1, blkNums)...) - - // Write to double indirect block. - doublyIndirectBlk := primitive.Uint32(blkNums.next()) - doublyIndirectBlk.MarshalBytes(data[off:]) - off += doublyIndirectBlk.SizeBytes() - fileData = append(fileData, writeFileDataToBlock(mockDisk, uint32(doublyIndirectBlk), 2, blkNums)...) - - // Write to triple indirect block. - triplyIndirectBlk := primitive.Uint32(blkNums.next()) - triplyIndirectBlk.MarshalBytes(data[off:]) - fileData = append(fileData, writeFileDataToBlock(mockDisk, uint32(triplyIndirectBlk), 3, blkNums)...) - - args := inodeArgs{ - fs: &filesystem{ - dev: bytes.NewReader(mockDisk), - }, - diskInode: &disklayout.InodeNew{ - InodeOld: disklayout.InodeOld{ - SizeLo: getMockBMFileFize(), - }, - }, - blkSize: uint64(mockBMBlkSize), - } - copy(args.diskInode.Data(), data) - - mockFile, err := newBlockMapFile(args) - if err != nil { - t.Fatalf("newBlockMapFile failed: %v", err) - } - return mockFile, fileData -} - -// writeFileDataToBlock writes random bytes to the block on disk. -func writeFileDataToBlock(disk []byte, blkNum uint32, height uint, blkNums *blkNumGen) []byte { - if height == 0 { - start := blkNum * mockBMBlkSize - end := start + mockBMBlkSize - rand.Read(disk[start:end]) - return disk[start:end] - } - - var fileData []byte - for off := blkNum * mockBMBlkSize; off < (blkNum+1)*mockBMBlkSize; off += 4 { - curBlkNum := primitive.Uint32(blkNums.next()) - curBlkNum.MarshalBytes(disk[off : off+4]) - fileData = append(fileData, writeFileDataToBlock(disk, uint32(curBlkNum), height-1, blkNums)...) - } - return fileData -} - -// getMockBMFileFize gets the size of the mock block map file which is used for -// testing. -func getMockBMFileFize() uint32 { - return uint32(numDirectBlks*getCoverage(uint64(mockBMBlkSize), 0) + getCoverage(uint64(mockBMBlkSize), 1) + getCoverage(uint64(mockBMBlkSize), 2) + getCoverage(uint64(mockBMBlkSize), 3)) -} diff --git a/pkg/sentry/fsimpl/ext/dentry.go b/pkg/sentry/fsimpl/ext/dentry.go deleted file mode 100644 index 9bfed883a..000000000 --- a/pkg/sentry/fsimpl/ext/dentry.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// dentry implements vfs.DentryImpl. -// -// +stateify savable -type dentry struct { - vfsd vfs.Dentry - - // Protected by filesystem.mu. - parent *dentry - name string - - // inode is the inode represented by this dentry. Multiple Dentries may - // share a single non-directory Inode (with hard links). inode is - // immutable. - inode *inode -} - -// Compiles only if dentry implements vfs.DentryImpl. -var _ vfs.DentryImpl = (*dentry)(nil) - -// newDentry is the dentry constructor. -func newDentry(in *inode) *dentry { - d := &dentry{ - inode: in, - } - d.vfsd.Init(d) - return d -} - -// IncRef implements vfs.DentryImpl.IncRef. -func (d *dentry) IncRef() { - d.inode.incRef() -} - -// TryIncRef implements vfs.DentryImpl.TryIncRef. -func (d *dentry) TryIncRef() bool { - return d.inode.tryIncRef() -} - -// DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef(ctx context.Context) { - // FIXME(b/134676337): filesystem.mu may not be locked as required by - // inode.decRef(). - d.inode.decRef() -} - -// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. -// -// TODO(b/134676337): Implement inotify. -func (d *dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) {} - -// Watches implements vfs.DentryImpl.Watches. -// -// TODO(b/134676337): Implement inotify. -func (d *dentry) Watches() *vfs.Watches { - return nil -} - -// OnZeroWatches implements vfs.Dentry.OnZeroWatches. -// -// TODO(b/134676337): Implement inotify. -func (d *dentry) OnZeroWatches(context.Context) {} diff --git a/pkg/sentry/fsimpl/ext/directory.go b/pkg/sentry/fsimpl/ext/directory.go deleted file mode 100644 index cc067c20e..000000000 --- a/pkg/sentry/fsimpl/ext/directory.go +++ /dev/null @@ -1,312 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" -) - -// directory represents a directory inode. It holds the childList in memory. -// -// +stateify savable -type directory struct { - inode inode - - // childCache maps filenames to dentries for children for which dentries - // have been instantiated. childCache is protected by filesystem.mu. - childCache map[string]*dentry - - // mu serializes the changes to childList. - // Lock Order (outermost locks must be taken first): - // directory.mu - // filesystem.mu - mu sync.Mutex `state:"nosave"` - - // childList is a list containing (1) child dirents and (2) fake dirents - // (with diskDirent == nil) that represent the iteration position of - // directoryFDs. childList is used to support directoryFD.IterDirents() - // efficiently. childList is protected by mu. - childList direntList - - // childMap maps the child's filename to the dirent structure stored in - // childList. This adds some data replication but helps in faster path - // traversal. For consistency, key == childMap[key].diskDirent.FileName(). - // Immutable. - childMap map[string]*dirent -} - -// newDirectory is the directory constructor. -func newDirectory(args inodeArgs, newDirent bool) (*directory, error) { - file := &directory{ - childCache: make(map[string]*dentry), - childMap: make(map[string]*dirent), - } - file.inode.init(args, file) - - // Initialize childList by reading dirents from the underlying file. - if args.diskInode.Flags().Index { - // TODO(b/134676337): Support hash tree directories. Currently only the '.' - // and '..' entries are read in. - - // Users cannot navigate this hash tree directory yet. - log.Warningf("hash tree directory being used which is unsupported") - return file, nil - } - - // The dirents are organized in a linear array in the file data. - // Extract the file data and decode the dirents. - regFile, err := newRegularFile(args) - if err != nil { - return nil, err - } - - // buf is used as scratch space for reading in dirents from disk and - // unmarshalling them into dirent structs. - buf := make([]byte, disklayout.DirentSize) - size := args.diskInode.Size() - for off, inc := uint64(0), uint64(0); off < size; off += inc { - toRead := size - off - if toRead > disklayout.DirentSize { - toRead = disklayout.DirentSize - } - if n, err := regFile.impl.ReadAt(buf[:toRead], int64(off)); uint64(n) < toRead { - return nil, err - } - - var curDirent dirent - if newDirent { - curDirent.diskDirent = &disklayout.DirentNew{} - } else { - curDirent.diskDirent = &disklayout.DirentOld{} - } - curDirent.diskDirent.UnmarshalBytes(buf) - - if curDirent.diskDirent.Inode() != 0 && len(curDirent.diskDirent.FileName()) != 0 { - // Inode number and name length fields being set to 0 is used to indicate - // an unused dirent. - file.childList.PushBack(&curDirent) - file.childMap[curDirent.diskDirent.FileName()] = &curDirent - } - - // The next dirent is placed exactly after this dirent record on disk. - inc = uint64(curDirent.diskDirent.RecordSize()) - } - - return file, nil -} - -func (i *inode) isDir() bool { - _, ok := i.impl.(*directory) - return ok -} - -// dirent is the directory.childList node. -// -// +stateify savable -type dirent struct { - diskDirent disklayout.Dirent - - // direntEntry links dirents into their parent directory.childList. - direntEntry -} - -// directoryFD represents a directory file description. It implements -// vfs.FileDescriptionImpl. -// -// +stateify savable -type directoryFD struct { - fileDescription - vfs.DirectoryFileDescriptionDefaultImpl - - // Protected by directory.mu. - iter *dirent - off int64 -} - -// Compiles only if directoryFD implements vfs.FileDescriptionImpl. -var _ vfs.FileDescriptionImpl = (*directoryFD)(nil) - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *directoryFD) Release(ctx context.Context) { - if fd.iter == nil { - return - } - - dir := fd.inode().impl.(*directory) - dir.mu.Lock() - dir.childList.Remove(fd.iter) - dir.mu.Unlock() - fd.iter = nil -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - extfs := fd.filesystem() - dir := fd.inode().impl.(*directory) - - dir.mu.Lock() - defer dir.mu.Unlock() - - // Ensure that fd.iter exists and is not linked into dir.childList. - var child *dirent - if fd.iter == nil { - // Start iteration at the beginning of dir. - child = dir.childList.Front() - fd.iter = &dirent{} - } else { - // Continue iteration from where we left off. - child = fd.iter.Next() - dir.childList.Remove(fd.iter) - } - for ; child != nil; child = child.Next() { - // Skip other directoryFD iterators. - if child.diskDirent != nil { - childType, ok := child.diskDirent.FileType() - if !ok { - // We will need to read the inode off disk. Do not increment - // ref count here because this inode is not being added to the - // dentry tree. - extfs.mu.Lock() - childInode, err := extfs.getOrCreateInodeLocked(child.diskDirent.Inode()) - extfs.mu.Unlock() - if err != nil { - // Usage of the file description after the error is - // undefined. This implementation would continue reading - // from the next dirent. - fd.off++ - dir.childList.InsertAfter(child, fd.iter) - return err - } - childType = fs.ToInodeType(childInode.diskInode.Mode().FileType()) - } - - if err := cb.Handle(vfs.Dirent{ - Name: child.diskDirent.FileName(), - Type: fs.ToDirentType(childType), - Ino: uint64(child.diskDirent.Inode()), - NextOff: fd.off + 1, - }); err != nil { - dir.childList.InsertBefore(child, fd.iter) - return err - } - fd.off++ - } - } - dir.childList.PushBack(fd.iter) - return nil -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - if whence != linux.SEEK_SET && whence != linux.SEEK_CUR { - return 0, linuxerr.EINVAL - } - - dir := fd.inode().impl.(*directory) - - dir.mu.Lock() - defer dir.mu.Unlock() - - // Find resulting offset. - if whence == linux.SEEK_CUR { - offset += fd.off - } - - if offset < 0 { - // lseek(2) specifies that EINVAL should be returned if the resulting offset - // is negative. - return 0, linuxerr.EINVAL - } - - n := int64(len(dir.childMap)) - realWantOff := offset - if realWantOff > n { - realWantOff = n - } - realCurOff := fd.off - if realCurOff > n { - realCurOff = n - } - - // Ensure that fd.iter exists and is linked into dir.childList so we can - // intelligently seek from the optimal position. - if fd.iter == nil { - fd.iter = &dirent{} - dir.childList.PushFront(fd.iter) - } - - // Guess that iterating from the current position is optimal. - child := fd.iter - diff := realWantOff - realCurOff // Shows direction and magnitude of travel. - - // See if starting from the beginning or end is better. - abDiff := diff - if diff < 0 { - abDiff = -diff - } - if abDiff > realWantOff { - // Starting from the beginning is best. - child = dir.childList.Front() - diff = realWantOff - } else if abDiff > (n - realWantOff) { - // Starting from the end is best. - child = dir.childList.Back() - // (n - 1) because the last non-nil dirent represents the (n-1)th offset. - diff = realWantOff - (n - 1) - } - - for child != nil { - // Skip other directoryFD iterators. - if child.diskDirent != nil { - if diff == 0 { - if child != fd.iter { - dir.childList.Remove(fd.iter) - dir.childList.InsertBefore(child, fd.iter) - } - - fd.off = offset - return offset, nil - } - - if diff < 0 { - diff++ - child = child.Prev() - } else { - diff-- - child = child.Next() - } - continue - } - - if diff < 0 { - child = child.Prev() - } else { - child = child.Next() - } - } - - // Reaching here indicates that the offset is beyond the end of the childList. - dir.childList.Remove(fd.iter) - dir.childList.PushBack(fd.iter) - fd.off = offset - return offset, nil -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/BUILD b/pkg/sentry/fsimpl/ext/disklayout/BUILD deleted file mode 100644 index d98a05dd8..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/BUILD +++ /dev/null @@ -1,48 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "disklayout", - srcs = [ - "block_group.go", - "block_group_32.go", - "block_group_64.go", - "dirent.go", - "dirent_new.go", - "dirent_old.go", - "disklayout.go", - "extent.go", - "inode.go", - "inode_new.go", - "inode_old.go", - "superblock.go", - "superblock_32.go", - "superblock_64.go", - "superblock_old.go", - "test_utils.go", - ], - marshal = True, - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/marshal", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - ], -) - -go_test( - name = "disklayout_test", - size = "small", - srcs = [ - "block_group_test.go", - "dirent_test.go", - "extent_test.go", - "inode_test.go", - "superblock_test.go", - ], - library = ":disklayout", - deps = ["//pkg/sentry/kernel/time"], -) diff --git a/pkg/sentry/fsimpl/ext/disklayout/block_group.go b/pkg/sentry/fsimpl/ext/disklayout/block_group.go deleted file mode 100644 index 0d56ae9da..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/block_group.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "gvisor.dev/gvisor/pkg/marshal" -) - -// BlockGroup represents a Linux ext block group descriptor. An ext file system -// is split into a series of block groups. This provides an access layer to -// information needed to access and use a block group. -// -// Location: -// - The block group descriptor table is always placed in the blocks -// immediately after the block containing the superblock. -// - The 1st block group descriptor in the original table is in the -// (sb.FirstDataBlock() + 1)th block. -// - See SuperBlock docs to see where the block group descriptor table is -// replicated. -// - sb.BgDescSize() must be used as the block group descriptor entry size -// while reading the table from disk. -// -// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#block-group-descriptors. -type BlockGroup interface { - marshal.Marshallable - - // InodeTable returns the absolute block number of the block containing the - // inode table. This points to an array of Inode structs. Inode tables are - // statically allocated at mkfs time. The superblock records the number of - // inodes per group (length of this table) and the size of each inode struct. - InodeTable() uint64 - - // BlockBitmap returns the absolute block number of the block containing the - // block bitmap. This bitmap tracks the usage of data blocks within this block - // group and has its own checksum. - BlockBitmap() uint64 - - // InodeBitmap returns the absolute block number of the block containing the - // inode bitmap. This bitmap tracks the usage of this group's inode table - // entries and has its own checksum. - InodeBitmap() uint64 - - // ExclusionBitmap returns the absolute block number of the snapshot exclusion - // bitmap. - ExclusionBitmap() uint64 - - // FreeBlocksCount returns the number of free blocks in the group. - FreeBlocksCount() uint32 - - // FreeInodesCount returns the number of free inodes in the group. - FreeInodesCount() uint32 - - // DirectoryCount returns the number of inodes that represent directories - // under this block group. - DirectoryCount() uint32 - - // UnusedInodeCount returns the number of unused inodes beyond the last used - // inode in this group's inode table. As a result, we needn’t scan past the - // (InodesPerGroup - UnusedInodeCount())th entry in the inode table. - UnusedInodeCount() uint32 - - // BlockBitmapChecksum returns the block bitmap checksum. This is calculated - // using crc32c(FS UUID + group number + entire bitmap). - BlockBitmapChecksum() uint32 - - // InodeBitmapChecksum returns the inode bitmap checksum. This is calculated - // using crc32c(FS UUID + group number + entire bitmap). - InodeBitmapChecksum() uint32 - - // Checksum returns this block group's checksum. - // - // If SbMetadataCsum feature is set: - // - checksum is crc32c(FS UUID + group number + group descriptor - // structure) & 0xFFFF. - // - // If SbGdtCsum feature is set: - // - checksum is crc16(FS UUID + group number + group descriptor - // structure). - // - // SbMetadataCsum and SbGdtCsum should not be both set. - // If they are, Linux warns and asks to run fsck. - Checksum() uint16 - - // Flags returns BGFlags which represents the block group flags. - Flags() BGFlags -} - -// These are the different block group flags. -const ( - // BgInodeUninit indicates that inode table and bitmap are not initialized. - BgInodeUninit uint16 = 0x1 - - // BgBlockUninit indicates that block bitmap is not initialized. - BgBlockUninit uint16 = 0x2 - - // BgInodeZeroed indicates that inode table is zeroed. - BgInodeZeroed uint16 = 0x4 -) - -// BGFlags represents all the different combinations of block group flags. -type BGFlags struct { - InodeUninit bool - BlockUninit bool - InodeZeroed bool -} - -// ToInt converts a BGFlags struct back to its 16-bit representation. -func (f BGFlags) ToInt() uint16 { - var res uint16 - - if f.InodeUninit { - res |= BgInodeUninit - } - if f.BlockUninit { - res |= BgBlockUninit - } - if f.InodeZeroed { - res |= BgInodeZeroed - } - - return res -} - -// BGFlagsFromInt converts the 16-bit flag representation to a BGFlags struct. -func BGFlagsFromInt(flags uint16) BGFlags { - return BGFlags{ - InodeUninit: flags&BgInodeUninit > 0, - BlockUninit: flags&BgBlockUninit > 0, - InodeZeroed: flags&BgInodeZeroed > 0, - } -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/block_group_32.go b/pkg/sentry/fsimpl/ext/disklayout/block_group_32.go deleted file mode 100644 index a35fa22a0..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/block_group_32.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// BlockGroup32Bit emulates the first half of struct ext4_group_desc in -// fs/ext4/ext4.h. It is the block group descriptor struct for ext2, ext3 and -// 32-bit ext4 filesystems. It implements BlockGroup interface. -// -// +marshal -type BlockGroup32Bit struct { - BlockBitmapLo uint32 - InodeBitmapLo uint32 - InodeTableLo uint32 - FreeBlocksCountLo uint16 - FreeInodesCountLo uint16 - UsedDirsCountLo uint16 - FlagsRaw uint16 - ExcludeBitmapLo uint32 - BlockBitmapChecksumLo uint16 - InodeBitmapChecksumLo uint16 - ItableUnusedLo uint16 - ChecksumRaw uint16 -} - -// Compiles only if BlockGroup32Bit implements BlockGroup. -var _ BlockGroup = (*BlockGroup32Bit)(nil) - -// InodeTable implements BlockGroup.InodeTable. -func (bg *BlockGroup32Bit) InodeTable() uint64 { return uint64(bg.InodeTableLo) } - -// BlockBitmap implements BlockGroup.BlockBitmap. -func (bg *BlockGroup32Bit) BlockBitmap() uint64 { return uint64(bg.BlockBitmapLo) } - -// InodeBitmap implements BlockGroup.InodeBitmap. -func (bg *BlockGroup32Bit) InodeBitmap() uint64 { return uint64(bg.InodeBitmapLo) } - -// ExclusionBitmap implements BlockGroup.ExclusionBitmap. -func (bg *BlockGroup32Bit) ExclusionBitmap() uint64 { return uint64(bg.ExcludeBitmapLo) } - -// FreeBlocksCount implements BlockGroup.FreeBlocksCount. -func (bg *BlockGroup32Bit) FreeBlocksCount() uint32 { return uint32(bg.FreeBlocksCountLo) } - -// FreeInodesCount implements BlockGroup.FreeInodesCount. -func (bg *BlockGroup32Bit) FreeInodesCount() uint32 { return uint32(bg.FreeInodesCountLo) } - -// DirectoryCount implements BlockGroup.DirectoryCount. -func (bg *BlockGroup32Bit) DirectoryCount() uint32 { return uint32(bg.UsedDirsCountLo) } - -// UnusedInodeCount implements BlockGroup.UnusedInodeCount. -func (bg *BlockGroup32Bit) UnusedInodeCount() uint32 { return uint32(bg.ItableUnusedLo) } - -// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum. -func (bg *BlockGroup32Bit) BlockBitmapChecksum() uint32 { return uint32(bg.BlockBitmapChecksumLo) } - -// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum. -func (bg *BlockGroup32Bit) InodeBitmapChecksum() uint32 { return uint32(bg.InodeBitmapChecksumLo) } - -// Checksum implements BlockGroup.Checksum. -func (bg *BlockGroup32Bit) Checksum() uint16 { return bg.ChecksumRaw } - -// Flags implements BlockGroup.Flags. -func (bg *BlockGroup32Bit) Flags() BGFlags { return BGFlagsFromInt(bg.FlagsRaw) } diff --git a/pkg/sentry/fsimpl/ext/disklayout/block_group_64.go b/pkg/sentry/fsimpl/ext/disklayout/block_group_64.go deleted file mode 100644 index d54d1d345..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/block_group_64.go +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// BlockGroup64Bit emulates struct ext4_group_desc in fs/ext4/ext4.h. -// It is the block group descriptor struct for 64-bit ext4 filesystems. -// It implements BlockGroup interface. It is an extension of the 32-bit -// version of BlockGroup. -// -// +marshal -type BlockGroup64Bit struct { - // We embed the 32-bit struct here because 64-bit version is just an extension - // of the 32-bit version. - BlockGroup32Bit - - // 64-bit specific fields. - BlockBitmapHi uint32 - InodeBitmapHi uint32 - InodeTableHi uint32 - FreeBlocksCountHi uint16 - FreeInodesCountHi uint16 - UsedDirsCountHi uint16 - ItableUnusedHi uint16 - ExcludeBitmapHi uint32 - BlockBitmapChecksumHi uint16 - InodeBitmapChecksumHi uint16 - _ uint32 // Padding to 64 bytes. -} - -// Compiles only if BlockGroup64Bit implements BlockGroup. -var _ BlockGroup = (*BlockGroup64Bit)(nil) - -// Methods to override. Checksum() and Flags() are not overridden. - -// InodeTable implements BlockGroup.InodeTable. -func (bg *BlockGroup64Bit) InodeTable() uint64 { - return (uint64(bg.InodeTableHi) << 32) | uint64(bg.InodeTableLo) -} - -// BlockBitmap implements BlockGroup.BlockBitmap. -func (bg *BlockGroup64Bit) BlockBitmap() uint64 { - return (uint64(bg.BlockBitmapHi) << 32) | uint64(bg.BlockBitmapLo) -} - -// InodeBitmap implements BlockGroup.InodeBitmap. -func (bg *BlockGroup64Bit) InodeBitmap() uint64 { - return (uint64(bg.InodeBitmapHi) << 32) | uint64(bg.InodeBitmapLo) -} - -// ExclusionBitmap implements BlockGroup.ExclusionBitmap. -func (bg *BlockGroup64Bit) ExclusionBitmap() uint64 { - return (uint64(bg.ExcludeBitmapHi) << 32) | uint64(bg.ExcludeBitmapLo) -} - -// FreeBlocksCount implements BlockGroup.FreeBlocksCount. -func (bg *BlockGroup64Bit) FreeBlocksCount() uint32 { - return (uint32(bg.FreeBlocksCountHi) << 16) | uint32(bg.FreeBlocksCountLo) -} - -// FreeInodesCount implements BlockGroup.FreeInodesCount. -func (bg *BlockGroup64Bit) FreeInodesCount() uint32 { - return (uint32(bg.FreeInodesCountHi) << 16) | uint32(bg.FreeInodesCountLo) -} - -// DirectoryCount implements BlockGroup.DirectoryCount. -func (bg *BlockGroup64Bit) DirectoryCount() uint32 { - return (uint32(bg.UsedDirsCountHi) << 16) | uint32(bg.UsedDirsCountLo) -} - -// UnusedInodeCount implements BlockGroup.UnusedInodeCount. -func (bg *BlockGroup64Bit) UnusedInodeCount() uint32 { - return (uint32(bg.ItableUnusedHi) << 16) | uint32(bg.ItableUnusedLo) -} - -// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum. -func (bg *BlockGroup64Bit) BlockBitmapChecksum() uint32 { - return (uint32(bg.BlockBitmapChecksumHi) << 16) | uint32(bg.BlockBitmapChecksumLo) -} - -// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum. -func (bg *BlockGroup64Bit) InodeBitmapChecksum() uint32 { - return (uint32(bg.InodeBitmapChecksumHi) << 16) | uint32(bg.InodeBitmapChecksumLo) -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/block_group_test.go b/pkg/sentry/fsimpl/ext/disklayout/block_group_test.go deleted file mode 100644 index e4ce484e4..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/block_group_test.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "testing" -) - -// TestBlockGroupSize tests that the block group descriptor structs are of the -// correct size. -func TestBlockGroupSize(t *testing.T) { - var bgSmall BlockGroup32Bit - assertSize(t, &bgSmall, 32) - var bgBig BlockGroup64Bit - assertSize(t, &bgBig, 64) -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/dirent.go b/pkg/sentry/fsimpl/ext/disklayout/dirent.go deleted file mode 100644 index 568c8cb4c..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/dirent.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "gvisor.dev/gvisor/pkg/marshal" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -const ( - // MaxFileName is the maximum length of an ext fs file's name. - MaxFileName = 255 - - // DirentSize is the size of ext dirent structures. - DirentSize = 263 -) - -var ( - // inodeTypeByFileType maps ext4 file types to vfs inode types. - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#ftype. - inodeTypeByFileType = map[uint8]fs.InodeType{ - 0: fs.Anonymous, - 1: fs.RegularFile, - 2: fs.Directory, - 3: fs.CharacterDevice, - 4: fs.BlockDevice, - 5: fs.Pipe, - 6: fs.Socket, - 7: fs.Symlink, - } -) - -// The Dirent interface should be implemented by structs representing ext -// directory entries. These are for the linear classical directories which -// just store a list of dirent structs. A directory is a series of data blocks -// where is each data block contains a linear array of dirents. The last entry -// of the block has a record size that takes it to the end of the block. The -// end of the directory is when you read dirInode.Size() bytes from the blocks. -// -// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#linear-classic-directories. -type Dirent interface { - marshal.Marshallable - - // Inode returns the absolute inode number of the underlying inode. - // Inode number 0 signifies an unused dirent. - Inode() uint32 - - // RecordSize returns the record length of this dirent on disk. The next - // dirent in the dirent list should be read after these many bytes from - // the current dirent. Must be a multiple of 4. - RecordSize() uint16 - - // FileName returns the name of the file. Can be at most 255 is length. - FileName() string - - // FileType returns the inode type of the underlying inode. This is a - // performance hack so that we do not have to read the underlying inode struct - // to know the type of inode. This will only work when the SbDirentFileType - // feature is set. If not, the second returned value will be false indicating - // that user code has to use the inode mode to extract the file type. - FileType() (fs.InodeType, bool) -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/dirent_new.go b/pkg/sentry/fsimpl/ext/disklayout/dirent_new.go deleted file mode 100644 index 51f9c2946..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/dirent_new.go +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "fmt" - - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -// DirentNew represents the ext4 directory entry struct. This emulates Linux's -// ext4_dir_entry_2 struct. The FileName can not be more than 255 bytes so we -// only need 8 bits to store the NameLength. As a result, NameLength has been -// shortened and the other 8 bits are used to encode the file type. Use the -// FileTypeRaw field only if the SbDirentFileType feature is set. -// -// Note: This struct can be of variable size on disk. The one described below -// is of maximum size and the FileName beyond NameLength bytes might contain -// garbage. -// -// +marshal -type DirentNew struct { - InodeNumber uint32 - RecordLength uint16 - NameLength uint8 - FileTypeRaw uint8 - FileNameRaw [MaxFileName]byte `marshal:"unaligned"` -} - -// Compiles only if DirentNew implements Dirent. -var _ Dirent = (*DirentNew)(nil) - -// Inode implements Dirent.Inode. -func (d *DirentNew) Inode() uint32 { return d.InodeNumber } - -// RecordSize implements Dirent.RecordSize. -func (d *DirentNew) RecordSize() uint16 { return d.RecordLength } - -// FileName implements Dirent.FileName. -func (d *DirentNew) FileName() string { - return string(d.FileNameRaw[:d.NameLength]) -} - -// FileType implements Dirent.FileType. -func (d *DirentNew) FileType() (fs.InodeType, bool) { - if inodeType, ok := inodeTypeByFileType[d.FileTypeRaw]; ok { - return inodeType, true - } - - panic(fmt.Sprintf("unknown file type %v", d.FileTypeRaw)) -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/dirent_old.go b/pkg/sentry/fsimpl/ext/disklayout/dirent_old.go deleted file mode 100644 index d4b19e086..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/dirent_old.go +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import "gvisor.dev/gvisor/pkg/sentry/fs" - -// DirentOld represents the old directory entry struct which does not contain -// the file type. This emulates Linux's ext4_dir_entry struct. -// -// Note: This struct can be of variable size on disk. The one described below -// is of maximum size and the FileName beyond NameLength bytes might contain -// garbage. -// -// +marshal -type DirentOld struct { - InodeNumber uint32 - RecordLength uint16 - NameLength uint16 - FileNameRaw [MaxFileName]byte `marshal:"unaligned"` -} - -// Compiles only if DirentOld implements Dirent. -var _ Dirent = (*DirentOld)(nil) - -// Inode implements Dirent.Inode. -func (d *DirentOld) Inode() uint32 { return d.InodeNumber } - -// RecordSize implements Dirent.RecordSize. -func (d *DirentOld) RecordSize() uint16 { return d.RecordLength } - -// FileName implements Dirent.FileName. -func (d *DirentOld) FileName() string { - return string(d.FileNameRaw[:d.NameLength]) -} - -// FileType implements Dirent.FileType. -func (d *DirentOld) FileType() (fs.InodeType, bool) { - return fs.Anonymous, false -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/dirent_test.go b/pkg/sentry/fsimpl/ext/disklayout/dirent_test.go deleted file mode 100644 index 3486864dc..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/dirent_test.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "testing" -) - -// TestDirentSize tests that the dirent structs are of the correct -// size. -func TestDirentSize(t *testing.T) { - var dOld DirentOld - assertSize(t, &dOld, DirentSize) - var dNew DirentNew - assertSize(t, &dNew, DirentSize) -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/disklayout.go b/pkg/sentry/fsimpl/ext/disklayout/disklayout.go deleted file mode 100644 index 0834e9ba8..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/disklayout.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package disklayout provides Linux ext file system's disk level structures -// which can be directly read into from the underlying device. Structs aim to -// emulate structures `exactly` how they are layed out on disk. -// -// This library aims to be compatible with all ext(2/3/4) systems so it -// provides a generic interface for all major structures and various -// implementations (for different versions). The user code is responsible for -// using appropriate implementations based on the underlying device. -// -// Interfacing all major structures here serves a few purposes: -// - Abstracts away the complexity of the underlying structure from client -// code. The client only has to figure out versioning on set up and then -// can use these as black boxes and pass it higher up the stack. -// - Having pointer receivers forces the user to use pointers to these -// heavy structs. Hence, prevents the client code from unintentionally -// copying these by value while passing the interface around. -// - Version-based implementation selection is resolved on set up hence -// avoiding per call overhead of choosing implementation. -// - All interface methods are pretty light weight (do not take in any -// parameters by design). Passing pointer arguments to interface methods -// can lead to heap allocation as the compiler won't be able to perform -// escape analysis on an unknown implementation at compile time. -// -// Notes: -// - All structures on disk are in little-endian order. Only jbd2 (journal) -// structures are in big-endian order. -// - All OS dependent fields in these structures will be interpretted using -// the Linux version of that field. -// - The suffix `Lo` in field names stands for lower bits of that field. -// - The suffix `Hi` in field names stands for upper bits of that field. -// - The suffix `Raw` has been added to indicate that the field is not split -// into Lo and Hi fields and also to resolve name collision with the -// respective interface. -package disklayout diff --git a/pkg/sentry/fsimpl/ext/disklayout/extent.go b/pkg/sentry/fsimpl/ext/disklayout/extent.go deleted file mode 100644 index b13999bfc..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/extent.go +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "gvisor.dev/gvisor/pkg/marshal" -) - -// Extents were introduced in ext4 and provide huge performance gains in terms -// data locality and reduced metadata block usage. Extents are organized in -// extent trees. The root node is contained in inode.BlocksRaw. -// -// Terminology: -// - Physical Block: -// Filesystem data block which is addressed normally wrt the entire -// filesystem (addressed with 48 bits). -// -// - File Block: -// Data block containing *only* file data and addressed wrt to the file -// with only 32 bits. The (i)th file block contains file data from -// byte (i * sb.BlockSize()) to ((i+1) * sb.BlockSize()). - -const ( - // ExtentHeaderSize is the size of the header of an extent tree node. - ExtentHeaderSize = 12 - - // ExtentEntrySize is the size of an entry in an extent tree node. - // This size is the same for both leaf and internal nodes. - ExtentEntrySize = 12 - - // ExtentMagic is the magic number which must be present in the header. - ExtentMagic = 0xf30a -) - -// ExtentEntryPair couples an in-memory ExtendNode with the ExtentEntry that -// points to it. We want to cache these structs in memory to avoid repeated -// disk reads. -// -// Note: This struct itself does not represent an on-disk struct. -type ExtentEntryPair struct { - // Entry points to the child node on disk. - Entry ExtentEntry - // Node points to child node in memory. Is nil if the current node is a leaf. - Node *ExtentNode -} - -// ExtentNode represents an extent tree node. For internal nodes, all Entries -// will be ExtendIdxs. For leaf nodes, they will all be Extents. -// -// Note: This struct itself does not represent an on-disk struct. -type ExtentNode struct { - Header ExtentHeader - Entries []ExtentEntryPair -} - -// ExtentEntry represents an extent tree node entry. The entry can either be -// an ExtentIdx or Extent itself. This exists to simplify navigation logic. -type ExtentEntry interface { - marshal.Marshallable - - // FileBlock returns the first file block number covered by this entry. - FileBlock() uint32 - - // PhysicalBlock returns the child physical block that this entry points to. - PhysicalBlock() uint64 -} - -// ExtentHeader emulates the ext4_extent_header struct in ext4. Each extent -// tree node begins with this and is followed by `NumEntries` number of: -// - Extent if `Depth` == 0 -// - ExtentIdx otherwise -// -// +marshal -type ExtentHeader struct { - // Magic in the extent magic number, must be 0xf30a. - Magic uint16 - - // NumEntries indicates the number of valid entries following the header. - NumEntries uint16 - - // MaxEntries that could follow the header. Used while adding entries. - MaxEntries uint16 - - // Height represents the distance of this node from the farthest leaf. Please - // note that Linux incorrectly calls this `Depth` (which means the distance - // of the node from the root). - Height uint16 - _ uint32 -} - -// ExtentIdx emulates the ext4_extent_idx struct in ext4. Only present in -// internal nodes. Sorted in ascending order based on FirstFileBlock since -// Linux does a binary search on this. This points to a block containing the -// child node. -// -// +marshal -type ExtentIdx struct { - FirstFileBlock uint32 - ChildBlockLo uint32 - ChildBlockHi uint16 - _ uint16 -} - -// Compiles only if ExtentIdx implements ExtentEntry. -var _ ExtentEntry = (*ExtentIdx)(nil) - -// FileBlock implements ExtentEntry.FileBlock. -func (ei *ExtentIdx) FileBlock() uint32 { - return ei.FirstFileBlock -} - -// PhysicalBlock implements ExtentEntry.PhysicalBlock. It returns the -// physical block number of the child block. -func (ei *ExtentIdx) PhysicalBlock() uint64 { - return (uint64(ei.ChildBlockHi) << 32) | uint64(ei.ChildBlockLo) -} - -// Extent represents the ext4_extent struct in ext4. Only present in leaf -// nodes. Sorted in ascending order based on FirstFileBlock since Linux does a -// binary search on this. This points to an array of data blocks containing the -// file data. It covers `Length` data blocks starting from `StartBlock`. -// -// +marshal -type Extent struct { - FirstFileBlock uint32 - Length uint16 - StartBlockHi uint16 - StartBlockLo uint32 -} - -// Compiles only if Extent implements ExtentEntry. -var _ ExtentEntry = (*Extent)(nil) - -// FileBlock implements ExtentEntry.FileBlock. -func (e *Extent) FileBlock() uint32 { - return e.FirstFileBlock -} - -// PhysicalBlock implements ExtentEntry.PhysicalBlock. It returns the -// physical block number of the first data block this extent covers. -func (e *Extent) PhysicalBlock() uint64 { - return (uint64(e.StartBlockHi) << 32) | uint64(e.StartBlockLo) -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/extent_test.go b/pkg/sentry/fsimpl/ext/disklayout/extent_test.go deleted file mode 100644 index c96002e19..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/extent_test.go +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "testing" -) - -// TestExtentSize tests that the extent structs are of the correct -// size. -func TestExtentSize(t *testing.T) { - var h ExtentHeader - assertSize(t, &h, ExtentHeaderSize) - var i ExtentIdx - assertSize(t, &i, ExtentEntrySize) - var e Extent - assertSize(t, &e, ExtentEntrySize) -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/inode.go b/pkg/sentry/fsimpl/ext/disklayout/inode.go deleted file mode 100644 index ef25040a9..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/inode.go +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/marshal" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/time" -) - -// Special inodes. See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#special-inodes. -const ( - // RootDirInode is the inode number of the root directory inode. - RootDirInode = 2 -) - -// The Inode interface must be implemented by structs representing ext inodes. -// The inode stores all the metadata pertaining to the file (except for the -// file name which is held by the directory entry). It does NOT expose all -// fields and should be extended if need be. -// -// Some file systems (e.g. FAT) use the directory entry to store all this -// information. Ext file systems do not so that they can support hard links. -// However, ext4 cheats a little bit and duplicates the file type in the -// directory entry for performance gains. -// -// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#index-nodes. -type Inode interface { - marshal.Marshallable - - // Mode returns the linux file mode which is majorly used to extract - // information like: - // - File permissions (read/write/execute by user/group/others). - // - Sticky, set UID and GID bits. - // - File type. - // - // Masks to extract this information are provided in pkg/abi/linux/file.go. - Mode() linux.FileMode - - // UID returns the owner UID. - UID() auth.KUID - - // GID returns the owner GID. - GID() auth.KGID - - // Size returns the size of the file in bytes. - Size() uint64 - - // InodeSize returns the size of this inode struct in bytes. - // In ext2 and ext3, the inode struct and inode disk record size was fixed at - // 128 bytes. Ext4 makes it possible for the inode struct to be bigger. - // However, accessing any field beyond the 128 bytes marker must be verified - // using this method. - InodeSize() uint16 - - // AccessTime returns the last access time. Shows when the file was last read. - // - // If InExtendedAttr is set, then this should NOT be used because the - // underlying field is used to store the extended attribute value checksum. - AccessTime() time.Time - - // ChangeTime returns the last change time. Shows when the file meta data - // (like permissions) was last changed. - // - // If InExtendedAttr is set, then this should NOT be used because the - // underlying field is used to store the lower 32 bits of the attribute - // value’s reference count. - ChangeTime() time.Time - - // ModificationTime returns the last modification time. Shows when the file - // content was last modified. - // - // If InExtendedAttr is set, then this should NOT be used because - // the underlying field contains the number of the inode that owns the - // extended attribute. - ModificationTime() time.Time - - // DeletionTime returns the deletion time. Inodes are marked as deleted by - // writing to the underlying field. FS tools can restore files until they are - // actually overwritten. - DeletionTime() time.Time - - // LinksCount returns the number of hard links to this inode. - // - // Normally there is an upper limit on the number of hard links: - // - ext2/ext3 = 32,000 - // - ext4 = 65,000 - // - // This implies that an ext4 directory cannot have more than 64,998 - // subdirectories because each subdirectory will have a hard link to the - // directory via the `..` entry. The directory has hard link via the `.` entry - // of its own. And finally the inode is initiated with 1 hard link (itself). - // - // The underlying value is reset to 1 if all the following hold: - // - Inode is a directory. - // - SbDirNlink is enabled. - // - Number of hard links is incremented past 64,999. - // Hard link value of 1 for a directory would indicate that the number of hard - // links is unknown because a directory can have minimum 2 hard links (itself - // and `.` entry). - LinksCount() uint16 - - // Flags returns InodeFlags which represents the inode flags. - Flags() InodeFlags - - // Data returns the underlying inode.i_block array as a slice so it's - // modifiable. This field is special and is used to store various kinds of - // things depending on the filesystem version and inode type. The underlying - // field name in Linux is a little misleading. - // - In ext2/ext3, it contains the block map. - // - In ext4, it contains the extent tree root node. - // - For inline files, it contains the file contents. - // - For symlinks, it contains the link path (if it fits here). - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#the-contents-of-inode-i-block. - Data() []byte -} - -// Inode flags. This is not comprehensive and flags which were not used in -// the Linux kernel have been excluded. -const ( - // InSync indicates that all writes to the file must be synchronous. - InSync = 0x8 - - // InImmutable indicates that this file is immutable. - InImmutable = 0x10 - - // InAppend indicates that this file can only be appended to. - InAppend = 0x20 - - // InNoDump indicates that teh dump(1) utility should not dump this file. - InNoDump = 0x40 - - // InNoAccessTime indicates that the access time of this inode must not be - // updated. - InNoAccessTime = 0x80 - - // InIndex indicates that this directory has hashed indexes. - InIndex = 0x1000 - - // InJournalData indicates that file data must always be written through a - // journal device. - InJournalData = 0x4000 - - // InDirSync indicates that all the directory entiry data must be written - // synchronously. - InDirSync = 0x10000 - - // InTopDir indicates that this inode is at the top of the directory hierarchy. - InTopDir = 0x20000 - - // InHugeFile indicates that this is a huge file. - InHugeFile = 0x40000 - - // InExtents indicates that this inode uses extents. - InExtents = 0x80000 - - // InExtendedAttr indicates that this inode stores a large extended attribute - // value in its data blocks. - InExtendedAttr = 0x200000 - - // InInline indicates that this inode has inline data. - InInline = 0x10000000 - - // InReserved indicates that this inode is reserved for the ext4 library. - InReserved = 0x80000000 -) - -// InodeFlags represents all possible combinations of inode flags. It aims to -// cover the bit masks and provide a more user-friendly interface. -type InodeFlags struct { - Sync bool - Immutable bool - Append bool - NoDump bool - NoAccessTime bool - Index bool - JournalData bool - DirSync bool - TopDir bool - HugeFile bool - Extents bool - ExtendedAttr bool - Inline bool - Reserved bool -} - -// ToInt converts inode flags back to its 32-bit rep. -func (f InodeFlags) ToInt() uint32 { - var res uint32 - - if f.Sync { - res |= InSync - } - if f.Immutable { - res |= InImmutable - } - if f.Append { - res |= InAppend - } - if f.NoDump { - res |= InNoDump - } - if f.NoAccessTime { - res |= InNoAccessTime - } - if f.Index { - res |= InIndex - } - if f.JournalData { - res |= InJournalData - } - if f.DirSync { - res |= InDirSync - } - if f.TopDir { - res |= InTopDir - } - if f.HugeFile { - res |= InHugeFile - } - if f.Extents { - res |= InExtents - } - if f.ExtendedAttr { - res |= InExtendedAttr - } - if f.Inline { - res |= InInline - } - if f.Reserved { - res |= InReserved - } - - return res -} - -// InodeFlagsFromInt converts the integer representation of inode flags to -// a InodeFlags struct. -func InodeFlagsFromInt(f uint32) InodeFlags { - return InodeFlags{ - Sync: f&InSync > 0, - Immutable: f&InImmutable > 0, - Append: f&InAppend > 0, - NoDump: f&InNoDump > 0, - NoAccessTime: f&InNoAccessTime > 0, - Index: f&InIndex > 0, - JournalData: f&InJournalData > 0, - DirSync: f&InDirSync > 0, - TopDir: f&InTopDir > 0, - HugeFile: f&InHugeFile > 0, - Extents: f&InExtents > 0, - ExtendedAttr: f&InExtendedAttr > 0, - Inline: f&InInline > 0, - Reserved: f&InReserved > 0, - } -} - -// These masks define how users can view/modify inode flags. The rest of the -// flags are for internal kernel usage only. -const ( - InUserReadFlagMask = 0x4BDFFF - InUserWriteFlagMask = 0x4B80FF -) diff --git a/pkg/sentry/fsimpl/ext/disklayout/inode_new.go b/pkg/sentry/fsimpl/ext/disklayout/inode_new.go deleted file mode 100644 index a4503f5cf..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/inode_new.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import "gvisor.dev/gvisor/pkg/sentry/kernel/time" - -// InodeNew represents ext4 inode structure which can be bigger than -// OldInodeSize. The actual size of this struct should be determined using -// inode.ExtraInodeSize. Accessing any field here should be verified with the -// actual size. The extra space between the end of the inode struct and end of -// the inode record can be used to store extended attr. -// -// If the TimeExtra fields are in scope, the lower 2 bits of those are used -// to extend their counter part to be 34 bits wide; the rest (upper) 30 bits -// are used to provide nanoscond precision. Hence, these timestamps will now -// overflow in May 2446. -// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps. -// -// +marshal -type InodeNew struct { - InodeOld - - ExtraInodeSize uint16 - ChecksumHi uint16 - ChangeTimeExtra uint32 - ModificationTimeExtra uint32 - AccessTimeExtra uint32 - CreationTime uint32 - CreationTimeExtra uint32 - VersionHi uint32 - ProjectID uint32 -} - -// Compiles only if InodeNew implements Inode. -var _ Inode = (*InodeNew)(nil) - -// fromExtraTime decodes the extra time and constructs the kernel time struct -// with nanosecond precision. -func fromExtraTime(lo int32, extra uint32) time.Time { - // See description above InodeNew for format. - seconds := (int64(extra&0x3) << 32) + int64(lo) - nanoseconds := int64(extra >> 2) - return time.FromUnix(seconds, nanoseconds) -} - -// Only override methods which change due to ext4 specific fields. - -// Size implements Inode.Size. -func (in *InodeNew) Size() uint64 { - return (uint64(in.SizeHi) << 32) | uint64(in.SizeLo) -} - -// InodeSize implements Inode.InodeSize. -func (in *InodeNew) InodeSize() uint16 { - return OldInodeSize + in.ExtraInodeSize -} - -// ChangeTime implements Inode.ChangeTime. -func (in *InodeNew) ChangeTime() time.Time { - // Apply new timestamp logic if inode.ChangeTimeExtra is in scope. - if in.ExtraInodeSize >= 8 { - return fromExtraTime(in.ChangeTimeRaw, in.ChangeTimeExtra) - } - - return in.InodeOld.ChangeTime() -} - -// ModificationTime implements Inode.ModificationTime. -func (in *InodeNew) ModificationTime() time.Time { - // Apply new timestamp logic if inode.ModificationTimeExtra is in scope. - if in.ExtraInodeSize >= 12 { - return fromExtraTime(in.ModificationTimeRaw, in.ModificationTimeExtra) - } - - return in.InodeOld.ModificationTime() -} - -// AccessTime implements Inode.AccessTime. -func (in *InodeNew) AccessTime() time.Time { - // Apply new timestamp logic if inode.AccessTimeExtra is in scope. - if in.ExtraInodeSize >= 16 { - return fromExtraTime(in.AccessTimeRaw, in.AccessTimeExtra) - } - - return in.InodeOld.AccessTime() -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/inode_old.go b/pkg/sentry/fsimpl/ext/disklayout/inode_old.go deleted file mode 100644 index e6b28babf..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/inode_old.go +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/time" -) - -const ( - // OldInodeSize is the inode size in ext2/ext3. - OldInodeSize = 128 -) - -// InodeOld implements Inode interface. It emulates ext2/ext3 inode struct. -// Inode struct size and record size are both 128 bytes for this. -// -// All fields representing time are in seconds since the epoch. Which means that -// they will overflow in January 2038. -// -// +marshal -type InodeOld struct { - ModeRaw uint16 - UIDLo uint16 - SizeLo uint32 - - // The time fields are signed integers because they could be negative to - // represent time before the epoch. - AccessTimeRaw int32 - ChangeTimeRaw int32 - ModificationTimeRaw int32 - DeletionTimeRaw int32 - - GIDLo uint16 - LinksCountRaw uint16 - BlocksCountLo uint32 - FlagsRaw uint32 - VersionLo uint32 // This is OS dependent. - DataRaw [60]byte - Generation uint32 - FileACLLo uint32 - SizeHi uint32 - ObsoFaddr uint32 - - // OS dependent fields have been inlined here. - BlocksCountHi uint16 - FileACLHi uint16 - UIDHi uint16 - GIDHi uint16 - ChecksumLo uint16 - _ uint16 -} - -// Compiles only if InodeOld implements Inode. -var _ Inode = (*InodeOld)(nil) - -// Mode implements Inode.Mode. -func (in *InodeOld) Mode() linux.FileMode { return linux.FileMode(in.ModeRaw) } - -// UID implements Inode.UID. -func (in *InodeOld) UID() auth.KUID { - return auth.KUID((uint32(in.UIDHi) << 16) | uint32(in.UIDLo)) -} - -// GID implements Inode.GID. -func (in *InodeOld) GID() auth.KGID { - return auth.KGID((uint32(in.GIDHi) << 16) | uint32(in.GIDLo)) -} - -// Size implements Inode.Size. -func (in *InodeOld) Size() uint64 { - // In ext2/ext3, in.SizeHi did not exist, it was instead named in.DirACL. - return uint64(in.SizeLo) -} - -// InodeSize implements Inode.InodeSize. -func (in *InodeOld) InodeSize() uint16 { return OldInodeSize } - -// AccessTime implements Inode.AccessTime. -func (in *InodeOld) AccessTime() time.Time { - return time.FromUnix(int64(in.AccessTimeRaw), 0) -} - -// ChangeTime implements Inode.ChangeTime. -func (in *InodeOld) ChangeTime() time.Time { - return time.FromUnix(int64(in.ChangeTimeRaw), 0) -} - -// ModificationTime implements Inode.ModificationTime. -func (in *InodeOld) ModificationTime() time.Time { - return time.FromUnix(int64(in.ModificationTimeRaw), 0) -} - -// DeletionTime implements Inode.DeletionTime. -func (in *InodeOld) DeletionTime() time.Time { - return time.FromUnix(int64(in.DeletionTimeRaw), 0) -} - -// LinksCount implements Inode.LinksCount. -func (in *InodeOld) LinksCount() uint16 { return in.LinksCountRaw } - -// Flags implements Inode.Flags. -func (in *InodeOld) Flags() InodeFlags { return InodeFlagsFromInt(in.FlagsRaw) } - -// Data implements Inode.Data. -func (in *InodeOld) Data() []byte { return in.DataRaw[:] } diff --git a/pkg/sentry/fsimpl/ext/disklayout/inode_test.go b/pkg/sentry/fsimpl/ext/disklayout/inode_test.go deleted file mode 100644 index 90744e956..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/inode_test.go +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "fmt" - "strconv" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/kernel/time" -) - -// TestInodeSize tests that the inode structs are of the correct size. -func TestInodeSize(t *testing.T) { - var iOld InodeOld - assertSize(t, &iOld, OldInodeSize) - - // This was updated from 156 bytes to 160 bytes in Oct 2015. - var iNew InodeNew - assertSize(t, &iNew, 160) -} - -// TestTimestampSeconds tests that the seconds part of [a/c/m] timestamps in -// ext4 inode structs are decoded correctly. -// -// These tests are derived from the table under https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps. -func TestTimestampSeconds(t *testing.T) { - type timestampTest struct { - // msbSet tells if the most significant bit of InodeOld.[X]TimeRaw is set. - // If this is set then the 32-bit time is negative. - msbSet bool - - // lowerBound tells if we should take the lowest possible value of - // InodeOld.[X]TimeRaw while satisfying test.msbSet condition. If set to - // false it tells to take the highest possible value. - lowerBound bool - - // extraBits is InodeNew.[X]TimeExtra. - extraBits uint32 - - // want is the kernel time struct that is expected. - want time.Time - } - - tests := []timestampTest{ - // 1901-12-13 - { - msbSet: true, - lowerBound: true, - extraBits: 0, - want: time.FromUnix(int64(-0x80000000), 0), - }, - - // 1969-12-31 - { - msbSet: true, - lowerBound: false, - extraBits: 0, - want: time.FromUnix(int64(-1), 0), - }, - - // 1970-01-01 - { - msbSet: false, - lowerBound: true, - extraBits: 0, - want: time.FromUnix(int64(0), 0), - }, - - // 2038-01-19 - { - msbSet: false, - lowerBound: false, - extraBits: 0, - want: time.FromUnix(int64(0x7fffffff), 0), - }, - - // 2038-01-19 - { - msbSet: true, - lowerBound: true, - extraBits: 1, - want: time.FromUnix(int64(0x80000000), 0), - }, - - // 2106-02-07 - { - msbSet: true, - lowerBound: false, - extraBits: 1, - want: time.FromUnix(int64(0xffffffff), 0), - }, - - // 2106-02-07 - { - msbSet: false, - lowerBound: true, - extraBits: 1, - want: time.FromUnix(int64(0x100000000), 0), - }, - - // 2174-02-25 - { - msbSet: false, - lowerBound: false, - extraBits: 1, - want: time.FromUnix(int64(0x17fffffff), 0), - }, - - // 2174-02-25 - { - msbSet: true, - lowerBound: true, - extraBits: 2, - want: time.FromUnix(int64(0x180000000), 0), - }, - - // 2242-03-16 - { - msbSet: true, - lowerBound: false, - extraBits: 2, - want: time.FromUnix(int64(0x1ffffffff), 0), - }, - - // 2242-03-16 - { - msbSet: false, - lowerBound: true, - extraBits: 2, - want: time.FromUnix(int64(0x200000000), 0), - }, - - // 2310-04-04 - { - msbSet: false, - lowerBound: false, - extraBits: 2, - want: time.FromUnix(int64(0x27fffffff), 0), - }, - - // 2310-04-04 - { - msbSet: true, - lowerBound: true, - extraBits: 3, - want: time.FromUnix(int64(0x280000000), 0), - }, - - // 2378-04-22 - { - msbSet: true, - lowerBound: false, - extraBits: 3, - want: time.FromUnix(int64(0x2ffffffff), 0), - }, - - // 2378-04-22 - { - msbSet: false, - lowerBound: true, - extraBits: 3, - want: time.FromUnix(int64(0x300000000), 0), - }, - - // 2446-05-10 - { - msbSet: false, - lowerBound: false, - extraBits: 3, - want: time.FromUnix(int64(0x37fffffff), 0), - }, - } - - lowerMSB0 := int32(0) // binary: 00000000 00000000 00000000 00000000 - upperMSB0 := int32(0x7fffffff) // binary: 01111111 11111111 11111111 11111111 - lowerMSB1 := int32(-0x80000000) // binary: 10000000 00000000 00000000 00000000 - upperMSB1 := int32(-1) // binary: 11111111 11111111 11111111 11111111 - - get32BitTime := func(test timestampTest) int32 { - if test.msbSet { - if test.lowerBound { - return lowerMSB1 - } - - return upperMSB1 - } - - if test.lowerBound { - return lowerMSB0 - } - - return upperMSB0 - } - - getTestName := func(test timestampTest) string { - return fmt.Sprintf( - "Tests time decoding with epoch bits 0b%s and 32-bit raw time: MSB set=%t, lower bound=%t", - strconv.FormatInt(int64(test.extraBits), 2), - test.msbSet, - test.lowerBound, - ) - } - - for _, test := range tests { - t.Run(getTestName(test), func(t *testing.T) { - if got := fromExtraTime(get32BitTime(test), test.extraBits); got != test.want { - t.Errorf("Expected: %v, Got: %v", test.want, got) - } - }) - } -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/superblock.go b/pkg/sentry/fsimpl/ext/disklayout/superblock.go deleted file mode 100644 index 70948ebe9..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/superblock.go +++ /dev/null @@ -1,477 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "gvisor.dev/gvisor/pkg/marshal" -) - -const ( - // SbOffset is the absolute offset at which the superblock is placed. - SbOffset = 1024 -) - -// SuperBlock should be implemented by structs representing the ext superblock. -// The superblock holds a lot of information about the enclosing filesystem. -// This interface aims to provide access methods to important information held -// by the superblock. It does NOT expose all fields of the superblock, only the -// ones necessary. This can be expanded when need be. -// -// Location and replication: -// - The superblock is located at offset 1024 in block group 0. -// - Redundant copies of the superblock and group descriptors are kept in -// all groups if SbSparse feature flag is NOT set. If it is set, the -// replicas only exist in groups whose group number is either 0 or a -// power of 3, 5, or 7. -// - There is also a sparse superblock feature v2 in which there are just -// two replicas saved in the block groups pointed by sb.s_backup_bgs. -// -// Replicas should eventually be updated if the superblock is updated. -// -// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#super-block. -type SuperBlock interface { - marshal.Marshallable - - // InodesCount returns the total number of inodes in this filesystem. - InodesCount() uint32 - - // BlocksCount returns the total number of data blocks in this filesystem. - BlocksCount() uint64 - - // FreeBlocksCount returns the number of free blocks in this filesystem. - FreeBlocksCount() uint64 - - // FreeInodesCount returns the number of free inodes in this filesystem. - FreeInodesCount() uint32 - - // MountCount returns the number of mounts since the last fsck. - MountCount() uint16 - - // MaxMountCount returns the number of mounts allowed beyond which a fsck is - // needed. - MaxMountCount() uint16 - - // FirstDataBlock returns the absolute block number of the first data block, - // which contains the super block itself. - // - // If the filesystem has 1kb data blocks then this should return 1. For all - // other configurations, this typically returns 0. - FirstDataBlock() uint32 - - // BlockSize returns the size of one data block in this filesystem. - // This can be calculated by 2^(10 + sb.s_log_block_size). This ensures that - // the smallest block size is 1kb. - BlockSize() uint64 - - // BlocksPerGroup returns the number of data blocks in a block group. - BlocksPerGroup() uint32 - - // ClusterSize returns block cluster size (set during mkfs time by admin). - // This can be calculated by 2^(10 + sb.s_log_cluster_size). This ensures that - // the smallest cluster size is 1kb. - // - // sb.s_log_cluster_size must equal sb.s_log_block_size if bigalloc feature - // is NOT set and consequently BlockSize() = ClusterSize() in that case. - ClusterSize() uint64 - - // ClustersPerGroup returns: - // - number of clusters per group if bigalloc is enabled. - // - BlocksPerGroup() otherwise. - ClustersPerGroup() uint32 - - // InodeSize returns the size of the inode disk record size in bytes. Use this - // to iterate over inode arrays on disk. - // - // In ext2 and ext3: - // - Each inode had a disk record of 128 bytes. - // - The inode struct size was fixed at 128 bytes. - // - // In ext4 its possible to allocate larger on-disk inodes: - // - Inode disk record size = sb.s_inode_size (function return value). - // = 256 (default) - // - Inode struct size = 128 + inode.i_extra_isize. - // = 128 + 32 = 160 (default) - InodeSize() uint16 - - // InodesPerGroup returns the number of inodes in a block group. - InodesPerGroup() uint32 - - // BgDescSize returns the size of the block group descriptor struct. - // - // In ext2, ext3, ext4 (without 64-bit feature), the block group descriptor - // is only 32 bytes long. - // In ext4 with 64-bit feature, the block group descriptor expands to AT LEAST - // 64 bytes. It might be bigger than that. - BgDescSize() uint16 - - // CompatibleFeatures returns the CompatFeatures struct which holds all the - // compatible features this fs supports. - CompatibleFeatures() CompatFeatures - - // IncompatibleFeatures returns the CompatFeatures struct which holds all the - // incompatible features this fs supports. - IncompatibleFeatures() IncompatFeatures - - // ReadOnlyCompatibleFeatures returns the CompatFeatures struct which holds all the - // readonly compatible features this fs supports. - ReadOnlyCompatibleFeatures() RoCompatFeatures - - // Magic() returns the magic signature which must be 0xef53. - Magic() uint16 - - // Revision returns the superblock revision. Superblock struct fields from - // offset 0x54 till 0x150 should only be used if superblock has DynamicRev. - Revision() SbRevision -} - -// SbRevision is the type for superblock revisions. -type SbRevision uint32 - -// Super block revisions. -const ( - // OldRev is the good old (original) format. - OldRev SbRevision = 0 - - // DynamicRev is v2 format w/ dynamic inode sizes. - DynamicRev SbRevision = 1 -) - -// Superblock compatible features. -// This is not exhaustive, unused features are not listed. -const ( - // SbDirPrealloc indicates directory preallocation. - SbDirPrealloc = 0x1 - - // SbHasJournal indicates the presence of a journal. jbd2 should only work - // with this being set. - SbHasJournal = 0x4 - - // SbExtAttr indicates extended attributes support. - SbExtAttr = 0x8 - - // SbResizeInode indicates that the fs has reserved GDT blocks (right after - // group descriptors) for fs expansion. - SbResizeInode = 0x10 - - // SbDirIndex indicates that the fs has directory indices. - SbDirIndex = 0x20 - - // SbSparseV2 stands for Sparse superblock version 2. - SbSparseV2 = 0x200 -) - -// CompatFeatures represents a superblock's compatible feature set. If the -// kernel does not understand any of these feature, it can still read/write -// to this fs. -type CompatFeatures struct { - DirPrealloc bool - HasJournal bool - ExtAttr bool - ResizeInode bool - DirIndex bool - SparseV2 bool -} - -// ToInt converts superblock compatible features back to its 32-bit rep. -func (f CompatFeatures) ToInt() uint32 { - var res uint32 - - if f.DirPrealloc { - res |= SbDirPrealloc - } - if f.HasJournal { - res |= SbHasJournal - } - if f.ExtAttr { - res |= SbExtAttr - } - if f.ResizeInode { - res |= SbResizeInode - } - if f.DirIndex { - res |= SbDirIndex - } - if f.SparseV2 { - res |= SbSparseV2 - } - - return res -} - -// CompatFeaturesFromInt converts the integer representation of superblock -// compatible features to CompatFeatures struct. -func CompatFeaturesFromInt(f uint32) CompatFeatures { - return CompatFeatures{ - DirPrealloc: f&SbDirPrealloc > 0, - HasJournal: f&SbHasJournal > 0, - ExtAttr: f&SbExtAttr > 0, - ResizeInode: f&SbResizeInode > 0, - DirIndex: f&SbDirIndex > 0, - SparseV2: f&SbSparseV2 > 0, - } -} - -// Superblock incompatible features. -// This is not exhaustive, unused features are not listed. -const ( - // SbDirentFileType indicates that directory entries record the file type. - // We should use struct DirentNew for dirents then. - SbDirentFileType = 0x2 - - // SbRecovery indicates that the filesystem needs recovery. - SbRecovery = 0x4 - - // SbJournalDev indicates that the filesystem has a separate journal device. - SbJournalDev = 0x8 - - // SbMetaBG indicates that the filesystem is using Meta block groups. Moves - // the group descriptors from the congested first block group into the first - // group of each metablock group to increase the maximum block groups limit - // and hence support much larger filesystems. - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#meta-block-groups. - SbMetaBG = 0x10 - - // SbExtents indicates that the filesystem uses extents. Must be set in ext4 - // filesystems. - SbExtents = 0x40 - - // SbIs64Bit indicates that this filesystem addresses blocks with 64-bits. - // Hence can support 2^64 data blocks. - SbIs64Bit = 0x80 - - // SbMMP indicates that this filesystem has multiple mount protection. - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#multiple-mount-protection. - SbMMP = 0x100 - - // SbFlexBg indicates that this filesystem has flexible block groups. Several - // block groups are tied into one logical block group so that all the metadata - // for the block groups (bitmaps and inode tables) are close together for - // faster loading. Consequently, large files will be continuous on disk. - // However, this does not affect the placement of redundant superblocks and - // group descriptors. - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#flexible-block-groups. - SbFlexBg = 0x200 - - // SbLargeDir shows that large directory enabled. Directory htree can be 3 - // levels deep. Directory htrees are allowed to be 2 levels deep otherwise. - SbLargeDir = 0x4000 - - // SbInlineData allows inline data in inodes for really small files. - SbInlineData = 0x8000 - - // SbEncrypted indicates that this fs contains encrypted inodes. - SbEncrypted = 0x10000 -) - -// IncompatFeatures represents a superblock's incompatible feature set. If the -// kernel does not understand any of these feature, it should refuse to mount. -type IncompatFeatures struct { - DirentFileType bool - Recovery bool - JournalDev bool - MetaBG bool - Extents bool - Is64Bit bool - MMP bool - FlexBg bool - LargeDir bool - InlineData bool - Encrypted bool -} - -// ToInt converts superblock incompatible features back to its 32-bit rep. -func (f IncompatFeatures) ToInt() uint32 { - var res uint32 - - if f.DirentFileType { - res |= SbDirentFileType - } - if f.Recovery { - res |= SbRecovery - } - if f.JournalDev { - res |= SbJournalDev - } - if f.MetaBG { - res |= SbMetaBG - } - if f.Extents { - res |= SbExtents - } - if f.Is64Bit { - res |= SbIs64Bit - } - if f.MMP { - res |= SbMMP - } - if f.FlexBg { - res |= SbFlexBg - } - if f.LargeDir { - res |= SbLargeDir - } - if f.InlineData { - res |= SbInlineData - } - if f.Encrypted { - res |= SbEncrypted - } - - return res -} - -// IncompatFeaturesFromInt converts the integer representation of superblock -// incompatible features to IncompatFeatures struct. -func IncompatFeaturesFromInt(f uint32) IncompatFeatures { - return IncompatFeatures{ - DirentFileType: f&SbDirentFileType > 0, - Recovery: f&SbRecovery > 0, - JournalDev: f&SbJournalDev > 0, - MetaBG: f&SbMetaBG > 0, - Extents: f&SbExtents > 0, - Is64Bit: f&SbIs64Bit > 0, - MMP: f&SbMMP > 0, - FlexBg: f&SbFlexBg > 0, - LargeDir: f&SbLargeDir > 0, - InlineData: f&SbInlineData > 0, - Encrypted: f&SbEncrypted > 0, - } -} - -// Superblock readonly compatible features. -// This is not exhaustive, unused features are not listed. -const ( - // SbSparse indicates sparse superblocks. Only groups with number either 0 or - // a power of 3, 5, or 7 will have redundant copies of the superblock and - // block descriptors. - SbSparse = 0x1 - - // SbLargeFile indicates that this fs has been used to store a file >= 2GiB. - SbLargeFile = 0x2 - - // SbHugeFile indicates that this fs contains files whose sizes are - // represented in units of logicals blocks, not 512-byte sectors. - SbHugeFile = 0x8 - - // SbGdtCsum indicates that group descriptors have checksums. - SbGdtCsum = 0x10 - - // SbDirNlink indicates that the new subdirectory limit is 64,999. Ext3 has a - // 32,000 subdirectory limit. - SbDirNlink = 0x20 - - // SbExtraIsize indicates that large inodes exist on this filesystem. - SbExtraIsize = 0x40 - - // SbHasSnapshot indicates the existence of a snapshot. - SbHasSnapshot = 0x80 - - // SbQuota enables usage tracking for all quota types. - SbQuota = 0x100 - - // SbBigalloc maps to the bigalloc feature. When set, the minimum allocation - // unit becomes a cluster rather than a data block. Then block bitmaps track - // clusters, not data blocks. - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#bigalloc. - SbBigalloc = 0x200 - - // SbMetadataCsum indicates that the fs supports metadata checksumming. - SbMetadataCsum = 0x400 - - // SbReadOnly marks this filesystem as readonly. Should refuse to mount in - // read/write mode. - SbReadOnly = 0x1000 -) - -// RoCompatFeatures represents a superblock's readonly compatible feature set. -// If the kernel does not understand any of these feature, it can still mount -// readonly. But if the user wants to mount read/write, the kernel should -// refuse to mount. -type RoCompatFeatures struct { - Sparse bool - LargeFile bool - HugeFile bool - GdtCsum bool - DirNlink bool - ExtraIsize bool - HasSnapshot bool - Quota bool - Bigalloc bool - MetadataCsum bool - ReadOnly bool -} - -// ToInt converts superblock readonly compatible features to its 32-bit rep. -func (f RoCompatFeatures) ToInt() uint32 { - var res uint32 - - if f.Sparse { - res |= SbSparse - } - if f.LargeFile { - res |= SbLargeFile - } - if f.HugeFile { - res |= SbHugeFile - } - if f.GdtCsum { - res |= SbGdtCsum - } - if f.DirNlink { - res |= SbDirNlink - } - if f.ExtraIsize { - res |= SbExtraIsize - } - if f.HasSnapshot { - res |= SbHasSnapshot - } - if f.Quota { - res |= SbQuota - } - if f.Bigalloc { - res |= SbBigalloc - } - if f.MetadataCsum { - res |= SbMetadataCsum - } - if f.ReadOnly { - res |= SbReadOnly - } - - return res -} - -// RoCompatFeaturesFromInt converts the integer representation of superblock -// readonly compatible features to RoCompatFeatures struct. -func RoCompatFeaturesFromInt(f uint32) RoCompatFeatures { - return RoCompatFeatures{ - Sparse: f&SbSparse > 0, - LargeFile: f&SbLargeFile > 0, - HugeFile: f&SbHugeFile > 0, - GdtCsum: f&SbGdtCsum > 0, - DirNlink: f&SbDirNlink > 0, - ExtraIsize: f&SbExtraIsize > 0, - HasSnapshot: f&SbHasSnapshot > 0, - Quota: f&SbQuota > 0, - Bigalloc: f&SbBigalloc > 0, - MetadataCsum: f&SbMetadataCsum > 0, - ReadOnly: f&SbReadOnly > 0, - } -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/superblock_32.go b/pkg/sentry/fsimpl/ext/disklayout/superblock_32.go deleted file mode 100644 index 4dc6080fb..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/superblock_32.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// SuperBlock32Bit implements SuperBlock and represents the 32-bit version of -// the ext4_super_block struct in fs/ext4/ext4.h. Should be used only if -// RevLevel = DynamicRev and 64-bit feature is disabled. -// -// +marshal -type SuperBlock32Bit struct { - // We embed the old superblock struct here because the 32-bit version is just - // an extension of the old version. - SuperBlockOld - - FirstInode uint32 - InodeSizeRaw uint16 - BlockGroupNumber uint16 - FeatureCompat uint32 - FeatureIncompat uint32 - FeatureRoCompat uint32 - UUID [16]byte - VolumeName [16]byte - LastMounted [64]byte - AlgoUsageBitmap uint32 - PreallocBlocks uint8 - PreallocDirBlocks uint8 - ReservedGdtBlocks uint16 - JournalUUID [16]byte - JournalInum uint32 - JournalDev uint32 - LastOrphan uint32 - HashSeed [4]uint32 - DefaultHashVersion uint8 - JnlBackupType uint8 - BgDescSizeRaw uint16 - DefaultMountOpts uint32 - FirstMetaBg uint32 - MkfsTime uint32 - JnlBlocks [17]uint32 -} - -// Compiles only if SuperBlock32Bit implements SuperBlock. -var _ SuperBlock = (*SuperBlock32Bit)(nil) - -// Only override methods which change based on the additional fields above. -// Not overriding SuperBlock.BgDescSize because it would still return 32 here. - -// InodeSize implements SuperBlock.InodeSize. -func (sb *SuperBlock32Bit) InodeSize() uint16 { - return sb.InodeSizeRaw -} - -// CompatibleFeatures implements SuperBlock.CompatibleFeatures. -func (sb *SuperBlock32Bit) CompatibleFeatures() CompatFeatures { - return CompatFeaturesFromInt(sb.FeatureCompat) -} - -// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures. -func (sb *SuperBlock32Bit) IncompatibleFeatures() IncompatFeatures { - return IncompatFeaturesFromInt(sb.FeatureIncompat) -} - -// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures. -func (sb *SuperBlock32Bit) ReadOnlyCompatibleFeatures() RoCompatFeatures { - return RoCompatFeaturesFromInt(sb.FeatureRoCompat) -} diff --git a/pkg/sentry/fsimpl/ext/disklayout/superblock_64.go b/pkg/sentry/fsimpl/ext/disklayout/superblock_64.go deleted file mode 100644 index 2c9039327..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/superblock_64.go +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// SuperBlock64Bit implements SuperBlock and represents the 64-bit version of -// the ext4_super_block struct in fs/ext4/ext4.h. This sums up to be exactly -// 1024 bytes (smallest possible block size) and hence the superblock always -// fits in no more than one data block. Should only be used when the 64-bit -// feature is set. -// -// +marshal -type SuperBlock64Bit struct { - // We embed the 32-bit struct here because 64-bit version is just an extension - // of the 32-bit version. - SuperBlock32Bit - - BlocksCountHi uint32 - ReservedBlocksCountHi uint32 - FreeBlocksCountHi uint32 - MinInodeSize uint16 - WantInodeSize uint16 - Flags uint32 - RaidStride uint16 - MmpInterval uint16 - MmpBlock uint64 - RaidStripeWidth uint32 - LogGroupsPerFlex uint8 - ChecksumType uint8 - _ uint16 - KbytesWritten uint64 - SnapshotInum uint32 - SnapshotID uint32 - SnapshotRsrvBlocksCount uint64 - SnapshotList uint32 - ErrorCount uint32 - FirstErrorTime uint32 - FirstErrorInode uint32 - FirstErrorBlock uint64 - FirstErrorFunction [32]byte - FirstErrorLine uint32 - LastErrorTime uint32 - LastErrorInode uint32 - LastErrorLine uint32 - LastErrorBlock uint64 - LastErrorFunction [32]byte - MountOpts [64]byte - UserQuotaInum uint32 - GroupQuotaInum uint32 - OverheadBlocks uint32 - BackupBgs [2]uint32 - EncryptAlgos [4]uint8 - EncryptPwSalt [16]uint8 - LostFoundInode uint32 - ProjectQuotaInode uint32 - ChecksumSeed uint32 - WtimeHi uint8 - MtimeHi uint8 - MkfsTimeHi uint8 - LastCheckHi uint8 - FirstErrorTimeHi uint8 - LastErrorTimeHi uint8 - _ [2]uint8 - Encoding uint16 - EncodingFlags uint16 - _ [95]uint32 - Checksum uint32 -} - -// Compiles only if SuperBlock64Bit implements SuperBlock. -var _ SuperBlock = (*SuperBlock64Bit)(nil) - -// Only override methods which change based on the 64-bit feature. - -// BlocksCount implements SuperBlock.BlocksCount. -func (sb *SuperBlock64Bit) BlocksCount() uint64 { - return (uint64(sb.BlocksCountHi) << 32) | uint64(sb.BlocksCountLo) -} - -// FreeBlocksCount implements SuperBlock.FreeBlocksCount. -func (sb *SuperBlock64Bit) FreeBlocksCount() uint64 { - return (uint64(sb.FreeBlocksCountHi) << 32) | uint64(sb.FreeBlocksCountLo) -} - -// BgDescSize implements SuperBlock.BgDescSize. -func (sb *SuperBlock64Bit) BgDescSize() uint16 { return sb.BgDescSizeRaw } diff --git a/pkg/sentry/fsimpl/ext/disklayout/superblock_old.go b/pkg/sentry/fsimpl/ext/disklayout/superblock_old.go deleted file mode 100644 index e4709f23c..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/superblock_old.go +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// SuperBlockOld implements SuperBlock and represents the old version of the -// superblock struct. Should be used only if RevLevel = OldRev. -// -// +marshal -type SuperBlockOld struct { - InodesCountRaw uint32 - BlocksCountLo uint32 - ReservedBlocksCount uint32 - FreeBlocksCountLo uint32 - FreeInodesCountRaw uint32 - FirstDataBlockRaw uint32 - LogBlockSize uint32 - LogClusterSize uint32 - BlocksPerGroupRaw uint32 - ClustersPerGroupRaw uint32 - InodesPerGroupRaw uint32 - Mtime uint32 - Wtime uint32 - MountCountRaw uint16 - MaxMountCountRaw uint16 - MagicRaw uint16 - State uint16 - Errors uint16 - MinorRevLevel uint16 - LastCheck uint32 - CheckInterval uint32 - CreatorOS uint32 - RevLevel uint32 - DefResUID uint16 - DefResGID uint16 -} - -// Compiles only if SuperBlockOld implements SuperBlock. -var _ SuperBlock = (*SuperBlockOld)(nil) - -// InodesCount implements SuperBlock.InodesCount. -func (sb *SuperBlockOld) InodesCount() uint32 { return sb.InodesCountRaw } - -// BlocksCount implements SuperBlock.BlocksCount. -func (sb *SuperBlockOld) BlocksCount() uint64 { return uint64(sb.BlocksCountLo) } - -// FreeBlocksCount implements SuperBlock.FreeBlocksCount. -func (sb *SuperBlockOld) FreeBlocksCount() uint64 { return uint64(sb.FreeBlocksCountLo) } - -// FreeInodesCount implements SuperBlock.FreeInodesCount. -func (sb *SuperBlockOld) FreeInodesCount() uint32 { return sb.FreeInodesCountRaw } - -// MountCount implements SuperBlock.MountCount. -func (sb *SuperBlockOld) MountCount() uint16 { return sb.MountCountRaw } - -// MaxMountCount implements SuperBlock.MaxMountCount. -func (sb *SuperBlockOld) MaxMountCount() uint16 { return sb.MaxMountCountRaw } - -// FirstDataBlock implements SuperBlock.FirstDataBlock. -func (sb *SuperBlockOld) FirstDataBlock() uint32 { return sb.FirstDataBlockRaw } - -// BlockSize implements SuperBlock.BlockSize. -func (sb *SuperBlockOld) BlockSize() uint64 { return 1 << (10 + sb.LogBlockSize) } - -// BlocksPerGroup implements SuperBlock.BlocksPerGroup. -func (sb *SuperBlockOld) BlocksPerGroup() uint32 { return sb.BlocksPerGroupRaw } - -// ClusterSize implements SuperBlock.ClusterSize. -func (sb *SuperBlockOld) ClusterSize() uint64 { return 1 << (10 + sb.LogClusterSize) } - -// ClustersPerGroup implements SuperBlock.ClustersPerGroup. -func (sb *SuperBlockOld) ClustersPerGroup() uint32 { return sb.ClustersPerGroupRaw } - -// InodeSize implements SuperBlock.InodeSize. -func (sb *SuperBlockOld) InodeSize() uint16 { return OldInodeSize } - -// InodesPerGroup implements SuperBlock.InodesPerGroup. -func (sb *SuperBlockOld) InodesPerGroup() uint32 { return sb.InodesPerGroupRaw } - -// BgDescSize implements SuperBlock.BgDescSize. -func (sb *SuperBlockOld) BgDescSize() uint16 { return 32 } - -// CompatibleFeatures implements SuperBlock.CompatibleFeatures. -func (sb *SuperBlockOld) CompatibleFeatures() CompatFeatures { return CompatFeatures{} } - -// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures. -func (sb *SuperBlockOld) IncompatibleFeatures() IncompatFeatures { return IncompatFeatures{} } - -// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures. -func (sb *SuperBlockOld) ReadOnlyCompatibleFeatures() RoCompatFeatures { return RoCompatFeatures{} } - -// Magic implements SuperBlock.Magic. -func (sb *SuperBlockOld) Magic() uint16 { return sb.MagicRaw } - -// Revision implements SuperBlock.Revision. -func (sb *SuperBlockOld) Revision() SbRevision { return SbRevision(sb.RevLevel) } diff --git a/pkg/sentry/fsimpl/ext/disklayout/superblock_test.go b/pkg/sentry/fsimpl/ext/disklayout/superblock_test.go deleted file mode 100644 index b734b6987..000000000 --- a/pkg/sentry/fsimpl/ext/disklayout/superblock_test.go +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "testing" -) - -// TestSuperBlockSize tests that the superblock structs are of the correct -// size. -func TestSuperBlockSize(t *testing.T) { - var sbOld SuperBlockOld - assertSize(t, &sbOld, 84) - var sb32 SuperBlock32Bit - assertSize(t, &sb32, 336) - var sb64 SuperBlock64Bit - assertSize(t, &sb64, 1024) -} diff --git a/pkg/sentry/fsimpl/ext/ext.go b/pkg/sentry/fsimpl/ext/ext.go deleted file mode 100644 index 80854b501..000000000 --- a/pkg/sentry/fsimpl/ext/ext.go +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package ext implements readonly ext(2/3/4) filesystems. -package ext - -import ( - "errors" - "fmt" - "io" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// Name is the name of this filesystem. -const Name = "ext" - -// FilesystemType implements vfs.FilesystemType. -// -// +stateify savable -type FilesystemType struct{} - -// getDeviceFd returns an io.ReaderAt to the underlying device. -// Currently there are two ways of mounting an ext(2/3/4) fs: -// 1. Specify a mount with our internal special MountType in the OCI spec. -// 2. Expose the device to the container and mount it from application layer. -func getDeviceFd(source string, opts vfs.GetFilesystemOptions) (io.ReaderAt, error) { - if opts.InternalData == nil { - // User mount call. - // TODO(b/134676337): Open the device specified by `source` and return that. - panic("unimplemented") - } - - // GetFilesystem call originated from within the sentry. - devFd, ok := opts.InternalData.(int) - if !ok { - return nil, errors.New("internal data for ext fs must be an int containing the file descriptor to device") - } - - if devFd < 0 { - return nil, fmt.Errorf("ext device file descriptor is not valid: %d", devFd) - } - - // The fd.ReadWriter returned from fd.NewReadWriter() does not take ownership - // of the file descriptor and hence will not close it when it is garbage - // collected. - return fd.NewReadWriter(devFd), nil -} - -// isCompatible checks if the superblock has feature sets which are compatible. -// We only need to check the superblock incompatible feature set since we are -// mounting readonly. We will also need to check readonly compatible feature -// set when mounting for read/write. -func isCompatible(sb disklayout.SuperBlock) bool { - // Please note that what is being checked is limited based on the fact that we - // are mounting readonly and that we are not journaling. When mounting - // read/write or with a journal, this must be reevaluated. - incompatFeatures := sb.IncompatibleFeatures() - if incompatFeatures.MetaBG { - log.Warningf("ext fs: meta block groups are not supported") - return false - } - if incompatFeatures.MMP { - log.Warningf("ext fs: multiple mount protection is not supported") - return false - } - if incompatFeatures.Encrypted { - log.Warningf("ext fs: encrypted inodes not supported") - return false - } - if incompatFeatures.InlineData { - log.Warningf("ext fs: inline files not supported") - return false - } - return true -} - -// Name implements vfs.FilesystemType.Name. -func (FilesystemType) Name() string { - return Name -} - -// Release implements vfs.FilesystemType.Release. -func (FilesystemType) Release(ctx context.Context) {} - -// GetFilesystem implements vfs.FilesystemType.GetFilesystem. -func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { - // TODO(b/134676337): Ensure that the user is mounting readonly. If not, - // EACCESS should be returned according to mount(2). Filesystem independent - // flags (like readonly) are currently not available in pkg/sentry/vfs. - - devMinor, err := vfsObj.GetAnonBlockDevMinor() - if err != nil { - return nil, nil, err - } - - dev, err := getDeviceFd(source, opts) - if err != nil { - return nil, nil, err - } - - fs := filesystem{ - dev: dev, - inodeCache: make(map[uint32]*inode), - devMinor: devMinor, - } - fs.vfsfs.Init(vfsObj, &fsType, &fs) - fs.sb, err = readSuperBlock(dev) - if err != nil { - fs.vfsfs.DecRef(ctx) - return nil, nil, err - } - - if fs.sb.Magic() != linux.EXT_SUPER_MAGIC { - // mount(2) specifies that EINVAL should be returned if the superblock is - // invalid. - fs.vfsfs.DecRef(ctx) - return nil, nil, linuxerr.EINVAL - } - - // Refuse to mount if the filesystem is incompatible. - if !isCompatible(fs.sb) { - fs.vfsfs.DecRef(ctx) - return nil, nil, linuxerr.EINVAL - } - - fs.bgs, err = readBlockGroups(dev, fs.sb) - if err != nil { - fs.vfsfs.DecRef(ctx) - return nil, nil, err - } - - rootInode, err := fs.getOrCreateInodeLocked(disklayout.RootDirInode) - if err != nil { - fs.vfsfs.DecRef(ctx) - return nil, nil, err - } - rootInode.incRef() - - return &fs.vfsfs, &newDentry(rootInode).vfsd, nil -} diff --git a/pkg/sentry/fsimpl/ext/ext_test.go b/pkg/sentry/fsimpl/ext/ext_test.go deleted file mode 100644 index db712e71f..000000000 --- a/pkg/sentry/fsimpl/ext/ext_test.go +++ /dev/null @@ -1,926 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "fmt" - "io" - "os" - "path" - "sort" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/test/testutil" - "gvisor.dev/gvisor/pkg/usermem" -) - -const ( - assetsDir = "pkg/sentry/fsimpl/ext/assets" -) - -var ( - ext2ImagePath = path.Join(assetsDir, "tiny.ext2") - ext3ImagePath = path.Join(assetsDir, "tiny.ext3") - ext4ImagePath = path.Join(assetsDir, "tiny.ext4") -) - -// setUp opens imagePath as an ext Filesystem and returns all necessary -// elements required to run tests. If error is non-nil, it also returns a tear -// down function which must be called after the test is run for clean up. -func setUp(t *testing.T, imagePath string) (context.Context, *vfs.VirtualFilesystem, *vfs.VirtualDentry, func(), error) { - localImagePath, err := testutil.FindFile(imagePath) - if err != nil { - return nil, nil, nil, nil, fmt.Errorf("failed to open local image at path %s: %v", imagePath, err) - } - - f, err := os.Open(localImagePath) - if err != nil { - return nil, nil, nil, nil, err - } - - ctx := contexttest.Context(t) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - t.Fatalf("VFS init: %v", err) - } - vfsObj.MustRegisterFilesystemType("extfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, localImagePath, "extfs", &vfs.MountOptions{ - GetFilesystemOptions: vfs.GetFilesystemOptions{ - InternalData: int(f.Fd()), - }, - }) - if err != nil { - f.Close() - return nil, nil, nil, nil, err - } - - root := mntns.Root() - root.IncRef() - - tearDown := func() { - root.DecRef(ctx) - - if err := f.Close(); err != nil { - t.Fatalf("tearDown failed: %v", err) - } - } - return ctx, vfsObj, &root, tearDown, nil -} - -// TODO(b/134676337): Test vfs.FilesystemImpl.ReadlinkAt and -// vfs.FilesystemImpl.StatFSAt which are not implemented in -// vfs.VirtualFilesystem yet. - -// TestSeek tests vfs.FileDescriptionImpl.Seek functionality. -func TestSeek(t *testing.T) { - type seekTest struct { - name string - image string - path string - } - - tests := []seekTest{ - { - name: "ext4 root dir seek", - image: ext4ImagePath, - path: "/", - }, - { - name: "ext3 root dir seek", - image: ext3ImagePath, - path: "/", - }, - { - name: "ext2 root dir seek", - image: ext2ImagePath, - path: "/", - }, - { - name: "ext4 reg file seek", - image: ext4ImagePath, - path: "/file.txt", - }, - { - name: "ext3 reg file seek", - image: ext3ImagePath, - path: "/file.txt", - }, - { - name: "ext2 reg file seek", - image: ext2ImagePath, - path: "/file.txt", - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ctx, vfsfs, root, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - fd, err := vfsfs.OpenAt( - ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)}, - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt failed: %v", err) - } - - if n, err := fd.Seek(ctx, 0, linux.SEEK_SET); n != 0 || err != nil { - t.Errorf("expected seek position 0, got %d and error %v", n, err) - } - - stat, err := fd.Stat(ctx, vfs.StatOptions{}) - if err != nil { - t.Errorf("fd.stat failed for file %s in image %s: %v", test.path, test.image, err) - } - - // We should be able to seek beyond the end of file. - size := int64(stat.Size) - if n, err := fd.Seek(ctx, size, linux.SEEK_SET); n != size || err != nil { - t.Errorf("expected seek position %d, got %d and error %v", size, n, err) - } - - // EINVAL should be returned if the resulting offset is negative. - if _, err := fd.Seek(ctx, -1, linux.SEEK_SET); !linuxerr.Equals(linuxerr.EINVAL, err) { - t.Errorf("expected error EINVAL but got %v", err) - } - - if n, err := fd.Seek(ctx, 3, linux.SEEK_CUR); n != size+3 || err != nil { - t.Errorf("expected seek position %d, got %d and error %v", size+3, n, err) - } - - // Make sure negative offsets work with SEEK_CUR. - if n, err := fd.Seek(ctx, -2, linux.SEEK_CUR); n != size+1 || err != nil { - t.Errorf("expected seek position %d, got %d and error %v", size+1, n, err) - } - - // EINVAL should be returned if the resulting offset is negative. - if _, err := fd.Seek(ctx, -(size + 2), linux.SEEK_CUR); !linuxerr.Equals(linuxerr.EINVAL, err) { - t.Errorf("expected error EINVAL but got %v", err) - } - - // Make sure SEEK_END works with regular files. - if _, ok := fd.Impl().(*regularFileFD); ok { - // Seek back to 0. - if n, err := fd.Seek(ctx, -size, linux.SEEK_END); n != 0 || err != nil { - t.Errorf("expected seek position %d, got %d and error %v", 0, n, err) - } - - // Seek forward beyond EOF. - if n, err := fd.Seek(ctx, 1, linux.SEEK_END); n != size+1 || err != nil { - t.Errorf("expected seek position %d, got %d and error %v", size+1, n, err) - } - - // EINVAL should be returned if the resulting offset is negative. - if _, err := fd.Seek(ctx, -(size + 1), linux.SEEK_END); !linuxerr.Equals(linuxerr.EINVAL, err) { - t.Errorf("expected error EINVAL but got %v", err) - } - } - }) - } -} - -// TestStatAt tests filesystem.StatAt functionality. -func TestStatAt(t *testing.T) { - type statAtTest struct { - name string - image string - path string - want linux.Statx - } - - tests := []statAtTest{ - { - name: "ext4 statx small file", - image: ext4ImagePath, - path: "/file.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13, - }, - }, - { - name: "ext3 statx small file", - image: ext3ImagePath, - path: "/file.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13, - }, - }, - { - name: "ext2 statx small file", - image: ext2ImagePath, - path: "/file.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13, - }, - }, - { - name: "ext4 statx big file", - image: ext4ImagePath, - path: "/bigfile.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13042, - }, - }, - { - name: "ext3 statx big file", - image: ext3ImagePath, - path: "/bigfile.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13042, - }, - }, - { - name: "ext2 statx big file", - image: ext2ImagePath, - path: "/bigfile.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13042, - }, - }, - { - name: "ext4 statx symlink file", - image: ext4ImagePath, - path: "/symlink.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0777 | linux.ModeSymlink, - Size: 8, - }, - }, - { - name: "ext3 statx symlink file", - image: ext3ImagePath, - path: "/symlink.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0777 | linux.ModeSymlink, - Size: 8, - }, - }, - { - name: "ext2 statx symlink file", - image: ext2ImagePath, - path: "/symlink.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0777 | linux.ModeSymlink, - Size: 8, - }, - }, - } - - // Ignore the fields that are not supported by filesystem.StatAt yet and - // those which are likely to change as the image does. - ignoredFields := map[string]bool{ - "Attributes": true, - "AttributesMask": true, - "Atime": true, - "Blocks": true, - "Btime": true, - "Ctime": true, - "DevMajor": true, - "DevMinor": true, - "Ino": true, - "Mask": true, - "Mtime": true, - "RdevMajor": true, - "RdevMinor": true, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ctx, vfsfs, root, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - got, err := vfsfs.StatAt(ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)}, - &vfs.StatOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.StatAt failed for file %s in image %s: %v", test.path, test.image, err) - } - - cmpIgnoreFields := cmp.FilterPath(func(p cmp.Path) bool { - _, ok := ignoredFields[p.String()] - return ok - }, cmp.Ignore()) - if diff := cmp.Diff(got, test.want, cmpIgnoreFields, cmpopts.IgnoreUnexported(linux.Statx{})); diff != "" { - t.Errorf("stat mismatch (-want +got):\n%s", diff) - } - }) - } -} - -// TestRead tests the read functionality for vfs file descriptions. -func TestRead(t *testing.T) { - type readTest struct { - name string - image string - absPath string - } - - tests := []readTest{ - { - name: "ext4 read small file", - image: ext4ImagePath, - absPath: "/file.txt", - }, - { - name: "ext3 read small file", - image: ext3ImagePath, - absPath: "/file.txt", - }, - { - name: "ext2 read small file", - image: ext2ImagePath, - absPath: "/file.txt", - }, - { - name: "ext4 read big file", - image: ext4ImagePath, - absPath: "/bigfile.txt", - }, - { - name: "ext3 read big file", - image: ext3ImagePath, - absPath: "/bigfile.txt", - }, - { - name: "ext2 read big file", - image: ext2ImagePath, - absPath: "/bigfile.txt", - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ctx, vfsfs, root, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - fd, err := vfsfs.OpenAt( - ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.absPath)}, - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt failed: %v", err) - } - - // Get a local file descriptor and compare its functionality with a vfs file - // description for the same file. - localFile, err := testutil.FindFile(path.Join(assetsDir, test.absPath)) - if err != nil { - t.Fatalf("testutil.FindFile failed for %s: %v", test.absPath, err) - } - - f, err := os.Open(localFile) - if err != nil { - t.Fatalf("os.Open failed for %s: %v", localFile, err) - } - defer f.Close() - - // Read the entire file by reading one byte repeatedly. Doing this stress - // tests the underlying file reader implementation. - got := make([]byte, 1) - want := make([]byte, 1) - for { - n, err := f.Read(want) - fd.Read(ctx, usermem.BytesIOSequence(got), vfs.ReadOptions{}) - - if diff := cmp.Diff(got, want); diff != "" { - t.Errorf("file data mismatch (-want +got):\n%s", diff) - } - - // Make sure there is no more file data left after getting EOF. - if n == 0 || err == io.EOF { - if n, _ := fd.Read(ctx, usermem.BytesIOSequence(got), vfs.ReadOptions{}); n != 0 { - t.Errorf("extra unexpected file data in file %s in image %s", test.absPath, test.image) - } - - break - } - - if err != nil { - t.Fatalf("read failed: %v", err) - } - } - }) - } -} - -// iterDirentsCb is a simple callback which just keeps adding the dirents to an -// internal list. Implements vfs.IterDirentsCallback. -type iterDirentsCb struct { - dirents []vfs.Dirent -} - -// Compiles only if iterDirentCb implements vfs.IterDirentsCallback. -var _ vfs.IterDirentsCallback = (*iterDirentsCb)(nil) - -// newIterDirentsCb is the iterDirent -func newIterDirentCb() *iterDirentsCb { - return &iterDirentsCb{dirents: make([]vfs.Dirent, 0)} -} - -// Handle implements vfs.IterDirentsCallback.Handle. -func (cb *iterDirentsCb) Handle(dirent vfs.Dirent) error { - cb.dirents = append(cb.dirents, dirent) - return nil -} - -// TestIterDirents tests the FileDescriptionImpl.IterDirents functionality. -func TestIterDirents(t *testing.T) { - type iterDirentTest struct { - name string - image string - path string - want []vfs.Dirent - } - - wantDirents := []vfs.Dirent{ - { - Name: ".", - Type: linux.DT_DIR, - }, - { - Name: "..", - Type: linux.DT_DIR, - }, - { - Name: "lost+found", - Type: linux.DT_DIR, - }, - { - Name: "file.txt", - Type: linux.DT_REG, - }, - { - Name: "bigfile.txt", - Type: linux.DT_REG, - }, - { - Name: "symlink.txt", - Type: linux.DT_LNK, - }, - } - tests := []iterDirentTest{ - { - name: "ext4 root dir iteration", - image: ext4ImagePath, - path: "/", - want: wantDirents, - }, - { - name: "ext3 root dir iteration", - image: ext3ImagePath, - path: "/", - want: wantDirents, - }, - { - name: "ext2 root dir iteration", - image: ext2ImagePath, - path: "/", - want: wantDirents, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ctx, vfsfs, root, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - fd, err := vfsfs.OpenAt( - ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)}, - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt failed: %v", err) - } - - cb := &iterDirentsCb{} - if err = fd.IterDirents(ctx, cb); err != nil { - t.Fatalf("dir fd.IterDirents() failed: %v", err) - } - - sort.Slice(cb.dirents, func(i int, j int) bool { return cb.dirents[i].Name < cb.dirents[j].Name }) - sort.Slice(test.want, func(i int, j int) bool { return test.want[i].Name < test.want[j].Name }) - - // Ignore the inode number and offset of dirents because those are likely to - // change as the underlying image changes. - cmpIgnoreFields := cmp.FilterPath(func(p cmp.Path) bool { - return p.String() == "Ino" || p.String() == "NextOff" - }, cmp.Ignore()) - if diff := cmp.Diff(cb.dirents, test.want, cmpIgnoreFields); diff != "" { - t.Errorf("dirents mismatch (-want +got):\n%s", diff) - } - }) - } -} - -// TestRootDir tests that the root directory inode is correctly initialized and -// returned from setUp. -func TestRootDir(t *testing.T) { - type inodeProps struct { - Mode linux.FileMode - UID auth.KUID - GID auth.KGID - Size uint64 - InodeSize uint16 - Links uint16 - Flags disklayout.InodeFlags - } - - type rootDirTest struct { - name string - image string - wantInode inodeProps - } - - tests := []rootDirTest{ - { - name: "ext4 root dir", - image: ext4ImagePath, - wantInode: inodeProps{ - Mode: linux.ModeDirectory | 0755, - Size: 0x400, - InodeSize: 0x80, - Links: 3, - Flags: disklayout.InodeFlags{Extents: true}, - }, - }, - { - name: "ext3 root dir", - image: ext3ImagePath, - wantInode: inodeProps{ - Mode: linux.ModeDirectory | 0755, - Size: 0x400, - InodeSize: 0x80, - Links: 3, - }, - }, - { - name: "ext2 root dir", - image: ext2ImagePath, - wantInode: inodeProps{ - Mode: linux.ModeDirectory | 0755, - Size: 0x400, - InodeSize: 0x80, - Links: 3, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - _, _, vd, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - d, ok := vd.Dentry().Impl().(*dentry) - if !ok { - t.Fatalf("ext dentry of incorrect type: %T", vd.Dentry().Impl()) - } - - // Offload inode contents into local structs for comparison. - gotInode := inodeProps{ - Mode: d.inode.diskInode.Mode(), - UID: d.inode.diskInode.UID(), - GID: d.inode.diskInode.GID(), - Size: d.inode.diskInode.Size(), - InodeSize: d.inode.diskInode.InodeSize(), - Links: d.inode.diskInode.LinksCount(), - Flags: d.inode.diskInode.Flags(), - } - - if diff := cmp.Diff(gotInode, test.wantInode); diff != "" { - t.Errorf("inode mismatch (-want +got):\n%s", diff) - } - }) - } -} - -// TestFilesystemInit tests that the filesystem superblock and block group -// descriptors are correctly read in and initialized. -func TestFilesystemInit(t *testing.T) { - // sb only contains the immutable properties of the superblock. - type sb struct { - InodesCount uint32 - BlocksCount uint64 - MaxMountCount uint16 - FirstDataBlock uint32 - BlockSize uint64 - BlocksPerGroup uint32 - ClusterSize uint64 - ClustersPerGroup uint32 - InodeSize uint16 - InodesPerGroup uint32 - BgDescSize uint16 - Magic uint16 - Revision disklayout.SbRevision - CompatFeatures disklayout.CompatFeatures - IncompatFeatures disklayout.IncompatFeatures - RoCompatFeatures disklayout.RoCompatFeatures - } - - // bg only contains the immutable properties of the block group descriptor. - type bg struct { - InodeTable uint64 - BlockBitmap uint64 - InodeBitmap uint64 - ExclusionBitmap uint64 - Flags disklayout.BGFlags - } - - type fsInitTest struct { - name string - image string - wantSb sb - wantBgs []bg - } - - tests := []fsInitTest{ - { - name: "ext4 filesystem init", - image: ext4ImagePath, - wantSb: sb{ - InodesCount: 0x10, - BlocksCount: 0x40, - MaxMountCount: 0xffff, - FirstDataBlock: 0x1, - BlockSize: 0x400, - BlocksPerGroup: 0x2000, - ClusterSize: 0x400, - ClustersPerGroup: 0x2000, - InodeSize: 0x80, - InodesPerGroup: 0x10, - BgDescSize: 0x40, - Magic: linux.EXT_SUPER_MAGIC, - Revision: disklayout.DynamicRev, - CompatFeatures: disklayout.CompatFeatures{ - ExtAttr: true, - ResizeInode: true, - DirIndex: true, - }, - IncompatFeatures: disklayout.IncompatFeatures{ - DirentFileType: true, - Extents: true, - Is64Bit: true, - FlexBg: true, - }, - RoCompatFeatures: disklayout.RoCompatFeatures{ - Sparse: true, - LargeFile: true, - HugeFile: true, - DirNlink: true, - ExtraIsize: true, - MetadataCsum: true, - }, - }, - wantBgs: []bg{ - { - InodeTable: 0x23, - BlockBitmap: 0x3, - InodeBitmap: 0x13, - Flags: disklayout.BGFlags{ - InodeZeroed: true, - }, - }, - }, - }, - { - name: "ext3 filesystem init", - image: ext3ImagePath, - wantSb: sb{ - InodesCount: 0x10, - BlocksCount: 0x40, - MaxMountCount: 0xffff, - FirstDataBlock: 0x1, - BlockSize: 0x400, - BlocksPerGroup: 0x2000, - ClusterSize: 0x400, - ClustersPerGroup: 0x2000, - InodeSize: 0x80, - InodesPerGroup: 0x10, - BgDescSize: 0x20, - Magic: linux.EXT_SUPER_MAGIC, - Revision: disklayout.DynamicRev, - CompatFeatures: disklayout.CompatFeatures{ - ExtAttr: true, - ResizeInode: true, - DirIndex: true, - }, - IncompatFeatures: disklayout.IncompatFeatures{ - DirentFileType: true, - }, - RoCompatFeatures: disklayout.RoCompatFeatures{ - Sparse: true, - LargeFile: true, - }, - }, - wantBgs: []bg{ - { - InodeTable: 0x5, - BlockBitmap: 0x3, - InodeBitmap: 0x4, - Flags: disklayout.BGFlags{ - InodeZeroed: true, - }, - }, - }, - }, - { - name: "ext2 filesystem init", - image: ext2ImagePath, - wantSb: sb{ - InodesCount: 0x10, - BlocksCount: 0x40, - MaxMountCount: 0xffff, - FirstDataBlock: 0x1, - BlockSize: 0x400, - BlocksPerGroup: 0x2000, - ClusterSize: 0x400, - ClustersPerGroup: 0x2000, - InodeSize: 0x80, - InodesPerGroup: 0x10, - BgDescSize: 0x20, - Magic: linux.EXT_SUPER_MAGIC, - Revision: disklayout.DynamicRev, - CompatFeatures: disklayout.CompatFeatures{ - ExtAttr: true, - ResizeInode: true, - DirIndex: true, - }, - IncompatFeatures: disklayout.IncompatFeatures{ - DirentFileType: true, - }, - RoCompatFeatures: disklayout.RoCompatFeatures{ - Sparse: true, - LargeFile: true, - }, - }, - wantBgs: []bg{ - { - InodeTable: 0x5, - BlockBitmap: 0x3, - InodeBitmap: 0x4, - Flags: disklayout.BGFlags{ - InodeZeroed: true, - }, - }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - _, _, vd, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - fs, ok := vd.Mount().Filesystem().Impl().(*filesystem) - if !ok { - t.Fatalf("ext filesystem of incorrect type: %T", vd.Mount().Filesystem().Impl()) - } - - // Offload superblock and block group descriptors contents into - // local structs for comparison. - totalFreeInodes := uint32(0) - totalFreeBlocks := uint64(0) - gotSb := sb{ - InodesCount: fs.sb.InodesCount(), - BlocksCount: fs.sb.BlocksCount(), - MaxMountCount: fs.sb.MaxMountCount(), - FirstDataBlock: fs.sb.FirstDataBlock(), - BlockSize: fs.sb.BlockSize(), - BlocksPerGroup: fs.sb.BlocksPerGroup(), - ClusterSize: fs.sb.ClusterSize(), - ClustersPerGroup: fs.sb.ClustersPerGroup(), - InodeSize: fs.sb.InodeSize(), - InodesPerGroup: fs.sb.InodesPerGroup(), - BgDescSize: fs.sb.BgDescSize(), - Magic: fs.sb.Magic(), - Revision: fs.sb.Revision(), - CompatFeatures: fs.sb.CompatibleFeatures(), - IncompatFeatures: fs.sb.IncompatibleFeatures(), - RoCompatFeatures: fs.sb.ReadOnlyCompatibleFeatures(), - } - gotNumBgs := len(fs.bgs) - gotBgs := make([]bg, gotNumBgs) - for i := 0; i < gotNumBgs; i++ { - gotBgs[i].InodeTable = fs.bgs[i].InodeTable() - gotBgs[i].BlockBitmap = fs.bgs[i].BlockBitmap() - gotBgs[i].InodeBitmap = fs.bgs[i].InodeBitmap() - gotBgs[i].ExclusionBitmap = fs.bgs[i].ExclusionBitmap() - gotBgs[i].Flags = fs.bgs[i].Flags() - - totalFreeInodes += fs.bgs[i].FreeInodesCount() - totalFreeBlocks += uint64(fs.bgs[i].FreeBlocksCount()) - } - - if diff := cmp.Diff(gotSb, test.wantSb); diff != "" { - t.Errorf("superblock mismatch (-want +got):\n%s", diff) - } - - if diff := cmp.Diff(gotBgs, test.wantBgs); diff != "" { - t.Errorf("block group descriptors mismatch (-want +got):\n%s", diff) - } - - if diff := cmp.Diff(totalFreeInodes, fs.sb.FreeInodesCount()); diff != "" { - t.Errorf("total free inodes mismatch (-want +got):\n%s", diff) - } - - if diff := cmp.Diff(totalFreeBlocks, fs.sb.FreeBlocksCount()); diff != "" { - t.Errorf("total free blocks mismatch (-want +got):\n%s", diff) - } - }) - } -} diff --git a/pkg/sentry/fsimpl/ext/extent_file.go b/pkg/sentry/fsimpl/ext/extent_file.go deleted file mode 100644 index f449bc8bd..000000000 --- a/pkg/sentry/fsimpl/ext/extent_file.go +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "io" - "sort" - - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" - "gvisor.dev/gvisor/pkg/syserror" -) - -// extentFile is a type of regular file which uses extents to store file data. -// -// +stateify savable -type extentFile struct { - regFile regularFile - - // root is the root extent node. This lives in the 60 byte diskInode.Data(). - // Immutable. - root disklayout.ExtentNode -} - -// Compiles only if extentFile implements io.ReaderAt. -var _ io.ReaderAt = (*extentFile)(nil) - -// newExtentFile is the extent file constructor. It reads the entire extent -// tree into memory. -// TODO(b/134676337): Build extent tree on demand to reduce memory usage. -func newExtentFile(args inodeArgs) (*extentFile, error) { - file := &extentFile{} - file.regFile.impl = file - file.regFile.inode.init(args, &file.regFile) - err := file.buildExtTree() - if err != nil { - return nil, err - } - return file, nil -} - -// buildExtTree builds the extent tree by reading it from disk by doing -// running a simple DFS. It first reads the root node from the inode struct in -// memory. Then it recursively builds the rest of the tree by reading it off -// disk. -// -// Precondition: inode flag InExtents must be set. -func (f *extentFile) buildExtTree() error { - rootNodeData := f.regFile.inode.diskInode.Data() - - f.root.Header.UnmarshalBytes(rootNodeData[:disklayout.ExtentHeaderSize]) - - // Root node can not have more than 4 entries: 60 bytes = 1 header + 4 entries. - if f.root.Header.NumEntries > 4 { - // read(2) specifies that EINVAL should be returned if the file is unsuitable - // for reading. - return linuxerr.EINVAL - } - - f.root.Entries = make([]disklayout.ExtentEntryPair, f.root.Header.NumEntries) - for i, off := uint16(0), disklayout.ExtentEntrySize; i < f.root.Header.NumEntries; i, off = i+1, off+disklayout.ExtentEntrySize { - var curEntry disklayout.ExtentEntry - if f.root.Header.Height == 0 { - // Leaf node. - curEntry = &disklayout.Extent{} - } else { - // Internal node. - curEntry = &disklayout.ExtentIdx{} - } - curEntry.UnmarshalBytes(rootNodeData[off : off+disklayout.ExtentEntrySize]) - f.root.Entries[i].Entry = curEntry - } - - // If this node is internal, perform DFS. - if f.root.Header.Height > 0 { - for i := uint16(0); i < f.root.Header.NumEntries; i++ { - var err error - if f.root.Entries[i].Node, err = f.buildExtTreeFromDisk(f.root.Entries[i].Entry); err != nil { - return err - } - } - } - - return nil -} - -// buildExtTreeFromDisk reads the extent tree nodes from disk and recursively -// builds the tree. Performs a simple DFS. It returns the ExtentNode pointed to -// by the ExtentEntry. -func (f *extentFile) buildExtTreeFromDisk(entry disklayout.ExtentEntry) (*disklayout.ExtentNode, error) { - var header disklayout.ExtentHeader - off := entry.PhysicalBlock() * f.regFile.inode.blkSize - err := readFromDisk(f.regFile.inode.fs.dev, int64(off), &header) - if err != nil { - return nil, err - } - - entries := make([]disklayout.ExtentEntryPair, header.NumEntries) - for i, off := uint16(0), off+disklayout.ExtentEntrySize; i < header.NumEntries; i, off = i+1, off+disklayout.ExtentEntrySize { - var curEntry disklayout.ExtentEntry - if header.Height == 0 { - // Leaf node. - curEntry = &disklayout.Extent{} - } else { - // Internal node. - curEntry = &disklayout.ExtentIdx{} - } - - err := readFromDisk(f.regFile.inode.fs.dev, int64(off), curEntry) - if err != nil { - return nil, err - } - entries[i].Entry = curEntry - } - - // If this node is internal, perform DFS. - if header.Height > 0 { - for i := uint16(0); i < header.NumEntries; i++ { - var err error - entries[i].Node, err = f.buildExtTreeFromDisk(entries[i].Entry) - if err != nil { - return nil, err - } - } - } - - return &disklayout.ExtentNode{header, entries}, nil -} - -// ReadAt implements io.ReaderAt.ReadAt. -func (f *extentFile) ReadAt(dst []byte, off int64) (int, error) { - if len(dst) == 0 { - return 0, nil - } - - if off < 0 { - return 0, linuxerr.EINVAL - } - - if uint64(off) >= f.regFile.inode.diskInode.Size() { - return 0, io.EOF - } - - n, err := f.read(&f.root, uint64(off), dst) - if n < len(dst) && err == nil { - err = io.EOF - } - return n, err -} - -// read is the recursive step of extentFile.ReadAt which traverses the extent -// tree from the node passed and reads file data. -func (f *extentFile) read(node *disklayout.ExtentNode, off uint64, dst []byte) (int, error) { - // Perform a binary search for the node covering bytes starting at r.fileOff. - // A highly fragmented filesystem can have upto 340 entries and so linear - // search should be avoided. Finds the first entry which does not cover the - // file block we want and subtracts 1 to get the desired index. - fileBlk := uint32(off / f.regFile.inode.blkSize) - n := len(node.Entries) - found := sort.Search(n, func(i int) bool { - return node.Entries[i].Entry.FileBlock() > fileBlk - }) - 1 - - // We should be in this recursive step only if the data we want exists under - // the current node. - if found < 0 { - panic("searching for a file block in an extent entry which does not cover it") - } - - read := 0 - toRead := len(dst) - var curR int - var err error - for i := found; i < n && read < toRead; i++ { - if node.Header.Height == 0 { - curR, err = f.readFromExtent(node.Entries[i].Entry.(*disklayout.Extent), off, dst[read:]) - } else { - curR, err = f.read(node.Entries[i].Node, off, dst[read:]) - } - - read += curR - off += uint64(curR) - if err != nil { - return read, err - } - } - - return read, nil -} - -// readFromExtent reads file data from the extent. It takes advantage of the -// sequential nature of extents and reads file data from multiple blocks in one -// call. -// -// A non-nil error indicates that this is a partial read and there is probably -// more to read from this extent. The caller should propagate the error upward -// and not move to the next extent in the tree. -// -// A subsequent call to extentReader.Read should continue reading from where we -// left off as expected. -func (f *extentFile) readFromExtent(ex *disklayout.Extent, off uint64, dst []byte) (int, error) { - curFileBlk := uint32(off / f.regFile.inode.blkSize) - exFirstFileBlk := ex.FileBlock() - exLastFileBlk := exFirstFileBlk + uint32(ex.Length) // This is exclusive. - - // We should be in this recursive step only if the data we want exists under - // the current extent. - if curFileBlk < exFirstFileBlk || exLastFileBlk <= curFileBlk { - panic("searching for a file block in an extent which does not cover it") - } - - curPhyBlk := uint64(curFileBlk-exFirstFileBlk) + ex.PhysicalBlock() - readStart := curPhyBlk*f.regFile.inode.blkSize + (off % f.regFile.inode.blkSize) - - endPhyBlk := ex.PhysicalBlock() + uint64(ex.Length) - extentEnd := endPhyBlk * f.regFile.inode.blkSize // This is exclusive. - - toRead := int(extentEnd - readStart) - if len(dst) < toRead { - toRead = len(dst) - } - - n, _ := f.regFile.inode.fs.dev.ReadAt(dst[:toRead], int64(readStart)) - if n < toRead { - return n, syserror.EIO - } - return n, nil -} diff --git a/pkg/sentry/fsimpl/ext/extent_test.go b/pkg/sentry/fsimpl/ext/extent_test.go deleted file mode 100644 index 985f76ac0..000000000 --- a/pkg/sentry/fsimpl/ext/extent_test.go +++ /dev/null @@ -1,266 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "bytes" - "math/rand" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" -) - -const ( - // mockExtentBlkSize is the mock block size used for testing. - // No block has more than 1 header + 4 entries. - mockExtentBlkSize = uint64(64) -) - -// The tree described below looks like: -// -// 0.{Head}[Idx][Idx] -// / \ -// / \ -// 1.{Head}[Ext][Ext] 2.{Head}[Idx] -// / | \ -// [Phy] [Phy, Phy] 3.{Head}[Ext] -// | -// [Phy, Phy, Phy] -// -// Legend: -// - Head = ExtentHeader -// - Idx = ExtentIdx -// - Ext = Extent -// - Phy = Physical Block -// -// Please note that ext4 might not construct extent trees looking like this. -// This is purely for testing the tree traversal logic. -var ( - node3 = &disklayout.ExtentNode{ - Header: disklayout.ExtentHeader{ - Magic: disklayout.ExtentMagic, - NumEntries: 1, - MaxEntries: 4, - Height: 0, - }, - Entries: []disklayout.ExtentEntryPair{ - { - Entry: &disklayout.Extent{ - FirstFileBlock: 3, - Length: 3, - StartBlockLo: 6, - }, - Node: nil, - }, - }, - } - - node2 = &disklayout.ExtentNode{ - Header: disklayout.ExtentHeader{ - Magic: disklayout.ExtentMagic, - NumEntries: 1, - MaxEntries: 4, - Height: 1, - }, - Entries: []disklayout.ExtentEntryPair{ - { - Entry: &disklayout.ExtentIdx{ - FirstFileBlock: 3, - ChildBlockLo: 2, - }, - Node: node3, - }, - }, - } - - node1 = &disklayout.ExtentNode{ - Header: disklayout.ExtentHeader{ - Magic: disklayout.ExtentMagic, - NumEntries: 2, - MaxEntries: 4, - Height: 0, - }, - Entries: []disklayout.ExtentEntryPair{ - { - Entry: &disklayout.Extent{ - FirstFileBlock: 0, - Length: 1, - StartBlockLo: 3, - }, - Node: nil, - }, - { - Entry: &disklayout.Extent{ - FirstFileBlock: 1, - Length: 2, - StartBlockLo: 4, - }, - Node: nil, - }, - }, - } - - node0 = &disklayout.ExtentNode{ - Header: disklayout.ExtentHeader{ - Magic: disklayout.ExtentMagic, - NumEntries: 2, - MaxEntries: 4, - Height: 2, - }, - Entries: []disklayout.ExtentEntryPair{ - { - Entry: &disklayout.ExtentIdx{ - FirstFileBlock: 0, - ChildBlockLo: 0, - }, - Node: node1, - }, - { - Entry: &disklayout.ExtentIdx{ - FirstFileBlock: 3, - ChildBlockLo: 1, - }, - Node: node2, - }, - }, - } -) - -// TestExtentReader stress tests extentReader functionality. It performs random -// length reads from all possible positions in the extent tree. -func TestExtentReader(t *testing.T) { - mockExtentFile, want := extentTreeSetUp(t, node0) - n := len(want) - - for from := 0; from < n; from++ { - got := make([]byte, n-from) - - if read, err := mockExtentFile.ReadAt(got, int64(from)); err != nil { - t.Fatalf("file read operation from offset %d to %d only read %d bytes: %v", from, n, read, err) - } - - if diff := cmp.Diff(got, want[from:]); diff != "" { - t.Fatalf("file data from offset %d to %d mismatched (-want +got):\n%s", from, n, diff) - } - } -} - -// TestBuildExtentTree tests the extent tree building logic. -func TestBuildExtentTree(t *testing.T) { - mockExtentFile, _ := extentTreeSetUp(t, node0) - - opt := cmpopts.IgnoreUnexported(disklayout.ExtentIdx{}, disklayout.ExtentHeader{}) - if diff := cmp.Diff(&mockExtentFile.root, node0, opt); diff != "" { - t.Errorf("extent tree mismatch (-want +got):\n%s", diff) - } -} - -// extentTreeSetUp writes the passed extent tree to a mock disk as an extent -// tree. It also constucts a mock extent file with the same tree built in it. -// It also writes random data file data and returns it. -func extentTreeSetUp(t *testing.T, root *disklayout.ExtentNode) (*extentFile, []byte) { - t.Helper() - - mockDisk := make([]byte, mockExtentBlkSize*10) - mockExtentFile := &extentFile{} - args := inodeArgs{ - fs: &filesystem{ - dev: bytes.NewReader(mockDisk), - }, - diskInode: &disklayout.InodeNew{ - InodeOld: disklayout.InodeOld{ - SizeLo: uint32(mockExtentBlkSize) * getNumPhyBlks(root), - }, - }, - blkSize: mockExtentBlkSize, - } - mockExtentFile.regFile.inode.init(args, &mockExtentFile.regFile) - - fileData := writeTree(&mockExtentFile.regFile.inode, mockDisk, node0, mockExtentBlkSize) - - if err := mockExtentFile.buildExtTree(); err != nil { - t.Fatalf("inode.buildExtTree failed: %v", err) - } - return mockExtentFile, fileData -} - -// writeTree writes the tree represented by `root` to the inode and disk. It -// also writes random file data on disk. -func writeTree(in *inode, disk []byte, root *disklayout.ExtentNode, mockExtentBlkSize uint64) []byte { - rootData := in.diskInode.Data() - root.Header.MarshalBytes(rootData) - off := root.Header.SizeBytes() - for _, ep := range root.Entries { - ep.Entry.MarshalBytes(rootData[off:]) - off += ep.Entry.SizeBytes() - } - - var fileData []byte - for _, ep := range root.Entries { - if root.Header.Height == 0 { - fileData = append(fileData, writeFileDataToExtent(disk, ep.Entry.(*disklayout.Extent))...) - } else { - fileData = append(fileData, writeTreeToDisk(disk, ep)...) - } - } - return fileData -} - -// writeTreeToDisk is the recursive step for writeTree which writes the tree -// on the disk only. Also writes random file data on disk. -func writeTreeToDisk(disk []byte, curNode disklayout.ExtentEntryPair) []byte { - nodeData := disk[curNode.Entry.PhysicalBlock()*mockExtentBlkSize:] - curNode.Node.Header.MarshalBytes(nodeData) - off := curNode.Node.Header.SizeBytes() - for _, ep := range curNode.Node.Entries { - ep.Entry.MarshalBytes(nodeData[off:]) - off += ep.Entry.SizeBytes() - } - - var fileData []byte - for _, ep := range curNode.Node.Entries { - if curNode.Node.Header.Height == 0 { - fileData = append(fileData, writeFileDataToExtent(disk, ep.Entry.(*disklayout.Extent))...) - } else { - fileData = append(fileData, writeTreeToDisk(disk, ep)...) - } - } - return fileData -} - -// writeFileDataToExtent writes random bytes to the blocks on disk that the -// passed extent points to. -func writeFileDataToExtent(disk []byte, ex *disklayout.Extent) []byte { - phyExStartBlk := ex.PhysicalBlock() - phyExStartOff := phyExStartBlk * mockExtentBlkSize - phyExEndOff := phyExStartOff + uint64(ex.Length)*mockExtentBlkSize - rand.Read(disk[phyExStartOff:phyExEndOff]) - return disk[phyExStartOff:phyExEndOff] -} - -// getNumPhyBlks returns the number of physical blocks covered under the node. -func getNumPhyBlks(node *disklayout.ExtentNode) uint32 { - var res uint32 - for _, ep := range node.Entries { - if node.Header.Height == 0 { - res += uint32(ep.Entry.(*disklayout.Extent).Length) - } else { - res += getNumPhyBlks(ep.Node) - } - } - return res -} diff --git a/pkg/sentry/fsimpl/ext/file_description.go b/pkg/sentry/fsimpl/ext/file_description.go deleted file mode 100644 index 2e9033c1d..000000000 --- a/pkg/sentry/fsimpl/ext/file_description.go +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// fileDescription is embedded by ext implementations of -// vfs.FileDescriptionImpl. -type fileDescription struct { - vfsfd vfs.FileDescription - vfs.FileDescriptionDefaultImpl - vfs.LockFD -} - -func (fd *fileDescription) filesystem() *filesystem { - return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) -} - -func (fd *fileDescription) inode() *inode { - return fd.vfsfd.Dentry().Impl().(*dentry).inode -} - -// Stat implements vfs.FileDescriptionImpl.Stat. -func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { - var stat linux.Statx - fd.inode().statTo(&stat) - return stat, nil -} - -// SetStat implements vfs.FileDescriptionImpl.SetStat. -func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { - if opts.Stat.Mask == 0 { - return nil - } - return linuxerr.EPERM -} - -// SetStat implements vfs.FileDescriptionImpl.StatFS. -func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { - var stat linux.Statfs - fd.filesystem().statTo(&stat) - return stat, nil -} - -// Sync implements vfs.FileDescriptionImpl.Sync. -func (fd *fileDescription) Sync(ctx context.Context) error { - return nil -} diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go deleted file mode 100644 index bcc7588da..000000000 --- a/pkg/sentry/fsimpl/ext/filesystem.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "errors" - "io" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" -) - -var ( - // errResolveDirent indicates that the vfs.ResolvingPath.Component() does - // not exist on the dentry tree but does exist on disk. So it has to be read in - // using the in-memory dirent and added to the dentry tree. Usually indicates - // the need to lock filesystem.mu for writing. - errResolveDirent = errors.New("resolve path component using dirent") -) - -// filesystem implements vfs.FilesystemImpl. -// -// +stateify savable -type filesystem struct { - vfsfs vfs.Filesystem - - // mu serializes changes to the Dentry tree. - mu sync.RWMutex `state:"nosave"` - - // dev represents the underlying fs device. It does not require protection - // because io.ReaderAt permits concurrent read calls to it. It translates to - // the pread syscall which passes on the read request directly to the device - // driver. Device drivers are intelligent in serving multiple concurrent read - // requests in the optimal order (taking locality into consideration). - dev io.ReaderAt - - // inodeCache maps absolute inode numbers to the corresponding Inode struct. - // Inodes should be removed from this once their reference count hits 0. - // - // Protected by mu because most additions (see IterDirents) and all removals - // from this corresponds to a change in the dentry tree. - inodeCache map[uint32]*inode - - // sb represents the filesystem superblock. Immutable after initialization. - sb disklayout.SuperBlock - - // bgs represents all the block group descriptors for the filesystem. - // Immutable after initialization. - bgs []disklayout.BlockGroup - - // devMinor is this filesystem's device minor number. Immutable after - // initialization. - devMinor uint32 -} - -// Compiles only if filesystem implements vfs.FilesystemImpl. -var _ vfs.FilesystemImpl = (*filesystem)(nil) - -// stepLocked resolves rp.Component() in parent directory vfsd. The write -// parameter passed tells if the caller has acquired filesystem.mu for writing -// or not. If set to true, an existing inode on disk can be added to the dentry -// tree if not present already. -// -// stepLocked is loosely analogous to fs/namei.c:walk_component(). -// -// Preconditions: -// * filesystem.mu must be locked (for writing if write param is true). -// * !rp.Done(). -// * inode == vfsd.Impl().(*Dentry).inode. -func stepLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write bool) (*vfs.Dentry, *inode, error) { - if !inode.isDir() { - return nil, nil, linuxerr.ENOTDIR - } - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { - return nil, nil, err - } - - for { - name := rp.Component() - if name == "." { - rp.Advance() - return vfsd, inode, nil - } - d := vfsd.Impl().(*dentry) - if name == ".." { - isRoot, err := rp.CheckRoot(ctx, vfsd) - if err != nil { - return nil, nil, err - } - if isRoot || d.parent == nil { - rp.Advance() - return vfsd, inode, nil - } - if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil { - return nil, nil, err - } - rp.Advance() - return &d.parent.vfsd, d.parent.inode, nil - } - - dir := inode.impl.(*directory) - child, ok := dir.childCache[name] - if !ok { - // We may need to instantiate a new dentry for this child. - childDirent, ok := dir.childMap[name] - if !ok { - // The underlying inode does not exist on disk. - return nil, nil, syserror.ENOENT - } - - if !write { - // filesystem.mu must be held for writing to add to the dentry tree. - return nil, nil, errResolveDirent - } - - // Create and add the component's dirent to the dentry tree. - fs := rp.Mount().Filesystem().Impl().(*filesystem) - childInode, err := fs.getOrCreateInodeLocked(childDirent.diskDirent.Inode()) - if err != nil { - return nil, nil, err - } - // incRef because this is being added to the dentry tree. - childInode.incRef() - child = newDentry(childInode) - child.parent = d - child.name = name - dir.childCache[name] = child - } - if err := rp.CheckMount(ctx, &child.vfsd); err != nil { - return nil, nil, err - } - if child.inode.isSymlink() && rp.ShouldFollowSymlink() { - if err := rp.HandleSymlink(child.inode.impl.(*symlink).target); err != nil { - return nil, nil, err - } - continue - } - rp.Advance() - return &child.vfsd, child.inode, nil - } -} - -// walkLocked resolves rp to an existing file. The write parameter -// passed tells if the caller has acquired filesystem.mu for writing or not. -// If set to true, additions can be made to the dentry tree while walking. -// If errResolveDirent is returned, the walk needs to be continued with an -// upgraded filesystem.mu. -// -// walkLocked is loosely analogous to Linux's fs/namei.c:path_lookupat(). -// -// Preconditions: -// * filesystem.mu must be locked (for writing if write param is true). -func walkLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) { - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode - for !rp.Done() { - var err error - vfsd, inode, err = stepLocked(ctx, rp, vfsd, inode, write) - if err != nil { - return nil, nil, err - } - } - if rp.MustBeDir() && !inode.isDir() { - return nil, nil, linuxerr.ENOTDIR - } - return vfsd, inode, nil -} - -// walkParentLocked resolves all but the last path component of rp to an -// existing directory. It does not check that the returned directory is -// searchable by the provider of rp. The write parameter passed tells if the -// caller has acquired filesystem.mu for writing or not. If set to true, -// additions can be made to the dentry tree while walking. -// If errResolveDirent is returned, the walk needs to be continued with an -// upgraded filesystem.mu. -// -// walkParentLocked is loosely analogous to Linux's fs/namei.c:path_parentat(). -// -// Preconditions: -// * filesystem.mu must be locked (for writing if write param is true). -// * !rp.Done(). -func walkParentLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) { - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode - for !rp.Final() { - var err error - vfsd, inode, err = stepLocked(ctx, rp, vfsd, inode, write) - if err != nil { - return nil, nil, err - } - } - if !inode.isDir() { - return nil, nil, linuxerr.ENOTDIR - } - return vfsd, inode, nil -} - -// walk resolves rp to an existing file. If parent is set to true, it resolves -// the rp till the parent of the last component which should be an existing -// directory. If parent is false then resolves rp entirely. Attemps to resolve -// the path as far as it can with a read lock and upgrades the lock if needed. -func (fs *filesystem) walk(ctx context.Context, rp *vfs.ResolvingPath, parent bool) (*vfs.Dentry, *inode, error) { - var ( - vfsd *vfs.Dentry - inode *inode - err error - ) - - // Try walking with the hopes that all dentries have already been pulled out - // of disk. This reduces congestion (allows concurrent walks). - fs.mu.RLock() - if parent { - vfsd, inode, err = walkParentLocked(ctx, rp, false) - } else { - vfsd, inode, err = walkLocked(ctx, rp, false) - } - fs.mu.RUnlock() - - if err == errResolveDirent { - // Upgrade lock and continue walking. Lock upgrading in the middle of the - // walk is fine as this is a read only filesystem. - fs.mu.Lock() - if parent { - vfsd, inode, err = walkParentLocked(ctx, rp, true) - } else { - vfsd, inode, err = walkLocked(ctx, rp, true) - } - fs.mu.Unlock() - } - - return vfsd, inode, err -} - -// getOrCreateInodeLocked gets the inode corresponding to the inode number passed in. -// It creates a new one with the given inode number if one does not exist. -// The caller must increment the ref count if adding this to the dentry tree. -// -// Precondition: must be holding fs.mu for writing. -func (fs *filesystem) getOrCreateInodeLocked(inodeNum uint32) (*inode, error) { - if in, ok := fs.inodeCache[inodeNum]; ok { - return in, nil - } - - in, err := newInode(fs, inodeNum) - if err != nil { - return nil, err - } - - fs.inodeCache[inodeNum] = in - return in, nil -} - -// statTo writes the statfs fields to the output parameter. -func (fs *filesystem) statTo(stat *linux.Statfs) { - stat.Type = uint64(fs.sb.Magic()) - stat.BlockSize = int64(fs.sb.BlockSize()) - stat.Blocks = fs.sb.BlocksCount() - stat.BlocksFree = fs.sb.FreeBlocksCount() - stat.BlocksAvailable = fs.sb.FreeBlocksCount() - stat.Files = uint64(fs.sb.InodesCount()) - stat.FilesFree = uint64(fs.sb.FreeInodesCount()) - stat.NameLength = disklayout.MaxFileName - stat.FragmentSize = int64(fs.sb.BlockSize()) - // TODO(b/134676337): Set Statfs.Flags and Statfs.FSID. -} - -// AccessAt implements vfs.Filesystem.Impl.AccessAt. -func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { - _, inode, err := fs.walk(ctx, rp, false) - if err != nil { - return err - } - return inode.checkPermissions(rp.Credentials(), ats) -} - -// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. -func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { - vfsd, inode, err := fs.walk(ctx, rp, false) - if err != nil { - return nil, err - } - - if opts.CheckSearchable { - if !inode.isDir() { - return nil, linuxerr.ENOTDIR - } - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { - return nil, err - } - } - - inode.incRef() - return vfsd, nil -} - -// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. -func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { - vfsd, inode, err := fs.walk(ctx, rp, true) - if err != nil { - return nil, err - } - inode.incRef() - return vfsd, nil -} - -// OpenAt implements vfs.FilesystemImpl.OpenAt. -func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - vfsd, inode, err := fs.walk(ctx, rp, false) - if err != nil { - return nil, err - } - - // EROFS is returned if write access is needed. - if vfs.MayWriteFileWithOpenFlags(opts.Flags) || opts.Flags&(linux.O_CREAT|linux.O_EXCL|linux.O_TMPFILE) != 0 { - return nil, linuxerr.EROFS - } - return inode.open(rp, vfsd, &opts) -} - -// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. -func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { - _, inode, err := fs.walk(ctx, rp, false) - if err != nil { - return "", err - } - symlink, ok := inode.impl.(*symlink) - if !ok { - return "", linuxerr.EINVAL - } - return symlink.target, nil -} - -// StatAt implements vfs.FilesystemImpl.StatAt. -func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { - _, inode, err := fs.walk(ctx, rp, false) - if err != nil { - return linux.Statx{}, err - } - var stat linux.Statx - inode.statTo(&stat) - return stat, nil -} - -// StatFSAt implements vfs.FilesystemImpl.StatFSAt. -func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { - if _, _, err := fs.walk(ctx, rp, false); err != nil { - return linux.Statfs{}, err - } - - var stat linux.Statfs - fs.statTo(&stat) - return stat, nil -} - -// Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release(ctx context.Context) { - fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) -} - -// Sync implements vfs.FilesystemImpl.Sync. -func (fs *filesystem) Sync(ctx context.Context) error { - // This is a readonly filesystem for now. - return nil -} - -// The vfs.FilesystemImpl functions below return EROFS because their respective -// man pages say that EROFS must be returned if the path resolves to a file on -// this read-only filesystem. - -// LinkAt implements vfs.FilesystemImpl.LinkAt. -func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { - if rp.Done() { - return linuxerr.EEXIST - } - - if _, _, err := fs.walk(ctx, rp, true); err != nil { - return err - } - - return linuxerr.EROFS -} - -// MkdirAt implements vfs.FilesystemImpl.MkdirAt. -func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { - if rp.Done() { - return linuxerr.EEXIST - } - - if _, _, err := fs.walk(ctx, rp, true); err != nil { - return err - } - - return linuxerr.EROFS -} - -// MknodAt implements vfs.FilesystemImpl.MknodAt. -func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { - if rp.Done() { - return linuxerr.EEXIST - } - - _, _, err := fs.walk(ctx, rp, true) - if err != nil { - return err - } - - return linuxerr.EROFS -} - -// RenameAt implements vfs.FilesystemImpl.RenameAt. -func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { - if rp.Done() { - return syserror.ENOENT - } - - _, _, err := fs.walk(ctx, rp, false) - if err != nil { - return err - } - - return linuxerr.EROFS -} - -// RmdirAt implements vfs.FilesystemImpl.RmdirAt. -func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { - _, inode, err := fs.walk(ctx, rp, false) - if err != nil { - return err - } - - if !inode.isDir() { - return linuxerr.ENOTDIR - } - - return linuxerr.EROFS -} - -// SetStatAt implements vfs.FilesystemImpl.SetStatAt. -func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { - _, _, err := fs.walk(ctx, rp, false) - if err != nil { - return err - } - - return linuxerr.EROFS -} - -// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. -func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { - if rp.Done() { - return linuxerr.EEXIST - } - - _, _, err := fs.walk(ctx, rp, true) - if err != nil { - return err - } - - return linuxerr.EROFS -} - -// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. -func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { - _, inode, err := fs.walk(ctx, rp, false) - if err != nil { - return err - } - - if inode.isDir() { - return syserror.EISDIR - } - - return linuxerr.EROFS -} - -// BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. -func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { - _, inode, err := fs.walk(ctx, rp, false) - if err != nil { - return nil, err - } - if err := inode.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { - return nil, err - } - - // TODO(b/134676337): Support sockets. - return nil, linuxerr.ECONNREFUSED -} - -// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. -func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { - _, _, err := fs.walk(ctx, rp, false) - if err != nil { - return nil, err - } - return nil, linuxerr.ENOTSUP -} - -// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. -func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) { - _, _, err := fs.walk(ctx, rp, false) - if err != nil { - return "", err - } - return "", linuxerr.ENOTSUP -} - -// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. -func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { - _, _, err := fs.walk(ctx, rp, false) - if err != nil { - return err - } - return linuxerr.ENOTSUP -} - -// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. -func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { - _, _, err := fs.walk(ctx, rp, false) - if err != nil { - return err - } - return linuxerr.ENOTSUP -} - -// PrependPath implements vfs.FilesystemImpl.PrependPath. -func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) -} - -// MountOptions implements vfs.FilesystemImpl.MountOptions. -func (fs *filesystem) MountOptions() string { - return "" -} diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go deleted file mode 100644 index 46658f855..000000000 --- a/pkg/sentry/fsimpl/ext/inode.go +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "fmt" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// inode represents an ext inode. -// -// inode uses the same inheritance pattern that pkg/sentry/vfs structures use. -// This has been done to increase memory locality. -// -// Implementations: -// inode -- -// |-- dir -// |-- symlink -// |-- regular-- -// |-- extent file -// |-- block map file -// -// +stateify savable -type inode struct { - // refs is a reference count. refs is accessed using atomic memory operations. - refs int64 - - // fs is the containing filesystem. - fs *filesystem - - // inodeNum is the inode number of this inode on disk. This is used to - // identify inodes within the ext filesystem. - inodeNum uint32 - - // blkSize is the fs data block size. Same as filesystem.sb.BlockSize(). - blkSize uint64 - - // diskInode gives us access to the inode struct on disk. Immutable. - diskInode disklayout.Inode - - locks vfs.FileLocks - - // This is immutable. The first field of the implementations must have inode - // as the first field to ensure temporality. - impl interface{} -} - -// incRef increments the inode ref count. -func (in *inode) incRef() { - atomic.AddInt64(&in.refs, 1) -} - -// tryIncRef tries to increment the ref count. Returns true if successful. -func (in *inode) tryIncRef() bool { - for { - refs := atomic.LoadInt64(&in.refs) - if refs == 0 { - return false - } - if atomic.CompareAndSwapInt64(&in.refs, refs, refs+1) { - return true - } - } -} - -// decRef decrements the inode ref count and releases the inode resources if -// the ref count hits 0. -// -// Precondition: Must have locked filesystem.mu. -func (in *inode) decRef() { - if refs := atomic.AddInt64(&in.refs, -1); refs == 0 { - delete(in.fs.inodeCache, in.inodeNum) - } else if refs < 0 { - panic("ext.inode.decRef() called without holding a reference") - } -} - -// newInode is the inode constructor. Reads the inode off disk. Identifies -// inodes based on the absolute inode number on disk. -func newInode(fs *filesystem, inodeNum uint32) (*inode, error) { - if inodeNum == 0 { - panic("inode number 0 on ext filesystems is not possible") - } - - inodeRecordSize := fs.sb.InodeSize() - var diskInode disklayout.Inode - if inodeRecordSize == disklayout.OldInodeSize { - diskInode = &disklayout.InodeOld{} - } else { - diskInode = &disklayout.InodeNew{} - } - - // Calculate where the inode is actually placed. - inodesPerGrp := fs.sb.InodesPerGroup() - blkSize := fs.sb.BlockSize() - inodeTableOff := fs.bgs[getBGNum(inodeNum, inodesPerGrp)].InodeTable() * blkSize - inodeOff := inodeTableOff + uint64(uint32(inodeRecordSize)*getBGOff(inodeNum, inodesPerGrp)) - - if err := readFromDisk(fs.dev, int64(inodeOff), diskInode); err != nil { - return nil, err - } - - // Build the inode based on its type. - args := inodeArgs{ - fs: fs, - inodeNum: inodeNum, - blkSize: blkSize, - diskInode: diskInode, - } - - switch diskInode.Mode().FileType() { - case linux.ModeSymlink: - f, err := newSymlink(args) - if err != nil { - return nil, err - } - return &f.inode, nil - case linux.ModeRegular: - f, err := newRegularFile(args) - if err != nil { - return nil, err - } - return &f.inode, nil - case linux.ModeDirectory: - f, err := newDirectory(args, fs.sb.IncompatibleFeatures().DirentFileType) - if err != nil { - return nil, err - } - return &f.inode, nil - default: - // TODO(b/134676337): Return appropriate errors for sockets, pipes and devices. - return nil, linuxerr.EINVAL - } -} - -type inodeArgs struct { - fs *filesystem - inodeNum uint32 - blkSize uint64 - diskInode disklayout.Inode -} - -func (in *inode) init(args inodeArgs, impl interface{}) { - in.fs = args.fs - in.inodeNum = args.inodeNum - in.blkSize = args.blkSize - in.diskInode = args.diskInode - in.impl = impl -} - -// open creates and returns a file description for the dentry passed in. -func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { - ats := vfs.AccessTypesForOpenFlags(opts) - if err := in.checkPermissions(rp.Credentials(), ats); err != nil { - return nil, err - } - mnt := rp.Mount() - switch in.impl.(type) { - case *regularFile: - var fd regularFileFD - fd.LockFD.Init(&in.locks) - if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { - return nil, err - } - return &fd.vfsfd, nil - case *directory: - // Can't open directories writably. This check is not necessary for a read - // only filesystem but will be required when write is implemented. - if ats&vfs.MayWrite != 0 { - return nil, syserror.EISDIR - } - var fd directoryFD - fd.LockFD.Init(&in.locks) - if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { - return nil, err - } - return &fd.vfsfd, nil - case *symlink: - if opts.Flags&linux.O_PATH == 0 { - // Can't open symlinks without O_PATH. - return nil, linuxerr.ELOOP - } - var fd symlinkFD - fd.LockFD.Init(&in.locks) - if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { - return nil, err - } - return &fd.vfsfd, nil - default: - panic(fmt.Sprintf("unknown inode type: %T", in.impl)) - } -} - -func (in *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error { - return vfs.GenericCheckPermissions(creds, ats, in.diskInode.Mode(), in.diskInode.UID(), in.diskInode.GID()) -} - -// statTo writes the statx fields to the output parameter. -func (in *inode) statTo(stat *linux.Statx) { - stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | - linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_SIZE | - linux.STATX_ATIME | linux.STATX_CTIME | linux.STATX_MTIME - stat.Blksize = uint32(in.blkSize) - stat.Mode = uint16(in.diskInode.Mode()) - stat.Nlink = uint32(in.diskInode.LinksCount()) - stat.UID = uint32(in.diskInode.UID()) - stat.GID = uint32(in.diskInode.GID()) - stat.Ino = uint64(in.inodeNum) - stat.Size = in.diskInode.Size() - stat.Atime = in.diskInode.AccessTime().StatxTimestamp() - stat.Ctime = in.diskInode.ChangeTime().StatxTimestamp() - stat.Mtime = in.diskInode.ModificationTime().StatxTimestamp() - stat.DevMajor = linux.UNNAMED_MAJOR - stat.DevMinor = in.fs.devMinor - // TODO(b/134676337): Set stat.Blocks which is the number of 512 byte blocks - // (including metadata blocks) required to represent this file. -} - -// getBGNum returns the block group number that a given inode belongs to. -func getBGNum(inodeNum uint32, inodesPerGrp uint32) uint32 { - return (inodeNum - 1) / inodesPerGrp -} - -// getBGOff returns the offset at which the given inode lives in the block -// group's inode table, i.e. the index of the inode in the inode table. -func getBGOff(inodeNum uint32, inodesPerGrp uint32) uint32 { - return (inodeNum - 1) % inodesPerGrp -} diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go deleted file mode 100644 index 6613f0e1d..000000000 --- a/pkg/sentry/fsimpl/ext/regular_file.go +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "io" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" -) - -// regularFile represents a regular file's inode. This too follows the -// inheritance pattern prevelant in the vfs layer described in -// pkg/sentry/vfs/README.md. -// -// +stateify savable -type regularFile struct { - inode inode - - // This is immutable. The first field of fileReader implementations must be - // regularFile to ensure temporality. - // io.ReaderAt is more strict than io.Reader in the sense that a partial read - // is always accompanied by an error. If a read spans past the end of file, a - // partial read (within file range) is done and io.EOF is returned. - impl io.ReaderAt -} - -// newRegularFile is the regularFile constructor. It figures out what kind of -// file this is and initializes the fileReader. -func newRegularFile(args inodeArgs) (*regularFile, error) { - if args.diskInode.Flags().Extents { - file, err := newExtentFile(args) - if err != nil { - return nil, err - } - return &file.regFile, nil - } - - file, err := newBlockMapFile(args) - if err != nil { - return nil, err - } - return &file.regFile, nil -} - -func (in *inode) isRegular() bool { - _, ok := in.impl.(*regularFile) - return ok -} - -// directoryFD represents a directory file description. It implements -// vfs.FileDescriptionImpl. -// -// +stateify savable -type regularFileFD struct { - fileDescription - vfs.LockFD - - // off is the file offset. off is accessed using atomic memory operations. - off int64 - - // offMu serializes operations that may mutate off. - offMu sync.Mutex `state:"nosave"` -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *regularFileFD) Release(context.Context) {} - -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - safeReader := safemem.FromIOReaderAt{ - ReaderAt: fd.inode().impl.(*regularFile).impl, - Offset: offset, - } - - // Copies data from disk directly into usermem without any intermediate - // allocations (if dst is converted into BlockSeq such that it does not need - // safe copying). - return dst.CopyOutFrom(ctx, safeReader) -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - n, err := fd.PRead(ctx, dst, fd.off, opts) - fd.offMu.Lock() - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - // write(2) specifies that EBADF must be returned if the fd is not open for - // writing. - return 0, linuxerr.EBADF -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - n, err := fd.PWrite(ctx, src, fd.off, opts) - fd.offMu.Lock() - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *regularFileFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - return linuxerr.ENOTDIR -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - fd.offMu.Lock() - defer fd.offMu.Unlock() - switch whence { - case linux.SEEK_SET: - // Use offset as specified. - case linux.SEEK_CUR: - offset += fd.off - case linux.SEEK_END: - offset += int64(fd.inode().diskInode.Size()) - default: - return 0, linuxerr.EINVAL - } - if offset < 0 { - return 0, linuxerr.EINVAL - } - fd.off = offset - return offset, nil -} - -// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. -func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - // TODO(b/134676337): Implement mmap(2). - return linuxerr.ENODEV -} diff --git a/pkg/sentry/fsimpl/ext/symlink.go b/pkg/sentry/fsimpl/ext/symlink.go deleted file mode 100644 index 385651dc3..000000000 --- a/pkg/sentry/fsimpl/ext/symlink.go +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/usermem" -) - -// symlink represents a symlink inode. -// -// +stateify savable -type symlink struct { - inode inode - target string // immutable -} - -// newSymlink is the symlink constructor. It reads out the symlink target from -// the inode (however it might have been stored). -func newSymlink(args inodeArgs) (*symlink, error) { - var link []byte - - // If the symlink target is lesser than 60 bytes, its stores in inode.Data(). - // Otherwise either extents or block maps will be used to store the link. - size := args.diskInode.Size() - if size < 60 { - link = args.diskInode.Data()[:size] - } else { - // Create a regular file out of this inode and read out the target. - regFile, err := newRegularFile(args) - if err != nil { - return nil, err - } - - link = make([]byte, size) - if n, err := regFile.impl.ReadAt(link, 0); uint64(n) < size { - return nil, err - } - } - - file := &symlink{target: string(link)} - file.inode.init(args, file) - return file, nil -} - -func (in *inode) isSymlink() bool { - _, ok := in.impl.(*symlink) - return ok -} - -// symlinkFD represents a symlink file description and implements -// vfs.FileDescriptionImpl. which may only be used if open options contains -// O_PATH. For this reason most of the functions return EBADF. -// -// +stateify savable -type symlinkFD struct { - fileDescription - vfs.NoLockFD -} - -// Compiles only if symlinkFD implements vfs.FileDescriptionImpl. -var _ vfs.FileDescriptionImpl = (*symlinkFD)(nil) - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *symlinkFD) Release(context.Context) {} - -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *symlinkFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - return 0, linuxerr.EBADF -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (fd *symlinkFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - return 0, linuxerr.EBADF -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *symlinkFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, linuxerr.EBADF -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *symlinkFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - return 0, linuxerr.EBADF -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *symlinkFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - return linuxerr.ENOTDIR -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *symlinkFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - return 0, linuxerr.EBADF -} - -// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. -func (fd *symlinkFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - return linuxerr.EBADF -} diff --git a/pkg/sentry/fsimpl/ext/utils.go b/pkg/sentry/fsimpl/ext/utils.go deleted file mode 100644 index 58ef7b9b8..000000000 --- a/pkg/sentry/fsimpl/ext/utils.go +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "io" - - "gvisor.dev/gvisor/pkg/marshal" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" - "gvisor.dev/gvisor/pkg/syserror" -) - -// readFromDisk performs a binary read from disk into the given struct from -// the absolute offset provided. -func readFromDisk(dev io.ReaderAt, abOff int64, v marshal.Marshallable) error { - n := v.SizeBytes() - buf := make([]byte, n) - if read, _ := dev.ReadAt(buf, abOff); read < int(n) { - return syserror.EIO - } - - v.UnmarshalBytes(buf) - return nil -} - -// readSuperBlock reads the SuperBlock from block group 0 in the underlying -// device. There are three versions of the superblock. This function identifies -// and returns the correct version. -func readSuperBlock(dev io.ReaderAt) (disklayout.SuperBlock, error) { - var sb disklayout.SuperBlock = &disklayout.SuperBlockOld{} - if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil { - return nil, err - } - if sb.Revision() == disklayout.OldRev { - return sb, nil - } - - sb = &disklayout.SuperBlock32Bit{} - if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil { - return nil, err - } - if !sb.IncompatibleFeatures().Is64Bit { - return sb, nil - } - - sb = &disklayout.SuperBlock64Bit{} - if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil { - return nil, err - } - return sb, nil -} - -// blockGroupsCount returns the number of block groups in the ext fs. -func blockGroupsCount(sb disklayout.SuperBlock) uint64 { - blocksCount := sb.BlocksCount() - blocksPerGroup := uint64(sb.BlocksPerGroup()) - - // Round up the result. float64 can compromise precision so do it manually. - return (blocksCount + blocksPerGroup - 1) / blocksPerGroup -} - -// readBlockGroups reads the block group descriptor table from block group 0 in -// the underlying device. -func readBlockGroups(dev io.ReaderAt, sb disklayout.SuperBlock) ([]disklayout.BlockGroup, error) { - bgCount := blockGroupsCount(sb) - bgdSize := uint64(sb.BgDescSize()) - is64Bit := sb.IncompatibleFeatures().Is64Bit - bgds := make([]disklayout.BlockGroup, bgCount) - - for i, off := uint64(0), uint64(sb.FirstDataBlock()+1)*sb.BlockSize(); i < bgCount; i, off = i+1, off+bgdSize { - if is64Bit { - bgds[i] = &disklayout.BlockGroup64Bit{} - } else { - bgds[i] = &disklayout.BlockGroup32Bit{} - } - - if err := readFromDisk(dev, int64(off), bgds[i]); err != nil { - return nil, err - } - } - return bgds, nil -} diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index ec8d58cc9..25d2e39d6 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -1161,6 +1161,13 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs if !d.isSynthetic() { if stat.Mask != 0 { + if stat.Mask&linux.STATX_SIZE != 0 { + // d.dataMu must be held around the update to both the remote + // file's size and d.size to serialize with writeback (which + // might otherwise write data back up to the old d.size after + // the remote file has been truncated). + d.dataMu.Lock() + } if err := d.file.setAttr(ctx, p9.SetAttrMask{ Permissions: stat.Mask&linux.STATX_MODE != 0, UID: stat.Mask&linux.STATX_UID != 0, @@ -1180,13 +1187,16 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs MTimeSeconds: uint64(stat.Mtime.Sec), MTimeNanoSeconds: uint64(stat.Mtime.Nsec), }); err != nil { + if stat.Mask&linux.STATX_SIZE != 0 { + d.dataMu.Unlock() // +checklocksforce: locked conditionally above + } return err } if stat.Mask&linux.STATX_SIZE != 0 { // d.size should be kept up to date, and privatized // copy-on-write mappings of truncated pages need to be // invalidated, even if InteropModeShared is in effect. - d.updateSizeLocked(stat.Size) + d.updateSizeAndUnlockDataMuLocked(stat.Size) // +checklocksforce: locked conditionally above } } if d.fs.opts.interop == InteropModeShared { @@ -1249,6 +1259,14 @@ func (d *dentry) doAllocate(ctx context.Context, offset, length uint64, allocate // Preconditions: d.metadataMu must be locked. func (d *dentry) updateSizeLocked(newSize uint64) { d.dataMu.Lock() + d.updateSizeAndUnlockDataMuLocked(newSize) +} + +// Preconditions: d.metadataMu and d.dataMu must be locked. +// +// Postconditions: d.dataMu is unlocked. +// +checklocksrelease:d.dataMu +func (d *dentry) updateSizeAndUnlockDataMuLocked(newSize uint64) { oldSize := d.size atomic.StoreUint64(&d.size, newSize) // d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings @@ -1257,9 +1275,9 @@ func (d *dentry) updateSizeLocked(newSize uint64) { // contents beyond the new d.size. (We are still holding d.metadataMu, // so we can't race with Write or another truncate.) d.dataMu.Unlock() - if d.size < oldSize { + if newSize < oldSize { oldpgend, _ := hostarch.PageRoundUp(oldSize) - newpgend, _ := hostarch.PageRoundUp(d.size) + newpgend, _ := hostarch.PageRoundUp(newSize) if oldpgend != newpgend { d.mapsMu.Lock() d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{ @@ -1275,8 +1293,8 @@ func (d *dentry) updateSizeLocked(newSize uint64) { // truncated pages have been removed from the remote file, they // should be dropped without being written back. d.dataMu.Lock() - d.cache.Truncate(d.size, d.fs.mfp.MemoryFile()) - d.dirty.KeepClean(memmap.MappableRange{d.size, oldpgend}) + d.cache.Truncate(newSize, d.fs.mfp.MemoryFile()) + d.dirty.KeepClean(memmap.MappableRange{newSize, oldpgend}) d.dataMu.Unlock() } } diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index 91405fe66..947dbe05f 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -79,17 +79,22 @@ func (fd *regularFileFD) OnClose(ctx context.Context) error { if !fd.vfsfd.IsWritable() { return nil } - // Skip flushing if there are client-buffered writes, since (as with the - // VFS1 client) we don't flush buffered writes on close anyway. d := fd.dentry() - if d.fs.opts.interop != InteropModeExclusive { - return nil - } - d.dataMu.RLock() - haveDirtyPages := !d.dirty.IsEmpty() - d.dataMu.RUnlock() - if haveDirtyPages { - return nil + if d.fs.opts.interop == InteropModeExclusive { + // d may have dirty pages that we won't write back now (and wouldn't + // have in VFS1), making a flushf RPC ineffective. If this is the case, + // skip the flushf. + // + // Note that it's also possible to have dirty pages under other interop + // modes if forcePageCache is in effect; we conservatively assume that + // applications have some way of tolerating this and still want the + // flushf. + d.dataMu.RLock() + haveDirtyPages := !d.dirty.IsEmpty() + d.dataMu.RUnlock() + if haveDirtyPages { + return nil + } } d.handleMu.RLock() defer d.handleMu.RUnlock() @@ -707,14 +712,8 @@ func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpt return vfs.GenericConfigureMMap(&fd.vfsfd, d, opts) } -func (d *dentry) mayCachePages() bool { - if d.fs.opts.forcePageCache { - return true - } - if d.fs.opts.interop == InteropModeShared { - return false - } - return atomic.LoadInt32(&d.mmapFD) >= 0 +func (fs *filesystem) mayCachePagesInMemoryFile() bool { + return fs.opts.forcePageCache || fs.opts.interop != InteropModeShared } // AddMapping implements memmap.Mappable.AddMapping. @@ -726,7 +725,7 @@ func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar host for _, r := range mapped { d.pf.hostFileMapper.IncRefOn(r) } - if d.mayCachePages() { + if d.fs.mayCachePagesInMemoryFile() { // d.Evict() will refuse to evict memory-mapped pages, so tell the // MemoryFile to not bother trying. mf := d.fs.mfp.MemoryFile() @@ -745,7 +744,7 @@ func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar h for _, r := range unmapped { d.pf.hostFileMapper.DecRefOn(r) } - if d.mayCachePages() { + if d.fs.mayCachePagesInMemoryFile() { // Pages that are no longer referenced by any application memory // mappings are now considered unused; allow MemoryFile to evict them // when necessary. diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index cbbc0935a..f54811edf 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -78,7 +78,7 @@ func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns "smaps": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &smapsData{task: task}), "stat": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}), "statm": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &statmData{task: task}), - "status": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &statusData{task: task, pidns: pidns}), + "status": fs.newStatusInode(ctx, task, pidns, fs.NextIno(), 0444), "uid_map": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}), } if isThreadGroup { diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 5bb6bc372..0ce3ed797 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -661,34 +661,119 @@ func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error { return nil } -// statusData implements vfs.DynamicBytesSource for /proc/[pid]/status. +// statusInode implements kernfs.Inode for /proc/[pid]/status. // // +stateify savable -type statusData struct { - kernfs.DynamicBytesFile +type statusInode struct { + kernfs.InodeAttrs + kernfs.InodeNoStatFS + kernfs.InodeNoopRefCount + kernfs.InodeNotDirectory + kernfs.InodeNotSymlink task *kernel.Task pidns *kernel.PIDNamespace + locks vfs.FileLocks } -var _ dynamicInode = (*statusData)(nil) +// statusFD implements vfs.FileDescriptionImpl and vfs.DynamicByteSource for +// /proc/[pid]/status. +// +// +stateify savable +type statusFD struct { + statusFDLowerBase + vfs.DynamicBytesFileDescriptionImpl + vfs.LockFD + + vfsfd vfs.FileDescription + + inode *statusInode + task *kernel.Task + pidns *kernel.PIDNamespace + userns *auth.UserNamespace // equivalent to struct file::f_cred::user_ns +} + +// statusFDLowerBase is a dumb hack to ensure that statusFD prefers +// vfs.DynamicBytesFileDescriptionImpl methods to vfs.FileDescriptinDefaultImpl +// methods. +// +// +stateify savable +type statusFDLowerBase struct { + vfs.FileDescriptionDefaultImpl +} + +func (fs *filesystem) newStatusInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, ino uint64, perm linux.FileMode) kernfs.Inode { + // Note: credentials are overridden by taskOwnedInode. + inode := &statusInode{ + task: task, + pidns: pidns, + } + inode.InodeAttrs.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeRegular|perm) + return &taskOwnedInode{Inode: inode, owner: task} +} + +// Open implements kernfs.Inode.Open. +func (s *statusInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + fd := &statusFD{ + inode: s, + task: s.task, + pidns: s.pidns, + userns: rp.Credentials().UserNamespace, + } + fd.LockFD.Init(&s.locks) + if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), d.VFSDentry(), &vfs.FileDescriptionOptions{}); err != nil { + return nil, err + } + fd.SetDataSource(fd) + return &fd.vfsfd, nil +} + +// SetStat implements kernfs.Inode.SetStat. +func (*statusInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { + return linuxerr.EPERM +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (s *statusFD) Release(ctx context.Context) { +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (s *statusFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + fs := s.vfsfd.VirtualDentry().Mount().Filesystem() + return s.inode.Stat(ctx, fs, opts) +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (s *statusFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { + return linuxerr.EPERM +} // Generate implements vfs.DynamicBytesSource.Generate. -func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error { +func (s *statusFD) Generate(ctx context.Context, buf *bytes.Buffer) error { fmt.Fprintf(buf, "Name:\t%s\n", s.task.Name()) fmt.Fprintf(buf, "State:\t%s\n", s.task.StateStatus()) fmt.Fprintf(buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.task.ThreadGroup())) fmt.Fprintf(buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.task)) + ppid := kernel.ThreadID(0) if parent := s.task.Parent(); parent != nil { ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup()) } fmt.Fprintf(buf, "PPid:\t%d\n", ppid) + tpid := kernel.ThreadID(0) if tracer := s.task.Tracer(); tracer != nil { tpid = s.pidns.IDOfTask(tracer) } fmt.Fprintf(buf, "TracerPid:\t%d\n", tpid) + + creds := s.task.Credentials() + ruid := creds.RealKUID.In(s.userns).OrOverflow() + euid := creds.EffectiveKUID.In(s.userns).OrOverflow() + suid := creds.SavedKUID.In(s.userns).OrOverflow() + rgid := creds.RealKGID.In(s.userns).OrOverflow() + egid := creds.EffectiveKGID.In(s.userns).OrOverflow() + sgid := creds.SavedKGID.In(s.userns).OrOverflow() var fds int var vss, rss, data uint64 s.task.WithMuLocked(func(t *kernel.Task) { @@ -701,12 +786,26 @@ func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error { data = mm.VirtualDataSize() } }) + // Filesystem user/group IDs aren't implemented; effective UID/GID are used + // instead. + fmt.Fprintf(buf, "Uid:\t%d\t%d\t%d\t%d\n", ruid, euid, suid, euid) + fmt.Fprintf(buf, "Gid:\t%d\t%d\t%d\t%d\n", rgid, egid, sgid, egid) fmt.Fprintf(buf, "FDSize:\t%d\n", fds) + buf.WriteString("Groups:\t ") + // There is a space between each pair of supplemental GIDs, as well as an + // unconditional trailing space that some applications actually depend on. + var sep string + for _, kgid := range creds.ExtraKGIDs { + fmt.Fprintf(buf, "%s%d", sep, kgid.In(s.userns).OrOverflow()) + sep = " " + } + buf.WriteString(" \n") + fmt.Fprintf(buf, "VmSize:\t%d kB\n", vss>>10) fmt.Fprintf(buf, "VmRSS:\t%d kB\n", rss>>10) fmt.Fprintf(buf, "VmData:\t%d kB\n", data>>10) + fmt.Fprintf(buf, "Threads:\t%d\n", s.task.ThreadGroup().Count()) - creds := s.task.Credentials() fmt.Fprintf(buf, "CapInh:\t%016x\n", creds.InheritableCaps) fmt.Fprintf(buf, "CapPrm:\t%016x\n", creds.PermittedCaps) fmt.Fprintf(buf, "CapEff:\t%016x\n", creds.EffectiveCaps) diff --git a/pkg/sentry/inet/inet.go b/pkg/sentry/inet/inet.go index 80dda1559..b121fc1b4 100644 --- a/pkg/sentry/inet/inet.go +++ b/pkg/sentry/inet/inet.go @@ -27,6 +27,9 @@ type Stack interface { // integers. Interfaces() map[int32]Interface + // RemoveInterface removes the specified network interface. + RemoveInterface(idx int32) error + // InterfaceAddrs returns all network interface addresses as a mapping from // interface indexes to a slice of associated interface address properties. InterfaceAddrs() map[int32][]InterfaceAddr diff --git a/pkg/sentry/inet/test_stack.go b/pkg/sentry/inet/test_stack.go index 218d9dafc..621f47e1f 100644 --- a/pkg/sentry/inet/test_stack.go +++ b/pkg/sentry/inet/test_stack.go @@ -45,23 +45,29 @@ func NewTestStack() *TestStack { } } -// Interfaces implements Stack.Interfaces. +// Interfaces implements Stack. func (s *TestStack) Interfaces() map[int32]Interface { return s.InterfacesMap } -// InterfaceAddrs implements Stack.InterfaceAddrs. +// RemoveInterface implements Stack. +func (s *TestStack) RemoveInterface(idx int32) error { + delete(s.InterfacesMap, idx) + return nil +} + +// InterfaceAddrs implements Stack. func (s *TestStack) InterfaceAddrs() map[int32][]InterfaceAddr { return s.InterfaceAddrsMap } -// AddInterfaceAddr implements Stack.AddInterfaceAddr. +// AddInterfaceAddr implements Stack. func (s *TestStack) AddInterfaceAddr(idx int32, addr InterfaceAddr) error { s.InterfaceAddrsMap[idx] = append(s.InterfaceAddrsMap[idx], addr) return nil } -// RemoveInterfaceAddr implements Stack.RemoveInterfaceAddr. +// RemoveInterfaceAddr implements Stack. func (s *TestStack) RemoveInterfaceAddr(idx int32, addr InterfaceAddr) error { interfaceAddrs, ok := s.InterfaceAddrsMap[idx] if !ok { @@ -79,94 +85,94 @@ func (s *TestStack) RemoveInterfaceAddr(idx int32, addr InterfaceAddr) error { return nil } -// SupportsIPv6 implements Stack.SupportsIPv6. +// SupportsIPv6 implements Stack. func (s *TestStack) SupportsIPv6() bool { return s.SupportsIPv6Flag } -// TCPReceiveBufferSize implements Stack.TCPReceiveBufferSize. +// TCPReceiveBufferSize implements Stack. func (s *TestStack) TCPReceiveBufferSize() (TCPBufferSize, error) { return s.TCPRecvBufSize, nil } -// SetTCPReceiveBufferSize implements Stack.SetTCPReceiveBufferSize. +// SetTCPReceiveBufferSize implements Stack. func (s *TestStack) SetTCPReceiveBufferSize(size TCPBufferSize) error { s.TCPRecvBufSize = size return nil } -// TCPSendBufferSize implements Stack.TCPSendBufferSize. +// TCPSendBufferSize implements Stack. func (s *TestStack) TCPSendBufferSize() (TCPBufferSize, error) { return s.TCPSendBufSize, nil } -// SetTCPSendBufferSize implements Stack.SetTCPSendBufferSize. +// SetTCPSendBufferSize implements Stack. func (s *TestStack) SetTCPSendBufferSize(size TCPBufferSize) error { s.TCPSendBufSize = size return nil } -// TCPSACKEnabled implements Stack.TCPSACKEnabled. +// TCPSACKEnabled implements Stack. func (s *TestStack) TCPSACKEnabled() (bool, error) { return s.TCPSACKFlag, nil } -// SetTCPSACKEnabled implements Stack.SetTCPSACKEnabled. +// SetTCPSACKEnabled implements Stack. func (s *TestStack) SetTCPSACKEnabled(enabled bool) error { s.TCPSACKFlag = enabled return nil } -// TCPRecovery implements Stack.TCPRecovery. +// TCPRecovery implements Stack. func (s *TestStack) TCPRecovery() (TCPLossRecovery, error) { return s.Recovery, nil } -// SetTCPRecovery implements Stack.SetTCPRecovery. +// SetTCPRecovery implements Stack. func (s *TestStack) SetTCPRecovery(recovery TCPLossRecovery) error { s.Recovery = recovery return nil } -// Statistics implements inet.Stack.Statistics. +// Statistics implements Stack. func (s *TestStack) Statistics(stat interface{}, arg string) error { return nil } -// RouteTable implements Stack.RouteTable. +// RouteTable implements Stack. func (s *TestStack) RouteTable() []Route { return s.RouteList } -// Resume implements Stack.Resume. +// Resume implements Stack. func (s *TestStack) Resume() {} -// RegisteredEndpoints implements inet.Stack.RegisteredEndpoints. +// RegisteredEndpoints implements Stack. func (s *TestStack) RegisteredEndpoints() []stack.TransportEndpoint { return nil } -// CleanupEndpoints implements inet.Stack.CleanupEndpoints. +// CleanupEndpoints implements Stack. func (s *TestStack) CleanupEndpoints() []stack.TransportEndpoint { return nil } -// RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints. +// RestoreCleanupEndpoints implements Stack. func (s *TestStack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {} -// SetForwarding implements inet.Stack.SetForwarding. +// SetForwarding implements Stack. func (s *TestStack) SetForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) error { s.IPForwarding = enable return nil } -// PortRange implements inet.Stack.PortRange. +// PortRange implements Stack. func (*TestStack) PortRange() (uint16, uint16) { // Use the default Linux values per net/ipv4/af_inet.c:inet_init_net(). return 32768, 28232 } -// SetPortRange implements inet.Stack.SetPortRange. +// SetPortRange implements Stack. func (*TestStack) SetPortRange(start uint16, end uint16) error { // No-op. return nil diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index 26614b029..e4e0dc04f 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -220,6 +220,7 @@ go_library( "//pkg/abi/linux", "//pkg/abi/linux/errno", "//pkg/amutex", + "//pkg/bitmap", "//pkg/bits", "//pkg/bpf", "//pkg/cleanup", @@ -256,6 +257,7 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/futex", + "//pkg/sentry/kernel/msgqueue", "//pkg/sentry/kernel/sched", "//pkg/sentry/kernel/semaphore", "//pkg/sentry/kernel/shm", diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index 8786a70b5..eff556a0c 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -18,10 +18,10 @@ import ( "fmt" "math" "strings" - "sync/atomic" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bitmap" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -84,13 +84,8 @@ type FDTable struct { // mu protects below. mu sync.Mutex `state:"nosave"` - // next is start position to find fd. - next int32 - - // used contains the number of non-nil entries. It must be accessed - // atomically. It may be read atomically without holding mu (but not - // written). - used int32 + // fdBitmap shows which fds are already in use. + fdBitmap bitmap.Bitmap `state:"nosave"` // descriptorTable holds descriptors. descriptorTable `state:".(map[int32]descriptor)"` @@ -98,6 +93,8 @@ type FDTable struct { func (f *FDTable) saveDescriptorTable() map[int32]descriptor { m := make(map[int32]descriptor) + f.mu.Lock() + defer f.mu.Unlock() f.forEach(context.Background(), func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { m[fd] = descriptor{ file: file, @@ -111,12 +108,16 @@ func (f *FDTable) saveDescriptorTable() map[int32]descriptor { func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) { ctx := context.Background() f.initNoLeakCheck() // Initialize table. - f.used = 0 + f.fdBitmap = bitmap.New(uint32(math.MaxUint16)) for fd, d := range m { + if fd < 0 { + panic(fmt.Sprintf("FD is not supposed to be negative. FD: %d", fd)) + } + if file, fileVFS2 := f.setAll(ctx, fd, d.file, d.fileVFS2, d.flags); file != nil || fileVFS2 != nil { panic("VFS1 or VFS2 files set") } - + f.fdBitmap.Add(uint32(fd)) // Note that we do _not_ need to acquire a extra table reference here. The // table reference will already be accounted for in the file, so we drop the // reference taken by set above. @@ -189,8 +190,10 @@ func (f *FDTable) DecRef(ctx context.Context) { func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags)) { // retries tracks the number of failed TryIncRef attempts for the same FD. retries := 0 - fd := int32(0) - for { + fds := f.fdBitmap.ToSlice() + // Iterate through the fdBitmap. + for _, ufd := range fds { + fd := int32(ufd) file, fileVFS2, flags, ok := f.getAll(fd) if !ok { break @@ -218,7 +221,6 @@ func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *fs.File, fileVFS2.DecRef(ctx) } retries = 0 - fd++ } } @@ -226,6 +228,8 @@ func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *fs.File, func (f *FDTable) String() string { var buf strings.Builder ctx := context.Background() + f.mu.Lock() + defer f.mu.Unlock() f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { switch { case file != nil: @@ -250,10 +254,10 @@ func (f *FDTable) String() string { } // NewFDs allocates new FDs guaranteed to be the lowest number available -// greater than or equal to the fd parameter. All files will share the set +// greater than or equal to the minFD parameter. All files will share the set // flags. Success is guaranteed to be all or none. -func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags FDFlags) (fds []int32, err error) { - if fd < 0 { +func (f *FDTable) NewFDs(ctx context.Context, minFD int32, files []*fs.File, flags FDFlags) (fds []int32, err error) { + if minFD < 0 { // Don't accept negative FDs. return nil, unix.EINVAL } @@ -267,31 +271,48 @@ func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags if lim.Cur != limits.Infinity { end = int32(lim.Cur) } - if fd >= end { + if minFD+int32(len(files)) > end { return nil, unix.EMFILE } } f.mu.Lock() - // From f.next to find available fd. - if fd < f.next { - fd = f.next + // max is used as the largest number in fdBitmap + 1. + max := int32(0) + + if !f.fdBitmap.IsEmpty() { + max = int32(f.fdBitmap.Maximum()) + max++ } + // Adjust max in case it is less than minFD. + if max < minFD { + max = minFD + } // Install all entries. - for i := fd; i < end && len(fds) < len(files); i++ { - if d, _, _ := f.get(i); d == nil { - // Set the descriptor. - f.set(ctx, i, files[len(fds)], flags) - fds = append(fds, i) // Record the file descriptor. + for len(fds) < len(files) { + // Try to use free bit in fdBitmap. + // If all bits in fdBitmap are used, expand fd to the max. + fd := f.fdBitmap.FirstZero(uint32(minFD)) + if fd == math.MaxInt32 { + fd = uint32(max) + max++ + } + if fd >= uint32(end) { + break } + f.fdBitmap.Add(fd) + f.set(ctx, int32(fd), files[len(fds)], flags) + fds = append(fds, int32(fd)) + minFD = int32(fd) } // Failure? Unwind existing FDs. if len(fds) < len(files) { for _, i := range fds { f.set(ctx, i, nil, FDFlags{}) + f.fdBitmap.Remove(uint32(i)) } f.mu.Unlock() @@ -305,20 +326,15 @@ func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags return nil, unix.EMFILE } - if fd == f.next { - // Update next search start position. - f.next = fds[len(fds)-1] + 1 - } - f.mu.Unlock() return fds, nil } // NewFDsVFS2 allocates new FDs guaranteed to be the lowest number available -// greater than or equal to the fd parameter. All files will share the set +// greater than or equal to the minFD parameter. All files will share the set // flags. Success is guaranteed to be all or none. -func (f *FDTable) NewFDsVFS2(ctx context.Context, fd int32, files []*vfs.FileDescription, flags FDFlags) (fds []int32, err error) { - if fd < 0 { +func (f *FDTable) NewFDsVFS2(ctx context.Context, minFD int32, files []*vfs.FileDescription, flags FDFlags) (fds []int32, err error) { + if minFD < 0 { // Don't accept negative FDs. return nil, unix.EINVAL } @@ -332,31 +348,47 @@ func (f *FDTable) NewFDsVFS2(ctx context.Context, fd int32, files []*vfs.FileDes if lim.Cur != limits.Infinity { end = int32(lim.Cur) } - if fd >= end { + if minFD >= end { return nil, unix.EMFILE } } f.mu.Lock() - // From f.next to find available fd. - if fd < f.next { - fd = f.next + // max is used as the largest number in fdBitmap + 1. + max := int32(0) + + if !f.fdBitmap.IsEmpty() { + max = int32(f.fdBitmap.Maximum()) + max++ } - // Install all entries. - for i := fd; i < end && len(fds) < len(files); i++ { - if d, _, _ := f.getVFS2(i); d == nil { - // Set the descriptor. - f.setVFS2(ctx, i, files[len(fds)], flags) - fds = append(fds, i) // Record the file descriptor. - } + // Adjust max in case it is less than minFD. + if max < minFD { + max = minFD } + for len(fds) < len(files) { + // Try to use free bit in fdBitmap. + // If all bits in fdBitmap are used, expand fd to the max. + fd := f.fdBitmap.FirstZero(uint32(minFD)) + if fd == math.MaxInt32 { + fd = uint32(max) + max++ + } + if fd >= uint32(end) { + break + } + f.fdBitmap.Add(fd) + f.setVFS2(ctx, int32(fd), files[len(fds)], flags) + fds = append(fds, int32(fd)) + minFD = int32(fd) + } // Failure? Unwind existing FDs. if len(fds) < len(files) { for _, i := range fds { f.setVFS2(ctx, i, nil, FDFlags{}) + f.fdBitmap.Remove(uint32(i)) } f.mu.Unlock() @@ -370,57 +402,19 @@ func (f *FDTable) NewFDsVFS2(ctx context.Context, fd int32, files []*vfs.FileDes return nil, unix.EMFILE } - if fd == f.next { - // Update next search start position. - f.next = fds[len(fds)-1] + 1 - } - f.mu.Unlock() return fds, nil } -// NewFDVFS2 allocates a file descriptor greater than or equal to minfd for +// NewFDVFS2 allocates a file descriptor greater than or equal to minFD for // the given file description. If it succeeds, it takes a reference on file. -func (f *FDTable) NewFDVFS2(ctx context.Context, minfd int32, file *vfs.FileDescription, flags FDFlags) (int32, error) { - if minfd < 0 { - // Don't accept negative FDs. - return -1, unix.EINVAL - } - - // Default limit. - end := int32(math.MaxInt32) - - // Ensure we don't get past the provided limit. - if limitSet := limits.FromContext(ctx); limitSet != nil { - lim := limitSet.Get(limits.NumberOfFiles) - if lim.Cur != limits.Infinity { - end = int32(lim.Cur) - } - if minfd >= end { - return -1, unix.EMFILE - } - } - - f.mu.Lock() - defer f.mu.Unlock() - - // From f.next to find available fd. - fd := minfd - if fd < f.next { - fd = f.next - } - for fd < end { - if d, _, _ := f.getVFS2(fd); d == nil { - f.setVFS2(ctx, fd, file, flags) - if fd == f.next { - // Update next search start position. - f.next = fd + 1 - } - return fd, nil - } - fd++ +func (f *FDTable) NewFDVFS2(ctx context.Context, minFD int32, file *vfs.FileDescription, flags FDFlags) (int32, error) { + files := []*vfs.FileDescription{file} + fileSlice, error := f.NewFDsVFS2(ctx, minFD, files, flags) + if error != nil { + return -1, error } - return -1, unix.EMFILE + return fileSlice[0], nil } // NewFDAt sets the file reference for the given FD. If there is an active @@ -469,6 +463,11 @@ func (f *FDTable) newFDAt(ctx context.Context, fd int32, file *fs.File, fileVFS2 defer f.mu.Unlock() df, dfVFS2 := f.setAll(ctx, fd, file, fileVFS2, flags) + // Add fd to fdBitmap. + if file != nil || fileVFS2 != nil { + f.fdBitmap.Add(uint32(fd)) + } + return df, dfVFS2, nil } @@ -573,7 +572,9 @@ func (f *FDTable) GetVFS2(fd int32) (*vfs.FileDescription, FDFlags) { // Precondition: The caller must be running on the task goroutine, or Task.mu // must be locked. func (f *FDTable) GetFDs(ctx context.Context) []int32 { - fds := make([]int32, 0, int(atomic.LoadInt32(&f.used))) + f.mu.Lock() + defer f.mu.Unlock() + fds := make([]int32, 0, int(f.fdBitmap.GetNumOnes())) f.forEach(ctx, func(fd int32, _ *fs.File, _ *vfs.FileDescription, _ FDFlags) { fds = append(fds, fd) }) @@ -583,13 +584,15 @@ func (f *FDTable) GetFDs(ctx context.Context) []int32 { // Fork returns an independent FDTable. func (f *FDTable) Fork(ctx context.Context) *FDTable { clone := f.k.NewFDTable() - + f.mu.Lock() + defer f.mu.Unlock() f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { // The set function here will acquire an appropriate table // reference for the clone. We don't need anything else. if df, dfVFS2 := clone.setAll(ctx, fd, file, fileVFS2, flags); df != nil || dfVFS2 != nil { panic("VFS1 or VFS2 files set") } + clone.fdBitmap.Add(uint32(fd)) }) return clone } @@ -604,11 +607,6 @@ func (f *FDTable) Remove(ctx context.Context, fd int32) (*fs.File, *vfs.FileDesc f.mu.Lock() - // Update current available position. - if fd < f.next { - f.next = fd - } - orig, orig2, _, _ := f.getAll(fd) // Add reference for caller. @@ -621,6 +619,7 @@ func (f *FDTable) Remove(ctx context.Context, fd int32) (*fs.File, *vfs.FileDesc if orig != nil || orig2 != nil { orig, orig2 = f.setAll(ctx, fd, nil, nil, FDFlags{}) // Zap entry. + f.fdBitmap.Remove(uint32(fd)) } f.mu.Unlock() @@ -644,16 +643,13 @@ func (f *FDTable) RemoveIf(ctx context.Context, cond func(*fs.File, *vfs.FileDes f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { if cond(file, fileVFS2, flags) { df, dfVFS2 := f.setAll(ctx, fd, nil, nil, FDFlags{}) // Clear from table. + f.fdBitmap.Remove(uint32(fd)) if df != nil { files = append(files, df) } if dfVFS2 != nil { filesVFS2 = append(filesVFS2, dfVFS2) } - // Update current available position. - if fd < f.next { - f.next = fd - } } }) f.mu.Unlock() diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go index f17f9c59c..2b3e6ef71 100644 --- a/pkg/sentry/kernel/fd_table_unsafe.go +++ b/pkg/sentry/kernel/fd_table_unsafe.go @@ -15,9 +15,11 @@ package kernel import ( + "math" "sync/atomic" "unsafe" + "gvisor.dev/gvisor/pkg/bitmap" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -44,6 +46,7 @@ func (f *FDTable) initNoLeakCheck() { func (f *FDTable) init() { f.initNoLeakCheck() f.InitRefs() + f.fdBitmap = bitmap.New(uint32(math.MaxUint16)) } // get gets a file entry. @@ -162,14 +165,6 @@ func (f *FDTable) setAll(ctx context.Context, fd int32, file *fs.File, fileVFS2 } } - // Adjust used. - switch { - case orig == nil && desc != nil: - atomic.AddInt32(&f.used, 1) - case orig != nil && desc == nil: - atomic.AddInt32(&f.used, -1) - } - if orig != nil { switch { case orig.file != nil: diff --git a/pkg/sentry/kernel/ipc/BUILD b/pkg/sentry/kernel/ipc/BUILD new file mode 100644 index 000000000..e42a94e15 --- /dev/null +++ b/pkg/sentry/kernel/ipc/BUILD @@ -0,0 +1,20 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "ipc", + srcs = [ + "object.go", + "registry.go", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/errors/linuxerr", + "//pkg/log", + "//pkg/sentry/fs", + "//pkg/sentry/kernel/auth", + ], +) diff --git a/pkg/sentry/kernel/ipc/object.go b/pkg/sentry/kernel/ipc/object.go new file mode 100644 index 000000000..387b35e7e --- /dev/null +++ b/pkg/sentry/kernel/ipc/object.go @@ -0,0 +1,115 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ipc defines functionality and utilities common to sysvipc mechanisms. +// +// Lock ordering: [shm/semaphore/msgqueue].Registry.mu -> Mechanism +package ipc + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +// Key is a user-provided identifier for IPC objects. +type Key int32 + +// ID is a kernel identifier for IPC objects. +type ID int32 + +// Object represents an abstract IPC object with fields common to all IPC +// mechanisms. +// +// +stateify savable +type Object struct { + // User namespace which owns the IPC namespace which owns the IPC object. + // Immutable. + UserNS *auth.UserNamespace + + // ID is a kernel identifier for the IPC object. Immutable. + ID ID + + // Key is a user-provided identifier for the IPC object. Immutable. + Key Key + + // Creator is the user who created the IPC object. Immutable. + Creator fs.FileOwner + + // Owner is the current owner of the IPC object. + Owner fs.FileOwner + + // Perms is the access permissions the IPC object. + Perms fs.FilePermissions +} + +// Mechanism represents a SysV mechanism that holds an IPC object. It can also +// be looked at as a container for an ipc.Object, which is by definition a fully +// functional SysV object. +type Mechanism interface { + // Lock behaves the same as Mutex.Lock on the mechanism. + Lock() + + // Unlock behaves the same as Mutex.Unlock on the mechanism. + Unlock() + + // Object returns a pointer to the mechanism's ipc.Object. Mechanism.Lock, + // and Mechanism.Unlock should be used when the object is used. + Object() *Object + + // Destroy destroys the mechanism. + Destroy() +} + +// NewObject returns a new, initialized ipc.Object. The newly returned object +// doesn't have a valid ID. When the object is registered, the registry assigns +// it a new unique ID. +func NewObject(un *auth.UserNamespace, key Key, creator, owner fs.FileOwner, perms fs.FilePermissions) *Object { + return &Object{ + UserNS: un, + Key: key, + Creator: creator, + Owner: owner, + Perms: perms, + } +} + +// CheckOwnership verifies whether an IPC object may be accessed using creds as +// an owner. See ipc/util.c:ipcctl_obtain_check() in Linux. +func (o *Object) CheckOwnership(creds *auth.Credentials) bool { + if o.Owner.UID == creds.EffectiveKUID || o.Creator.UID == creds.EffectiveKUID { + return true + } + + // Tasks with CAP_SYS_ADMIN may bypass ownership checks. Strangely, Linux + // doesn't use CAP_IPC_OWNER for this despite CAP_IPC_OWNER being documented + // for use to "override IPC ownership checks". + return creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, o.UserNS) +} + +// CheckPermissions verifies whether an IPC object is accessible using creds for +// access described by req. See ipc/util.c:ipcperms() in Linux. +func (o *Object) CheckPermissions(creds *auth.Credentials, req fs.PermMask) bool { + p := o.Perms.Other + if o.Owner.UID == creds.EffectiveKUID { + p = o.Perms.User + } else if creds.InGroup(o.Owner.GID) { + p = o.Perms.Group + } + + if p.SupersetOf(req) { + return true + } + return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, o.UserNS) +} diff --git a/pkg/sentry/kernel/ipc/registry.go b/pkg/sentry/kernel/ipc/registry.go new file mode 100644 index 000000000..91de19070 --- /dev/null +++ b/pkg/sentry/kernel/ipc/registry.go @@ -0,0 +1,196 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipc + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +// Registry is similar to Object, but for registries. It represent an abstract +// SysV IPC registry with fields common to all SysV registries. Registry is not +// thread-safe, and should be protected using a mutex. +// +// +stateify savable +type Registry struct { + // UserNS owning the IPC namespace this registry belongs to. Immutable. + UserNS *auth.UserNamespace + + // objects is a map of IDs to IPC mechanisms. + objects map[ID]Mechanism + + // KeysToIDs maps a lookup key to an ID. + keysToIDs map[Key]ID + + // lastIDUsed is used to find the next available ID for object creation. + lastIDUsed ID +} + +// NewRegistry return a new, initialized ipc.Registry. +func NewRegistry(userNS *auth.UserNamespace) *Registry { + return &Registry{ + UserNS: userNS, + objects: make(map[ID]Mechanism), + keysToIDs: make(map[Key]ID), + } +} + +// Find uses key to search for and return a SysV mechanism. Find returns an +// error if an object is found by shouldn't be, or if the user doesn't have +// permission to use the object. If no object is found, Find checks create +// flag, and returns an error only if it's false. +func (r *Registry) Find(ctx context.Context, key Key, mode linux.FileMode, create, exclusive bool) (Mechanism, error) { + if id, ok := r.keysToIDs[key]; ok { + mech := r.objects[id] + mech.Lock() + defer mech.Unlock() + + obj := mech.Object() + creds := auth.CredentialsFromContext(ctx) + if !obj.CheckPermissions(creds, fs.PermsFromMode(mode)) { + // The [calling process / user] does not have permission to access + // the set, and does not have the CAP_IPC_OWNER capability in the + // user namespace that governs its IPC namespace. + return nil, linuxerr.EACCES + } + + if create && exclusive { + // IPC_CREAT and IPC_EXCL were specified, but an object already + // exists for key. + return nil, linuxerr.EEXIST + } + return mech, nil + } + + if !create { + // No object exists for key and msgflg did not specify IPC_CREAT. + return nil, linuxerr.ENOENT + } + + return nil, nil +} + +// Register adds the given object into Registry.Objects, and assigns it a new +// ID. It returns an error if all IDs are exhausted. +func (r *Registry) Register(m Mechanism) error { + id, err := r.newID() + if err != nil { + return err + } + + obj := m.Object() + obj.ID = id + + r.objects[id] = m + r.keysToIDs[obj.Key] = id + + return nil +} + +// newID finds the first unused ID in the registry, and returns an error if +// non is found. +func (r *Registry) newID() (ID, error) { + // Find the next available ID. + for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ { + // Handle wrap around. + if id < 0 { + id = 0 + continue + } + if r.objects[id] == nil { + r.lastIDUsed = id + return id, nil + } + } + + log.Warningf("ids exhausted, they may be leaking") + + // The man pages for shmget(2) mention that ENOSPC should be used if "All + // possible shared memory IDs have been taken (SHMMNI)". Other SysV + // mechanisms don't have a specific errno for running out of IDs, but they + // return ENOSPC if the max number of objects is exceeded, so we assume that + // it's the same case. + return 0, linuxerr.ENOSPC +} + +// Remove removes the mechanism with the given id from the registry, and calls +// mechanism.Destroy to perform mechanism-specific removal. +func (r *Registry) Remove(id ID, creds *auth.Credentials) error { + mech := r.objects[id] + if mech == nil { + return linuxerr.EINVAL + } + + mech.Lock() + defer mech.Unlock() + + obj := mech.Object() + + // The effective user ID of the calling process must match the creator or + // owner of the [mechanism], or the caller must be privileged. + if !obj.CheckOwnership(creds) { + return linuxerr.EPERM + } + + delete(r.objects, obj.ID) + delete(r.keysToIDs, obj.Key) + mech.Destroy() + + return nil +} + +// ForAllObjects executes a given function for all given objects. +func (r *Registry) ForAllObjects(f func(o Mechanism)) { + for _, o := range r.objects { + f(o) + } +} + +// FindByID returns the mechanism with the given ID, nil if non exists. +func (r *Registry) FindByID(id ID) Mechanism { + return r.objects[id] +} + +// DissociateKey removes the association between a mechanism and its key +// (deletes it from r.keysToIDs), preventing it from being discovered by any new +// process, but not necessarily destroying it. If the given key doesn't exist, +// nothing is changed. +func (r *Registry) DissociateKey(key Key) { + delete(r.keysToIDs, key) +} + +// DissociateID removes the association between a mechanism and its ID (deletes +// it from r.objects). An ID can't be removed unless the associated key is +// removed already, this is done to prevent the users from acquiring nil a +// Mechanism. +// +// Precondition: must be preceded by a call to r.DissociateKey. +func (r *Registry) DissociateID(id ID) { + delete(r.objects, id) +} + +// ObjectCount returns the number of registered objects. +func (r *Registry) ObjectCount() int { + return len(r.objects) +} + +// LastIDUsed returns the last used ID. +func (r *Registry) LastIDUsed() ID { + return r.lastIDUsed +} diff --git a/pkg/sentry/kernel/ipc_namespace.go b/pkg/sentry/kernel/ipc_namespace.go index 9545bb5ef..0b101b1bb 100644 --- a/pkg/sentry/kernel/ipc_namespace.go +++ b/pkg/sentry/kernel/ipc_namespace.go @@ -17,6 +17,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/msgqueue" "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore" "gvisor.dev/gvisor/pkg/sentry/kernel/shm" ) @@ -30,6 +31,7 @@ type IPCNamespace struct { // User namespace which owns this IPC namespace. Immutable. userNS *auth.UserNamespace + queues *msgqueue.Registry semaphores *semaphore.Registry shms *shm.Registry } @@ -38,6 +40,7 @@ type IPCNamespace struct { func NewIPCNamespace(userNS *auth.UserNamespace) *IPCNamespace { ns := &IPCNamespace{ userNS: userNS, + queues: msgqueue.NewRegistry(userNS), semaphores: semaphore.NewRegistry(userNS), shms: shm.NewRegistry(userNS), } @@ -45,6 +48,11 @@ func NewIPCNamespace(userNS *auth.UserNamespace) *IPCNamespace { return ns } +// MsgqueueRegistry returns the message queue registry for this namespace. +func (i *IPCNamespace) MsgqueueRegistry() *msgqueue.Registry { + return i.queues +} + // SemaphoreRegistry returns the semaphore set registry for this namespace. func (i *IPCNamespace) SemaphoreRegistry() *semaphore.Registry { return i.semaphores diff --git a/pkg/sentry/kernel/msgqueue/BUILD b/pkg/sentry/kernel/msgqueue/BUILD new file mode 100644 index 000000000..5ec11e1f6 --- /dev/null +++ b/pkg/sentry/kernel/msgqueue/BUILD @@ -0,0 +1,36 @@ +load("//tools:defs.bzl", "go_library") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +package(licenses = ["notice"]) + +go_template_instance( + name = "message_list", + out = "message_list.go", + package = "msgqueue", + prefix = "msg", + template = "//pkg/ilist:generic_list", + types = { + "Element": "*Message", + "Linker": "*Message", + }, +) + +go_library( + name = "msgqueue", + srcs = [ + "message_list.go", + "msgqueue.go", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/errors/linuxerr", + "//pkg/sentry/fs", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/ipc", + "//pkg/sentry/kernel/time", + "//pkg/sync", + "//pkg/waiter", + ], +) diff --git a/pkg/sentry/kernel/msgqueue/msgqueue.go b/pkg/sentry/kernel/msgqueue/msgqueue.go new file mode 100644 index 000000000..3ce926950 --- /dev/null +++ b/pkg/sentry/kernel/msgqueue/msgqueue.go @@ -0,0 +1,220 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package msgqueue implements System V message queues. +package msgqueue + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/waiter" +) + +const ( + // System-wide limit for maximum number of queues. + maxQueues = linux.MSGMNI + + // Maximum size of a queue in bytes. + maxQueueBytes = linux.MSGMNB + + // Maximum size of a message in bytes. + maxMessageBytes = linux.MSGMAX +) + +// Registry contains a set of message queues that can be referenced using keys +// or IDs. +// +// +stateify savable +type Registry struct { + // mu protects all the fields below. + mu sync.Mutex `state:"nosave"` + + // reg defines basic fields and operations needed for all SysV registries. + reg *ipc.Registry +} + +// NewRegistry returns a new Registry ready to be used. +func NewRegistry(userNS *auth.UserNamespace) *Registry { + return &Registry{ + reg: ipc.NewRegistry(userNS), + } +} + +// Queue represents a SysV message queue, described by sysvipc(7). +// +// +stateify savable +type Queue struct { + // registry is the registry owning this queue. Immutable. + registry *Registry + + // mu protects all the fields below. + mu sync.Mutex `state:"nosave"` + + // dead is set to true when a queue is removed from the registry and should + // not be used. Operations on the queue should check dead, and return + // EIDRM if set to true. + dead bool + + // obj defines basic fields that should be included in all SysV IPC objects. + obj *ipc.Object + + // senders holds a queue of blocked message senders. Senders are notified + // when enough space is available in the queue to insert their message. + senders waiter.Queue + + // receivers holds a queue of blocked receivers. Receivers are notified + // when a new message is inserted into the queue and can be received. + receivers waiter.Queue + + // messages is a list of sent messages. + messages msgList + + // sendTime is the last time a msgsnd was perfomed. + sendTime ktime.Time + + // receiveTime is the last time a msgrcv was performed. + receiveTime ktime.Time + + // changeTime is the last time the queue was modified using msgctl. + changeTime ktime.Time + + // byteCount is the current number of message bytes in the queue. + byteCount uint64 + + // messageCount is the current number of messages in the queue. + messageCount uint64 + + // maxBytes is the maximum allowed number of bytes in the queue, and is also + // used as a limit for the number of total possible messages. + maxBytes uint64 + + // sendPID is the PID of the process that performed the last msgsnd. + sendPID int32 + + // receivePID is the PID of the process that performed the last msgrcv. + receivePID int32 +} + +// Message represents a message exchanged through a Queue via msgsnd(2) and +// msgrcv(2). +// +// +stateify savable +type Message struct { + msgEntry + + // mType is an integer representing the type of the sent message. + mType int64 + + // mText is an untyped block of memory. + mText []byte + + // mSize is the size of mText. + mSize uint64 +} + +// FindOrCreate creates a new message queue or returns an existing one. See +// msgget(2). +func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, mode linux.FileMode, private, create, exclusive bool) (*Queue, error) { + r.mu.Lock() + defer r.mu.Unlock() + + if !private { + queue, err := r.reg.Find(ctx, key, mode, create, exclusive) + if err != nil { + return nil, err + } + + if queue != nil { + return queue.(*Queue), nil + } + } + + // Check system-wide limits. + if r.reg.ObjectCount() >= maxQueues { + return nil, linuxerr.ENOSPC + } + + return r.newQueueLocked(ctx, key, fs.FileOwnerFromContext(ctx), fs.FilePermsFromMode(mode)) +} + +// newQueueLocked creates a new queue using the given fields. An error is +// returned if there're no more available identifiers. +// +// Precondition: r.mu must be held. +func (r *Registry) newQueueLocked(ctx context.Context, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions) (*Queue, error) { + q := &Queue{ + registry: r, + obj: ipc.NewObject(r.reg.UserNS, key, creator, creator, perms), + sendTime: ktime.ZeroTime, + receiveTime: ktime.ZeroTime, + changeTime: ktime.NowFromContext(ctx), + maxBytes: maxQueueBytes, + } + + err := r.reg.Register(q) + if err != nil { + return nil, err + } + return q, nil +} + +// Remove removes the queue with specified ID. All waiters (readers and +// writers) and writers will be awakened and fail. Remove will return an error +// if the ID is invalid, or the the user doesn't have privileges. +func (r *Registry) Remove(id ipc.ID, creds *auth.Credentials) error { + r.mu.Lock() + defer r.mu.Unlock() + + r.reg.Remove(id, creds) + return nil +} + +// Lock implements ipc.Mechanism.Lock. +func (q *Queue) Lock() { + q.mu.Lock() +} + +// Unlock implements ipc.mechanism.Unlock. +// +// +checklocksignore +func (q *Queue) Unlock() { + q.mu.Unlock() +} + +// Object implements ipc.Mechanism.Object. +func (q *Queue) Object() *ipc.Object { + return q.obj +} + +// Destroy implements ipc.Mechanism.Destroy. +func (q *Queue) Destroy() { + q.dead = true + + // Notify waiters. Senders and receivers will try to run, and return an + // error (EIDRM). Waiters should remove themselves from the queue after + // waking up. + q.senders.Notify(waiter.EventOut) + q.receivers.Notify(waiter.EventIn) +} + +// ID returns queue's ID. +func (q *Queue) ID() ipc.ID { + return q.obj.ID +} diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index 21358ec92..079294f81 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -768,14 +768,14 @@ const ( // ptraceClone is called at the end of a clone or fork syscall to check if t // should enter PTRACE_EVENT_CLONE, PTRACE_EVENT_FORK, or PTRACE_EVENT_VFORK // stop. child is the new task. -func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions) bool { +func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, args *linux.CloneArgs) bool { if !t.hasTracer() { return false } t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() event := false - if !opts.Untraced { + if args.Flags&linux.CLONE_UNTRACED == 0 { switch kind { case ptraceCloneKindClone: if t.ptraceOpts.TraceClone { @@ -810,7 +810,7 @@ func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions // clone(2)'s documentation of CLONE_UNTRACED and CLONE_PTRACE is // confusingly wrong; see kernel/fork.c:_do_fork() => copy_process() => // include/linux/ptrace.h:ptrace_init_task(). - if event || opts.InheritTracer { + if event || args.Flags&linux.CLONE_PTRACE != 0 { tracer := t.Tracer() if tracer != nil { child.ptraceTracer.Store(tracer) diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD index a787c00a8..2ae08ed12 100644 --- a/pkg/sentry/kernel/semaphore/BUILD +++ b/pkg/sentry/kernel/semaphore/BUILD @@ -26,9 +26,9 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/errors/linuxerr", - "//pkg/log", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/ipc", "//pkg/sentry/kernel/time", "//pkg/sync", "//pkg/syserror", @@ -41,10 +41,11 @@ go_test( srcs = ["semaphore_test.go"], library = ":semaphore", deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/contexttest", - "//pkg/sentry/kernel/auth", - "//pkg/syserror", + "//pkg/abi/linux", # keep + "//pkg/context", # keep + "//pkg/sentry/contexttest", # keep + "//pkg/sentry/kernel/auth", # keep + "//pkg/sentry/kernel/ipc", # keep + "//pkg/syserror", # keep ], ) diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go index 485c3a788..8610d3fc1 100644 --- a/pkg/sentry/kernel/semaphore/semaphore.go +++ b/pkg/sentry/kernel/semaphore/semaphore.go @@ -21,9 +21,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -47,15 +47,15 @@ const ( // // +stateify savable type Registry struct { - // userNS owning the ipc name this registry belongs to. Immutable. - userNS *auth.UserNamespace // mu protects all fields below. - mu sync.Mutex `state:"nosave"` - semaphores map[int32]*Set - lastIDUsed int32 + mu sync.Mutex `state:"nosave"` + + // reg defines basic fields and operations needed for all SysV registries. + reg *ipc.Registry + // indexes maintains a mapping between a set's index in virtual array and // its identifier. - indexes map[int32]int32 + indexes map[int32]ipc.ID } // Set represents a set of semaphores that can be operated atomically. @@ -65,19 +65,11 @@ type Set struct { // registry owning this sem set. Immutable. registry *Registry - // Id is a handle that identifies the set. - ID int32 - - // key is an user provided key that can be shared between processes. - key int32 + // mu protects all fields below. + mu sync.Mutex `state:"nosave"` - // creator is the user that created the set. Immutable. - creator fs.FileOwner + obj *ipc.Object - // mu protects all fields below. - mu sync.Mutex `state:"nosave"` - owner fs.FileOwner - perms fs.FilePermissions opTime ktime.Time changeTime ktime.Time @@ -115,9 +107,8 @@ type waiter struct { // NewRegistry creates a new semaphore set registry. func NewRegistry(userNS *auth.UserNamespace) *Registry { return &Registry{ - userNS: userNS, - semaphores: make(map[int32]*Set), - indexes: make(map[int32]int32), + reg: ipc.NewRegistry(userNS), + indexes: make(map[int32]ipc.ID), } } @@ -126,7 +117,7 @@ func NewRegistry(userNS *auth.UserNamespace) *Registry { // a new set is always created. If create is false, it fails if a set cannot // be found. If exclusive is true, it fails if a set with the same key already // exists. -func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linux.FileMode, private, create, exclusive bool) (*Set, error) { +func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, mode linux.FileMode, private, create, exclusive bool) (*Set, error) { if nsems < 0 || nsems > semsMax { return nil, linuxerr.EINVAL } @@ -135,31 +126,19 @@ func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linu defer r.mu.Unlock() if !private { - // Look up an existing semaphore. - if set := r.findByKey(key); set != nil { - set.mu.Lock() - defer set.mu.Unlock() - - // Check that caller can access semaphore set. - creds := auth.CredentialsFromContext(ctx) - if !set.checkPerms(creds, fs.PermsFromMode(mode)) { - return nil, linuxerr.EACCES - } + set, err := r.reg.Find(ctx, key, mode, create, exclusive) + if err != nil { + return nil, err + } - // Validate parameters. + // Validate semaphore-specific parameters. + if set != nil { + set := set.(*Set) if nsems > int32(set.Size()) { return nil, linuxerr.EINVAL } - if create && exclusive { - return nil, linuxerr.EEXIST - } return set, nil } - - if !create { - // Semaphore not found and should not be created. - return nil, syserror.ENOENT - } } // Zero is only valid if an existing set is found. @@ -169,9 +148,9 @@ func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linu // Apply system limits. // - // Map semaphores and map indexes in a registry are of the same size, - // check map semaphores only here for the system limit. - if len(r.semaphores) >= setsMax { + // Map reg.objects and map indexes in a registry are of the same size, + // check map reg.objects only here for the system limit. + if r.reg.ObjectCount() >= setsMax { return nil, syserror.ENOSPC } if r.totalSems() > int(semsTotalMax-nsems) { @@ -179,9 +158,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linu } // Finally create a new set. - owner := fs.FileOwnerFromContext(ctx) - perms := fs.FilePermsFromMode(mode) - return r.newSet(ctx, key, owner, owner, perms, nsems) + return r.newSetLocked(ctx, key, fs.FileOwnerFromContext(ctx), fs.FilePermsFromMode(mode), nsems) } // IPCInfo returns information about system-wide semaphore limits and parameters. @@ -208,7 +185,7 @@ func (r *Registry) SemInfo() *linux.SemInfo { defer r.mu.Unlock() info := r.IPCInfo() - info.SemUsz = uint32(len(r.semaphores)) + info.SemUsz = uint32(r.reg.ObjectCount()) info.SemAem = uint32(r.totalSems()) return info @@ -231,77 +208,59 @@ func (r *Registry) HighestIndex() int32 { return highestIndex } -// RemoveID removes set with give 'id' from the registry and marks the set as +// Remove removes set with give 'id' from the registry and marks the set as // dead. All waiters will be awakened and fail. -func (r *Registry) RemoveID(id int32, creds *auth.Credentials) error { +func (r *Registry) Remove(id ipc.ID, creds *auth.Credentials) error { r.mu.Lock() defer r.mu.Unlock() - set := r.semaphores[id] - if set == nil { - return linuxerr.EINVAL - } index, found := r.findIndexByID(id) if !found { - // Inconsistent state. - panic(fmt.Sprintf("unable to find an index for ID: %d", id)) + return linuxerr.EINVAL } + delete(r.indexes, index) - set.mu.Lock() - defer set.mu.Unlock() + r.reg.Remove(id, creds) - // "The effective user ID of the calling process must match the creator or - // owner of the semaphore set, or the caller must be privileged." - if !set.checkCredentials(creds) && !set.checkCapability(creds) { - return linuxerr.EACCES - } - - delete(r.semaphores, set.ID) - delete(r.indexes, index) - set.destroy() return nil } -func (r *Registry) newSet(ctx context.Context, key int32, owner, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) { +// newSetLocked creates a new Set using given fields. An error is returned if there +// are no more available identifiers. +// +// Precondition: r.mu must be held. +func (r *Registry) newSetLocked(ctx context.Context, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) { set := &Set{ registry: r, - key: key, - owner: owner, - creator: owner, - perms: perms, + obj: ipc.NewObject(r.reg.UserNS, ipc.Key(key), creator, creator, perms), changeTime: ktime.NowFromContext(ctx), sems: make([]sem, nsems), } - // Find the next available ID. - for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ { - // Handle wrap around. - if id < 0 { - id = 0 - continue - } - if r.semaphores[id] == nil { - index, found := r.findFirstAvailableIndex() - if !found { - panic("unable to find an available index") - } - r.indexes[index] = id - r.lastIDUsed = id - r.semaphores[id] = set - set.ID = id - return set, nil - } + err := r.reg.Register(set) + if err != nil { + return nil, err } - log.Warningf("Semaphore map is full, they must be leaking") - return nil, syserror.ENOMEM + index, found := r.findFirstAvailableIndex() + if !found { + // See linux, ipc/sem.c:newary(). + return nil, linuxerr.ENOSPC + } + r.indexes[index] = set.obj.ID + + return set, nil } // FindByID looks up a set given an ID. -func (r *Registry) FindByID(id int32) *Set { +func (r *Registry) FindByID(id ipc.ID) *Set { r.mu.Lock() defer r.mu.Unlock() - return r.semaphores[id] + mech := r.reg.FindByID(id) + if mech == nil { + return nil + } + return mech.(*Set) } // FindByIndex looks up a set given an index. @@ -313,19 +272,10 @@ func (r *Registry) FindByIndex(index int32) *Set { if !present { return nil } - return r.semaphores[id] + return r.reg.FindByID(id).(*Set) } -func (r *Registry) findByKey(key int32) *Set { - for _, v := range r.semaphores { - if v.key == key { - return v - } - } - return nil -} - -func (r *Registry) findIndexByID(id int32) (int32, bool) { +func (r *Registry) findIndexByID(id ipc.ID) (int32, bool) { for k, v := range r.indexes { if v == id { return k, true @@ -345,12 +295,36 @@ func (r *Registry) findFirstAvailableIndex() (int32, bool) { func (r *Registry) totalSems() int { totalSems := 0 - for _, v := range r.semaphores { - totalSems += v.Size() - } + r.reg.ForAllObjects( + func(o ipc.Mechanism) { + totalSems += o.(*Set).Size() + }, + ) return totalSems } +// ID returns semaphore's ID. +func (s *Set) ID() ipc.ID { + return s.obj.ID +} + +// Object implements ipc.Mechanism.Object. +func (s *Set) Object() *ipc.Object { + return s.obj +} + +// Lock implements ipc.Mechanism.Lock. +func (s *Set) Lock() { + s.mu.Lock() +} + +// Unlock implements ipc.mechanism.Unlock. +// +// +checklocksignore +func (s *Set) Unlock() { + s.mu.Unlock() +} + func (s *Set) findSem(num int32) *sem { if num < 0 || int(num) >= s.Size() { return nil @@ -370,12 +344,12 @@ func (s *Set) Change(ctx context.Context, creds *auth.Credentials, owner fs.File // "The effective UID of the calling process must match the owner or creator // of the semaphore set, or the caller must be privileged." - if !s.checkCredentials(creds) && !s.checkCapability(creds) { + if !s.obj.CheckOwnership(creds) { return linuxerr.EACCES } - s.owner = owner - s.perms = perms + s.obj.Owner = owner + s.obj.Perms = perms s.changeTime = ktime.NowFromContext(ctx) return nil } @@ -395,18 +369,18 @@ func (s *Set) semStat(creds *auth.Credentials, permMask fs.PermMask) (*linux.Sem s.mu.Lock() defer s.mu.Unlock() - if !s.checkPerms(creds, permMask) { + if !s.obj.CheckPermissions(creds, permMask) { return nil, linuxerr.EACCES } return &linux.SemidDS{ SemPerm: linux.IPCPerm{ - Key: uint32(s.key), - UID: uint32(creds.UserNamespace.MapFromKUID(s.owner.UID)), - GID: uint32(creds.UserNamespace.MapFromKGID(s.owner.GID)), - CUID: uint32(creds.UserNamespace.MapFromKUID(s.creator.UID)), - CGID: uint32(creds.UserNamespace.MapFromKGID(s.creator.GID)), - Mode: uint16(s.perms.LinuxMode()), + Key: uint32(s.obj.Key), + UID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Owner.UID)), + GID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Owner.GID)), + CUID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Creator.UID)), + CGID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Creator.GID)), + Mode: uint16(s.obj.Perms.LinuxMode()), Seq: 0, // IPC sequence not supported. }, SemOTime: s.opTime.TimeT(), @@ -425,7 +399,7 @@ func (s *Set) SetVal(ctx context.Context, num int32, val int16, creds *auth.Cred defer s.mu.Unlock() // "The calling process must have alter permission on the semaphore set." - if !s.checkPerms(creds, fs.PermMask{Write: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Write: true}) { return linuxerr.EACCES } @@ -461,7 +435,7 @@ func (s *Set) SetValAll(ctx context.Context, vals []uint16, creds *auth.Credenti defer s.mu.Unlock() // "The calling process must have alter permission on the semaphore set." - if !s.checkPerms(creds, fs.PermMask{Write: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Write: true}) { return linuxerr.EACCES } @@ -483,7 +457,7 @@ func (s *Set) GetVal(num int32, creds *auth.Credentials) (int16, error) { defer s.mu.Unlock() // "The calling process must have read permission on the semaphore set." - if !s.checkPerms(creds, fs.PermMask{Read: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) { return 0, linuxerr.EACCES } @@ -500,7 +474,7 @@ func (s *Set) GetValAll(creds *auth.Credentials) ([]uint16, error) { defer s.mu.Unlock() // "The calling process must have read permission on the semaphore set." - if !s.checkPerms(creds, fs.PermMask{Read: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) { return nil, linuxerr.EACCES } @@ -517,7 +491,7 @@ func (s *Set) GetPID(num int32, creds *auth.Credentials) (int32, error) { defer s.mu.Unlock() // "The calling process must have read permission on the semaphore set." - if !s.checkPerms(creds, fs.PermMask{Read: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) { return 0, linuxerr.EACCES } @@ -533,7 +507,7 @@ func (s *Set) countWaiters(num int32, creds *auth.Credentials, pred func(w *wait defer s.mu.Unlock() // The calling process must have read permission on the semaphore set. - if !s.checkPerms(creds, fs.PermMask{Read: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) { return 0, linuxerr.EACCES } @@ -589,7 +563,7 @@ func (s *Set) ExecuteOps(ctx context.Context, ops []linux.Sembuf, creds *auth.Cr } } - if !s.checkPerms(creds, fs.PermMask{Read: readOnly, Write: !readOnly}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: readOnly, Write: !readOnly}) { return nil, 0, linuxerr.EACCES } @@ -675,38 +649,10 @@ func (s *Set) AbortWait(num int32, ch chan struct{}) { // Waiter may not be found in case it raced with wakeWaiters(). } -func (s *Set) checkCredentials(creds *auth.Credentials) bool { - return s.owner.UID == creds.EffectiveKUID || - s.owner.GID == creds.EffectiveKGID || - s.creator.UID == creds.EffectiveKUID || - s.creator.GID == creds.EffectiveKGID -} - -func (s *Set) checkCapability(creds *auth.Credentials) bool { - return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, s.registry.userNS) && creds.UserNamespace.MapFromKUID(s.owner.UID).Ok() -} - -func (s *Set) checkPerms(creds *auth.Credentials, reqPerms fs.PermMask) bool { - // Are we owner, or in group, or other? - p := s.perms.Other - if s.owner.UID == creds.EffectiveKUID { - p = s.perms.User - } else if creds.InGroup(s.owner.GID) { - p = s.perms.Group - } - - // Are permissions satisfied without capability checks? - if p.SupersetOf(reqPerms) { - return true - } - - return s.checkCapability(creds) -} - -// destroy destroys the set. +// Destroy implements ipc.Mechanism.Destroy. // // Preconditions: Caller must hold 's.mu'. -func (s *Set) destroy() { +func (s *Set) Destroy() { // Notify all waiters. They will fail on the next attempt to execute // operations and return error. s.dead = true diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go index e47acefdf..2e4ab8121 100644 --- a/pkg/sentry/kernel/semaphore/semaphore_test.go +++ b/pkg/sentry/kernel/semaphore/semaphore_test.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" "gvisor.dev/gvisor/pkg/syserror" ) @@ -55,7 +56,7 @@ func signalled(ch chan struct{}) bool { func TestBasic(t *testing.T) { ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} + set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)} ops := []linux.Sembuf{ {SemOp: 1}, } @@ -76,7 +77,7 @@ func TestBasic(t *testing.T) { func TestWaitForZero(t *testing.T) { ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} + set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)} ops := []linux.Sembuf{ {SemOp: 0}, } @@ -115,7 +116,7 @@ func TestWaitForZero(t *testing.T) { func TestNoWait(t *testing.T) { ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} + set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)} ops := []linux.Sembuf{ {SemOp: 1}, } @@ -138,11 +139,12 @@ func TestUnregister(t *testing.T) { ctx := contexttest.Context(t) r := NewRegistry(auth.NewRootUserNamespace()) set, err := r.FindOrCreate(ctx, 123, 2, linux.FileMode(0x600), true, true, true) + if err != nil { t.Fatalf("FindOrCreate() failed, err: %v", err) } - if got := r.FindByID(set.ID); got.ID != set.ID { - t.Fatalf("FindById(%d) failed, got: %+v, expected: %+v", set.ID, got, set) + if got := r.FindByID(set.obj.ID); got.obj.ID != set.obj.ID { + t.Fatalf("FindById(%d) failed, got: %+v, expected: %+v", set.obj.ID, got, set) } ops := []linux.Sembuf{ @@ -155,14 +157,14 @@ func TestUnregister(t *testing.T) { } creds := auth.CredentialsFromContext(ctx) - if err := r.RemoveID(set.ID, creds); err != nil { - t.Fatalf("RemoveID(%d) failed, err: %v", set.ID, err) + if err := r.Remove(set.obj.ID, creds); err != nil { + t.Fatalf("Remove(%d) failed, err: %v", set.obj.ID, err) } if !set.dead { t.Fatalf("set is not dead: %+v", set) } - if got := r.FindByID(set.ID); got != nil { - t.Fatalf("FindById(%d) failed, got: %+v, expected: nil", set.ID, got) + if got := r.FindByID(set.obj.ID); got != nil { + t.Fatalf("FindById(%d) failed, got: %+v, expected: nil", set.obj.ID, got) } for i, ch := range chs { if !signalled(ch) { diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index 5b69333fe..4e8deac4c 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -36,6 +36,7 @@ go_library( "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/ipc", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", "//pkg/sentry/pgalloc", diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index f7ac4c2b2..2abf467d7 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -43,6 +43,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" @@ -51,12 +52,6 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) -// Key represents a shm segment key. Analogous to a file name. -type Key int32 - -// ID represents the opaque handle for a shm segment. Analogous to an fd. -type ID int32 - // Registry tracks all shared memory segments in an IPC namespace. The registry // provides the mechanisms for creating and finding segments, and reporting // global shm parameters. @@ -69,50 +64,51 @@ type Registry struct { // mu protects all fields below. mu sync.Mutex `state:"nosave"` - // shms maps segment ids to segments. + // reg defines basic fields and operations needed for all SysV registries. // - // shms holds all referenced segments, which are removed on the last + // Withing reg, there are two maps, Objects and KeysToIDs. + // + // reg.objects holds all referenced segments, which are removed on the last // DecRef. Thus, it cannot itself hold a reference on the Shm. // // Since removal only occurs after the last (unlocked) DecRef, there // exists a short window during which a Shm still exists in Shm, but is // unreferenced. Users must use TryIncRef to determine if the Shm is // still valid. - shms map[ID]*Shm - - // keysToShms maps segment keys to segments. // - // Shms in keysToShms are guaranteed to be referenced, as they are + // keysToIDs maps segment keys to IDs. + // + // Shms in keysToIDs are guaranteed to be referenced, as they are // removed by disassociateKey before the last DecRef. - keysToShms map[Key]*Shm + reg *ipc.Registry // Sum of the sizes of all existing segments rounded up to page size, in // units of page size. totalPages uint64 - - // ID assigned to the last created segment. Used to quickly find the next - // unused ID. - lastIDUsed ID } // NewRegistry creates a new shm registry. func NewRegistry(userNS *auth.UserNamespace) *Registry { return &Registry{ - userNS: userNS, - shms: make(map[ID]*Shm), - keysToShms: make(map[Key]*Shm), + userNS: userNS, + reg: ipc.NewRegistry(userNS), } } // FindByID looks up a segment given an ID. // // FindByID returns a reference on Shm. -func (r *Registry) FindByID(id ID) *Shm { +func (r *Registry) FindByID(id ipc.ID) *Shm { r.mu.Lock() defer r.mu.Unlock() - s := r.shms[id] + mech := r.reg.FindByID(id) + if mech == nil { + return nil + } + s := mech.(*Shm) + // Take a reference on s. If TryIncRef fails, s has reached the last - // DecRef, but hasn't quite been removed from r.shms yet. + // DecRef, but hasn't quite been removed from r.reg.objects yet. if s != nil && s.TryIncRef() { return s } @@ -129,9 +125,9 @@ func (r *Registry) dissociateKey(s *Shm) { defer r.mu.Unlock() s.mu.Lock() defer s.mu.Unlock() - if s.key != linux.IPC_PRIVATE { - delete(r.keysToShms, s.key) - s.key = linux.IPC_PRIVATE + if s.obj.Key != linux.IPC_PRIVATE { + r.reg.DissociateKey(s.obj.Key) + s.obj.Key = linux.IPC_PRIVATE } } @@ -139,7 +135,7 @@ func (r *Registry) dissociateKey(s *Shm) { // analogous to open(2). // // FindOrCreate returns a reference on Shm. -func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size uint64, mode linux.FileMode, private, create, exclusive bool) (*Shm, error) { +func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, size uint64, mode linux.FileMode, private, create, exclusive bool) (*Shm, error) { if (create || private) && (size < linux.SHMMIN || size > linux.SHMMAX) { // "A new segment was to be created and size is less than SHMMIN or // greater than SHMMAX." - man shmget(2) @@ -152,49 +148,29 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui r.mu.Lock() defer r.mu.Unlock() - if len(r.shms) >= linux.SHMMNI { + if r.reg.ObjectCount() >= linux.SHMMNI { // "All possible shared memory IDs have been taken (SHMMNI) ..." // - man shmget(2) return nil, syserror.ENOSPC } if !private { - // Look up an existing segment. - if shm := r.keysToShms[key]; shm != nil { - shm.mu.Lock() - defer shm.mu.Unlock() - - // Check that caller can access the segment. - if !shm.checkPermissions(ctx, fs.PermsFromMode(mode)) { - // "The user does not have permission to access the shared - // memory segment, and does not have the CAP_IPC_OWNER - // capability in the user namespace that governs its IPC - // namespace." - man shmget(2) - return nil, linuxerr.EACCES - } + shm, err := r.reg.Find(ctx, key, mode, create, exclusive) + if err != nil { + return nil, err + } + // Validate shm-specific parameters. + if shm != nil { + shm := shm.(*Shm) if size > shm.size { // "A segment for the given key exists, but size is greater than // the size of that segment." - man shmget(2) return nil, linuxerr.EINVAL } - - if create && exclusive { - // "IPC_CREAT and IPC_EXCL were specified in shmflg, but a - // shared memory segment already exists for key." - // - man shmget(2) - return nil, linuxerr.EEXIST - } - shm.IncRef() return shm, nil } - - if !create { - // "No segment exists for the given key, and IPC_CREAT was not - // specified." - man shmget(2) - return nil, syserror.ENOENT - } } var sizeAligned uint64 @@ -212,9 +188,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui } // Need to create a new segment. - creator := fs.FileOwnerFromContext(ctx) - perms := fs.FilePermsFromMode(mode) - s, err := r.newShm(ctx, pid, key, creator, perms, size) + s, err := r.newShmLocked(ctx, pid, key, fs.FileOwnerFromContext(ctx), fs.FilePermsFromMode(mode), size) if err != nil { return nil, err } @@ -224,10 +198,10 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui return s, nil } -// newShm creates a new segment in the registry. +// newShmLocked creates a new segment in the registry. // // Precondition: Caller must hold r.mu. -func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) { +func (r *Registry) newShmLocked(ctx context.Context, pid int32, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) { mfp := pgalloc.MemoryFileProviderFromContext(ctx) if mfp == nil { panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider)) @@ -242,40 +216,21 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi shm := &Shm{ mfp: mfp, registry: r, - creator: creator, size: size, effectiveSize: effectiveSize, + obj: ipc.NewObject(r.reg.UserNS, ipc.Key(key), creator, creator, perms), fr: fr, - key: key, - perms: perms, - owner: creator, creatorPID: pid, changeTime: ktime.NowFromContext(ctx), } shm.InitRefs() - // Find the next available ID. - for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ { - // Handle wrap around. - if id < 0 { - id = 0 - continue - } - if r.shms[id] == nil { - r.lastIDUsed = id - - shm.ID = id - r.shms[id] = shm - r.keysToShms[key] = shm - - r.totalPages += effectiveSize / hostarch.PageSize - - return shm, nil - } + if err := r.reg.Register(shm); err != nil { + return nil, err } + r.totalPages += effectiveSize / hostarch.PageSize - log.Warningf("Shm ids exhuasted, they may be leaking") - return nil, syserror.ENOSPC + return shm, nil } // IPCInfo reports global parameters for sysv shared memory segments on this @@ -297,7 +252,7 @@ func (r *Registry) ShmInfo() *linux.ShmInfo { defer r.mu.Unlock() return &linux.ShmInfo{ - UsedIDs: int32(r.lastIDUsed), + UsedIDs: int32(r.reg.LastIDUsed()), ShmTot: r.totalPages, ShmRss: r.totalPages, // We could probably get a better estimate from memory accounting. ShmSwp: 0, // No reclaim at the moment. @@ -314,11 +269,11 @@ func (r *Registry) remove(s *Shm) { s.mu.Lock() defer s.mu.Unlock() - if s.key != linux.IPC_PRIVATE { + if s.obj.Key != linux.IPC_PRIVATE { panic(fmt.Sprintf("Attempted to remove %s from the registry whose key is still associated", s.debugLocked())) } - delete(r.shms, s.ID) + r.reg.DissociateID(s.obj.ID) r.totalPages -= s.effectiveSize / hostarch.PageSize } @@ -330,13 +285,16 @@ func (r *Registry) Release(ctx context.Context) { // the IPC namespace containing it has no more references. toRelease := make([]*Shm, 0) r.mu.Lock() - for _, s := range r.keysToShms { - s.mu.Lock() - if !s.pendingDestruction { - toRelease = append(toRelease, s) - } - s.mu.Unlock() - } + r.reg.ForAllObjects( + func(o ipc.Mechanism) { + s := o.(*Shm) + s.mu.Lock() + if !s.pendingDestruction { + toRelease = append(toRelease, s) + } + s.mu.Unlock() + }, + ) r.mu.Unlock() for _, s := range toRelease { @@ -374,12 +332,6 @@ type Shm struct { // registry points to the shm registry containing this segment. Immutable. registry *Registry - // ID is the kernel identifier for this segment. Immutable. - ID ID - - // creator is the user that created the segment. Immutable. - creator fs.FileOwner - // size is the requested size of the segment at creation, in // bytes. Immutable. size uint64 @@ -397,14 +349,8 @@ type Shm struct { // mu protects all fields below. mu sync.Mutex `state:"nosave"` - // key is the public identifier for this segment. - key Key - - // perms is the access permissions for the segment. - perms fs.FilePermissions + obj *ipc.Object - // owner of this segment. - owner fs.FileOwner // attachTime is updated on every successful shmat. attachTime ktime.Time // detachTime is updated on every successful shmdt. @@ -426,17 +372,44 @@ type Shm struct { pendingDestruction bool } +// ID returns object's ID. +func (s *Shm) ID() ipc.ID { + return s.obj.ID +} + +// Object implements ipc.Mechanism.Object. +func (s *Shm) Object() *ipc.Object { + return s.obj +} + +// Destroy implements ipc.Mechanism.Destroy. No work is performed on shm.Destroy +// because a different removal mechanism is used in shm. See Shm.MarkDestroyed. +func (s *Shm) Destroy() { +} + +// Lock implements ipc.Mechanism.Lock. +func (s *Shm) Lock() { + s.mu.Lock() +} + +// Unlock implements ipc.mechanism.Unlock. +// +// +checklocksignore +func (s *Shm) Unlock() { + s.mu.Unlock() +} + // Precondition: Caller must hold s.mu. func (s *Shm) debugLocked() string { return fmt.Sprintf("Shm{id: %d, key: %d, size: %d bytes, refs: %d, destroyed: %v}", - s.ID, s.key, s.size, s.ReadRefs(), s.pendingDestruction) + s.obj.ID, s.obj.Key, s.size, s.ReadRefs(), s.pendingDestruction) } // MappedName implements memmap.MappingIdentity.MappedName. func (s *Shm) MappedName(ctx context.Context) string { s.mu.Lock() defer s.mu.Unlock() - return fmt.Sprintf("SYSV%08d", s.key) + return fmt.Sprintf("SYSV%08d", s.obj.Key) } // DeviceID implements memmap.MappingIdentity.DeviceID. @@ -448,7 +421,7 @@ func (s *Shm) DeviceID() uint64 { func (s *Shm) InodeID() uint64 { // "shmid gets reported as "inode#" in /proc/pid/maps. proc-ps tools use // this. Changing this will break them." -- Linux, ipc/shm.c:newseg() - return uint64(s.ID) + return uint64(s.obj.ID) } // DecRef drops a reference on s. @@ -551,7 +524,8 @@ func (s *Shm) ConfigureAttach(ctx context.Context, addr hostarch.Addr, opts Atta return memmap.MMapOpts{}, syserror.EIDRM } - if !s.checkPermissions(ctx, fs.PermMask{ + creds := auth.CredentialsFromContext(ctx) + if !s.obj.CheckPermissions(creds, fs.PermMask{ Read: true, Write: !opts.Readonly, Execute: opts.Execute, @@ -591,7 +565,8 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) { // "The caller must have read permission on the shared memory segment." // - man shmctl(2) - if !s.checkPermissions(ctx, fs.PermMask{Read: true}) { + creds := auth.CredentialsFromContext(ctx) + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) { // "IPC_STAT or SHM_STAT is requested and shm_perm.mode does not allow // read access for shmid, and the calling process does not have the // CAP_IPC_OWNER capability in the user namespace that governs its IPC @@ -603,7 +578,6 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) { if s.pendingDestruction { mode |= linux.SHM_DEST } - creds := auth.CredentialsFromContext(ctx) // Use the reference count as a rudimentary count of the number of // attaches. We exclude: @@ -620,12 +594,12 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) { ds := &linux.ShmidDS{ ShmPerm: linux.IPCPerm{ - Key: uint32(s.key), - UID: uint32(creds.UserNamespace.MapFromKUID(s.owner.UID)), - GID: uint32(creds.UserNamespace.MapFromKGID(s.owner.GID)), - CUID: uint32(creds.UserNamespace.MapFromKUID(s.creator.UID)), - CGID: uint32(creds.UserNamespace.MapFromKGID(s.creator.GID)), - Mode: mode | uint16(s.perms.LinuxMode()), + Key: uint32(s.obj.Key), + UID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Owner.UID)), + GID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Owner.GID)), + CUID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Creator.UID)), + CGID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Creator.GID)), + Mode: mode | uint16(s.obj.Perms.LinuxMode()), Seq: 0, // IPC sequences not supported. }, ShmSegsz: s.size, @@ -645,11 +619,11 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error { s.mu.Lock() defer s.mu.Unlock() - if !s.checkOwnership(ctx) { + creds := auth.CredentialsFromContext(ctx) + if !s.obj.CheckOwnership(creds) { return linuxerr.EPERM } - creds := auth.CredentialsFromContext(ctx) uid := creds.UserNamespace.MapToKUID(auth.UID(ds.ShmPerm.UID)) gid := creds.UserNamespace.MapToKGID(auth.GID(ds.ShmPerm.GID)) if !uid.Ok() || !gid.Ok() { @@ -659,10 +633,10 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error { // User may only modify the lower 9 bits of the mode. All the other bits are // always 0 for the underlying inode. mode := linux.FileMode(ds.ShmPerm.Mode & 0x1ff) - s.perms = fs.FilePermsFromMode(mode) + s.obj.Perms = fs.FilePermsFromMode(mode) - s.owner.UID = uid - s.owner.GID = gid + s.obj.Owner.UID = uid + s.obj.Owner.GID = gid s.changeTime = ktime.NowFromContext(ctx) return nil @@ -691,40 +665,3 @@ func (s *Shm) MarkDestroyed(ctx context.Context) { s.DecRef(ctx) return } - -// checkOwnership verifies whether a segment may be accessed by ctx as an -// owner. See ipc/util.c:ipcctl_pre_down_nolock() in Linux. -// -// Precondition: Caller must hold s.mu. -func (s *Shm) checkOwnership(ctx context.Context) bool { - creds := auth.CredentialsFromContext(ctx) - if s.owner.UID == creds.EffectiveKUID || s.creator.UID == creds.EffectiveKUID { - return true - } - - // Tasks with CAP_SYS_ADMIN may bypass ownership checks. Strangely, Linux - // doesn't use CAP_IPC_OWNER for this despite CAP_IPC_OWNER being documented - // for use to "override IPC ownership checks". - return creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, s.registry.userNS) -} - -// checkPermissions verifies whether a segment is accessible by ctx for access -// described by req. See ipc/util.c:ipcperms() in Linux. -// -// Precondition: Caller must hold s.mu. -func (s *Shm) checkPermissions(ctx context.Context, req fs.PermMask) bool { - creds := auth.CredentialsFromContext(ctx) - - p := s.perms.Other - if s.owner.UID == creds.EffectiveKUID { - p = s.perms.User - } else if creds.InGroup(s.owner.GID) { - p = s.perms.Group - } - if p.SupersetOf(req) { - return true - } - - // Tasks with CAP_IPC_OWNER may bypass permission checks. - return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, s.registry.userNS) -} diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index 7e1347aa6..da4b77ca2 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -26,140 +26,39 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -// SharingOptions controls what resources are shared by a new task created by -// Task.Clone, or an existing task affected by Task.Unshare. -type SharingOptions struct { - // If NewAddressSpace is true, the task should have an independent virtual - // address space. - NewAddressSpace bool - - // If NewSignalHandlers is true, the task should use an independent set of - // signal handlers. - NewSignalHandlers bool - - // If NewThreadGroup is true, the task should be the leader of its own - // thread group. TerminationSignal is the signal that the thread group - // will send to its parent when it exits. If NewThreadGroup is false, - // TerminationSignal is ignored. - NewThreadGroup bool - TerminationSignal linux.Signal - - // If NewPIDNamespace is true: - // - // - In the context of Task.Clone, the new task should be the init task - // (TID 1) in a new PID namespace. - // - // - In the context of Task.Unshare, the task should create a new PID - // namespace, and all subsequent clones of the task should be members of - // the new PID namespace. - NewPIDNamespace bool - - // If NewUserNamespace is true, the task should have an independent user - // namespace. - NewUserNamespace bool - - // If NewNetworkNamespace is true, the task should have an independent - // network namespace. - NewNetworkNamespace bool - - // If NewFiles is true, the task should use an independent file descriptor - // table. - NewFiles bool - - // If NewFSContext is true, the task should have an independent FSContext. - NewFSContext bool - - // If NewUTSNamespace is true, the task should have an independent UTS - // namespace. - NewUTSNamespace bool - - // If NewIPCNamespace is true, the task should have an independent IPC - // namespace. - NewIPCNamespace bool -} - -// CloneOptions controls the behavior of Task.Clone. -type CloneOptions struct { - // SharingOptions defines the set of resources that the new task will share - // with its parent. - SharingOptions - - // Stack is the initial stack pointer of the new task. If Stack is 0, the - // new task will start with the same stack pointer as its parent. - Stack hostarch.Addr - - // If SetTLS is true, set the new task's TLS (thread-local storage) - // descriptor to TLS. If SetTLS is false, TLS is ignored. - SetTLS bool - TLS hostarch.Addr - - // If ChildClearTID is true, when the child exits, 0 is written to the - // address ChildTID in the child's memory, and if the write is successful a - // futex wake on the same address is performed. - // - // If ChildSetTID is true, the child's thread ID (in the child's PID - // namespace) is written to address ChildTID in the child's memory. (As in - // Linux, failed writes are silently ignored.) - ChildClearTID bool - ChildSetTID bool - ChildTID hostarch.Addr - - // If ParentSetTID is true, the child's thread ID (in the parent's PID - // namespace) is written to address ParentTID in the parent's memory. (As - // in Linux, failed writes are silently ignored.) - // - // Older versions of the clone(2) man page state that CLONE_PARENT_SETTID - // causes the child's thread ID to be written to ptid in both the parent - // and child's memory, but this is a documentation error fixed by - // 87ab04792ced ("clone.2: Fix description of CLONE_PARENT_SETTID"). - ParentSetTID bool - ParentTID hostarch.Addr - - // If Vfork is true, place the parent in vforkStop until the cloned task - // releases its TaskImage. - Vfork bool - - // If Untraced is true, do not report PTRACE_EVENT_CLONE/FORK/VFORK for - // this clone(), and do not ptrace-attach the caller's tracer to the new - // task. (PTRACE_EVENT_VFORK_DONE will still be reported if appropriate). - Untraced bool - - // If InheritTracer is true, ptrace-attach the caller's tracer to the new - // task, even if no PTRACE_EVENT_CLONE/FORK/VFORK event would be reported - // for it. If both Untraced and InheritTracer are true, no event will be - // reported, but tracer inheritance will still occur. - InheritTracer bool -} - // Clone implements the clone(2) syscall and returns the thread ID of the new // task in t's PID namespace. Clone may return both a non-zero thread ID and a // non-nil error. // // Preconditions: The caller must be running Task.doSyscallInvoke on the task // goroutine. -func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { +func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) { // Since signal actions may refer to application signal handlers by virtual // address, any set of signal handlers must refer to the same address // space. - if !opts.NewSignalHandlers && opts.NewAddressSpace { + if args.Flags&(linux.CLONE_SIGHAND|linux.CLONE_VM) == linux.CLONE_SIGHAND { return 0, nil, linuxerr.EINVAL } // In order for the behavior of thread-group-directed signals to be sane, // all tasks in a thread group must share signal handlers. - if !opts.NewThreadGroup && opts.NewSignalHandlers { + if args.Flags&(linux.CLONE_THREAD|linux.CLONE_SIGHAND) == linux.CLONE_THREAD { return 0, nil, linuxerr.EINVAL } // All tasks in a thread group must be in the same PID namespace. - if !opts.NewThreadGroup && (opts.NewPIDNamespace || t.childPIDNamespace != nil) { + if (args.Flags&linux.CLONE_THREAD != 0) && (args.Flags&linux.CLONE_NEWPID != 0 || t.childPIDNamespace != nil) { return 0, nil, linuxerr.EINVAL } // The two different ways of specifying a new PID namespace are // incompatible. - if opts.NewPIDNamespace && t.childPIDNamespace != nil { + if args.Flags&linux.CLONE_NEWPID != 0 && t.childPIDNamespace != nil { return 0, nil, linuxerr.EINVAL } // Thread groups and FS contexts cannot span user namespaces. - if opts.NewUserNamespace && (!opts.NewThreadGroup || !opts.NewFSContext) { + if args.Flags&linux.CLONE_NEWUSER != 0 && args.Flags&(linux.CLONE_THREAD|linux.CLONE_FS) != 0 { + return 0, nil, linuxerr.EINVAL + } + // args.ExitSignal must be a valid signal. + if args.ExitSignal != 0 && !linux.Signal(args.ExitSignal).IsValid() { return 0, nil, linuxerr.EINVAL } @@ -174,7 +73,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { // user_namespaces(7) creds := t.Credentials() userns := creds.UserNamespace - if opts.NewUserNamespace { + if args.Flags&linux.CLONE_NEWUSER != 0 { var err error // "EPERM (since Linux 3.9): CLONE_NEWUSER was specified in flags and // the caller is in a chroot environment (i.e., the caller's root @@ -189,21 +88,19 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { return 0, nil, err } } - if (opts.NewPIDNamespace || opts.NewNetworkNamespace || opts.NewUTSNamespace) && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) { + if args.Flags&(linux.CLONE_NEWPID|linux.CLONE_NEWNET|linux.CLONE_NEWUTS|linux.CLONE_NEWIPC) != 0 && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) { return 0, nil, linuxerr.EPERM } utsns := t.UTSNamespace() - if opts.NewUTSNamespace { + if args.Flags&linux.CLONE_NEWUTS != 0 { // Note that this must happen after NewUserNamespace so we get // the new userns if there is one. utsns = t.UTSNamespace().Clone(userns) } ipcns := t.IPCNamespace() - if opts.NewIPCNamespace { - // Note that "If CLONE_NEWIPC is set, then create the process in a new IPC - // namespace" + if args.Flags&linux.CLONE_NEWIPC != 0 { ipcns = NewIPCNamespace(userns) } else { ipcns.IncRef() @@ -214,7 +111,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { defer cu.Clean() netns := t.NetworkNamespace() - if opts.NewNetworkNamespace { + if args.Flags&linux.CLONE_NEWNET != 0 { netns = inet.NewNamespace(netns) } @@ -227,7 +124,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { }) } - image, err := t.image.Fork(t, t.k, !opts.NewAddressSpace) + image, err := t.image.Fork(t, t.k, args.Flags&linux.CLONE_VM != 0) if err != nil { return 0, nil, err } @@ -236,17 +133,17 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { }) // clone() returns 0 in the child. image.Arch.SetReturn(0) - if opts.Stack != 0 { - image.Arch.SetStack(uintptr(opts.Stack)) + if args.Stack != 0 { + image.Arch.SetStack(uintptr(args.Stack)) } - if opts.SetTLS { - if !image.Arch.SetTLS(uintptr(opts.TLS)) { + if args.Flags&linux.CLONE_SETTLS != 0 { + if !image.Arch.SetTLS(uintptr(args.TLS)) { return 0, nil, linuxerr.EPERM } } var fsContext *FSContext - if opts.NewFSContext { + if args.Flags&linux.CLONE_FS == 0 { fsContext = t.fsContext.Fork() } else { fsContext = t.fsContext @@ -254,7 +151,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { } var fdTable *FDTable - if opts.NewFiles { + if args.Flags&linux.CLONE_FILES == 0 { fdTable = t.fdTable.Fork(t) } else { fdTable = t.fdTable @@ -264,22 +161,22 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { pidns := t.tg.pidns if t.childPIDNamespace != nil { pidns = t.childPIDNamespace - } else if opts.NewPIDNamespace { + } else if args.Flags&linux.CLONE_NEWPID != 0 { pidns = pidns.NewChild(userns) } tg := t.tg rseqAddr := hostarch.Addr(0) rseqSignature := uint32(0) - if opts.NewThreadGroup { + if args.Flags&linux.CLONE_THREAD == 0 { if tg.mounts != nil { tg.mounts.IncRef() } sh := t.tg.signalHandlers - if opts.NewSignalHandlers { + if args.Flags&linux.CLONE_SIGHAND == 0 { sh = sh.Fork() } - tg = t.k.NewThreadGroup(tg.mounts, pidns, sh, opts.TerminationSignal, tg.limits.GetCopy()) + tg = t.k.NewThreadGroup(tg.mounts, pidns, sh, linux.Signal(args.ExitSignal), tg.limits.GetCopy()) tg.oomScoreAdj = atomic.LoadInt32(&t.tg.oomScoreAdj) rseqAddr = t.rseqAddr rseqSignature = t.rseqSignature @@ -304,7 +201,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { RSeqSignature: rseqSignature, ContainerID: t.ContainerID(), } - if opts.NewThreadGroup { + if args.Flags&linux.CLONE_THREAD == 0 { cfg.Parent = t } else { cfg.InheritParent = t @@ -322,7 +219,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { // // However kernel/fork.c:copy_process() adds a limitation to this: // "sigaltstack should be cleared when sharing the same VM". - if opts.NewAddressSpace || opts.Vfork { + if args.Flags&linux.CLONE_VM == 0 || args.Flags&linux.CLONE_VFORK != 0 { nt.SetSignalStack(t.SignalStack()) } @@ -347,35 +244,35 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { copiedFilters := append([]bpf.Program(nil), f.([]bpf.Program)...) nt.syscallFilters.Store(copiedFilters) } - if opts.Vfork { + if args.Flags&linux.CLONE_VFORK != 0 { nt.vforkParent = t } - if opts.ChildClearTID { - nt.SetClearTID(opts.ChildTID) + if args.Flags&linux.CLONE_CHILD_CLEARTID != 0 { + nt.SetClearTID(hostarch.Addr(args.ChildTID)) } - if opts.ChildSetTID { + if args.Flags&linux.CLONE_CHILD_SETTID != 0 { ctid := nt.ThreadID() - ctid.CopyOut(nt.CopyContext(t, usermem.IOOpts{AddressSpaceActive: false}), opts.ChildTID) + ctid.CopyOut(nt.CopyContext(t, usermem.IOOpts{AddressSpaceActive: false}), hostarch.Addr(args.ChildTID)) } ntid := t.tg.pidns.IDOfTask(nt) - if opts.ParentSetTID { - ntid.CopyOut(t, opts.ParentTID) + if args.Flags&linux.CLONE_PARENT_SETTID != 0 { + ntid.CopyOut(t, hostarch.Addr(args.ParentTID)) } kind := ptraceCloneKindClone - if opts.Vfork { + if args.Flags&linux.CLONE_VFORK != 0 { kind = ptraceCloneKindVfork - } else if opts.TerminationSignal == linux.SIGCHLD { + } else if linux.Signal(args.ExitSignal) == linux.SIGCHLD { kind = ptraceCloneKindFork } - if t.ptraceClone(kind, nt, opts) { - if opts.Vfork { + if t.ptraceClone(kind, nt, args) { + if args.Flags&linux.CLONE_VFORK != 0 { return ntid, &SyscallControl{next: &runSyscallAfterPtraceEventClone{vforkChild: nt, vforkChildTID: ntid}}, nil } return ntid, &SyscallControl{next: &runSyscallAfterPtraceEventClone{}}, nil } - if opts.Vfork { + if args.Flags&linux.CLONE_VFORK != 0 { t.maybeBeginVforkStop(nt) return ntid, &SyscallControl{next: &runSyscallAfterVforkStop{childTID: ntid}}, nil } @@ -446,27 +343,35 @@ func (r *runSyscallAfterVforkStop) execute(t *Task) taskRunState { } // Unshare changes the set of resources t shares with other tasks, as specified -// by opts. +// by flags. // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) Unshare(opts *SharingOptions) error { - // In Linux unshare(2), NewThreadGroup implies NewSignalHandlers and - // NewSignalHandlers implies NewAddressSpace. All three flags are no-ops if - // t is the only task using its MM, which due to clone(2)'s rules imply - // that it is also the only task using its signal handlers / in its thread - // group, and cause EINVAL to be returned otherwise. +func (t *Task) Unshare(flags int32) error { + // "CLONE_THREAD, CLONE_SIGHAND, and CLONE_VM can be specified in flags if + // the caller is single threaded (i.e., it is not sharing its address space + // with another process or thread). In this case, these flags have no + // effect. (Note also that specifying CLONE_THREAD automatically implies + // CLONE_VM, and specifying CLONE_VM automatically implies CLONE_SIGHAND.) + // If the process is multithreaded, then the use of these flags results in + // an error." - unshare(2). This is incorrect (cf. + // kernel/fork.c:ksys_unshare()): + // + // - CLONE_THREAD does not imply CLONE_VM. + // + // - CLONE_SIGHAND implies CLONE_THREAD. + // + // - Only CLONE_VM requires that the caller is not sharing its address + // space with another thread. CLONE_SIGHAND requires that the caller is not + // sharing its signal handlers, and CLONE_THREAD requires that the caller + // is the only thread in its thread group. // // Since we don't count the number of tasks using each address space or set - // of signal handlers, we reject NewSignalHandlers and NewAddressSpace - // altogether, and interpret NewThreadGroup as requiring that t be the only - // member of its thread group. This seems to be logically coherent, in the - // sense that clone(2) allows a task to share signal handlers and address - // spaces with tasks in other thread groups. - if opts.NewAddressSpace || opts.NewSignalHandlers { + // of signal handlers, we reject CLONE_VM and CLONE_SIGHAND altogether. + if flags&(linux.CLONE_VM|linux.CLONE_SIGHAND) != 0 { return linuxerr.EINVAL } creds := t.Credentials() - if opts.NewThreadGroup { + if flags&linux.CLONE_THREAD != 0 { t.tg.signalHandlers.mu.Lock() if t.tg.tasksCount != 1 { t.tg.signalHandlers.mu.Unlock() @@ -476,7 +381,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { // This isn't racy because we're the only living task, and therefore // the only task capable of creating new ones, in our thread group. } - if opts.NewUserNamespace { + if flags&linux.CLONE_NEWUSER != 0 { if t.IsChrooted() { return linuxerr.EPERM } @@ -492,7 +397,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { creds = t.Credentials() } haveCapSysAdmin := t.HasCapability(linux.CAP_SYS_ADMIN) - if opts.NewPIDNamespace { + if flags&linux.CLONE_NEWPID != 0 { if !haveCapSysAdmin { return linuxerr.EPERM } @@ -500,14 +405,14 @@ func (t *Task) Unshare(opts *SharingOptions) error { } t.mu.Lock() // Can't defer unlock: DecRefs must occur without holding t.mu. - if opts.NewNetworkNamespace { + if flags&linux.CLONE_NEWNET != 0 { if !haveCapSysAdmin { t.mu.Unlock() return linuxerr.EPERM } t.netns = inet.NewNamespace(t.netns) } - if opts.NewUTSNamespace { + if flags&linux.CLONE_NEWUTS != 0 { if !haveCapSysAdmin { t.mu.Unlock() return linuxerr.EPERM @@ -516,7 +421,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { // new user namespace is used if there is one. t.utsns = t.utsns.Clone(creds.UserNamespace) } - if opts.NewIPCNamespace { + if flags&linux.CLONE_NEWIPC != 0 { if !haveCapSysAdmin { t.mu.Unlock() return linuxerr.EPERM @@ -527,12 +432,12 @@ func (t *Task) Unshare(opts *SharingOptions) error { t.ipcns = NewIPCNamespace(creds.UserNamespace) } var oldFDTable *FDTable - if opts.NewFiles { + if flags&linux.CLONE_FILES != 0 { oldFDTable = t.fdTable t.fdTable = oldFDTable.Fork(t) } var oldFSContext *FSContext - if opts.NewFSContext { + if flags&linux.CLONE_FS != 0 { oldFSContext = t.fsContext t.fsContext = oldFSContext.Fork() } diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go index 28a613a54..8fd8287b3 100644 --- a/pkg/sentry/platform/kvm/bluepill_fault.go +++ b/pkg/sentry/platform/kvm/bluepill_fault.go @@ -101,7 +101,7 @@ func handleBluepillFault(m *machine, physical uintptr, phyRegions []physicalRegi // Store the physical address in the slot. This is used to // avoid calls to handleBluepillFault in the future (see // machine.mapPhysical). - atomic.StoreUintptr(&m.usedSlots[slot], physical) + atomic.StoreUintptr(&m.usedSlots[slot], physicalStart) // Successfully added region; we can increment nextSlot and // allow another set to proceed here. atomic.StoreUint32(&m.nextSlot, slot+1) diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go index f63ab6aba..0f0c1e73b 100644 --- a/pkg/sentry/platform/kvm/bluepill_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.12 && !go1.18 -// +build go1.12,!go1.18 +//go:build go1.12 +// +build go1.12 -// Check go:linkname function signatures when updating Go version. +// //go:linkname directives type-checked by checklinkname. Any other +// non-linkname assumptions outside the Go 1 compatibility guarantee should +// have an accompanied vet check or version guard build tag. package kvm @@ -28,7 +30,7 @@ import ( ) //go:linkname throw runtime.throw -func throw(string) +func throw(s string) // vCPUPtr returns a CPU for the given address. // diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 1b5d5f66e..e7092a756 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -70,7 +70,7 @@ type machine struct { // tscControl checks whether cpu supports TSC scaling tscControl bool - // usedSlots is the set of used physical addresses (sorted). + // usedSlots is the set of used physical addresses (not sorted). usedSlots []uintptr // nextID is the next vCPU ID. @@ -296,13 +296,20 @@ func newMachine(vm int) (*machine, error) { return m, nil } -// hasSlot returns true iff the given address is mapped. +// hasSlot returns true if the given address is mapped. // // This must be done via a linear scan. // //go:nosplit func (m *machine) hasSlot(physical uintptr) bool { - for i := 0; i < len(m.usedSlots); i++ { + slotLen := int(atomic.LoadUint32(&m.nextSlot)) + // When slots are being updated, nextSlot is ^uint32(0). As this situation + // is less likely happen, we just set the slotLen to m.maxSlots, and scan + // the whole usedSlots array. + if slotLen == int(^uint32(0)) { + slotLen = m.maxSlots + } + for i := 0; i < slotLen; i++ { if p := atomic.LoadUintptr(&m.usedSlots[i]); p == physical { return true } diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go index 35660e827..cc3a1253b 100644 --- a/pkg/sentry/platform/kvm/machine_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_unsafe.go @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.12 && !go1.18 -// +build go1.12,!go1.18 +//go:build go1.12 +// +build go1.12 -// Check go:linkname function signatures when updating Go version. +// //go:linkname directives type-checked by checklinkname. Any other +// non-linkname assumptions outside the Go 1 compatibility guarantee should +// have an accompanied vet check or version guard build tag. package kvm diff --git a/pkg/sentry/platform/ptrace/subprocess_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_unsafe.go index ffd4665f4..304722200 100644 --- a/pkg/sentry/platform/ptrace/subprocess_unsafe.go +++ b/pkg/sentry/platform/ptrace/subprocess_unsafe.go @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.12 && !go1.18 -// +build go1.12,!go1.18 +//go:build go1.12 +// +build go1.12 -// Check go:linkname function signatures when updating Go version. +// //go:linkname directives type-checked by checklinkname. Any other +// non-linkname assumptions outside the Go 1 compatibility guarantee should +// have an accompanied vet check or version guard build tag. package ptrace diff --git a/pkg/sentry/socket/hostinet/socket_unsafe.go b/pkg/sentry/socket/hostinet/socket_unsafe.go index ccf4f534d..587f479eb 100644 --- a/pkg/sentry/socket/hostinet/socket_unsafe.go +++ b/pkg/sentry/socket/hostinet/socket_unsafe.go @@ -67,7 +67,7 @@ func ioctl(ctx context.Context, fd int, io usermem.IO, args arch.SyscallArgument AddressSpaceActive: true, }) return 0, err - case unix.SIOCGIFFLAGS: + case unix.SIOCGIFFLAGS, unix.SIOCGIFCONF: cc := &usermem.IOCopyContext{ Ctx: ctx, IO: io, diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index 7a4e78a5f..61111ac6c 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -309,6 +309,11 @@ func (s *Stack) Interfaces() map[int32]inet.Interface { return interfaces } +// RemoveInterface implements inet.Stack.RemoveInterface. +func (*Stack) RemoveInterface(int32) error { + return linuxerr.EACCES +} + // InterfaceAddrs implements inet.Stack.InterfaceAddrs. func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { addrs := make(map[int32][]inet.InterfaceAddr) @@ -319,12 +324,12 @@ func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { } // AddInterfaceAddr implements inet.Stack.AddInterfaceAddr. -func (s *Stack) AddInterfaceAddr(int32, inet.InterfaceAddr) error { +func (*Stack) AddInterfaceAddr(int32, inet.InterfaceAddr) error { return linuxerr.EACCES } // RemoveInterfaceAddr implements inet.Stack.RemoveInterfaceAddr. -func (s *Stack) RemoveInterfaceAddr(int32, inet.InterfaceAddr) error { +func (*Stack) RemoveInterfaceAddr(int32, inet.InterfaceAddr) error { return linuxerr.EACCES } @@ -339,7 +344,7 @@ func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) { } // SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize. -func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error { +func (*Stack) SetTCPReceiveBufferSize(inet.TCPBufferSize) error { return linuxerr.EACCES } @@ -349,7 +354,7 @@ func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) { } // SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize. -func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error { +func (*Stack) SetTCPSendBufferSize(inet.TCPBufferSize) error { return linuxerr.EACCES } @@ -359,7 +364,7 @@ func (s *Stack) TCPSACKEnabled() (bool, error) { } // SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled. -func (s *Stack) SetTCPSACKEnabled(bool) error { +func (*Stack) SetTCPSACKEnabled(bool) error { return linuxerr.EACCES } @@ -369,7 +374,7 @@ func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) { } // SetTCPRecovery implements inet.Stack.SetTCPRecovery. -func (s *Stack) SetTCPRecovery(inet.TCPLossRecovery) error { +func (*Stack) SetTCPRecovery(inet.TCPLossRecovery) error { return linuxerr.EACCES } @@ -470,19 +475,19 @@ func (s *Stack) RouteTable() []inet.Route { } // Resume implements inet.Stack.Resume. -func (s *Stack) Resume() {} +func (*Stack) Resume() {} // RegisteredEndpoints implements inet.Stack.RegisteredEndpoints. -func (s *Stack) RegisteredEndpoints() []stack.TransportEndpoint { return nil } +func (*Stack) RegisteredEndpoints() []stack.TransportEndpoint { return nil } // CleanupEndpoints implements inet.Stack.CleanupEndpoints. -func (s *Stack) CleanupEndpoints() []stack.TransportEndpoint { return nil } +func (*Stack) CleanupEndpoints() []stack.TransportEndpoint { return nil } // RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints. -func (s *Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {} +func (*Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {} // SetForwarding implements inet.Stack.SetForwarding. -func (s *Stack) SetForwarding(tcpip.NetworkProtocolNumber, bool) error { +func (*Stack) SetForwarding(tcpip.NetworkProtocolNumber, bool) error { return linuxerr.EACCES } @@ -493,6 +498,6 @@ func (*Stack) PortRange() (uint16, uint16) { } // SetPortRange implements inet.Stack.SetPortRange. -func (*Stack) SetPortRange(start uint16, end uint16) error { +func (*Stack) SetPortRange(uint16, uint16) error { return linuxerr.EACCES } diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go index 86f6419dc..d526acb73 100644 --- a/pkg/sentry/socket/netlink/route/protocol.go +++ b/pkg/sentry/socket/netlink/route/protocol.go @@ -161,6 +161,47 @@ func (p *Protocol) getLink(ctx context.Context, msg *netlink.Message, ms *netlin return nil } +// delLink handles RTM_DELLINK requests. +func (p *Protocol) delLink(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { + stack := inet.StackFromContext(ctx) + if stack == nil { + // No network stack. + return syserr.ErrProtocolNotSupported + } + + var ifinfomsg linux.InterfaceInfoMessage + attrs, ok := msg.GetData(&ifinfomsg) + if !ok { + return syserr.ErrInvalidArgument + } + if ifinfomsg.Index == 0 { + // The index is unspecified, search by the interface name. + ahdr, value, _, ok := attrs.ParseFirst() + if !ok { + return syserr.ErrInvalidArgument + } + switch ahdr.Type { + case linux.IFLA_IFNAME: + if len(value) < 1 { + return syserr.ErrInvalidArgument + } + ifname := string(value[:len(value)-1]) + for idx, ifa := range stack.Interfaces() { + if ifname == ifa.Name { + ifinfomsg.Index = idx + break + } + } + default: + return syserr.ErrInvalidArgument + } + if ifinfomsg.Index == 0 { + return syserr.ErrNoDevice + } + } + return syserr.FromError(stack.RemoveInterface(ifinfomsg.Index)) +} + // addNewLinkMessage appends RTM_NEWLINK message for the given interface into // the message set. func addNewLinkMessage(ms *netlink.MessageSet, idx int32, i inet.Interface) { @@ -537,6 +578,8 @@ func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms switch hdr.Type { case linux.RTM_GETLINK: return p.getLink(ctx, msg, ms) + case linux.RTM_DELLINK: + return p.delLink(ctx, msg, ms) case linux.RTM_GETROUTE: return p.dumpRoutes(ctx, msg, ms) case linux.RTM_NEWADDR: diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index 0fd0ad32c..208ab9909 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -71,6 +71,12 @@ func (s *Stack) Interfaces() map[int32]inet.Interface { return is } +// RemoveInterface implements inet.Stack.RemoveInterface. +func (s *Stack) RemoveInterface(idx int32) error { + nic := tcpip.NICID(idx) + return syserr.TranslateNetstackError(s.Stack.RemoveNIC(nic)).ToError() +} + // InterfaceAddrs implements inet.Stack.InterfaceAddrs. func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { nicAddrs := make(map[int32][]inet.InterfaceAddr) diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index a2f612f45..ccccce6a9 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -25,6 +25,7 @@ go_library( "sys_mempolicy.go", "sys_mmap.go", "sys_mount.go", + "sys_msgqueue.go", "sys_pipe.go", "sys_poll.go", "sys_prctl.go", @@ -84,6 +85,7 @@ go_library( "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/eventfd", "//pkg/sentry/kernel/fasync", + "//pkg/sentry/kernel/ipc", "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/sched", "//pkg/sentry/kernel/shm", diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index f1cb5a2c8..6f44d767b 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -121,10 +121,10 @@ var AMD64 = &kernel.SyscallTable{ 65: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil), 66: syscalls.Supported("semctl", Semctl), 67: syscalls.Supported("shmdt", Shmdt), - 68: syscalls.ErrorWithEvent("msgget", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 69: syscalls.ErrorWithEvent("msgsnd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 70: syscalls.ErrorWithEvent("msgrcv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 71: syscalls.ErrorWithEvent("msgctl", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 68: syscalls.Supported("msgget", Msgget), + 69: syscalls.ErrorWithEvent("msgsnd", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 70: syscalls.ErrorWithEvent("msgrcv", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 71: syscalls.PartiallySupported("msgctl", Msgctl, "Only supports IPC_RMID option.", []string{"gvisor.dev/issue/135"}), 72: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil), 73: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil), 74: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil), @@ -616,10 +616,10 @@ var ARM64 = &kernel.SyscallTable{ 183: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 184: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 185: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 186: syscalls.ErrorWithEvent("msgget", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 187: syscalls.ErrorWithEvent("msgctl", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 188: syscalls.ErrorWithEvent("msgrcv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 189: syscalls.ErrorWithEvent("msgsnd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 186: syscalls.Supported("msgget", Msgget), + 187: syscalls.PartiallySupported("msgctl", Msgctl, "Only supports IPC_RMID option.", []string{"gvisor.dev/issue/135"}), + 188: syscalls.ErrorWithEvent("msgrcv", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 189: syscalls.ErrorWithEvent("msgsnd", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) 190: syscalls.Supported("semget", Semget), 191: syscalls.Supported("semctl", Semctl), 192: syscalls.Supported("semtimedop", Semtimedop), diff --git a/pkg/sentry/syscalls/linux/sys_msgqueue.go b/pkg/sentry/syscalls/linux/sys_msgqueue.go new file mode 100644 index 000000000..3476e218d --- /dev/null +++ b/pkg/sentry/syscalls/linux/sys_msgqueue.go @@ -0,0 +1,57 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" +) + +// Msgget implements msgget(2). +func Msgget(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + key := ipc.Key(args[0].Int()) + flag := args[1].Int() + + private := key == linux.IPC_PRIVATE + create := flag&linux.IPC_CREAT == linux.IPC_CREAT + exclusive := flag&linux.IPC_EXCL == linux.IPC_EXCL + mode := linux.FileMode(flag & 0777) + + r := t.IPCNamespace().MsgqueueRegistry() + queue, err := r.FindOrCreate(t, key, mode, private, create, exclusive) + if err != nil { + return 0, nil, err + } + return uintptr(queue.ID()), nil, nil +} + +// Msgctl implements msgctl(2). +func Msgctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + id := ipc.ID(args[0].Int()) + cmd := args[1].Int() + + creds := auth.CredentialsFromContext(t) + + switch cmd { + case linux.IPC_RMID: + return 0, nil, t.IPCNamespace().MsgqueueRegistry().Remove(id, creds) + default: + return 0, nil, linuxerr.EINVAL + } +} diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go index 30919eb2f..f61cc466c 100644 --- a/pkg/sentry/syscalls/linux/sys_sem.go +++ b/pkg/sentry/syscalls/linux/sys_sem.go @@ -26,13 +26,14 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" ) const opsMax = 500 // SEMOPM // Semget handles: semget(key_t key, int nsems, int semflg) func Semget(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - key := args[0].Int() + key := ipc.Key(args[0].Int()) nsems := args[1].Int() flag := args[2].Int() @@ -46,7 +47,7 @@ func Semget(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if err != nil { return 0, nil, err } - return uintptr(set.ID), nil, nil + return uintptr(set.ID()), nil, nil } // Semtimedop handles: semop(int semid, struct sembuf *sops, size_t nsops, const struct timespec *timeout) @@ -56,7 +57,7 @@ func Semtimedop(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return Semop(t, args) } - id := args[0].Int() + id := ipc.ID(args[0].Int()) sembufAddr := args[1].Pointer() nsops := args[2].SizeT() timespecAddr := args[3].Pointer() @@ -91,7 +92,7 @@ func Semtimedop(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Semop handles: semop(int semid, struct sembuf *sops, size_t nsops) func Semop(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - id := args[0].Int() + id := ipc.ID(args[0].Int()) sembufAddr := args[1].Pointer() nsops := args[2].SizeT() @@ -109,7 +110,7 @@ func Semop(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, semTimedOp(t, id, ops, false, time.Second) } -func semTimedOp(t *kernel.Task, id int32, ops []linux.Sembuf, haveTimeout bool, timeout time.Duration) error { +func semTimedOp(t *kernel.Task, id ipc.ID, ops []linux.Sembuf, haveTimeout bool, timeout time.Duration) error { set := t.IPCNamespace().SemaphoreRegistry().FindByID(id) if set == nil { @@ -131,7 +132,7 @@ func semTimedOp(t *kernel.Task, id int32, ops []linux.Sembuf, haveTimeout bool, // Semctl handles: semctl(int semid, int semnum, int cmd, ...) func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - id := args[0].Int() + id := ipc.ID(args[0].Int()) num := args[1].Int() cmd := args[2].Int() @@ -210,7 +211,7 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case linux.SEM_STAT: arg := args[3].Pointer() // id is an index in SEM_STAT. - semid, ds, err := semStat(t, id) + semid, ds, err := semStat(t, int32(id)) if err != nil { return 0, nil, err } @@ -222,7 +223,7 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case linux.SEM_STAT_ANY: arg := args[3].Pointer() // id is an index in SEM_STAT. - semid, ds, err := semStatAny(t, id) + semid, ds, err := semStatAny(t, int32(id)) if err != nil { return 0, nil, err } @@ -236,13 +237,13 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } } -func remove(t *kernel.Task, id int32) error { +func remove(t *kernel.Task, id ipc.ID) error { r := t.IPCNamespace().SemaphoreRegistry() creds := auth.CredentialsFromContext(t) - return r.RemoveID(id, creds) + return r.Remove(id, creds) } -func ipcSet(t *kernel.Task, id int32, uid auth.UID, gid auth.GID, perms fs.FilePermissions) error { +func ipcSet(t *kernel.Task, id ipc.ID, uid auth.UID, gid auth.GID, perms fs.FilePermissions) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -262,7 +263,7 @@ func ipcSet(t *kernel.Task, id int32, uid auth.UID, gid auth.GID, perms fs.FileP return set.Change(t, creds, owner, perms) } -func ipcStat(t *kernel.Task, id int32) (*linux.SemidDS, error) { +func ipcStat(t *kernel.Task, id ipc.ID) (*linux.SemidDS, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -283,7 +284,7 @@ func semStat(t *kernel.Task, index int32) (int32, *linux.SemidDS, error) { if err != nil { return 0, ds, err } - return set.ID, ds, nil + return int32(set.ID()), ds, nil } func semStatAny(t *kernel.Task, index int32) (int32, *linux.SemidDS, error) { @@ -296,10 +297,10 @@ func semStatAny(t *kernel.Task, index int32) (int32, *linux.SemidDS, error) { if err != nil { return 0, ds, err } - return set.ID, ds, nil + return int32(set.ID()), ds, nil } -func setVal(t *kernel.Task, id int32, num int32, val int16) error { +func setVal(t *kernel.Task, id ipc.ID, num int32, val int16) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -310,7 +311,7 @@ func setVal(t *kernel.Task, id int32, num int32, val int16) error { return set.SetVal(t, num, val, creds, int32(pid)) } -func setValAll(t *kernel.Task, id int32, array hostarch.Addr) error { +func setValAll(t *kernel.Task, id ipc.ID, array hostarch.Addr) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -325,7 +326,7 @@ func setValAll(t *kernel.Task, id int32, array hostarch.Addr) error { return set.SetValAll(t, vals, creds, int32(pid)) } -func getVal(t *kernel.Task, id int32, num int32) (int16, error) { +func getVal(t *kernel.Task, id ipc.ID, num int32) (int16, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -335,7 +336,7 @@ func getVal(t *kernel.Task, id int32, num int32) (int16, error) { return set.GetVal(num, creds) } -func getValAll(t *kernel.Task, id int32, array hostarch.Addr) error { +func getValAll(t *kernel.Task, id ipc.ID, array hostarch.Addr) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -350,7 +351,7 @@ func getValAll(t *kernel.Task, id int32, array hostarch.Addr) error { return err } -func getPID(t *kernel.Task, id int32, num int32) (int32, error) { +func getPID(t *kernel.Task, id ipc.ID, num int32) (int32, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -369,7 +370,7 @@ func getPID(t *kernel.Task, id int32, num int32) (int32, error) { return int32(tg.ID()), nil } -func getZCnt(t *kernel.Task, id int32, num int32) (uint16, error) { +func getZCnt(t *kernel.Task, id ipc.ID, num int32) (uint16, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -379,7 +380,7 @@ func getZCnt(t *kernel.Task, id int32, num int32) (uint16, error) { return set.CountZeroWaiters(num, creds) } -func getNCnt(t *kernel.Task, id int32, num int32) (uint16, error) { +func getNCnt(t *kernel.Task, id ipc.ID, num int32) (uint16, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { diff --git a/pkg/sentry/syscalls/linux/sys_shm.go b/pkg/sentry/syscalls/linux/sys_shm.go index 3e3a952ce..840540506 100644 --- a/pkg/sentry/syscalls/linux/sys_shm.go +++ b/pkg/sentry/syscalls/linux/sys_shm.go @@ -19,12 +19,13 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" "gvisor.dev/gvisor/pkg/sentry/kernel/shm" ) // Shmget implements shmget(2). func Shmget(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - key := shm.Key(args[0].Int()) + key := ipc.Key(args[0].Int()) size := uint64(args[1].SizeT()) flag := args[2].Int() @@ -40,13 +41,13 @@ func Shmget(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } defer segment.DecRef(t) - return uintptr(segment.ID), nil, nil + return uintptr(segment.ID()), nil, nil } // findSegment retrives a shm segment by the given id. // // findSegment returns a reference on Shm. -func findSegment(t *kernel.Task, id shm.ID) (*shm.Shm, error) { +func findSegment(t *kernel.Task, id ipc.ID) (*shm.Shm, error) { r := t.IPCNamespace().ShmRegistry() segment := r.FindByID(id) if segment == nil { @@ -58,7 +59,7 @@ func findSegment(t *kernel.Task, id shm.ID) (*shm.Shm, error) { // Shmat implements shmat(2). func Shmat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - id := shm.ID(args[0].Int()) + id := ipc.ID(args[0].Int()) addr := args[1].Pointer() flag := args[2].Int() @@ -89,7 +90,7 @@ func Shmdt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Shmctl implements shmctl(2). func Shmctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - id := shm.ID(args[0].Int()) + id := ipc.ID(args[0].Int()) cmd := args[1].Int() buf := args[2].Pointer() diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index 46145955e..981cdd985 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -31,11 +31,6 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -const ( - // exitSignalMask is the signal mask to be sent at exit. Same as CSIGNAL in linux. - exitSignalMask = 0xff -) - var ( // ExecMaxTotalSize is the maximum length of all argv and envv entries. // @@ -201,33 +196,16 @@ func ExitGroup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // clone is used by Clone, Fork, and VFork. func clone(t *kernel.Task, flags int, stack hostarch.Addr, parentTID hostarch.Addr, childTID hostarch.Addr, tls hostarch.Addr) (uintptr, *kernel.SyscallControl, error) { - opts := kernel.CloneOptions{ - SharingOptions: kernel.SharingOptions{ - NewAddressSpace: flags&linux.CLONE_VM == 0, - NewSignalHandlers: flags&linux.CLONE_SIGHAND == 0, - NewThreadGroup: flags&linux.CLONE_THREAD == 0, - TerminationSignal: linux.Signal(flags & exitSignalMask), - NewPIDNamespace: flags&linux.CLONE_NEWPID == linux.CLONE_NEWPID, - NewUserNamespace: flags&linux.CLONE_NEWUSER == linux.CLONE_NEWUSER, - NewNetworkNamespace: flags&linux.CLONE_NEWNET == linux.CLONE_NEWNET, - NewFiles: flags&linux.CLONE_FILES == 0, - NewFSContext: flags&linux.CLONE_FS == 0, - NewUTSNamespace: flags&linux.CLONE_NEWUTS == linux.CLONE_NEWUTS, - NewIPCNamespace: flags&linux.CLONE_NEWIPC == linux.CLONE_NEWIPC, - }, - Stack: stack, - SetTLS: flags&linux.CLONE_SETTLS == linux.CLONE_SETTLS, - TLS: tls, - ChildClearTID: flags&linux.CLONE_CHILD_CLEARTID == linux.CLONE_CHILD_CLEARTID, - ChildSetTID: flags&linux.CLONE_CHILD_SETTID == linux.CLONE_CHILD_SETTID, - ChildTID: childTID, - ParentSetTID: flags&linux.CLONE_PARENT_SETTID == linux.CLONE_PARENT_SETTID, - ParentTID: parentTID, - Vfork: flags&linux.CLONE_VFORK == linux.CLONE_VFORK, - Untraced: flags&linux.CLONE_UNTRACED == linux.CLONE_UNTRACED, - InheritTracer: flags&linux.CLONE_PTRACE == linux.CLONE_PTRACE, - } - ntid, ctrl, err := t.Clone(&opts) + args := linux.CloneArgs{ + Flags: uint64(uint32(flags) &^ linux.CSIGNAL), + Pidfd: uint64(parentTID), + ChildTID: uint64(childTID), + ParentTID: uint64(parentTID), + ExitSignal: uint64(flags & linux.CSIGNAL), + Stack: uint64(stack), + TLS: uint64(tls), + } + ntid, ctrl, err := t.Clone(&args) return uintptr(ntid), ctrl, err } @@ -460,29 +438,16 @@ func SetTidAddress(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel // Unshare implements linux syscall unshare(2). func Unshare(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { flags := args[0].Int() - opts := kernel.SharingOptions{ - NewAddressSpace: flags&linux.CLONE_VM == linux.CLONE_VM, - NewSignalHandlers: flags&linux.CLONE_SIGHAND == linux.CLONE_SIGHAND, - NewThreadGroup: flags&linux.CLONE_THREAD == linux.CLONE_THREAD, - NewPIDNamespace: flags&linux.CLONE_NEWPID == linux.CLONE_NEWPID, - NewUserNamespace: flags&linux.CLONE_NEWUSER == linux.CLONE_NEWUSER, - NewNetworkNamespace: flags&linux.CLONE_NEWNET == linux.CLONE_NEWNET, - NewFiles: flags&linux.CLONE_FILES == linux.CLONE_FILES, - NewFSContext: flags&linux.CLONE_FS == linux.CLONE_FS, - NewUTSNamespace: flags&linux.CLONE_NEWUTS == linux.CLONE_NEWUTS, - NewIPCNamespace: flags&linux.CLONE_NEWIPC == linux.CLONE_NEWIPC, - } // "CLONE_NEWPID automatically implies CLONE_THREAD as well." - unshare(2) - if opts.NewPIDNamespace { - opts.NewThreadGroup = true + if flags&linux.CLONE_NEWPID != 0 { + flags |= linux.CLONE_THREAD } // "... specifying CLONE_NEWUSER automatically implies CLONE_THREAD. Since // Linux 3.9, CLONE_NEWUSER also automatically implies CLONE_FS." - if opts.NewUserNamespace { - opts.NewThreadGroup = true - opts.NewFSContext = true + if flags&linux.CLONE_NEWUSER != 0 { + flags |= linux.CLONE_THREAD | linux.CLONE_FS } - return 0, nil, t.Unshare(&opts) + return 0, nil, t.Unshare(flags) } // SchedYield implements linux syscall sched_yield(2). diff --git a/pkg/sync/goyield_unsafe.go b/pkg/sync/goyield_unsafe.go index 8639bb64e..757edbaba 100644 --- a/pkg/sync/goyield_unsafe.go +++ b/pkg/sync/goyield_unsafe.go @@ -3,10 +3,12 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build go1.14 && !go1.18 -// +build go1.14,!go1.18 +//go:build go1.14 +// +build go1.14 -// Check go:linkname function signatures when updating Go version. +// //go:linkname directives type-checked by checklinkname. Any other +// non-linkname assumptions outside the Go 1 compatibility guarantee should +// have an accompanied vet check or version guard build tag. package sync diff --git a/pkg/sync/runtime_unsafe.go b/pkg/sync/runtime_unsafe.go index 1d9cf304e..49d4109a9 100644 --- a/pkg/sync/runtime_unsafe.go +++ b/pkg/sync/runtime_unsafe.go @@ -6,8 +6,13 @@ //go:build go1.13 && !go1.18 // +build go1.13,!go1.18 -// Check go:linkname function signatures, type definitions, and constants when -// updating Go version. +// //go:linkname directives type-checked by checklinkname. Any other +// non-linkname assumptions outside the Go 1 compatibility guarantee should +// have an accompanied vet check or version guard build tag. + +// Check type definitions and constants when updating Go version. +// +// TODO(b/165820485): add these checks to checklinkname. package sync @@ -109,10 +114,10 @@ type maptype struct { // These functions are only used within the sync package. //go:linkname semacquire sync.runtime_Semacquire -func semacquire(s *uint32) +func semacquire(addr *uint32) //go:linkname semrelease sync.runtime_Semrelease -func semrelease(s *uint32, handoff bool, skipframes int) +func semrelease(addr *uint32, handoff bool, skipframes int) //go:linkname canSpin sync.runtime_canSpin func canSpin(i int) bool diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index e8e716db0..48356c343 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -56,6 +56,7 @@ import ( // linkDispatcher reads packets from the link FD and dispatches them to the // NetworkDispatcher. type linkDispatcher interface { + stop() dispatch() (bool, tcpip.Error) } @@ -381,16 +382,27 @@ func isSocketFD(fd int) (bool, error) { // Attach launches the goroutine that reads packets from the file descriptor and // dispatches them via the provided dispatcher. func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { - e.dispatcher = dispatcher - // Link endpoints are not savable. When transportation endpoints are - // saved, they stop sending outgoing packets and all incoming packets - // are rejected. - for i := range e.inboundDispatchers { - e.wg.Add(1) - go func(i int) { // S/R-SAFE: See above. - e.dispatchLoop(e.inboundDispatchers[i]) - e.wg.Done() - }(i) + // nil means the NIC is being removed. + if dispatcher == nil && e.dispatcher != nil { + for _, dispatcher := range e.inboundDispatchers { + dispatcher.stop() + } + e.Wait() + e.dispatcher = nil + return + } + if dispatcher != nil && e.dispatcher == nil { + e.dispatcher = dispatcher + // Link endpoints are not savable. When transportation endpoints are + // saved, they stop sending outgoing packets and all incoming packets + // are rejected. + for i := range e.inboundDispatchers { + e.wg.Add(1) + go func(i int) { // S/R-SAFE: See above. + e.dispatchLoop(e.inboundDispatchers[i]) + e.wg.Done() + }(i) + } } } diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go index bfae34ab9..3f516cab5 100644 --- a/pkg/tcpip/link/fdbased/mmap.go +++ b/pkg/tcpip/link/fdbased/mmap.go @@ -114,6 +114,7 @@ func (t tPacketHdr) Payload() []byte { // packetMMapDispatcher uses PACKET_RX_RING's to read/dispatch inbound packets. // See: mmap_amd64_unsafe.go for implementation details. type packetMMapDispatcher struct { + stopFd // fd is the file descriptor used to send and receive packets. fd int @@ -129,18 +130,18 @@ type packetMMapDispatcher struct { ringOffset int } -func (d *packetMMapDispatcher) readMMappedPacket() ([]byte, tcpip.Error) { +func (d *packetMMapDispatcher) readMMappedPacket() ([]byte, bool, tcpip.Error) { hdr := tPacketHdr(d.ringBuffer[d.ringOffset*tpFrameSize:]) for hdr.tpStatus()&tpStatusUser == 0 { - event := rawfile.PollEvent{ - FD: int32(d.fd), - Events: unix.POLLIN | unix.POLLERR, - } - if _, errno := rawfile.BlockingPoll(&event, 1, nil); errno != 0 { + stopped, errno := rawfile.BlockingPollUntilStopped(d.efd, d.fd, unix.POLLIN|unix.POLLERR) + if errno != 0 { if errno == unix.EINTR { continue } - return nil, rawfile.TranslateErrno(errno) + return nil, stopped, rawfile.TranslateErrno(errno) + } + if stopped { + return nil, true, nil } if hdr.tpStatus()&tpStatusCopy != 0 { // This frame is truncated so skip it after flipping the @@ -158,14 +159,14 @@ func (d *packetMMapDispatcher) readMMappedPacket() ([]byte, tcpip.Error) { // Release packet to kernel. hdr.setTPStatus(tpStatusKernel) d.ringOffset = (d.ringOffset + 1) % tpFrameNR - return pkt, nil + return pkt, false, nil } // dispatch reads packets from an mmaped ring buffer and dispatches them to the // network stack. func (d *packetMMapDispatcher) dispatch() (bool, tcpip.Error) { - pkt, err := d.readMMappedPacket() - if err != nil { + pkt, stopped, err := d.readMMappedPacket() + if err != nil || stopped { return false, err } var ( diff --git a/pkg/tcpip/link/fdbased/mmap_unsafe.go b/pkg/tcpip/link/fdbased/mmap_unsafe.go index 58d5dfeef..5b786169a 100644 --- a/pkg/tcpip/link/fdbased/mmap_unsafe.go +++ b/pkg/tcpip/link/fdbased/mmap_unsafe.go @@ -47,9 +47,14 @@ func (t tPacketHdr) setTPStatus(status uint32) { } func newPacketMMapDispatcher(fd int, e *endpoint) (linkDispatcher, error) { + stopFd, err := newStopFd() + if err != nil { + return nil, err + } d := &packetMMapDispatcher{ - fd: fd, - e: e, + stopFd: stopFd, + fd: fd, + e: e, } pageSize := unix.Getpagesize() if tpBlockSize%pageSize != 0 { diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go index ab2855a63..fab34c5fa 100644 --- a/pkg/tcpip/link/fdbased/packet_dispatchers.go +++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go @@ -18,6 +18,8 @@ package fdbased import ( + "fmt" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -114,9 +116,36 @@ func (b *iovecBuffer) pullViews(n int) buffer.VectorisedView { return buffer.NewVectorisedView(n, views) } +// stopFd is an eventfd used to signal the stop of a dispatcher. +type stopFd struct { + efd int +} + +func newStopFd() (stopFd, error) { + efd, err := unix.Eventfd(0, unix.EFD_NONBLOCK) + if err != nil { + return stopFd{efd: -1}, fmt.Errorf("failed to create eventfd: %w", err) + } + return stopFd{efd: efd}, nil +} + +// stop writes to the eventfd and notifies the dispatcher to stop. It does not +// block. +func (s *stopFd) stop() { + increment := []byte{1, 0, 0, 0, 0, 0, 0, 0} + if n, err := unix.Write(s.efd, increment); n != len(increment) || err != nil { + // There are two possible errors documented in eventfd(2) for writing: + // 1. We are writing 8 bytes and not 0xffffffffffffff, thus no EINVAL. + // 2. stop is only supposed to be called once, it can't reach the limit, + // thus no EAGAIN. + panic(fmt.Sprintf("write(efd) = (%d, %s), want (%d, nil)", n, err, len(increment))) + } +} + // readVDispatcher uses readv() system call to read inbound packets and // dispatches them. type readVDispatcher struct { + stopFd // fd is the file descriptor used to send and receive packets. fd int @@ -128,7 +157,15 @@ type readVDispatcher struct { } func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) { - d := &readVDispatcher{fd: fd, e: e} + stopFd, err := newStopFd() + if err != nil { + return nil, err + } + d := &readVDispatcher{ + stopFd: stopFd, + fd: fd, + e: e, + } skipsVnetHdr := d.e.gsoKind == stack.HWGSOSupported d.buf = newIovecBuffer(BufConfig, skipsVnetHdr) return d, nil @@ -136,8 +173,8 @@ func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) { // dispatch reads one packet from the file descriptor and dispatches it. func (d *readVDispatcher) dispatch() (bool, tcpip.Error) { - n, err := rawfile.BlockingReadv(d.fd, d.buf.nextIovecs()) - if n == 0 || err != nil { + n, err := rawfile.BlockingReadvUntilStopped(d.efd, d.fd, d.buf.nextIovecs()) + if n <= 0 || err != nil { return false, err } @@ -184,6 +221,7 @@ func (d *readVDispatcher) dispatch() (bool, tcpip.Error) { // recvMMsgDispatcher uses the recvmmsg system call to read inbound packets and // dispatches them. type recvMMsgDispatcher struct { + stopFd // fd is the file descriptor used to send and receive packets. fd int @@ -207,7 +245,12 @@ const ( ) func newRecvMMsgDispatcher(fd int, e *endpoint) (linkDispatcher, error) { + stopFd, err := newStopFd() + if err != nil { + return nil, err + } d := &recvMMsgDispatcher{ + stopFd: stopFd, fd: fd, e: e, bufs: make([]*iovecBuffer, MaxMsgsPerRecv), @@ -235,8 +278,8 @@ func (d *recvMMsgDispatcher) dispatch() (bool, tcpip.Error) { d.msgHdrs[k].Msg.SetIovlen(iovLen) } - nMsgs, err := rawfile.BlockingRecvMMsg(d.fd, d.msgHdrs) - if err != nil { + nMsgs, err := rawfile.BlockingRecvMMsgUntilStopped(d.efd, d.fd, d.msgHdrs) + if nMsgs == -1 || err != nil { return false, err } // Process each of received packets. diff --git a/pkg/tcpip/link/qdisc/fifo/endpoint.go b/pkg/tcpip/link/qdisc/fifo/endpoint.go index b1a28491d..40bd5560b 100644 --- a/pkg/tcpip/link/qdisc/fifo/endpoint.go +++ b/pkg/tcpip/link/qdisc/fifo/endpoint.go @@ -115,6 +115,13 @@ func (e *endpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protoc // Attach implements stack.LinkEndpoint.Attach. func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { + // nil means the NIC is being removed. + if dispatcher == nil { + e.lower.Attach(nil) + e.Wait() + e.dispatcher = nil + return + } e.dispatcher = dispatcher e.lower.Attach(e) } diff --git a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go index da900c24b..0b7b9e3de 100644 --- a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go +++ b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build ((linux && amd64) || (linux && arm64)) && go1.12 && !go1.18 +//go:build ((linux && amd64) || (linux && arm64)) && go1.12 // +build linux,amd64 linux,arm64 // +build go1.12 -// +build !go1.18 -// Check go:linkname function signatures when updating Go version. +// //go:linkname directives type-checked by checklinkname. Any other +// non-linkname assumptions outside the Go 1 compatibility guarantee should +// have an accompanied vet check or version guard build tag. package rawfile diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go index 53448a641..e76fc55b6 100644 --- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go +++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go @@ -170,46 +170,63 @@ func BlockingRead(fd int, b []byte) (int, tcpip.Error) { } } -// BlockingReadv reads from a file descriptor that is set up as non-blocking and -// stores the data in a list of iovecs buffers. If no data is available, it will -// block in a poll() syscall until the file descriptor becomes readable. -func BlockingReadv(fd int, iovecs []unix.Iovec) (int, tcpip.Error) { +// BlockingReadvUntilStopped reads from a file descriptor that is set up as +// non-blocking and stores the data in a list of iovecs buffers. If no data is +// available, it will block in a poll() syscall until the file descriptor +// becomes readable or stop is signalled (efd becomes readable). Returns -1 in +// the latter case. +func BlockingReadvUntilStopped(efd int, fd int, iovecs []unix.Iovec) (int, tcpip.Error) { for { n, _, e := unix.RawSyscall(unix.SYS_READV, uintptr(fd), uintptr(unsafe.Pointer(&iovecs[0])), uintptr(len(iovecs))) if e == 0 { return int(n), nil } - event := PollEvent{ - FD: int32(fd), - Events: 1, // POLLIN + stopped, e := BlockingPollUntilStopped(efd, fd, unix.POLLIN) + if stopped { + return -1, nil } - - _, e = BlockingPoll(&event, 1, nil) if e != 0 && e != unix.EINTR { return 0, TranslateErrno(e) } } } -// BlockingRecvMMsg reads from a file descriptor that is set up as non-blocking -// and stores the received messages in a slice of MMsgHdr structures. If no data -// is available, it will block in a poll() syscall until the file descriptor -// becomes readable. -func BlockingRecvMMsg(fd int, msgHdrs []MMsgHdr) (int, tcpip.Error) { +// BlockingRecvMMsgUntilStopped reads from a file descriptor that is set up as +// non-blocking and stores the received messages in a slice of MMsgHdr +// structures. If no data is available, it will block in a poll() syscall until +// the file descriptor becomes readable or stop is signalled (efd becomes +// readable). Returns -1 in the latter case. +func BlockingRecvMMsgUntilStopped(efd int, fd int, msgHdrs []MMsgHdr) (int, tcpip.Error) { for { n, _, e := unix.RawSyscall6(unix.SYS_RECVMMSG, uintptr(fd), uintptr(unsafe.Pointer(&msgHdrs[0])), uintptr(len(msgHdrs)), unix.MSG_DONTWAIT, 0, 0) if e == 0 { return int(n), nil } - event := PollEvent{ - FD: int32(fd), - Events: 1, // POLLIN + stopped, e := BlockingPollUntilStopped(efd, fd, unix.POLLIN) + if stopped { + return -1, nil } - - if _, e := BlockingPoll(&event, 1, nil); e != 0 && e != unix.EINTR { + if e != 0 && e != unix.EINTR { return 0, TranslateErrno(e) } } } + +// BlockingPollUntilStopped polls for events on fd or until a stop is signalled +// on the event fd efd. Returns true if stopped, i.e., efd has event POLLIN. +func BlockingPollUntilStopped(efd int, fd int, events int16) (bool, unix.Errno) { + pevents := [...]PollEvent{ + { + FD: int32(efd), + Events: unix.POLLIN, + }, + { + FD: int32(fd), + Events: events, + }, + } + _, errno := BlockingPoll(&pevents[0], len(pevents), nil) + return pevents[0].Revents&unix.POLLIN != 0, errno +} diff --git a/pkg/tcpip/link/sniffer/pcap.go b/pkg/tcpip/link/sniffer/pcap.go index 3bb864ed2..d3edede63 100644 --- a/pkg/tcpip/link/sniffer/pcap.go +++ b/pkg/tcpip/link/sniffer/pcap.go @@ -14,7 +14,14 @@ package sniffer -import "time" +import ( + "encoding" + "encoding/binary" + "time" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) type pcapHeader struct { // MagicNumber is the file magic number. @@ -39,25 +46,38 @@ type pcapHeader struct { Network uint32 } -type pcapPacketHeader struct { - // Seconds is the timestamp seconds. - Seconds uint32 - - // Microseconds is the timestamp microseconds. - Microseconds uint32 +var _ encoding.BinaryMarshaler = (*pcapPacket)(nil) - // IncludedLength is the number of octets of packet saved in file. - IncludedLength uint32 - - // OriginalLength is the actual length of packet. - OriginalLength uint32 +type pcapPacket struct { + timestamp time.Time + packet *stack.PacketBuffer + maxCaptureLen int } -func newPCAPPacketHeader(now time.Time, incLen, orgLen uint32) pcapPacketHeader { - return pcapPacketHeader{ - Seconds: uint32(now.Unix()), - Microseconds: uint32(now.Nanosecond() / 1000), - IncludedLength: incLen, - OriginalLength: orgLen, +func (p *pcapPacket) MarshalBinary() ([]byte, error) { + packetSize := p.packet.Size() + captureLen := p.maxCaptureLen + if packetSize < captureLen { + captureLen = packetSize + } + b := make([]byte, 16+captureLen) + binary.BigEndian.PutUint32(b[0:4], uint32(p.timestamp.Unix())) + binary.BigEndian.PutUint32(b[4:8], uint32(p.timestamp.Nanosecond()/1000)) + binary.BigEndian.PutUint32(b[8:12], uint32(captureLen)) + binary.BigEndian.PutUint32(b[12:16], uint32(packetSize)) + w := tcpip.SliceWriter(b[16:]) + for _, v := range p.packet.Views() { + if captureLen == 0 { + break + } + if len(v) > captureLen { + v = v[:captureLen] + } + n, err := w.Write(v) + if err != nil { + panic(err) + } + captureLen -= n } + return b, nil } diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go index 3df826f3c..28a172e71 100644 --- a/pkg/tcpip/link/sniffer/sniffer.go +++ b/pkg/tcpip/link/sniffer/sniffer.go @@ -151,33 +151,16 @@ func (e *endpoint) dumpPacket(dir direction, protocol tcpip.NetworkProtocolNumbe logPacket(e.logPrefix, dir, protocol, pkt) } if writer != nil && atomic.LoadUint32(&LogPacketsToPCAP) == 1 { - totalLength := pkt.Size() - length := totalLength - if max := int(e.maxPCAPLen); length > max { - length = max + packet := pcapPacket{ + timestamp: time.Now(), + packet: pkt, + maxCaptureLen: int(e.maxPCAPLen), } - packetHeader := newPCAPPacketHeader(time.Now(), uint32(length), uint32(totalLength)) - packet := make([]byte, binary.Size(packetHeader)+length) - { - writer := tcpip.SliceWriter(packet) - if err := binary.Write(&writer, binary.BigEndian, packetHeader); err != nil { - panic(err) - } - for _, b := range pkt.Views() { - if length == 0 { - break - } - if len(b) > length { - b = b[:length] - } - n, err := writer.Write(b) - if err != nil { - panic(err) - } - length -= n - } + b, err := packet.MarshalBinary() + if err != nil { + panic(err) } - if _, err := writer.Write(packet); err != nil { + if _, err := writer.Write(b); err != nil { panic(err) } } diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD index c90974693..2257f728e 100644 --- a/pkg/tcpip/network/ipv4/BUILD +++ b/pkg/tcpip/network/ipv4/BUILD @@ -39,7 +39,6 @@ go_test( "//pkg/tcpip/faketime", "//pkg/tcpip/header", "//pkg/tcpip/link/channel", - "//pkg/tcpip/link/loopback", "//pkg/tcpip/link/sniffer", "//pkg/tcpip/network/arp", "//pkg/tcpip/network/internal/testutil", diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go index 4a4448cf9..73407be67 100644 --- a/pkg/tcpip/network/ipv4/ipv4_test.go +++ b/pkg/tcpip/network/ipv4/ipv4_test.go @@ -32,7 +32,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/faketime" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/link/channel" - "gvisor.dev/gvisor/pkg/tcpip/link/loopback" "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" "gvisor.dev/gvisor/pkg/tcpip/network/arp" iptestutil "gvisor.dev/gvisor/pkg/tcpip/network/internal/testutil" @@ -3339,7 +3338,7 @@ func TestCloseLocking(t *testing.T) { defer wg.Done() for i := 0; i < iterations; i++ { - if err := s.CreateNIC(nicID2, loopback.New()); err != nil { + if err := s.CreateNIC(nicID2, stack.LinkEndpoint(channel.New(0, defaultMTU, ""))); err != nil { t.Errorf("CreateNIC(%d, _): %s", nicID2, err) return } diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 81fabe29a..c73890c4c 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -780,6 +780,9 @@ func (s *Stack) removeNICLocked(id tcpip.NICID) tcpip.Error { if !ok { return &tcpip.ErrUnknownNICID{} } + if nic.IsLoopback() { + return &tcpip.ErrNotSupported{} + } delete(s.nics, id) // Remove routes in-place. n tracks the number of routes written. diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index 21951d05a..3089c0ef4 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -719,38 +719,59 @@ func TestRemoveUnknownNIC(t *testing.T) { } func TestRemoveNIC(t *testing.T) { - const nicID = 1 + for _, tt := range []struct { + name string + linkep stack.LinkEndpoint + expectErr tcpip.Error + }{ + { + name: "loopback", + linkep: loopback.New(), + expectErr: &tcpip.ErrNotSupported{}, + }, + { + name: "channel", + linkep: channel.New(0, defaultMTU, ""), + expectErr: nil, + }, + } { + t.Run(tt.name, func(t *testing.T) { + const nicID = 1 - s := stack.New(stack.Options{ - NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory}, - }) + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory}, + }) - e := linkEPWithMockedAttach{ - LinkEndpoint: loopback.New(), - } - if err := s.CreateNIC(nicID, &e); err != nil { - t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) - } + e := linkEPWithMockedAttach{ + LinkEndpoint: tt.linkep, + } + if err := s.CreateNIC(nicID, &e); err != nil { + t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) + } - // NIC should be present in NICInfo and attached to a NetworkDispatcher. - allNICInfo := s.NICInfo() - if _, ok := allNICInfo[nicID]; !ok { - t.Errorf("entry for %d missing from allNICInfo = %+v", nicID, allNICInfo) - } - if !e.isAttached() { - t.Fatal("link endpoint not attached to a network dispatcher") - } + // NIC should be present in NICInfo and attached to a NetworkDispatcher. + allNICInfo := s.NICInfo() + if _, ok := allNICInfo[nicID]; !ok { + t.Errorf("entry for %d missing from allNICInfo = %+v", nicID, allNICInfo) + } + if !e.isAttached() { + t.Fatal("link endpoint not attached to a network dispatcher") + } - // Removing a NIC should remove it from NICInfo and e should be detached from - // the NetworkDispatcher. - if err := s.RemoveNIC(nicID); err != nil { - t.Fatalf("s.RemoveNIC(%d): %s", nicID, err) - } - if nicInfo, ok := s.NICInfo()[nicID]; ok { - t.Errorf("got unexpected NICInfo entry for deleted NIC %d = %+v", nicID, nicInfo) - } - if e.isAttached() { - t.Error("link endpoint for removed NIC still attached to a network dispatcher") + // Removing a NIC should remove it from NICInfo and e should be detached from + // the NetworkDispatcher. + if got, want := s.RemoveNIC(nicID), tt.expectErr; got != want { + t.Fatalf("got s.RemoveNIC(%d) = %s, want %s", nicID, got, want) + } + if tt.expectErr == nil { + if nicInfo, ok := s.NICInfo()[nicID]; ok { + t.Errorf("got unexpected NICInfo entry for deleted NIC %d = %+v", nicID, nicInfo) + } + if e.isAttached() { + t.Error("link endpoint for removed NIC still attached to a network dispatcher") + } + } + }) } } diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 33e738efc..703f34827 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -463,6 +463,10 @@ func hostInetFilters() seccomp.SyscallRules { seccomp.MatchAny{}, seccomp.EqualTo(unix.SIOCGIFFLAGS), }, + { + seccomp.MatchAny{}, + seccomp.EqualTo(unix.SIOCGIFCONF), + }, }, unix.SYS_LISTEN: {}, unix.SYS_READV: {}, diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go index 7b11b3367..1fe9c6435 100644 --- a/runsc/cmd/chroot.go +++ b/runsc/cmd/chroot.go @@ -59,6 +59,23 @@ func pivotRoot(root string) error { return nil } +func copyFile(dst, src string) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + + out, err := os.Create(dst) + if err != nil { + return err + } + defer out.Close() + + _, err = out.ReadFrom(in) + return err +} + // setUpChroot creates an empty directory with runsc mounted at /runsc and proc // mounted at /proc. func setUpChroot(pidns bool) error { @@ -78,6 +95,14 @@ func setUpChroot(pidns bool) error { return fmt.Errorf("error mounting tmpfs in choot: %v", err) } + if err := os.Mkdir(filepath.Join(chroot, "etc"), 0755); err != nil { + return fmt.Errorf("error creating /etc in chroot: %v", err) + } + + if err := copyFile(filepath.Join(chroot, "etc/localtime"), "/etc/localtime"); err != nil { + log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err) + } + if pidns { flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY) if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil { diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 20e05f141..2193e9040 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -285,16 +285,22 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error { // Prepare tree structure for pivot_root(2). if err := os.Mkdir("/proc/proc", 0755); err != nil { - Fatalf("%v", err) + Fatalf("error creating /proc/proc: %v", err) } if err := os.Mkdir("/proc/root", 0755); err != nil { - Fatalf("%v", err) + Fatalf("error creating /proc/root: %v", err) + } + if err := os.Mkdir("/proc/etc", 0755); err != nil { + Fatalf("error creating /proc/etc: %v", err) } // This cannot use SafeMount because there's no available procfs. But we // know that /proc is an empty tmpfs mount, so this is safe. if err := unix.Mount("runsc-proc", "/proc/proc", "proc", flags|unix.MS_RDONLY, ""); err != nil { Fatalf("error mounting proc: %v", err) } + if err := copyFile("/proc/etc/localtime", "/etc/localtime"); err != nil { + log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err) + } root = "/proc/root" procPath = "/proc/proc" } @@ -409,7 +415,7 @@ func resolveMounts(conf *config.Config, mounts []specs.Mount, root string) ([]sp panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err)) } - opts, err := adjustMountOptions(filepath.Join(root, relDst), m.Options) + opts, err := adjustMountOptions(conf, filepath.Join(root, relDst), m.Options) if err != nil { return nil, err } @@ -475,7 +481,7 @@ func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, erro } // adjustMountOptions adds 'overlayfs_stale_read' if mounting over overlayfs. -func adjustMountOptions(path string, opts []string) ([]string, error) { +func adjustMountOptions(conf *config.Config, path string, opts []string) ([]string, error) { rv := make([]string, len(opts)) copy(rv, opts) diff --git a/runsc/container/container.go b/runsc/container/container.go index 7f066905a..6a9a07afe 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -789,30 +789,31 @@ func (c *Container) stop() error { } func (c *Container) waitForStopped() error { + if c.GoferPid == 0 { + return nil + } + + if c.IsSandboxRunning() { + if err := c.SignalContainer(unix.Signal(0), false); err == nil { + return fmt.Errorf("container is still running") + } + } + + if c.goferIsChild { + // The gofer process is a child of the current process, + // so we can wait it and collect its zombie. + if _, err := unix.Wait4(int(c.GoferPid), nil, 0, nil); err != nil { + return fmt.Errorf("error waiting the gofer process: %v", err) + } + c.GoferPid = 0 + return nil + } + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) op := func() error { - if c.IsSandboxRunning() { - if err := c.SignalContainer(unix.Signal(0), false); err == nil { - return fmt.Errorf("container is still running") - } - } - if c.GoferPid == 0 { - return nil - } - if c.goferIsChild { - // The gofer process is a child of the current process, - // so we can wait it and collect its zombie. - wpid, err := unix.Wait4(int(c.GoferPid), nil, unix.WNOHANG, nil) - if err != nil { - return fmt.Errorf("error waiting the gofer process: %v", err) - } - if wpid == 0 { - return fmt.Errorf("gofer is still running") - } - - } else if err := unix.Kill(c.GoferPid, 0); err == nil { + if err := unix.Kill(c.GoferPid, 0); err == nil { return fmt.Errorf("gofer is still running") } c.GoferPid = 0 diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 48efbb0b8..5fb7dc834 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -1157,27 +1157,26 @@ func (s *Sandbox) destroyContainer(cid string) error { } func (s *Sandbox) waitForStopped() error { + if s.child { + s.statusMu.Lock() + defer s.statusMu.Unlock() + if s.Pid == 0 { + return nil + } + // The sandbox process is a child of the current process, + // so we can wait it and collect its zombie. + if _, err := unix.Wait4(int(s.Pid), &s.status, 0, nil); err != nil { + return fmt.Errorf("error waiting the sandbox process: %v", err) + } + s.Pid = 0 + return nil + } + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) op := func() error { - if s.child { - s.statusMu.Lock() - defer s.statusMu.Unlock() - if s.Pid == 0 { - return nil - } - // The sandbox process is a child of the current process, - // so we can wait it and collect its zombie. - wpid, err := unix.Wait4(int(s.Pid), &s.status, unix.WNOHANG, nil) - if err != nil { - return fmt.Errorf("error waiting the sandbox process: %v", err) - } - if wpid == 0 { - return fmt.Errorf("sandbox is still running") - } - s.Pid = 0 - } else if s.IsRunning() { + if s.IsRunning() { return fmt.Errorf("sandbox is still running") } return nil diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 9e22c9a7d..d41139944 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -742,3 +742,49 @@ func TestUnmount(t *testing.T) { t.Fatalf("docker run failed: %v", err) } } + +func TestDeleteInterface(t *testing.T) { + if testutil.IsRunningWithHostNet() { + t.Skip("not able to remove interfaces on hostnet") + } + + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) + + opts := dockerutil.RunOpts{ + Image: "basic/alpine", + CapAdd: []string{"NET_ADMIN"}, + } + if err := d.Spawn(ctx, opts, "sleep", "1000"); err != nil { + t.Fatalf("docker run failed: %v", err) + } + + // We should be able to remove eth0. + output, err := d.Exec(ctx, dockerutil.ExecOpts{}, "/bin/sh", "-c", "ip link del dev eth0") + if err != nil { + t.Fatalf("failed to remove eth0: %s, output: %s", err, output) + } + // Verify that eth0 is no longer there. + output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/bin/sh", "-c", "ip link show") + if err != nil { + t.Fatalf("docker exec ip link show failed: %s, output: %s", err, output) + } + if strings.Contains(output, "eth0") { + t.Fatalf("failed to remove eth0") + } + + // Loopback device can't be removed. + output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/bin/sh", "-c", "ip link del dev lo") + if err == nil { + t.Fatalf("should not remove the loopback device: %v", output) + } + // Verify that lo is still there. + output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/bin/sh", "-c", "ip link show") + if err != nil { + t.Fatalf("docker exec ip link show failed: %s, output: %s", err, output) + } + if !strings.Contains(output, "lo") { + t.Fatalf("loopback interface is removed") + } +} diff --git a/test/perf/BUILD b/test/perf/BUILD index 75b5003e2..4299f08d5 100644 --- a/test/perf/BUILD +++ b/test/perf/BUILD @@ -70,6 +70,13 @@ syscall_test( ) syscall_test( + size = "large", + add_overlay = True, + debug = False, + test = "//test/perf/linux:dup_benchmark", +) + +syscall_test( debug = False, test = "//test/perf/linux:pipe_benchmark", ) @@ -139,3 +146,17 @@ syscall_test( debug = False, test = "//test/perf/linux:write_benchmark", ) + +syscall_test( + size = "large", + debug = False, + test = "//test/perf/linux:verity_open_benchmark", + vfs1 = False, +) + +syscall_test( + size = "large", + debug = False, + test = "//test/perf/linux:verity_read_benchmark", + vfs1 = False, +) diff --git a/test/perf/linux/BUILD b/test/perf/linux/BUILD index dd1d2438c..8a3216550 100644 --- a/test/perf/linux/BUILD +++ b/test/perf/linux/BUILD @@ -27,10 +27,10 @@ cc_binary( deps = [ gbenchmark, gtest, - "//test/syscalls/linux:socket_test_util", "//test/util:file_descriptor", "//test/util:logging", "//test/util:posix_error", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", @@ -109,6 +109,22 @@ cc_binary( ) cc_binary( + name = "dup_benchmark", + testonly = 1, + srcs = [ + "dup_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:fs_util", + "//test/util:logging", + "//test/util:temp_path", + "//test/util:test_main", + ], +) + +cc_binary( name = "read_benchmark", testonly = 1, srcs = [ @@ -370,3 +386,41 @@ cc_binary( "//test/util:test_main", ], ) + +cc_binary( + name = "verity_open_benchmark", + testonly = 1, + srcs = [ + "verity_open_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:capability_util", + "//test/util:fs_util", + "//test/util:logging", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:verity_util", + ], +) + +cc_binary( + name = "verity_read_benchmark", + testonly = 1, + srcs = [ + "verity_read_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:capability_util", + "//test/util:fs_util", + "//test/util:logging", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:verity_util", + ], +) diff --git a/test/perf/linux/dup_benchmark.cc b/test/perf/linux/dup_benchmark.cc new file mode 100644 index 000000000..5d808d225 --- /dev/null +++ b/test/perf/linux/dup_benchmark.cc @@ -0,0 +1,55 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#include <memory> +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/fs_util.h" +#include "test/util/logging.h" +#include "test/util/temp_path.h" + +namespace gvisor { + +namespace { + +void BM_Dup(benchmark::State& state) { + const int size = state.range(0); + + for (auto _ : state) { + std::vector<int> v; + for (int i = 0; i < size; i++) { + int fd = dup(2); + TEST_CHECK(fd != -1); + v.push_back(fd); + } + for (int i = 0; i < size; i++) { + int fd = v[i]; + close(fd); + } + } + state.SetItemsProcessed(state.iterations() * size); +} + +BENCHMARK(BM_Dup)->Range(1, 1 << 15)->UseRealTime(); + +} // namespace + +} // namespace gvisor diff --git a/test/perf/linux/send_recv_benchmark.cc b/test/perf/linux/send_recv_benchmark.cc index d73e49523..2d443f54f 100644 --- a/test/perf/linux/send_recv_benchmark.cc +++ b/test/perf/linux/send_recv_benchmark.cc @@ -23,10 +23,10 @@ #include "gtest/gtest.h" #include "absl/synchronization/notification.h" #include "benchmark/benchmark.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/logging.h" #include "test/util/posix_error.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" @@ -80,6 +80,9 @@ void BM_Recvmsg(benchmark::State& state) { int64_t bytes_received = 0; for (auto ignored : state) { int n = recvmsg(recv_socket.get(), recv_msg.header(), 0); + if (n == -1 && errno == EINTR) { + continue; + } TEST_CHECK(n > 0); bytes_received += n; } @@ -108,6 +111,9 @@ void BM_Sendmsg(benchmark::State& state) { int64_t bytes_sent = 0; for (auto ignored : state) { int n = sendmsg(send_socket.get(), send_msg.header(), 0); + if (n == -1 && errno == EINTR) { + continue; + } TEST_CHECK(n > 0); bytes_sent += n; } @@ -137,6 +143,9 @@ void BM_Recvfrom(benchmark::State& state) { for (auto ignored : state) { int n = recvfrom(recv_socket.get(), recv_buffer, kMessageSize, 0, nullptr, nullptr); + if (n == -1 && errno == EINTR) { + continue; + } TEST_CHECK(n > 0); bytes_received += n; } @@ -166,6 +175,9 @@ void BM_Sendto(benchmark::State& state) { int64_t bytes_sent = 0; for (auto ignored : state) { int n = sendto(send_socket.get(), send_buffer, kMessageSize, 0, nullptr, 0); + if (n == -1 && errno == EINTR) { + continue; + } TEST_CHECK(n > 0); bytes_sent += n; } @@ -247,6 +259,9 @@ void BM_RecvmsgWithControlBuf(benchmark::State& state) { int64_t bytes_received = 0; for (auto ignored : state) { int n = recvmsg(recv_socket.get(), recv_msg.header(), 0); + if (n == -1 && errno == EINTR) { + continue; + } TEST_CHECK(n > 0); bytes_received += n; } @@ -316,7 +331,11 @@ void BM_SendmsgTCP(benchmark::State& state) { ScopedThread t([&recv_msg, &recv_socket, ¬ification] { while (!notification.HasBeenNotified()) { - TEST_CHECK(recvmsg(recv_socket.get(), recv_msg.header(), 0) >= 0); + int rc = recvmsg(recv_socket.get(), recv_msg.header(), 0); + if (rc == -1 && errno == EINTR) { + continue; + } + TEST_CHECK(rc >= 0); } }); diff --git a/test/perf/linux/verity_open_benchmark.cc b/test/perf/linux/verity_open_benchmark.cc new file mode 100644 index 000000000..026b6f101 --- /dev/null +++ b/test/perf/linux/verity_open_benchmark.cc @@ -0,0 +1,71 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <unistd.h> + +#include <memory> +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/capability_util.h" +#include "test/util/fs_util.h" +#include "test/util/logging.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/verity_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void BM_Open(benchmark::State& state) { + const int size = state.range(0); + std::vector<TempPath> cache; + std::vector<EnableTarget> targets; + + // Mount a tmpfs file system to be wrapped by a verity fs. + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TEST_CHECK(mount("", dir.path().c_str(), "tmpfs", 0, "") == 0); + + for (int i = 0; i < size; i++) { + auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + targets.emplace_back( + EnableTarget(std::string(Basename(path.path())), O_RDONLY)); + cache.emplace_back(std::move(path)); + } + + std::string verity_dir = + TEST_CHECK_NO_ERRNO_AND_VALUE(MountVerity(dir.path(), targets)); + + unsigned int seed = 1; + for (auto _ : state) { + const int chosen = rand_r(&seed) % size; + int fd = open(JoinPath(verity_dir, targets[chosen].path).c_str(), O_RDONLY); + TEST_CHECK(fd != -1); + close(fd); + } +} + +BENCHMARK(BM_Open)->Range(1, 128)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/verity_read_benchmark.cc b/test/perf/linux/verity_read_benchmark.cc new file mode 100644 index 000000000..738b5ba45 --- /dev/null +++ b/test/perf/linux/verity_read_benchmark.cc @@ -0,0 +1,69 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <unistd.h> + +#include <memory> +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/capability_util.h" +#include "test/util/fs_util.h" +#include "test/util/logging.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/verity_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void BM_VerityRead(benchmark::State& state) { + const int size = state.range(0); + const std::string contents(size, 0); + + // Mount a tmpfs file system to be wrapped by a verity fs. + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TEST_CHECK(mount("", dir.path().c_str(), "tmpfs", 0, "") == 0); + + auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + dir.path(), contents, TempPath::kDefaultFileMode)); + std::string filename = std::string(Basename(path.path())); + + std::string verity_dir = TEST_CHECK_NO_ERRNO_AND_VALUE( + MountVerity(dir.path(), {EnableTarget(filename, O_RDONLY)})); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(JoinPath(verity_dir, filename), O_RDONLY)); + std::vector<char> buf(size); + for (auto _ : state) { + TEST_CHECK(PreadFd(fd.get(), buf.data(), buf.size(), 0) == size); + } + + state.SetBytesProcessed(static_cast<int64_t>(size) * + static_cast<int64_t>(state.iterations())); +} + +BENCHMARK(BM_VerityRead)->Range(1, 1 << 26)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/root/chroot_test.go b/test/root/chroot_test.go index 58fcd6f08..5114a9602 100644 --- a/test/root/chroot_test.go +++ b/test/root/chroot_test.go @@ -68,13 +68,15 @@ func TestChroot(t *testing.T) { if err != nil { t.Fatalf("error listing %q: %v", chroot, err) } - if want, got := 1, len(fi); want != got { + if want, got := 2, len(fi); want != got { t.Fatalf("chroot dir got %d entries, want %d", got, want) } - // chroot dir is prepared by runsc and should contains only /proc. - if fi[0].Name() != "proc" { - t.Errorf("chroot got children %v, want %v", fi[0].Name(), "proc") + // chroot dir is prepared by runsc and should contains only /etc and /proc. + for i, want := range []string{"etc", "proc"} { + if got := fi[i].Name(); got != want { + t.Errorf("chroot got child %v, want %v", got, want) + } } d.CleanUp(ctx) diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index 405e03832..05c75b130 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -135,6 +135,7 @@ def syscall_test( add_overlay = False, add_uds_tree = False, add_hostinet = False, + vfs1 = True, vfs2 = True, fuse = False, debug = True, @@ -148,6 +149,7 @@ def syscall_test( add_overlay: add an overlay test. add_uds_tree: add a UDS test. add_hostinet: add a hostinet test. + vfs1: enable VFS1 tests. Could be false only if vfs2 is true. vfs2: enable VFS2 support. fuse: enable FUSE support. debug: enable debug output. @@ -157,7 +159,7 @@ def syscall_test( if not tags: tags = [] - if vfs2 and not fuse: + if vfs2 and vfs1 and not fuse: # Generate a vfs1 plain test. Most testing will now be # biased towards vfs2, with only a single vfs1 case. _syscall_test( @@ -171,7 +173,7 @@ def syscall_test( **kwargs ) - if not fuse: + if vfs1 and not fuse: # Generate a native test if fuse is not required. _syscall_test( test = test, diff --git a/test/runner/setup_container/BUILD b/test/runner/setup_container/BUILD index 5b99d1de9..f879ef649 100644 --- a/test/runner/setup_container/BUILD +++ b/test/runner/setup_container/BUILD @@ -12,8 +12,8 @@ cc_binary( visibility = ["//test/runner:__subpackages__"], deps = [ "//test/syscalls/linux:socket_netlink_util", - "//test/syscalls/linux:socket_test_util", "//test/util:capability_util", "//test/util:posix_error", + "//test/util:socket_util", ], ) diff --git a/test/runner/setup_container/setup_container.cc b/test/runner/setup_container/setup_container.cc index 9a4e3fb8b..e6fae37e1 100644 --- a/test/runner/setup_container/setup_container.cc +++ b/test/runner/setup_container/setup_container.cc @@ -17,9 +17,9 @@ #include <unistd.h> #include "test/syscalls/linux/socket_netlink_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/posix_error.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 213c7e96c..16c451786 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -331,6 +331,10 @@ syscall_test( ) syscall_test( + test = "//test/syscalls/linux:msgqueue_test", +) + +syscall_test( size = "medium", test = "//test/syscalls/linux:msync_test", ) diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index fa2a080f1..3383495d0 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "cc_binary", "cc_library", "default_net_util", "gbenchmark", "gtest", "select_arch", "select_system") +load("//tools:defs.bzl", "cc_binary", "cc_library", "gbenchmark", "gtest", "select_arch", "select_system") package( default_visibility = ["//:sandbox"], @@ -128,9 +128,9 @@ cc_library( srcs = ["socket_netlink_util.cc"], hdrs = ["socket_netlink_util.h"], deps = [ - ":socket_test_util", "//test/util:file_descriptor", "//test/util:posix_error", + "//test/util:socket_util", "@com_google_absl//absl/strings", ], ) @@ -146,36 +146,12 @@ cc_library( ) cc_library( - name = "socket_test_util", - testonly = 1, - srcs = [ - "socket_test_util.cc", - "socket_test_util_impl.cc", - ], - hdrs = ["socket_test_util.h"], - defines = select_system(), - deps = default_net_util() + [ - gtest, - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/time", - "@com_google_absl//absl/types:optional", - "//test/util:file_descriptor", - "//test/util:posix_error", - "//test/util:temp_path", - "//test/util:test_util", - "//test/util:thread_util", - ], -) - -cc_library( name = "unix_domain_socket_test_util", testonly = 1, srcs = ["unix_domain_socket_test_util.cc"], hdrs = ["unix_domain_socket_test_util.h"], deps = [ - ":socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/strings", gtest, "//test/util:test_util", @@ -188,7 +164,7 @@ cc_library( srcs = ["ip_socket_test_util.cc"], hdrs = ["ip_socket_test_util.h"], deps = [ - ":socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/strings", ], ) @@ -233,9 +209,9 @@ cc_binary( srcs = ["accept_bind.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -248,9 +224,9 @@ cc_binary( srcs = ["accept_bind_stream.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -349,8 +325,8 @@ cc_binary( srcs = ["bind.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -363,7 +339,7 @@ cc_binary( srcs = ["socket.cc"], linkstatic = 1, deps = [ - ":socket_test_util", + "//test/util:socket_util", gtest, "//test/util:file_descriptor", "//test/util:temp_umask", @@ -378,9 +354,9 @@ cc_binary( srcs = ["socket_capability.cc"], linkstatic = 1, deps = [ - ":socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -540,9 +516,9 @@ cc_binary( srcs = ["connect_external.cc"], linkstatic = 1, deps = [ - ":socket_test_util", "//test/util:file_descriptor", "//test/util:fs_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -584,6 +560,7 @@ cc_binary( deps = [ "//test/util:eventfd_util", "//test/util:file_descriptor", + "@com_google_absl//absl/memory", gtest, "//test/util:fs_util", "//test/util:posix_error", @@ -755,10 +732,10 @@ cc_binary( linkstatic = 1, deps = [ ":file_base", - ":socket_test_util", "//test/util:cleanup", "//test/util:eventfd_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", gtest, @@ -801,12 +778,12 @@ cc_binary( srcs = ["fcntl.cc"], linkstatic = 1, deps = [ - ":socket_test_util", "//test/util:capability_util", "//test/util:cleanup", "//test/util:eventfd_util", "//test/util:file_descriptor", "//test/util:fs_util", + "//test/util:socket_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/memory", @@ -834,8 +811,8 @@ cc_binary( ], linkstatic = 1, deps = [ - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", gtest, @@ -1021,9 +998,9 @@ cc_binary( linkstatic = 1, deps = [ ":ip_socket_test_util", - ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", gtest, "//test/util:signal_util", "//test/util:test_main", @@ -1065,9 +1042,9 @@ cc_binary( linkstatic = 1, deps = [ ":iptables_types", - ":socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -1083,9 +1060,9 @@ cc_binary( linkstatic = 1, deps = [ ":iptables_types", - ":socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -1464,10 +1441,10 @@ cc_binary( defines = select_system(), linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:endian", gtest, @@ -1482,10 +1459,10 @@ cc_binary( srcs = ["packet_socket.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:endian", gtest, @@ -1541,9 +1518,9 @@ cc_binary( srcs = select_system(linux = ["partial_bad_buffer.cc"]), linkstatic = 1, deps = [ - ":socket_test_util", "//test/util:file_descriptor", "//test/util:fs_util", + "//test/util:socket_util", "@com_google_absl//absl/time", gtest, "//test/util:posix_error", @@ -1576,8 +1553,8 @@ cc_binary( linkstatic = 1, deps = [ ":ip_socket_test_util", - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", @@ -1774,6 +1751,7 @@ cc_binary( "//test/util:mount_util", "@com_google_absl//absl/container:node_hash_set", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", gtest, @@ -1795,10 +1773,10 @@ cc_binary( srcs = ["proc_net.cc"], linkstatic = 1, deps = [ - ":socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "//test/util:socket_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", gtest, @@ -1942,10 +1920,10 @@ cc_binary( srcs = ["raw_socket_hdrincl.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:endian", gtest, @@ -1961,10 +1939,10 @@ cc_binary( defines = select_system(), linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/base:core_headers", gtest, "//test/util:test_main", @@ -1978,10 +1956,10 @@ cc_binary( srcs = ["raw_socket_icmp.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/base:core_headers", gtest, "//test/util:test_main", @@ -2011,8 +1989,8 @@ cc_binary( srcs = ["readahead.cc"], linkstatic = 1, deps = [ - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", gtest, "//test/util:temp_path", "//test/util:test_main", @@ -2211,8 +2189,8 @@ cc_binary( srcs = ["sendfile_socket.cc"], linkstatic = 1, deps = [ - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/strings", gtest, ":ip_socket_test_util", @@ -2390,8 +2368,8 @@ cc_library( "socket_generic.h", ], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", gtest, @@ -2411,8 +2389,8 @@ cc_binary( deps = [ gtest, ":ip_socket_test_util", - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", @@ -2427,8 +2405,8 @@ cc_library( srcs = ["socket_unix_dgram.cc"], hdrs = ["socket_unix_dgram.h"], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/time", gtest, "//test/util:test_util", @@ -2442,8 +2420,8 @@ cc_library( srcs = ["socket_unix_seqpacket.cc"], hdrs = ["socket_unix_seqpacket.h"], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/time", gtest, "//test/util:test_util", @@ -2461,7 +2439,7 @@ cc_library( "socket_ip_tcp_generic.h", ], deps = [ - ":socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/memory", "@com_google_absl//absl/time", gtest, @@ -2482,8 +2460,8 @@ cc_library( "socket_non_blocking.h", ], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_util", ], @@ -2500,8 +2478,8 @@ cc_library( "socket_unix_non_stream.h", ], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:memory_util", "//test/util:test_util", @@ -2520,8 +2498,8 @@ cc_library( ], deps = [ ":ip_socket_test_util", - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_util", ], @@ -2539,7 +2517,7 @@ cc_library( ], deps = [ ":ip_socket_test_util", - ":socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_util", ], @@ -2557,7 +2535,7 @@ cc_library( ], deps = [ ":ip_socket_test_util", - ":socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/memory", gtest, "//test/util:posix_error", @@ -2578,7 +2556,7 @@ cc_library( ], deps = [ ":ip_socket_test_util", - ":socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/memory", gtest, "//test/util:posix_error", @@ -2599,9 +2577,9 @@ cc_library( ], deps = [ ":socket_netlink_route_util", - ":socket_test_util", "//test/util:capability_util", "//test/util:cleanup", + "//test/util:socket_util", gtest, ], alwayslink = 1, @@ -2618,8 +2596,8 @@ cc_library( ], deps = [ ":socket_netlink_route_util", - ":socket_test_util", "//test/util:capability_util", + "//test/util:socket_util", gtest, ], alwayslink = 1, @@ -2636,7 +2614,7 @@ cc_library( ], deps = [ ":ip_socket_test_util", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_util", ], alwayslink = 1, @@ -2683,10 +2661,10 @@ cc_binary( linkstatic = 1, deps = [ ":socket_generic_test_cases", - ":socket_test_util", ":socket_unix_cmsg_test_cases", ":socket_unix_test_cases", ":unix_domain_socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2701,8 +2679,8 @@ cc_binary( linkstatic = 1, deps = [ ":socket_non_blocking_test_cases", - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2715,10 +2693,10 @@ cc_binary( linkstatic = 1, deps = [ ":socket_non_stream_test_cases", - ":socket_test_util", ":socket_unix_dgram_test_cases", ":socket_unix_non_stream_test_cases", ":unix_domain_socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2730,8 +2708,8 @@ cc_binary( srcs = ["socket_unix_dgram_non_blocking.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -2747,10 +2725,10 @@ cc_binary( linkstatic = 1, deps = [ ":socket_non_stream_test_cases", - ":socket_test_util", ":socket_unix_non_stream_test_cases", ":socket_unix_seqpacket_test_cases", ":unix_domain_socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2762,8 +2740,8 @@ cc_binary( srcs = ["socket_unix_stream.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/time", gtest, "//test/util:test_main", @@ -2781,7 +2759,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_ip_tcp_generic_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2796,7 +2774,7 @@ cc_binary( linkstatic = 1, deps = [ ":ip_socket_test_util", - ":socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -2813,7 +2791,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_generic_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2829,7 +2807,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_non_blocking_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2847,7 +2825,7 @@ cc_binary( ":socket_generic_test_cases", ":socket_ip_udp_test_cases", ":socket_non_stream_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2863,7 +2841,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_ipv4_udp_unbound_external_networking_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2879,7 +2857,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_ipv6_udp_unbound_external_networking_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2895,8 +2873,8 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_bind_to_device_util", - ":socket_test_util", "//test/util:capability_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -2914,8 +2892,8 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_bind_to_device_util", - ":socket_test_util", "//test/util:capability_util", + "//test/util:socket_util", "@com_google_absl//absl/container:node_hash_map", gtest, "//test/util:test_main", @@ -2934,8 +2912,8 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_bind_to_device_util", - ":socket_test_util", "//test/util:capability_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -2953,7 +2931,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_non_blocking_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2969,7 +2947,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_ipv4_udp_unbound_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -2985,7 +2963,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_ipv6_udp_unbound_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -3000,7 +2978,7 @@ cc_binary( linkstatic = 1, deps = [ ":ip_socket_test_util", - ":socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3018,7 +2996,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_ipv4_udp_unbound_netlink_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -3034,7 +3012,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_ipv6_udp_unbound_netlink_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -3049,7 +3027,7 @@ cc_binary( linkstatic = 1, deps = [ ":ip_socket_test_util", - ":socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3066,8 +3044,8 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_netlink_route_util", - ":socket_test_util", "//test/util:capability_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3083,8 +3061,8 @@ cc_binary( linkstatic = 1, deps = [ ":socket_generic_test_cases", - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -3099,8 +3077,8 @@ cc_binary( linkstatic = 1, deps = [ ":socket_non_blocking_test_cases", - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -3115,10 +3093,10 @@ cc_binary( linkstatic = 1, deps = [ ":socket_generic_test_cases", - ":socket_test_util", ":socket_unix_cmsg_test_cases", ":socket_unix_test_cases", ":unix_domain_socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -3133,8 +3111,8 @@ cc_binary( linkstatic = 1, deps = [ ":socket_non_blocking_test_cases", - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -3145,7 +3123,7 @@ cc_library( testonly = 1, hdrs = ["socket_inet_loopback_test_params.h"], deps = [ - ":socket_test_util", + "//test/util:socket_util", gtest, ], ) @@ -3158,8 +3136,8 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_inet_loopback_test_params", - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", @@ -3180,8 +3158,8 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_inet_loopback_test_params", - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/strings", gtest, "//test/util:posix_error", @@ -3199,7 +3177,7 @@ cc_binary( deps = [ ":socket_inet_loopback_test_params", ":socket_netlink_util", - ":socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3213,8 +3191,8 @@ cc_binary( srcs = ["socket_netlink.cc"], linkstatic = 1, deps = [ - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3229,10 +3207,10 @@ cc_binary( deps = [ ":socket_netlink_route_util", ":socket_netlink_util", - ":socket_test_util", "//test/util:capability_util", "//test/util:cleanup", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/strings:str_format", gtest, "//test/util:test_main", @@ -3247,8 +3225,8 @@ cc_binary( linkstatic = 1, deps = [ ":socket_netlink_util", - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3267,8 +3245,8 @@ cc_library( "socket_stream.h", ], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/time", gtest, "//test/util:test_util", @@ -3286,8 +3264,8 @@ cc_library( "socket_blocking.h", ], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/time", gtest, "//test/util:test_util", @@ -3307,8 +3285,8 @@ cc_library( "socket_unix.h", ], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/strings", gtest, "//test/util:test_util", @@ -3327,8 +3305,8 @@ cc_library( "socket_unix_cmsg.h", ], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/strings", gtest, "//test/util:test_util", @@ -3347,8 +3325,8 @@ cc_library( "socket_stream_blocking.h", ], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/time", gtest, "//test/util:test_util", @@ -3368,8 +3346,8 @@ cc_library( "socket_stream_nonblock.h", ], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_util", ], @@ -3386,8 +3364,8 @@ cc_library( "socket_non_stream_blocking.h", ], deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/time", gtest, "//test/util:test_util", @@ -3422,8 +3400,8 @@ cc_binary( linkstatic = 1, deps = [ ":socket_stream_test_cases", - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -3438,8 +3416,8 @@ cc_binary( linkstatic = 1, deps = [ ":socket_stream_blocking_test_cases", - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -3455,7 +3433,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_stream_blocking_test_cases", - ":socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -3470,8 +3448,8 @@ cc_binary( linkstatic = 1, deps = [ ":socket_stream_nonblocking_test_cases", - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "//test/util:test_main", "//test/util:test_util", ], @@ -3483,8 +3461,8 @@ cc_binary( srcs = ["socket_unix_unbound_dgram.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3497,8 +3475,8 @@ cc_binary( srcs = ["socket_unix_unbound_abstract.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3511,8 +3489,8 @@ cc_binary( srcs = ["socket_unix_unbound_filesystem.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:file_descriptor", "//test/util:test_main", @@ -3529,8 +3507,8 @@ cc_binary( linkstatic = 1, deps = [ ":socket_blocking_test_cases", - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3547,7 +3525,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_blocking_test_cases", - ":socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3563,8 +3541,8 @@ cc_binary( linkstatic = 1, deps = [ ":socket_non_stream_blocking_test_cases", - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3581,7 +3559,7 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_non_stream_blocking_test_cases", - ":socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3596,10 +3574,10 @@ cc_binary( ], linkstatic = 1, deps = [ - ":socket_test_util", ":socket_unix_cmsg_test_cases", ":socket_unix_test_cases", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3612,8 +3590,8 @@ cc_binary( srcs = ["socket_unix_unbound_seqpacket.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3626,8 +3604,8 @@ cc_binary( srcs = ["socket_unix_unbound_stream.cc"], linkstatic = 1, deps = [ - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -3641,8 +3619,8 @@ cc_binary( linkstatic = 1, deps = [ ":socket_netlink_util", - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/base:endian", gtest, "//test/util:test_main", @@ -3799,8 +3777,8 @@ cc_binary( defines = select_system(), linkstatic = 1, deps = [ - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", "@com_google_absl//absl/time", gtest, "//test/util:posix_error", @@ -3910,7 +3888,7 @@ cc_binary( srcs = ["tuntap.cc"], linkstatic = 1, deps = [ - ":socket_test_util", + "//test/util:socket_util", gtest, ":socket_netlink_route_util", "//test/util:capability_util", @@ -3943,8 +3921,8 @@ cc_binary( linkstatic = 1, deps = [ ":ip_socket_test_util", - ":socket_test_util", ":unix_domain_socket_test_util", + "//test/util:socket_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", @@ -3963,8 +3941,8 @@ cc_binary( srcs = ["udp_bind.cc"], linkstatic = 1, deps = [ - ":socket_test_util", "//test/util:file_descriptor", + "//test/util:socket_util", gtest, "//test/util:test_main", "//test/util:test_util", @@ -4144,7 +4122,7 @@ cc_binary( srcs = ["network_namespace.cc"], linkstatic = 1, deps = [ - ":socket_test_util", + "//test/util:socket_util", gtest, "//test/util:capability_util", "//test/util:posix_error", @@ -4189,6 +4167,18 @@ cc_binary( ) cc_binary( + name = "msgqueue_test", + testonly = 1, + srcs = ["msgqueue.cc"], + linkstatic = 1, + deps = [ + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( name = "fadvise64_test", testonly = 1, srcs = ["fadvise64.cc"], diff --git a/test/syscalls/linux/accept_bind.cc b/test/syscalls/linux/accept_bind.cc index ba3747290..0d16d1d83 100644 --- a/test/syscalls/linux/accept_bind.cc +++ b/test/syscalls/linux/accept_bind.cc @@ -20,9 +20,9 @@ #include <vector> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/accept_bind_stream.cc b/test/syscalls/linux/accept_bind_stream.cc index 4857f160b..5f2b07105 100644 --- a/test/syscalls/linux/accept_bind_stream.cc +++ b/test/syscalls/linux/accept_bind_stream.cc @@ -19,9 +19,9 @@ #include <vector> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/bind.cc b/test/syscalls/linux/bind.cc index 9547c4ab2..8e1d00619 100644 --- a/test/syscalls/linux/bind.cc +++ b/test/syscalls/linux/bind.cc @@ -17,8 +17,8 @@ #include <sys/un.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/connect_external.cc b/test/syscalls/linux/connect_external.cc index 1edb50e47..fb2476da4 100644 --- a/test/syscalls/linux/connect_external.cc +++ b/test/syscalls/linux/connect_external.cc @@ -22,9 +22,9 @@ #include <tuple> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" // This file contains tests specific to connecting to host UDS managed outside diff --git a/test/syscalls/linux/dup.cc b/test/syscalls/linux/dup.cc index ba4e13fb9..8f0974f45 100644 --- a/test/syscalls/linux/dup.cc +++ b/test/syscalls/linux/dup.cc @@ -13,9 +13,11 @@ // limitations under the License. #include <fcntl.h> +#include <sys/resource.h> #include <unistd.h> #include "gtest/gtest.h" +#include "absl/memory/memory.h" #include "test/util/eventfd_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" @@ -98,6 +100,45 @@ TEST(DupTest, Dup2) { ASSERT_NO_ERRNO(CheckSameFile(fd, nfd2)); } +TEST(DupTest, Rlimit) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + struct rlimit rl = {}; + EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + + ASSERT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + + constexpr int kFDLimit = 101; + // Create a file descriptor that will be above the limit. + FileDescriptor aboveLimitFD = + ASSERT_NO_ERRNO_AND_VALUE(Dup2(fd, kFDLimit * 2 - 1)); + + rl.rlim_cur = kFDLimit; + ASSERT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + ASSERT_THAT(dup3(fd.get(), kFDLimit, 0), SyscallFails()); + + std::vector<std::unique_ptr<FileDescriptor>> fds; + int prev_fd = fd.get(); + int used_fds = 0; + for (int i = 0; i < kFDLimit; ++i) { + int new_fd = dup(fd.get()); + if (new_fd == -1) { + break; + } + auto f = absl::make_unique<FileDescriptor>(new_fd); + EXPECT_LT(new_fd, kFDLimit); + EXPECT_GT(new_fd, prev_fd); + // Check that all fds in (prev_fd, new_fd) are used. + for (int j = prev_fd + 1; j < new_fd; ++j) { + if (fcntl(j, F_GETFD) != -1) used_fds++; + } + prev_fd = new_fd; + fds.push_back(std::move(f)); + } + EXPECT_EQ(fds.size() + used_fds, kFDLimit - fd.get() - 1); +} + TEST(DupTest, Dup2SameFD) { auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); diff --git a/test/syscalls/linux/fallocate.cc b/test/syscalls/linux/fallocate.cc index 5c839447e..5f1b4d5e5 100644 --- a/test/syscalls/linux/fallocate.cc +++ b/test/syscalls/linux/fallocate.cc @@ -31,11 +31,11 @@ #include "absl/strings/str_cat.h" #include "absl/time/time.h" #include "test/syscalls/linux/file_base.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/cleanup.h" #include "test/util/eventfd_util.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" +#include "test/util/socket_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc index 91526572b..0e78a4d4a 100644 --- a/test/syscalls/linux/fcntl.cc +++ b/test/syscalls/linux/fcntl.cc @@ -35,7 +35,6 @@ #include "absl/strings/str_cat.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/cleanup.h" #include "test/util/eventfd_util.h" @@ -46,6 +45,7 @@ #include "test/util/posix_error.h" #include "test/util/save_util.h" #include "test/util/signal_util.h" +#include "test/util/socket_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc index 10dad042f..686b779be 100644 --- a/test/syscalls/linux/flock.cc +++ b/test/syscalls/linux/flock.cc @@ -21,8 +21,8 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/syscalls/linux/file_base.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/ioctl.cc b/test/syscalls/linux/ioctl.cc index 9b16d1558..88056ef2e 100644 --- a/test/syscalls/linux/ioctl.cc +++ b/test/syscalls/linux/ioctl.cc @@ -26,10 +26,10 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/signal_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/ip6tables.cc b/test/syscalls/linux/ip6tables.cc index e0e146067..d11b45d4a 100644 --- a/test/syscalls/linux/ip6tables.cc +++ b/test/syscalls/linux/ip6tables.cc @@ -17,9 +17,9 @@ #include "gtest/gtest.h" #include "test/syscalls/linux/iptables.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h index bde481f7e..8f26f1cd0 100644 --- a/test/syscalls/linux/ip_socket_test_util.h +++ b/test/syscalls/linux/ip_socket_test_util.h @@ -21,7 +21,7 @@ #include <string> -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc index ac113e6da..9fb04eae6 100644 --- a/test/syscalls/linux/itimer.cc +++ b/test/syscalls/linux/itimer.cc @@ -197,9 +197,9 @@ int TestSIGALRMToMainThread() { // (but don't guarantee it), so we expect to see most samples on the main // thread. // - // The number of SIGALRMs delivered to a worker should not exceed 20% + // The number of SIGALRMs delivered to a worker should not exceed 40% // of the number of total signals expected (this is somewhat arbitrary). - const int worker_threshold = result.expected_total / 5; + const int worker_threshold = result.expected_total / 5 * 2; // // Linux only guarantees timers will never expire before the requested time. @@ -230,7 +230,8 @@ TEST(ItimerTest, DeliversSIGALRMToMainThread) { // Not required anymore. kill.Release(); - EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status; + EXPECT_EQ(WIFEXITED(status) && WEXITSTATUS(status), 0) + << WIFEXITED(status) << " " << WEXITSTATUS(status); } // Signals are delivered to threads fairly. diff --git a/test/syscalls/linux/msgqueue.cc b/test/syscalls/linux/msgqueue.cc new file mode 100644 index 000000000..2409de7e8 --- /dev/null +++ b/test/syscalls/linux/msgqueue.cc @@ -0,0 +1,87 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/ipc.h> +#include <sys/msg.h> +#include <sys/types.h> + +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +// Queue is a RAII class used to automatically clean message queues. +class Queue { + public: + explicit Queue(int id) : id_(id) {} + + ~Queue() { + if (id_ >= 0) { + EXPECT_THAT(msgctl(id_, IPC_RMID, nullptr), SyscallSucceeds()); + } + } + + int release() { + int old = id_; + id_ = -1; + return old; + } + + int get() { return id_; } + + private: + int id_ = -1; +}; + +// Test simple creation and retrieval for msgget(2). +TEST(MsgqueueTest, MsgGet) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + ASSERT_THAT(key, SyscallSucceeds()); + + Queue queue(msgget(key, IPC_CREAT)); + ASSERT_THAT(queue.get(), SyscallSucceeds()); + EXPECT_THAT(msgget(key, 0), SyscallSucceedsWithValue(queue.get())); +} + +// Test simple failure scenarios for msgget(2). +TEST(MsgqueueTest, MsgGetFail) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + ASSERT_THAT(key, SyscallSucceeds()); + + EXPECT_THAT(msgget(key, 0), SyscallFailsWithErrno(ENOENT)); + + Queue queue(msgget(key, IPC_CREAT)); + ASSERT_THAT(queue.get(), SyscallSucceeds()); + + EXPECT_THAT(msgget(key, IPC_CREAT | IPC_EXCL), SyscallFailsWithErrno(EEXIST)); +} + +// Test using msgget(2) with IPC_PRIVATE option. +TEST(MsgqueueTest, MsgGetIpcPrivate) { + Queue queue1(msgget(IPC_PRIVATE, 0)); + ASSERT_THAT(queue1.get(), SyscallSucceeds()); + + Queue queue2(msgget(IPC_PRIVATE, 0)); + ASSERT_THAT(queue2.get(), SyscallSucceeds()); + + EXPECT_NE(queue1.get(), queue2.get()); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/network_namespace.cc b/test/syscalls/linux/network_namespace.cc index 133fdecf0..1984feedd 100644 --- a/test/syscalls/linux/network_namespace.cc +++ b/test/syscalls/linux/network_namespace.cc @@ -20,9 +20,9 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/posix_error.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc index 1e246c421..98339277b 100644 --- a/test/syscalls/linux/packet_socket.cc +++ b/test/syscalls/linux/packet_socket.cc @@ -29,10 +29,10 @@ #include "gtest/gtest.h" #include "absl/base/internal/endian.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" // Some of these tests involve sending packets via AF_PACKET sockets and the diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc index 7e439466e..07beb8ba0 100644 --- a/test/syscalls/linux/packet_socket_raw.cc +++ b/test/syscalls/linux/packet_socket_raw.cc @@ -30,10 +30,10 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/internal/endian.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" // Some of these tests involve sending packets via AF_PACKET sockets and the diff --git a/test/syscalls/linux/partial_bad_buffer.cc b/test/syscalls/linux/partial_bad_buffer.cc index 223ddc0c8..1bdfcbbe3 100644 --- a/test/syscalls/linux/partial_bad_buffer.cc +++ b/test/syscalls/linux/partial_bad_buffer.cc @@ -26,10 +26,10 @@ #include "gtest/gtest.h" #include "absl/time/clock.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" #include "test/util/posix_error.h" +#include "test/util/socket_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/ping_socket.cc b/test/syscalls/linux/ping_socket.cc index 8268e91da..7ec1938bf 100644 --- a/test/syscalls/linux/ping_socket.cc +++ b/test/syscalls/linux/ping_socket.cc @@ -29,8 +29,8 @@ #include "absl/strings/str_join.h" #include "absl/types/optional.h" #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" // Note: These tests require /proc/sys/net/ipv4/ping_group_range to be diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 78aa73edc..8a4025fed 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -54,6 +54,8 @@ #include "absl/strings/match.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" @@ -88,6 +90,7 @@ using ::testing::Gt; using ::testing::HasSubstr; using ::testing::IsSupersetOf; using ::testing::Pair; +using ::testing::StartsWith; using ::testing::UnorderedElementsAre; using ::testing::UnorderedElementsAreArray; @@ -1622,10 +1625,41 @@ TEST(ProcPidStatusTest, HasBasicFields) { ASSERT_FALSE(status_str.empty()); const auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str)); - EXPECT_THAT(status, IsSupersetOf({Pair("Name", thread_name), - Pair("Tgid", absl::StrCat(tgid)), - Pair("Pid", absl::StrCat(tid)), - Pair("PPid", absl::StrCat(getppid()))})); + EXPECT_THAT(status, IsSupersetOf({ + Pair("Name", thread_name), + Pair("Tgid", absl::StrCat(tgid)), + Pair("Pid", absl::StrCat(tid)), + Pair("PPid", absl::StrCat(getppid())), + })); + + if (!IsRunningWithVFS1()) { + uid_t ruid, euid, suid; + ASSERT_THAT(getresuid(&ruid, &euid, &suid), SyscallSucceeds()); + gid_t rgid, egid, sgid; + ASSERT_THAT(getresgid(&rgid, &egid, &sgid), SyscallSucceeds()); + std::vector<gid_t> supplementary_gids; + int ngids = getgroups(0, nullptr); + supplementary_gids.resize(ngids); + ASSERT_THAT(getgroups(ngids, supplementary_gids.data()), + SyscallSucceeds()); + + EXPECT_THAT( + status, + IsSupersetOf(std::vector< + ::testing::Matcher<std::pair<std::string, std::string>>>{ + // gVisor doesn't support fsuid/gid, and even if it did there is + // no getfsuid/getfsgid(). + Pair("Uid", StartsWith(absl::StrFormat("%d\t%d\t%d\t", ruid, euid, + suid))), + Pair("Gid", StartsWith(absl::StrFormat("%d\t%d\t%d\t", rgid, egid, + sgid))), + // ParseProcStatus strips leading whitespace for each value, + // so if the Groups line is empty then the trailing space is + // stripped. + Pair("Groups", + StartsWith(absl::StrJoin(supplementary_gids, " "))), + })); + } }); } diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc index 3b8a71ab4..4cbe30fc1 100644 --- a/test/syscalls/linux/proc_net.cc +++ b/test/syscalls/linux/proc_net.cc @@ -28,10 +28,10 @@ #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" #include "absl/time/clock.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/raw_socket.cc b/test/syscalls/linux/raw_socket.cc index f8798bc76..e19fe8f6b 100644 --- a/test/syscalls/linux/raw_socket.cc +++ b/test/syscalls/linux/raw_socket.cc @@ -27,10 +27,10 @@ #include <algorithm> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" // Note: in order to run these tests, /proc/sys/net/ipv4/ping_group_range will diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc index 4611b6283..f1d8fd295 100644 --- a/test/syscalls/linux/raw_socket_hdrincl.cc +++ b/test/syscalls/linux/raw_socket_hdrincl.cc @@ -27,10 +27,10 @@ #include "gtest/gtest.h" #include "absl/base/internal/endian.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/raw_socket_icmp.cc b/test/syscalls/linux/raw_socket_icmp.cc index 275996bd3..27d3fffee 100644 --- a/test/syscalls/linux/raw_socket_icmp.cc +++ b/test/syscalls/linux/raw_socket_icmp.cc @@ -24,10 +24,10 @@ #include <cstdint> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/readahead.cc b/test/syscalls/linux/readahead.cc index 71073bb3c..04104c912 100644 --- a/test/syscalls/linux/readahead.cc +++ b/test/syscalls/linux/readahead.cc @@ -16,8 +16,8 @@ #include <fcntl.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/semaphore.cc b/test/syscalls/linux/semaphore.cc index f72957f89..87b66aa98 100644 --- a/test/syscalls/linux/semaphore.cc +++ b/test/syscalls/linux/semaphore.cc @@ -1019,6 +1019,17 @@ TEST(SemaphoreTest, SemInfo) { EXPECT_EQ(info.semvmx, kSemVmx); } +TEST(SempahoreTest, RemoveNonExistentSemaphore) { + EXPECT_THAT(semctl(-1, 0, IPC_RMID), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SempahoreTest, RemoveDeletedSemaphore) { + int id; + EXPECT_THAT(id = semget(IPC_PRIVATE, 1, 0), SyscallSucceeds()); + EXPECT_THAT(semctl(id, 0, IPC_RMID), SyscallSucceeds()); + EXPECT_THAT(semctl(id, 0, IPC_RMID), SyscallFailsWithErrno(EINVAL)); +} + } // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/sendfile_socket.cc b/test/syscalls/linux/sendfile_socket.cc index c101fe9d2..ac6e89e91 100644 --- a/test/syscalls/linux/sendfile_socket.cc +++ b/test/syscalls/linux/sendfile_socket.cc @@ -24,8 +24,8 @@ #include "gtest/gtest.h" #include "absl/strings/string_view.h" #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/socket.cc b/test/syscalls/linux/socket.cc index 2742d19be..d2762b6e9 100644 --- a/test/syscalls/linux/socket.cc +++ b/test/syscalls/linux/socket.cc @@ -20,8 +20,8 @@ #include <unistd.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/temp_umask.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_abstract.cc b/test/syscalls/linux/socket_abstract.cc index 00999f192..d450fad14 100644 --- a/test/syscalls/linux/socket_abstract.cc +++ b/test/syscalls/linux/socket_abstract.cc @@ -15,10 +15,10 @@ #include <vector> #include "test/syscalls/linux/socket_generic.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/socket_unix.h" #include "test/syscalls/linux/socket_unix_cmsg.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_bind_to_device.cc b/test/syscalls/linux/socket_bind_to_device.cc index 6b27f6eab..dac31a90c 100644 --- a/test/syscalls/linux/socket_bind_to_device.cc +++ b/test/syscalls/linux/socket_bind_to_device.cc @@ -34,8 +34,8 @@ #include "gtest/gtest.h" #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_bind_to_device_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/socket_bind_to_device_distribution.cc b/test/syscalls/linux/socket_bind_to_device_distribution.cc index 70b0b2742..4cddb875a 100644 --- a/test/syscalls/linux/socket_bind_to_device_distribution.cc +++ b/test/syscalls/linux/socket_bind_to_device_distribution.cc @@ -35,8 +35,8 @@ #include "gtest/gtest.h" #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_bind_to_device_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/socket_bind_to_device_sequence.cc b/test/syscalls/linux/socket_bind_to_device_sequence.cc index d3cc71dbf..334b46730 100644 --- a/test/syscalls/linux/socket_bind_to_device_sequence.cc +++ b/test/syscalls/linux/socket_bind_to_device_sequence.cc @@ -36,8 +36,8 @@ #include "absl/container/node_hash_map.h" #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_bind_to_device_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/socket_blocking.cc b/test/syscalls/linux/socket_blocking.cc index 7e88aa2d9..5262e9ed9 100644 --- a/test/syscalls/linux/socket_blocking.cc +++ b/test/syscalls/linux/socket_blocking.cc @@ -23,8 +23,8 @@ #include "gtest/gtest.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" #include "test/util/timer_util.h" diff --git a/test/syscalls/linux/socket_blocking.h b/test/syscalls/linux/socket_blocking.h index db26e5ef5..89134ec30 100644 --- a/test/syscalls/linux/socket_blocking.h +++ b/test/syscalls/linux/socket_blocking.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_BLOCKING_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_BLOCKING_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_capability.cc b/test/syscalls/linux/socket_capability.cc index f75482aba..95cf1f6b4 100644 --- a/test/syscalls/linux/socket_capability.cc +++ b/test/syscalls/linux/socket_capability.cc @@ -16,9 +16,9 @@ // headers). #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_filesystem.cc b/test/syscalls/linux/socket_filesystem.cc index 287359363..a611e9f4e 100644 --- a/test/syscalls/linux/socket_filesystem.cc +++ b/test/syscalls/linux/socket_filesystem.cc @@ -15,10 +15,10 @@ #include <vector> #include "test/syscalls/linux/socket_generic.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/socket_unix.h" #include "test/syscalls/linux/socket_unix_cmsg.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_generic.h b/test/syscalls/linux/socket_generic.h index 00ae7bfc3..a13262355 100644 --- a/test/syscalls/linux/socket_generic.h +++ b/test/syscalls/linux/socket_generic.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_generic_stress.cc b/test/syscalls/linux/socket_generic_stress.cc index 778c32a8e..9ff385b41 100644 --- a/test/syscalls/linux/socket_generic_stress.cc +++ b/test/syscalls/linux/socket_generic_stress.cc @@ -29,8 +29,8 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/socket_generic_test_cases.cc b/test/syscalls/linux/socket_generic_test_cases.cc index fe5171bc8..c509d54e2 100644 --- a/test/syscalls/linux/socket_generic_test_cases.cc +++ b/test/syscalls/linux/socket_generic_test_cases.cc @@ -25,9 +25,9 @@ #include "gtest/gtest.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/capability_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" // This file is a generic socket test file. It must be built with another file diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 9ae0cc59d..13a83a1b3 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -35,10 +35,10 @@ #include "absl/time/time.h" #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_inet_loopback_test_params.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" #include "test/util/save_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" @@ -308,7 +308,7 @@ TEST_P(SocketInetLoopbackTest, TCPListenShutdownListen) { sockaddr_storage conn_addr = connector.addr; ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); - // TODO(b/157236388): Remove Disable save after bug is fixed. S/R test can + // TODO(b/153489135): Remove Disable save after bug is fixed. S/R test can // fail because the last socket may not be delivered to the accept queue // by the time connect returns. DisableSave ds; @@ -751,7 +751,7 @@ TEST_P(SocketInetLoopbackTest, TCPNonBlockingConnectClose) { } } -// TODO(b/157236388): Remove once bug is fixed. Test fails w/ +// TODO(b/153489135): Remove once bug is fixed. Test fails w/ // random save as established connections which can't be delivered to the accept // queue because the queue is full are not correctly delivered after restore // causing the last accept to timeout on the restore. @@ -801,7 +801,7 @@ TEST_P(SocketInetLoopbackTest, TCPAcceptBacklogSizes) { } } -// TODO(b/157236388): Remove once bug is fixed. Test fails w/ +// TODO(b/153489135): Remove once bug is fixed. Test fails w/ // random save as established connections which can't be delivered to the accept // queue because the queue is full are not correctly delivered after restore // causing the last accept to timeout on the restore. @@ -892,7 +892,7 @@ TEST_P(SocketInetLoopbackTest, TCPBacklog) { ASSERT_GE(client_conns, accepted_conns); } -// TODO(b/157236388): Remove once bug is fixed. Test fails w/ +// TODO(b/153489135): Remove once bug is fixed. Test fails w/ // random save as established connections which can't be delivered to the accept // queue because the queue is full are not correctly delivered after restore // causing the last accept to timeout on the restore. @@ -1136,7 +1136,7 @@ TEST_P(SocketInetLoopbackTest, TCPAcceptAfterReset) { sockaddr_storage conn_addr = connector.addr; ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); - // TODO(b/157236388): Reenable Cooperative S/R once bug is fixed. + // TODO(b/153489135): Reenable Cooperative S/R once bug is fixed. DisableSave ds; ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), AsSockAddr(&conn_addr), connector.addr_len), diff --git a/test/syscalls/linux/socket_inet_loopback_isolated.cc b/test/syscalls/linux/socket_inet_loopback_isolated.cc index ab2259b55..182d20a9e 100644 --- a/test/syscalls/linux/socket_inet_loopback_isolated.cc +++ b/test/syscalls/linux/socket_inet_loopback_isolated.cc @@ -18,7 +18,7 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/syscalls/linux/socket_inet_loopback_test_params.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" // Unit tests in this file will run in their own network namespace. diff --git a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc index cc2773af1..479162487 100644 --- a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc +++ b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc @@ -28,10 +28,10 @@ #include "absl/strings/str_cat.h" #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_inet_loopback_test_params.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" #include "test/util/save_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_inet_loopback_test_params.h b/test/syscalls/linux/socket_inet_loopback_test_params.h index 42b48eb8a..163e595a8 100644 --- a/test/syscalls/linux/socket_inet_loopback_test_params.h +++ b/test/syscalls/linux/socket_inet_loopback_test_params.h @@ -16,7 +16,7 @@ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_INET_LOOPBACK_TEST_PARAMS_H_ #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_ip_loopback_blocking.cc b/test/syscalls/linux/socket_ip_loopback_blocking.cc index fda252dd7..caa5c0c63 100644 --- a/test/syscalls/linux/socket_ip_loopback_blocking.cc +++ b/test/syscalls/linux/socket_ip_loopback_blocking.cc @@ -18,7 +18,7 @@ #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_blocking.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc index 2f5743cda..3271263c8 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -28,7 +28,7 @@ #include "absl/memory/memory.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/socket_ip_tcp_generic.h b/test/syscalls/linux/socket_ip_tcp_generic.h index a3eff3c73..e9e60ef4c 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.h +++ b/test/syscalls/linux/socket_ip_tcp_generic.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc index 4e79d21f4..3406874b8 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc @@ -18,7 +18,7 @@ #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_ip_tcp_generic.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_tcp_loopback.cc b/test/syscalls/linux/socket_ip_tcp_loopback.cc index 9db3037bc..0796b8634 100644 --- a/test/syscalls/linux/socket_ip_tcp_loopback.cc +++ b/test/syscalls/linux/socket_ip_tcp_loopback.cc @@ -16,7 +16,7 @@ #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_generic.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc index f996b93d2..533ccc3ae 100644 --- a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc +++ b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc @@ -18,7 +18,7 @@ #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_stream_blocking.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc index ffa377210..05fe2a738 100644 --- a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc +++ b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc @@ -18,7 +18,7 @@ #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_non_blocking.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_tcp_udp_generic.cc b/test/syscalls/linux/socket_ip_tcp_udp_generic.cc index f178f1af9..88adb5b1b 100644 --- a/test/syscalls/linux/socket_ip_tcp_udp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_udp_generic.cc @@ -23,7 +23,7 @@ #include "gtest/gtest.h" #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc index 1694e188a..8a87f2667 100644 --- a/test/syscalls/linux/socket_ip_udp_generic.cc +++ b/test/syscalls/linux/socket_ip_udp_generic.cc @@ -28,7 +28,7 @@ #include <sys/un.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_udp_generic.h b/test/syscalls/linux/socket_ip_udp_generic.h index 106c54e9f..a3a66c768 100644 --- a/test/syscalls/linux/socket_ip_udp_generic.h +++ b/test/syscalls/linux/socket_ip_udp_generic.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_GENERIC_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_GENERIC_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_ip_udp_loopback.cc b/test/syscalls/linux/socket_ip_udp_loopback.cc index c7fa44884..6d06bd580 100644 --- a/test/syscalls/linux/socket_ip_udp_loopback.cc +++ b/test/syscalls/linux/socket_ip_udp_loopback.cc @@ -18,7 +18,7 @@ #include "test/syscalls/linux/socket_generic.h" #include "test/syscalls/linux/socket_ip_udp_generic.h" #include "test/syscalls/linux/socket_non_stream.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc index d6925a8df..60d02e079 100644 --- a/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc +++ b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc @@ -16,7 +16,7 @@ #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_non_stream_blocking.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc index d675eddc6..c011e3658 100644 --- a/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc +++ b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc @@ -16,7 +16,7 @@ #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_non_blocking.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_udp_unbound_external_networking.cc b/test/syscalls/linux/socket_ip_udp_unbound_external_networking.cc index fdbb2216b..af2459a2f 100644 --- a/test/syscalls/linux/socket_ip_udp_unbound_external_networking.cc +++ b/test/syscalls/linux/socket_ip_udp_unbound_external_networking.cc @@ -14,7 +14,7 @@ #include "test/syscalls/linux/socket_ip_udp_unbound_external_networking.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_udp_unbound_external_networking.h b/test/syscalls/linux/socket_ip_udp_unbound_external_networking.h index e5287addb..2e8aab129 100644 --- a/test/syscalls/linux/socket_ip_udp_unbound_external_networking.h +++ b/test/syscalls/linux/socket_ip_udp_unbound_external_networking.h @@ -16,7 +16,7 @@ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_UNBOUND_EXTERNAL_NETWORKING_H_ #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc index 029f1e872..930f19e59 100644 --- a/test/syscalls/linux/socket_ip_unbound.cc +++ b/test/syscalls/linux/socket_ip_unbound.cc @@ -24,7 +24,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ip_unbound_netlink.cc b/test/syscalls/linux/socket_ip_unbound_netlink.cc index b02222999..803a3b30b 100644 --- a/test/syscalls/linux/socket_ip_unbound_netlink.cc +++ b/test/syscalls/linux/socket_ip_unbound_netlink.cc @@ -25,8 +25,8 @@ #include "gtest/gtest.h" #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_netlink_route_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index 18be4dcc7..816d1181c 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -26,9 +26,9 @@ #include "gtest/gtest.h" #include "absl/memory/memory.h" #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/posix_error.h" #include "test/util/save_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.h b/test/syscalls/linux/socket_ipv4_udp_unbound.h index f64c57645..3818a3490 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.h +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc index f6e64c157..ebf2185f2 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc @@ -17,7 +17,7 @@ #include <vector> #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc index f121c044d..00930c544 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc @@ -16,7 +16,7 @@ #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_ipv4_udp_unbound.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_netlink.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_netlink.cc index 8052bf404..f90a48630 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_netlink.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_netlink.cc @@ -16,7 +16,7 @@ #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_nogotsan.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_nogotsan.cc index a2c6d4491..5a7bca658 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_nogotsan.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_nogotsan.cc @@ -18,7 +18,7 @@ #include "gtest/gtest.h" #include "absl/memory/memory.h" #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h index 73e7836d5..17c8e2b84 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_NETLINK_UTIL_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_NETLINK_UTIL_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound.cc b/test/syscalls/linux/socket_ipv6_udp_unbound.cc index a4e3371f4..612fd531c 100644 --- a/test/syscalls/linux/socket_ipv6_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv6_udp_unbound.cc @@ -31,9 +31,9 @@ #include "gtest/gtest.h" #include "absl/memory/memory.h" #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/posix_error.h" #include "test/util/save_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound.h b/test/syscalls/linux/socket_ipv6_udp_unbound.h index 71e160f73..060343eaf 100644 --- a/test/syscalls/linux/socket_ipv6_udp_unbound.h +++ b/test/syscalls/linux/socket_ipv6_udp_unbound.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv6_udp_unbound_external_networking_test.cc index 5c764b8fd..ff12eafc3 100644 --- a/test/syscalls/linux/socket_ipv6_udp_unbound_external_networking_test.cc +++ b/test/syscalls/linux/socket_ipv6_udp_unbound_external_networking_test.cc @@ -17,7 +17,7 @@ #include <vector> #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_loopback.cc b/test/syscalls/linux/socket_ipv6_udp_unbound_loopback.cc index 058336ecc..f11f444a2 100644 --- a/test/syscalls/linux/socket_ipv6_udp_unbound_loopback.cc +++ b/test/syscalls/linux/socket_ipv6_udp_unbound_loopback.cc @@ -16,7 +16,7 @@ #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_ipv6_udp_unbound.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_loopback_netlink.cc b/test/syscalls/linux/socket_ipv6_udp_unbound_loopback_netlink.cc index 17021ff82..565f9bc2e 100644 --- a/test/syscalls/linux/socket_ipv6_udp_unbound_loopback_netlink.cc +++ b/test/syscalls/linux/socket_ipv6_udp_unbound_loopback_netlink.cc @@ -16,7 +16,7 @@ #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h index 88098be82..f017a4c89 100644 --- a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h +++ b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_NETLINK_UTIL_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_NETLINK_UTIL_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_netdevice.cc b/test/syscalls/linux/socket_netdevice.cc index 8d214a2b7..c95c5305d 100644 --- a/test/syscalls/linux/socket_netdevice.cc +++ b/test/syscalls/linux/socket_netdevice.cc @@ -22,8 +22,8 @@ #include "gtest/gtest.h" #include "absl/base/internal/endian.h" #include "test/syscalls/linux/socket_netlink_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" // Tests for netdevice queries. diff --git a/test/syscalls/linux/socket_netlink.cc b/test/syscalls/linux/socket_netlink.cc index 4ec0fd4fa..c78529a14 100644 --- a/test/syscalls/linux/socket_netlink.cc +++ b/test/syscalls/linux/socket_netlink.cc @@ -18,8 +18,8 @@ #include <unistd.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" // Tests for all netlink socket protocols. diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index ee3c08770..d5e1ce0cc 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -29,10 +29,10 @@ #include "absl/strings/str_format.h" #include "test/syscalls/linux/socket_netlink_route_util.h" #include "test/syscalls/linux/socket_netlink_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/cleanup.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" // Tests for NETLINK_ROUTE sockets. @@ -44,6 +44,7 @@ namespace { constexpr uint32_t kSeq = 12345; +using ::testing::_; using ::testing::AnyOf; using ::testing::Eq; @@ -244,7 +245,7 @@ TEST(NetlinkRouteTest, GetLinkByIndexNotFound) { req.ifm.ifi_index = 1234590; EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), - PosixErrorIs(ENODEV, ::testing::_)); + PosixErrorIs(ENODEV, _)); } TEST(NetlinkRouteTest, GetLinkByNameNotFound) { @@ -273,7 +274,112 @@ TEST(NetlinkRouteTest, GetLinkByNameNotFound) { NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len); EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), - PosixErrorIs(ENODEV, ::testing::_)); + PosixErrorIs(ENODEV, _)); +} + +TEST(NetlinkRouteTest, RemoveLoopbackByName) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink()); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + struct rtattr rtattr; + char ifname[IFNAMSIZ]; + char pad[NLMSG_ALIGNTO + RTA_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_type = RTM_DELLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.rtattr.rta_type = IFLA_IFNAME; + req.rtattr.rta_len = RTA_LENGTH(loopback_link.name.size() + 1); + strncpy(req.ifname, loopback_link.name.c_str(), sizeof(req.ifname)); + req.hdr.nlmsg_len = + NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len); + + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(ENOTSUP, _)); +} + +TEST(NetlinkRouteTest, RemoveLoopbackByIndex) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_DELLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.ifm.ifi_index = loopback_link.index; + + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(ENOTSUP, _)); +} + +TEST(NetlinkRouteTest, RemoveLinkByIndexNotFound) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.ifm.ifi_index = 1234590; + + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(ENODEV, _)); +} + +TEST(NetlinkRouteTest, RemoveLinkByNameNotFound) { + const std::string name = "nodevice?!"; + + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + struct rtattr rtattr; + char ifname[IFNAMSIZ]; + char pad[NLMSG_ALIGNTO + RTA_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_type = RTM_DELLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.rtattr.rta_type = IFLA_IFNAME; + req.rtattr.rta_len = RTA_LENGTH(name.size() + 1); + strncpy(req.ifname, name.c_str(), sizeof(req.ifname)); + req.hdr.nlmsg_len = + NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len); + + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(ENODEV, _)); } TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { @@ -295,7 +401,7 @@ TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { req.ifm.ifi_family = AF_UNSPEC; EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), - PosixErrorIs(EOPNOTSUPP, ::testing::_)); + PosixErrorIs(EOPNOTSUPP, _)); } TEST(NetlinkRouteTest, MsgHdrMsgTrunc) { @@ -536,7 +642,7 @@ TEST(NetlinkRouteTest, AddAndRemoveAddr) { // Second delete should fail, as address no longer exists. EXPECT_THAT(LinkDelLocalAddr(loopback_link.index, AF_INET, /*prefixlen=*/24, &addr, sizeof(addr)), - PosixErrorIs(EADDRNOTAVAIL, ::testing::_)); + PosixErrorIs(EADDRNOTAVAIL, _)); }); // Replace an existing address should succeed. @@ -546,7 +652,7 @@ TEST(NetlinkRouteTest, AddAndRemoveAddr) { // Create exclusive should fail, as we created the address above. EXPECT_THAT(LinkAddExclusiveLocalAddr(loopback_link.index, AF_INET, /*prefixlen=*/24, &addr, sizeof(addr)), - PosixErrorIs(EEXIST, ::testing::_)); + PosixErrorIs(EEXIST, _)); } // GetRouteDump tests a RTM_GETROUTE + NLM_F_DUMP request. diff --git a/test/syscalls/linux/socket_netlink_uevent.cc b/test/syscalls/linux/socket_netlink_uevent.cc index da425bed4..9e025911b 100644 --- a/test/syscalls/linux/socket_netlink_uevent.cc +++ b/test/syscalls/linux/socket_netlink_uevent.cc @@ -20,8 +20,8 @@ #include "gtest/gtest.h" #include "test/syscalls/linux/socket_netlink_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" // Tests for NETLINK_KOBJECT_UEVENT sockets. diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc index bdebea321..c1bff3c65 100644 --- a/test/syscalls/linux/socket_netlink_util.cc +++ b/test/syscalls/linux/socket_netlink_util.cc @@ -22,7 +22,7 @@ #include <vector> #include "absl/strings/str_cat.h" -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_non_blocking.cc b/test/syscalls/linux/socket_non_blocking.cc index c3520cadd..3d09485d3 100644 --- a/test/syscalls/linux/socket_non_blocking.cc +++ b/test/syscalls/linux/socket_non_blocking.cc @@ -20,8 +20,8 @@ #include <sys/un.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_non_blocking.h b/test/syscalls/linux/socket_non_blocking.h index bd3e02fd2..604206cfb 100644 --- a/test/syscalls/linux/socket_non_blocking.h +++ b/test/syscalls/linux/socket_non_blocking.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_non_stream.cc b/test/syscalls/linux/socket_non_stream.cc index c61817f14..7c3310909 100644 --- a/test/syscalls/linux/socket_non_stream.cc +++ b/test/syscalls/linux/socket_non_stream.cc @@ -20,8 +20,8 @@ #include "gtest/gtest.h" #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_non_stream.h b/test/syscalls/linux/socket_non_stream.h index 469fbe6a2..4876730f9 100644 --- a/test/syscalls/linux/socket_non_stream.h +++ b/test/syscalls/linux/socket_non_stream.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_non_stream_blocking.cc b/test/syscalls/linux/socket_non_stream_blocking.cc index b052f6e61..ac33407f8 100644 --- a/test/syscalls/linux/socket_non_stream_blocking.cc +++ b/test/syscalls/linux/socket_non_stream_blocking.cc @@ -22,8 +22,8 @@ #include "gtest/gtest.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/socket_non_stream_blocking.h b/test/syscalls/linux/socket_non_stream_blocking.h index 6e205a039..71520bb37 100644 --- a/test/syscalls/linux/socket_non_stream_blocking.h +++ b/test/syscalls/linux/socket_non_stream_blocking.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_stream.cc b/test/syscalls/linux/socket_stream.cc index 6522b2e01..11903f28b 100644 --- a/test/syscalls/linux/socket_stream.cc +++ b/test/syscalls/linux/socket_stream.cc @@ -21,8 +21,8 @@ #include "gtest/gtest.h" #include "absl/time/clock.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_stream.h b/test/syscalls/linux/socket_stream.h index b837b8f8c..dc6fb2f98 100644 --- a/test/syscalls/linux/socket_stream.h +++ b/test/syscalls/linux/socket_stream.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_stream_blocking.cc b/test/syscalls/linux/socket_stream_blocking.cc index 0743322ac..e168f79ff 100644 --- a/test/syscalls/linux/socket_stream_blocking.cc +++ b/test/syscalls/linux/socket_stream_blocking.cc @@ -22,8 +22,8 @@ #include "gtest/gtest.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" #include "test/util/timer_util.h" diff --git a/test/syscalls/linux/socket_stream_blocking.h b/test/syscalls/linux/socket_stream_blocking.h index 9fd19ff90..f760188f6 100644 --- a/test/syscalls/linux/socket_stream_blocking.h +++ b/test/syscalls/linux/socket_stream_blocking.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_stream_nonblock.cc b/test/syscalls/linux/socket_stream_nonblock.cc index 74d608741..788fae906 100644 --- a/test/syscalls/linux/socket_stream_nonblock.cc +++ b/test/syscalls/linux/socket_stream_nonblock.cc @@ -20,8 +20,8 @@ #include <sys/un.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_stream_nonblock.h b/test/syscalls/linux/socket_stream_nonblock.h index c3b7fad91..d1adaa95c 100644 --- a/test/syscalls/linux/socket_stream_nonblock.h +++ b/test/syscalls/linux/socket_stream_nonblock.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_unix.cc b/test/syscalls/linux/socket_unix.cc index 591cab3fd..cf96b2075 100644 --- a/test/syscalls/linux/socket_unix.cc +++ b/test/syscalls/linux/socket_unix.cc @@ -26,8 +26,8 @@ #include "gtest/gtest.h" #include "absl/strings/string_view.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/socket_unix.h b/test/syscalls/linux/socket_unix.h index 3625cc404..f6405f399 100644 --- a/test/syscalls/linux/socket_unix.h +++ b/test/syscalls/linux/socket_unix.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_unix_abstract_nonblock.cc b/test/syscalls/linux/socket_unix_abstract_nonblock.cc index 8bef76b67..617c5bfe5 100644 --- a/test/syscalls/linux/socket_unix_abstract_nonblock.cc +++ b/test/syscalls/linux/socket_unix_abstract_nonblock.cc @@ -15,8 +15,8 @@ #include <vector> #include "test/syscalls/linux/socket_non_blocking.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_blocking_local.cc b/test/syscalls/linux/socket_unix_blocking_local.cc index 77cb8c6d6..ba34320bc 100644 --- a/test/syscalls/linux/socket_unix_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_blocking_local.cc @@ -15,8 +15,8 @@ #include <vector> #include "test/syscalls/linux/socket_blocking.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_cmsg.cc b/test/syscalls/linux/socket_unix_cmsg.cc index 22a4ee0d1..6191b1448 100644 --- a/test/syscalls/linux/socket_unix_cmsg.cc +++ b/test/syscalls/linux/socket_unix_cmsg.cc @@ -26,8 +26,8 @@ #include "gtest/gtest.h" #include "absl/strings/string_view.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/socket_unix_cmsg.h b/test/syscalls/linux/socket_unix_cmsg.h index 431606903..f5a276155 100644 --- a/test/syscalls/linux/socket_unix_cmsg.h +++ b/test/syscalls/linux/socket_unix_cmsg.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_CMSG_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_CMSG_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_unix_dgram.cc b/test/syscalls/linux/socket_unix_dgram.cc index 5b0844493..d43c83d71 100644 --- a/test/syscalls/linux/socket_unix_dgram.cc +++ b/test/syscalls/linux/socket_unix_dgram.cc @@ -20,8 +20,8 @@ #include "gtest/gtest.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_dgram.h b/test/syscalls/linux/socket_unix_dgram.h index 0764ef85b..e9b8373a5 100644 --- a/test/syscalls/linux/socket_unix_dgram.h +++ b/test/syscalls/linux/socket_unix_dgram.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_unix_dgram_local.cc b/test/syscalls/linux/socket_unix_dgram_local.cc index 31d2d5216..4760630b9 100644 --- a/test/syscalls/linux/socket_unix_dgram_local.cc +++ b/test/syscalls/linux/socket_unix_dgram_local.cc @@ -15,10 +15,10 @@ #include <vector> #include "test/syscalls/linux/socket_non_stream.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/socket_unix_dgram.h" #include "test/syscalls/linux/socket_unix_non_stream.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_dgram_non_blocking.cc b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc index 2db8b68d3..ca277122e 100644 --- a/test/syscalls/linux/socket_unix_dgram_non_blocking.cc +++ b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc @@ -16,8 +16,8 @@ #include <sys/un.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_domain.cc b/test/syscalls/linux/socket_unix_domain.cc index f7dff8b4d..d8cb5b892 100644 --- a/test/syscalls/linux/socket_unix_domain.cc +++ b/test/syscalls/linux/socket_unix_domain.cc @@ -15,8 +15,8 @@ #include <vector> #include "test/syscalls/linux/socket_generic.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_filesystem_nonblock.cc b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc index 6700b4d90..cdd38f681 100644 --- a/test/syscalls/linux/socket_unix_filesystem_nonblock.cc +++ b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc @@ -15,8 +15,8 @@ #include <vector> #include "test/syscalls/linux/socket_non_blocking.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_non_stream.cc b/test/syscalls/linux/socket_unix_non_stream.cc index 9425e87a6..d18f9e7b0 100644 --- a/test/syscalls/linux/socket_unix_non_stream.cc +++ b/test/syscalls/linux/socket_unix_non_stream.cc @@ -19,9 +19,9 @@ #include <sys/un.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/memory_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_non_stream.h b/test/syscalls/linux/socket_unix_non_stream.h index 7478ab172..44d1c0033 100644 --- a/test/syscalls/linux/socket_unix_non_stream.h +++ b/test/syscalls/linux/socket_unix_non_stream.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc index fddcdf1c5..92f40f5e5 100644 --- a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc @@ -15,8 +15,8 @@ #include <vector> #include "test/syscalls/linux/socket_non_stream_blocking.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_pair.cc b/test/syscalls/linux/socket_unix_pair.cc index 85999db04..28a437339 100644 --- a/test/syscalls/linux/socket_unix_pair.cc +++ b/test/syscalls/linux/socket_unix_pair.cc @@ -14,10 +14,10 @@ #include <vector> -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/socket_unix.h" #include "test/syscalls/linux/socket_unix_cmsg.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_pair_nonblock.cc b/test/syscalls/linux/socket_unix_pair_nonblock.cc index 281410a9a..39360896b 100644 --- a/test/syscalls/linux/socket_unix_pair_nonblock.cc +++ b/test/syscalls/linux/socket_unix_pair_nonblock.cc @@ -15,8 +15,8 @@ #include <vector> #include "test/syscalls/linux/socket_non_blocking.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_seqpacket.cc b/test/syscalls/linux/socket_unix_seqpacket.cc index d6e7031c0..2a2741eb9 100644 --- a/test/syscalls/linux/socket_unix_seqpacket.cc +++ b/test/syscalls/linux/socket_unix_seqpacket.cc @@ -20,8 +20,8 @@ #include "gtest/gtest.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_seqpacket.h b/test/syscalls/linux/socket_unix_seqpacket.h index 30d9b9edf..5bb36af92 100644 --- a/test/syscalls/linux/socket_unix_seqpacket.h +++ b/test/syscalls/linux/socket_unix_seqpacket.h @@ -15,7 +15,7 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_ #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_ -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/socket_unix_seqpacket_local.cc b/test/syscalls/linux/socket_unix_seqpacket_local.cc index 69a5f150d..492416a77 100644 --- a/test/syscalls/linux/socket_unix_seqpacket_local.cc +++ b/test/syscalls/linux/socket_unix_seqpacket_local.cc @@ -15,10 +15,10 @@ #include <vector> #include "test/syscalls/linux/socket_non_stream.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/socket_unix_non_stream.h" #include "test/syscalls/linux/socket_unix_seqpacket.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_stream.cc b/test/syscalls/linux/socket_unix_stream.cc index 3ff810914..6e9f70f8c 100644 --- a/test/syscalls/linux/socket_unix_stream.cc +++ b/test/syscalls/linux/socket_unix_stream.cc @@ -19,8 +19,8 @@ #include "gtest/gtest.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_stream_blocking_local.cc index 8429bd429..97a6bb327 100644 --- a/test/syscalls/linux/socket_unix_stream_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_stream_blocking_local.cc @@ -15,8 +15,8 @@ #include <vector> #include "test/syscalls/linux/socket_stream_blocking.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_stream_local.cc b/test/syscalls/linux/socket_unix_stream_local.cc index a7e3449a9..4b267ccae 100644 --- a/test/syscalls/linux/socket_unix_stream_local.cc +++ b/test/syscalls/linux/socket_unix_stream_local.cc @@ -15,8 +15,8 @@ #include <vector> #include "test/syscalls/linux/socket_stream.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc index 4b763c8e2..d7bf7747b 100644 --- a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc +++ b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc @@ -14,8 +14,8 @@ #include <vector> #include "test/syscalls/linux/socket_stream_nonblock.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_unbound_abstract.cc b/test/syscalls/linux/socket_unix_unbound_abstract.cc index dd3d25450..0f6864266 100644 --- a/test/syscalls/linux/socket_unix_unbound_abstract.cc +++ b/test/syscalls/linux/socket_unix_unbound_abstract.cc @@ -16,8 +16,8 @@ #include <sys/un.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_unbound_dgram.cc b/test/syscalls/linux/socket_unix_unbound_dgram.cc index 907dca0f1..ccf2c94a1 100644 --- a/test/syscalls/linux/socket_unix_unbound_dgram.cc +++ b/test/syscalls/linux/socket_unix_unbound_dgram.cc @@ -17,8 +17,8 @@ #include <sys/un.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_unbound_filesystem.cc b/test/syscalls/linux/socket_unix_unbound_filesystem.cc index a035fb095..811fe12a1 100644 --- a/test/syscalls/linux/socket_unix_unbound_filesystem.cc +++ b/test/syscalls/linux/socket_unix_unbound_filesystem.cc @@ -17,9 +17,9 @@ #include <sys/un.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_unbound_seqpacket.cc b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc index cb99030f5..e22018890 100644 --- a/test/syscalls/linux/socket_unix_unbound_seqpacket.cc +++ b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc @@ -16,8 +16,8 @@ #include <sys/un.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_unbound_stream.cc b/test/syscalls/linux/socket_unix_unbound_stream.cc index f185dded3..b10062bc2 100644 --- a/test/syscalls/linux/socket_unix_unbound_stream.cc +++ b/test/syscalls/linux/socket_unix_unbound_stream.cc @@ -16,8 +16,8 @@ #include <sys/un.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index 183819faf..cb77986c2 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -29,9 +29,9 @@ #include "gtest/gtest.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc index 1c74b9724..7c9c5c870 100644 --- a/test/syscalls/linux/tuntap.cc +++ b/test/syscalls/linux/tuntap.cc @@ -31,11 +31,11 @@ #include "absl/strings/ascii.h" #include "absl/strings/str_split.h" #include "test/syscalls/linux/socket_netlink_route_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" #include "test/util/posix_error.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/udp_bind.cc b/test/syscalls/linux/udp_bind.cc index f68d78aa2..5ed115a14 100644 --- a/test/syscalls/linux/udp_bind.cc +++ b/test/syscalls/linux/udp_bind.cc @@ -17,8 +17,8 @@ #include <sys/types.h> #include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/udp_socket.cc b/test/syscalls/linux/udp_socket.cc index b40598767..3353e58cb 100644 --- a/test/syscalls/linux/udp_socket.cc +++ b/test/syscalls/linux/udp_socket.cc @@ -39,10 +39,10 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" +#include "test/util/socket_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/syscalls/linux/unix_domain_socket_test_util.h b/test/syscalls/linux/unix_domain_socket_test_util.h index b8073db17..4240bd5f6 100644 --- a/test/syscalls/linux/unix_domain_socket_test_util.h +++ b/test/syscalls/linux/unix_domain_socket_test_util.h @@ -17,7 +17,7 @@ #include <string> -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/verity_getdents.cc b/test/syscalls/linux/verity_getdents.cc index 2eafc3dd3..822a75254 100644 --- a/test/syscalls/linux/verity_getdents.cc +++ b/test/syscalls/linux/verity_getdents.cc @@ -59,7 +59,7 @@ class GetDentsTest : public ::testing::Test { TEST_F(GetDentsTest, GetDents) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); std::vector<std::string> expect = {".", "..", filename_}; EXPECT_NO_ERRNO(DirContains(verity_dir, expect, /*exclude=*/{})); @@ -67,7 +67,7 @@ TEST_F(GetDentsTest, GetDents) { TEST_F(GetDentsTest, Deleted) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); EXPECT_THAT(unlink(JoinPath(tmpfs_dir_.path(), filename_).c_str()), SyscallSucceeds()); @@ -78,7 +78,7 @@ TEST_F(GetDentsTest, Deleted) { TEST_F(GetDentsTest, Renamed) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); std::string new_file_name = "renamed-" + filename_; EXPECT_THAT(rename(JoinPath(tmpfs_dir_.path(), filename_).c_str(), diff --git a/test/syscalls/linux/verity_ioctl.cc b/test/syscalls/linux/verity_ioctl.cc index e7e4fa64b..45650809c 100644 --- a/test/syscalls/linux/verity_ioctl.cc +++ b/test/syscalls/linux/verity_ioctl.cc @@ -106,7 +106,7 @@ TEST_F(IoctlTest, Measure) { TEST_F(IoctlTest, Mount) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); // Make sure the file can be open and read in the mounted verity fs. auto const verity_fd = ASSERT_NO_ERRNO_AND_VALUE( @@ -118,7 +118,7 @@ TEST_F(IoctlTest, Mount) { TEST_F(IoctlTest, NonExistingFile) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); // Confirm that opening a non-existing file in the verity-enabled directory // triggers the expected error instead of verification failure. @@ -129,7 +129,7 @@ TEST_F(IoctlTest, NonExistingFile) { TEST_F(IoctlTest, ModifiedFile) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); // Modify the file and check verification failure upon reading from it. auto const fd = ASSERT_NO_ERRNO_AND_VALUE( @@ -144,7 +144,7 @@ TEST_F(IoctlTest, ModifiedFile) { TEST_F(IoctlTest, ModifiedMerkle) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); // Modify the Merkle file and check verification failure upon opening the // corresponding file. @@ -159,7 +159,7 @@ TEST_F(IoctlTest, ModifiedMerkle) { TEST_F(IoctlTest, ModifiedDirMerkle) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); // Modify the Merkle file for the parent directory and check verification // failure upon opening the corresponding file. @@ -174,7 +174,7 @@ TEST_F(IoctlTest, ModifiedDirMerkle) { TEST_F(IoctlTest, Stat) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); struct stat st; EXPECT_THAT(stat(JoinPath(verity_dir, filename_).c_str(), &st), @@ -183,7 +183,7 @@ TEST_F(IoctlTest, Stat) { TEST_F(IoctlTest, ModifiedStat) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); EXPECT_THAT(chmod(JoinPath(tmpfs_dir_.path(), filename_).c_str(), 0644), SyscallSucceeds()); @@ -194,7 +194,7 @@ TEST_F(IoctlTest, ModifiedStat) { TEST_F(IoctlTest, DeleteFile) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); EXPECT_THAT(unlink(JoinPath(tmpfs_dir_.path(), filename_).c_str()), SyscallSucceeds()); @@ -204,7 +204,7 @@ TEST_F(IoctlTest, DeleteFile) { TEST_F(IoctlTest, DeleteMerkle) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); EXPECT_THAT( unlink(MerklePath(JoinPath(tmpfs_dir_.path(), filename_)).c_str()), @@ -215,7 +215,7 @@ TEST_F(IoctlTest, DeleteMerkle) { TEST_F(IoctlTest, RenameFile) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); std::string new_file_name = "renamed-" + filename_; EXPECT_THAT(rename(JoinPath(tmpfs_dir_.path(), filename_).c_str(), @@ -227,7 +227,7 @@ TEST_F(IoctlTest, RenameFile) { TEST_F(IoctlTest, RenameMerkle) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); std::string new_file_name = "renamed-" + filename_; EXPECT_THAT( diff --git a/test/syscalls/linux/verity_mmap.cc b/test/syscalls/linux/verity_mmap.cc index 09ced6eb3..2bfd43b16 100644 --- a/test/syscalls/linux/verity_mmap.cc +++ b/test/syscalls/linux/verity_mmap.cc @@ -58,7 +58,7 @@ class MmapTest : public ::testing::Test { TEST_F(MmapTest, MmapRead) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); // Make sure the file can be open and mmapped in the mounted verity fs. auto const verity_fd = ASSERT_NO_ERRNO_AND_VALUE( @@ -72,7 +72,7 @@ TEST_F(MmapTest, MmapRead) { TEST_F(MmapTest, ModifiedBeforeMmap) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); // Modify the file and check verification failure upon mmapping. auto const fd = ASSERT_NO_ERRNO_AND_VALUE( @@ -91,7 +91,7 @@ TEST_F(MmapTest, ModifiedBeforeMmap) { TEST_F(MmapTest, ModifiedAfterMmap) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); auto const verity_fd = ASSERT_NO_ERRNO_AND_VALUE( Open(JoinPath(verity_dir, filename_), O_RDONLY, 0777)); @@ -127,7 +127,7 @@ INSTANTIATE_TEST_SUITE_P( TEST_P(MmapParamTest, Mmap) { std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, /*targets=*/{})); + MountVerity(tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY)})); // Make sure the file can be open and mmapped in the mounted verity fs. auto const verity_fd = ASSERT_NO_ERRNO_AND_VALUE( diff --git a/test/syscalls/linux/verity_symlink.cc b/test/syscalls/linux/verity_symlink.cc index bbf5375cb..c6fce8ead 100644 --- a/test/syscalls/linux/verity_symlink.cc +++ b/test/syscalls/linux/verity_symlink.cc @@ -62,9 +62,9 @@ class SymlinkTest : public ::testing::Test { }; TEST_F(SymlinkTest, Success) { - std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, - {EnableTarget(kSymlink, O_RDONLY | O_NOFOLLOW)})); + std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE(MountVerity( + tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY), + EnableTarget(kSymlink, O_RDONLY | O_NOFOLLOW)})); char buf[256]; EXPECT_THAT( @@ -77,9 +77,9 @@ TEST_F(SymlinkTest, Success) { } TEST_F(SymlinkTest, DeleteLink) { - std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, - {EnableTarget(kSymlink, O_RDONLY | O_NOFOLLOW)})); + std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE(MountVerity( + tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY), + EnableTarget(kSymlink, O_RDONLY | O_NOFOLLOW)})); ASSERT_THAT(unlink(JoinPath(tmpfs_dir_.path(), kSymlink).c_str()), SyscallSucceeds()); @@ -92,9 +92,9 @@ TEST_F(SymlinkTest, DeleteLink) { } TEST_F(SymlinkTest, ModifyLink) { - std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE( - MountVerity(tmpfs_dir_.path(), filename_, - {EnableTarget(kSymlink, O_RDONLY | O_NOFOLLOW)})); + std::string verity_dir = ASSERT_NO_ERRNO_AND_VALUE(MountVerity( + tmpfs_dir_.path(), {EnableTarget(filename_, O_RDONLY), + EnableTarget(kSymlink, O_RDONLY | O_NOFOLLOW)})); ASSERT_THAT(unlink(JoinPath(tmpfs_dir_.path(), kSymlink).c_str()), SyscallSucceeds()); diff --git a/test/util/BUILD b/test/util/BUILD index cc83221ea..4a4401ba8 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "cc_library", "cc_test", "coreutil", "gbenchmark", "gtest", "select_system") +load("//tools:defs.bzl", "cc_library", "cc_test", "coreutil", "default_net_util", "gbenchmark", "gtest", "select_system") package( default_visibility = ["//:sandbox"], @@ -414,3 +414,27 @@ cc_library( ":temp_path", ], ) + +cc_library( + name = "socket_util", + testonly = 1, + srcs = [ + "socket_util.cc", + "socket_util_impl.cc", + ], + hdrs = ["socket_util.h"], + defines = select_system(), + deps = default_net_util() + [ + gtest, + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_util", + "//test/util:thread_util", + ], +) diff --git a/test/syscalls/linux/socket_test_util.cc b/test/util/socket_util.cc index c1cded834..f2360b732 100644 --- a/test/syscalls/linux/socket_test_util.cc +++ b/test/util/socket_util.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" #include <arpa/inet.h> #include <netinet/in.h> diff --git a/test/syscalls/linux/socket_test_util.h b/test/util/socket_util.h index 0e2be63cc..0e2be63cc 100644 --- a/test/syscalls/linux/socket_test_util.h +++ b/test/util/socket_util.h diff --git a/test/syscalls/linux/socket_test_util_impl.cc b/test/util/socket_util_impl.cc index ef661a0e3..04550ad7c 100644 --- a/test/syscalls/linux/socket_test_util_impl.cc +++ b/test/util/socket_util_impl.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/socket_util.h" namespace gvisor { namespace testing { diff --git a/test/util/verity_util.cc b/test/util/verity_util.cc index 501d7c2cf..b7d1cb212 100644 --- a/test/util/verity_util.cc +++ b/test/util/verity_util.cc @@ -54,20 +54,14 @@ PosixError FlipRandomBit(int fd, int size) { return NoError(); } -PosixErrorOr<std::string> MountVerity(std::string tmpfs_dir, - std::string filename, +PosixErrorOr<std::string> MountVerity(std::string lower_dir, std::vector<EnableTarget> targets) { - // Mount a verity fs on the existing tmpfs mount. - std::string mount_opts = "lower_path=" + tmpfs_dir; + // Mount a verity fs on the existing mount. + std::string mount_opts = "lower_path=" + lower_dir; ASSIGN_OR_RETURN_ERRNO(TempPath verity_dir, TempPath::CreateDir()); RETURN_ERROR_IF_SYSCALL_FAIL( mount("", verity_dir.path().c_str(), "verity", 0, mount_opts.c_str())); - // Enable the file, symlink(if provided) and the directory. - ASSIGN_OR_RETURN_ERRNO( - auto fd, Open(JoinPath(verity_dir.path(), filename), O_RDONLY, 0777)); - RETURN_ERROR_IF_SYSCALL_FAIL(ioctl(fd.get(), FS_IOC_ENABLE_VERITY)); - for (const EnableTarget& target : targets) { ASSIGN_OR_RETURN_ERRNO( auto target_fd, @@ -92,6 +86,7 @@ PosixErrorOr<std::string> MountVerity(std::string tmpfs_dir, ASSIGN_OR_RETURN_ERRNO(TempPath verity_with_hash_dir, TempPath::CreateDir()); RETURN_ERROR_IF_SYSCALL_FAIL(mount("", verity_with_hash_dir.path().c_str(), "verity", 0, mount_opts.c_str())); + // Verity directories should not be deleted. Release the TempPath objects to // prevent those directories from being deleted by the destructor. verity_dir.release(); diff --git a/test/util/verity_util.h b/test/util/verity_util.h index 44863f322..ebb78b4bb 100644 --- a/test/util/verity_util.h +++ b/test/util/verity_util.h @@ -76,7 +76,6 @@ PosixError FlipRandomBit(int fd, int size); // Mount a verity on the tmpfs and enable both the file and the direcotry. Then // mount a new verity with measured root hash. PosixErrorOr<std::string> MountVerity(std::string tmpfs_dir, - std::string filename, std::vector<EnableTarget> targets); } // namespace testing diff --git a/tools/checklinkname/BUILD b/tools/checklinkname/BUILD new file mode 100644 index 000000000..0f1b07e24 --- /dev/null +++ b/tools/checklinkname/BUILD @@ -0,0 +1,16 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "checklinkname", + srcs = [ + "check_linkname.go", + "known.go", + ], + nogo = False, + visibility = ["//tools/nogo:__subpackages__"], + deps = [ + "@org_golang_x_tools//go/analysis:go_default_library", + ], +) diff --git a/tools/checklinkname/README.md b/tools/checklinkname/README.md new file mode 100644 index 000000000..06b3c302d --- /dev/null +++ b/tools/checklinkname/README.md @@ -0,0 +1,54 @@ +# `checklinkname` Analyzer + +`checklinkname` is an analyzer to provide rudimentary type-checking for +`//go:linkname` directives. Since `//go:linkname` only affects linker behavior, +there is no built-in type safety and it is the programmer's responsibility to +ensure the types on either side are compatible. + +`checklinkname` helps with this by checking that uses match expectations, as +defined in this package. + +`known.go` contains the set of known linkname targets. For most functions, we +expect identical types on both sides of the linkname. In a few cases, the types +may be slightly different (e.g., local redefinition of internal type). It is +still the responsibility of the programmer to ensure the signatures in +`known.go` are compatible and safe. + +## Findings + +Here are the most common findings from this package, and how to resolve them. + +### `runtime.foo signature got "BAR" want "BAZ"; stdlib type changed?` + +The definition of `runtime.foo` in the standard library does not match the +expected type in `known.go`. This means that the function signature in the +standard library changed. + +Addressing this will require creating a new linkname directive in a new Go +version build-tagged in any packages using this symbol. Be sure to also check to +ensure use with the new version is safe, as function constraints may have +changed in addition to the signature. + +<!-- TODO(b/165820485): This isn't yet explicitly supported. --> + +`known.go` will also need to be updated to accept the new signature for the new +version of Go. + +### `Cannot find known symbol "runtime.foo"` + +The standard library has removed runtime.foo entirely. Handling is similar to +above, except existing code must transition away from the symbol entirely (note +that is may simply be renamed). + +### `linkname to unknown symbol "mypkg.foo"; add this symbol to checklinkname.knownLinknames type-check against the remote type` + +A package has added a new linkname directive for a symbol not listed in +`known.go`. Address this by adding a new entry for the target symbol. The +`local` field should be the expected type in your package, while `remote` should +be expected type in the remote package (e.g., in the standard library). These +are typically identical, in which case `remote` can be omitted. + +### `usage: //go:linkname localname [linkname]` + +Malformed `//go:linkname` directive. This should be accompanied by a build +failure in the package. diff --git a/tools/checklinkname/check_linkname.go b/tools/checklinkname/check_linkname.go new file mode 100644 index 000000000..5373dd762 --- /dev/null +++ b/tools/checklinkname/check_linkname.go @@ -0,0 +1,229 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package checklinkname ensures that linkname declarations match their source. +package checklinkname + +import ( + "fmt" + "go/ast" + "go/token" + "go/types" + "strings" + + "golang.org/x/tools/go/analysis" +) + +// Analyzer implements the checklinkname analyzer. +var Analyzer = &analysis.Analyzer{ + Name: "checklinkname", + Doc: "verifies that linkname declarations match their source", + Run: run, +} + +// go:linkname can be rather confusing. https://pkg.go.dev/cmd/compile says: +// +// //go:linkname localname [importpath.name] +// +// This special directive does not apply to the Go code that follows it. +// Instead, the //go:linkname directive instructs the compiler to use +// “importpath.name” as the object file symbol name for the variable or +// function declared as “localname” in the source code. If the +// “importpath.name” argument is omitted, the directive uses the symbol's +// default object file symbol name and only has the effect of making the symbol +// accessible to other packages. Because this directive can subvert the type +// system and package modularity, it is only enabled in files that have +// imported "unsafe". +// +// In this package we use the term "local" to refer to the symbol name in the +// same package as the //go:linkname directive, whose name will be changed by +// the linker. We use the term "remote" to refer to the symbol name that we are +// changing to. +// +// In the general case, the local symbol is a function declaration, and the +// remote symbol is a real function in the standard library. + +// linknameSignatures describes a the type signatures of the symbols in a +// //go:linkname directive. +type linknameSignatures struct { + local string + remote string // equivalent to local if "". +} + +func (l *linknameSignatures) Remote() string { + if l.remote == "" { + return l.local + } + return l.remote +} + +// linknameSymbols describes the symbol namess in a single //go:linkname +// directive. +type linknameSymbols struct { + pos token.Pos + local string + remote string +} + +func findLinknames(pass *analysis.Pass, f *ast.File) []linknameSymbols { + var names []linknameSymbols + + for _, cg := range f.Comments { + for _, c := range cg.List { + if len(c.Text) <= 2 || !strings.HasPrefix(c.Text[2:], "go:linkname ") { + continue + } + + f := strings.Fields(c.Text) + if len(f) < 2 || len(f) > 3 { + // Malformed linkname. This is the same error the compiler emits. + pass.Reportf(c.Slash, "usage: //go:linkname localname [linkname]") + } + + if len(f) == 2 { + // "If the “importpath.name” argument is + // omitted, the directive uses the symbol's + // default object file symbol name and only has + // the effect of making the symbol accessible + // to other packages." + // -https://golang.org/cmd/compile + // + // There is no type-checking to be done here. + continue + } + + names = append(names, linknameSymbols{ + pos: c.Slash, + local: f[1], + remote: f[2], + }) + } + } + + return names +} + +func splitSymbol(pkg *types.Package, symbol string) (packagePath, name string) { + // Note that some runtime symbols can have multiple dots. e.g., + // runtime..init_task. + s := strings.SplitN(symbol, ".", 2) + + switch len(s) { + case 1: + // Package name omitted, use current package. + return pkg.Path(), symbol + case 2: + return s[0], s[1] + default: + panic("unreachable") + } +} + +func findObject(pkg *types.Package, symbol string) (types.Object, error) { + packagePath, symbolName := splitSymbol(pkg, symbol) + return findPackageObject(pkg, packagePath, symbolName) +} + +func findPackageObject(pkg *types.Package, packagePath, symbolName string) (types.Object, error) { + if pkg.Path() == packagePath { + o := pkg.Scope().Lookup(symbolName) + if o == nil { + return nil, fmt.Errorf("%q not found in %q (names: %+v)", symbolName, packagePath, pkg.Scope().Names()) + } + return o, nil + } + + for _, p := range pkg.Imports() { + if o, err := findPackageObject(p, packagePath, symbolName); err == nil { + return o, nil + } + } + + return nil, fmt.Errorf("package %q not found", packagePath) +} + +// checkOneLinkname verifies that the type of sym.local matches the type from +// knownLinknames. +func checkOneLinkname(pass *analysis.Pass, f *ast.File, sym linknameSymbols) { + remotePackage, remoteName := splitSymbol(pass.Pkg, sym.remote) + + m, ok := knownLinknames[remotePackage] + if !ok { + pass.Reportf(sym.pos, "linkname to unknown symbol %q; add this symbol to checklinkname.knownLinknames type-check against the remote type", sym.remote) + return + } + + linkname, ok := m[remoteName] + if !ok { + pass.Reportf(sym.pos, "linkname to unknown symbol %q; add this symbol to checklinkname.knownLinknames type-check against the remote type", sym.remote) + return + } + + local, err := findObject(pass.Pkg, sym.local) + if err != nil { + pass.Reportf(sym.pos, "Unable to find symbol %q: %v", sym.local, err) + return + } + + localSig, ok := local.Type().(*types.Signature) + if !ok { + pass.Reportf(local.Pos(), "%q object is not a signature: %+#v", sym.local, local) + return + } + + if linkname.local != localSig.String() { + pass.Reportf(local.Pos(), "%q signature got %q want %q; mismatched types?", sym.local, localSig.String(), linkname.local) + return + } +} + +// checkOneRemote verifies that the type of sym matches wantSig. +func checkOneRemote(pass *analysis.Pass, sym, wantSig string) { + o := pass.Pkg.Scope().Lookup(sym) + if o == nil { + pass.Reportf(pass.Files[0].Package, "Cannot find known symbol %q", sym) + return + } + + sig, ok := o.Type().(*types.Signature) + if !ok { + pass.Reportf(o.Pos(), "%q object is not a signature: %+#v", sym, o) + return + } + + if sig.String() != wantSig { + pass.Reportf(o.Pos(), "%q signature got %q want %q; stdlib type changed?", sym, sig.String(), wantSig) + return + } +} + +func run(pass *analysis.Pass) (interface{}, error) { + // First, check if any remote symbols are in this package. + p, ok := knownLinknames[pass.Pkg.Path()] + if ok { + for sym, l := range p { + checkOneRemote(pass, sym, l.Remote()) + } + } + + // Then check for local //go:linkname directives in this package. + for _, f := range pass.Files { + names := findLinknames(pass, f) + for _, n := range names { + checkOneLinkname(pass, f, n) + } + } + + return nil, nil +} diff --git a/tools/checklinkname/known.go b/tools/checklinkname/known.go new file mode 100644 index 000000000..54e5155fc --- /dev/null +++ b/tools/checklinkname/known.go @@ -0,0 +1,110 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package checklinkname + +// knownLinknames is the set of the symbols for which we can do a rudimentary +// type-check on. +// +// When analyzing the remote package (e.g., runtime), we verify the symbol +// signature matches 'remote'. When analyzing local packages with //go:linkname +// directives, we verify the symbol signature matches 'local'. +// +// Usually these are identical, but may differ slightly if equivalent +// replacement types are used in the local packages, such as a copy of a struct +// or uintptr instead of a pointer type. +// +// NOTE: It is the responsibility of the developer to verify the safety of the +// signatures used here! This analyzer only checks that types match this map; +// it does not verify compatibility of the entries themselves. +// +// //go:linkname directives with no corresponding entry here will trigger a +// finding. +// +// We preform only rudimentary string-based type-checking due to limitations in +// the analysis framework. Ideally, from the local package we'd lookup the +// remote symbol's types.Object and perform robust type-checking. +// Unfortunately, remote symbols are typically loaded from the remote package's +// gcexportdata. Since //go:linkname targets are usually not exported symbols, +// they are no included in gcexportdata and we cannot load their types.Object. +// +// TODO(b/165820485): Add option to specific per-version signatures. +var knownLinknames = map[string]map[string]linknameSignatures{ + "runtime": map[string]linknameSignatures{ + "entersyscall": linknameSignatures{ + local: "func()", + }, + "entersyscallblock": linknameSignatures{ + local: "func()", + }, + "exitsyscall": linknameSignatures{ + local: "func()", + }, + "fastrand": linknameSignatures{ + local: "func() uint32", + }, + "gopark": linknameSignatures{ + // TODO(b/165820485): add verification of waitReason + // size and reason and traceEv values. + local: "func(unlockf func(uintptr, unsafe.Pointer) bool, lock unsafe.Pointer, reason uint8, traceEv byte, traceskip int)", + remote: "func(unlockf func(*runtime.g, unsafe.Pointer) bool, lock unsafe.Pointer, reason runtime.waitReason, traceEv byte, traceskip int)", + }, + "goready": linknameSignatures{ + local: "func(gp uintptr, traceskip int)", + remote: "func(gp *runtime.g, traceskip int)", + }, + "goyield": linknameSignatures{ + local: "func()", + }, + "memmove": linknameSignatures{ + local: "func(to unsafe.Pointer, from unsafe.Pointer, n uintptr)", + }, + "throw": linknameSignatures{ + local: "func(s string)", + }, + }, + "sync": map[string]linknameSignatures{ + "runtime_canSpin": linknameSignatures{ + local: "func(i int) bool", + }, + "runtime_doSpin": linknameSignatures{ + local: "func()", + }, + "runtime_Semacquire": linknameSignatures{ + // The only difference here is the parameter names. We + // can't just change our local use to match remote, as + // the stdlib runtime and sync packages also disagree + // on the name, and the analyzer checks that use as + // well. + local: "func(addr *uint32)", + remote: "func(s *uint32)", + }, + "runtime_Semrelease": linknameSignatures{ + // See above. + local: "func(addr *uint32, handoff bool, skipframes int)", + remote: "func(s *uint32, handoff bool, skipframes int)", + }, + }, + "syscall": map[string]linknameSignatures{ + "runtime_BeforeFork": linknameSignatures{ + local: "func()", + }, + "runtime_AfterFork": linknameSignatures{ + local: "func()", + }, + "runtime_AfterForkInChild": linknameSignatures{ + local: "func()", + }, + }, +} diff --git a/tools/checklinkname/test/BUILD b/tools/checklinkname/test/BUILD new file mode 100644 index 000000000..b29bd84f2 --- /dev/null +++ b/tools/checklinkname/test/BUILD @@ -0,0 +1,9 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "test", + testonly = 1, + srcs = ["test_unsafe.go"], +) diff --git a/pkg/sentry/fsimpl/ext/disklayout/test_utils.go b/tools/checklinkname/test/test_unsafe.go index a4bc08411..a7504591c 100644 --- a/pkg/sentry/fsimpl/ext/disklayout/test_utils.go +++ b/tools/checklinkname/test/test_unsafe.go @@ -1,4 +1,4 @@ -// Copyright 2019 The gVisor Authors. +// Copyright 2021 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,19 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -package disklayout +// Package test provides linkname test targets. +package test import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/marshal" + _ "unsafe" // for go:linkname. ) -func assertSize(t *testing.T, v marshal.Marshallable, want int) { - t.Helper() +//go:linkname DetachedLinkname runtime.fastrand + +//go:linkname attachedLinkname runtime.entersyscall +func attachedLinkname() - if got := v.SizeBytes(); got != want { - t.Errorf("struct %s should be exactly %d bytes but is %d bytes", reflect.TypeOf(v).Name(), want, got) - } +// AttachedLinkname reexports attachedLinkname because go vet doesn't like an +// exported go:linkname without a comment starting with "// AttachedLinkname". +func AttachedLinkname() { + attachedLinkname() } + +// DetachedLinkname has a linkname elsewhere in the file. +func DetachedLinkname() uint32 diff --git a/tools/constraintutil/BUILD b/tools/constraintutil/BUILD new file mode 100644 index 000000000..004b708c4 --- /dev/null +++ b/tools/constraintutil/BUILD @@ -0,0 +1,18 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "constraintutil", + srcs = ["constraintutil.go"], + marshal = False, + stateify = False, + visibility = ["//tools:__subpackages__"], +) + +go_test( + name = "constraintutil_test", + size = "small", + srcs = ["constraintutil_test.go"], + library = ":constraintutil", +) diff --git a/tools/constraintutil/constraintutil.go b/tools/constraintutil/constraintutil.go new file mode 100644 index 000000000..fb3fbe5c2 --- /dev/null +++ b/tools/constraintutil/constraintutil.go @@ -0,0 +1,169 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package constraintutil provides utilities for working with Go build +// constraints. +package constraintutil + +import ( + "bufio" + "bytes" + "fmt" + "go/build/constraint" + "io" + "os" + "strings" +) + +// FromReader extracts the build constraint from the Go source or assembly file +// whose contents are read by r. +func FromReader(r io.Reader) (constraint.Expr, error) { + // See go/build.parseFileHeader() for the "official" logic that this is + // derived from. + const ( + slashStar = "/*" + starSlash = "*/" + gobuildPrefix = "//go:build" + ) + s := bufio.NewScanner(r) + var ( + inSlashStar = false // between /* and */ + haveGobuild = false + e constraint.Expr + ) +Lines: + for s.Scan() { + line := bytes.TrimSpace(s.Bytes()) + if !inSlashStar && constraint.IsGoBuild(string(line)) { + if haveGobuild { + return nil, fmt.Errorf("multiple go:build directives") + } + haveGobuild = true + var err error + e, err = constraint.Parse(string(line)) + if err != nil { + return nil, err + } + } + ThisLine: + for len(line) > 0 { + if inSlashStar { + if i := bytes.Index(line, []byte(starSlash)); i >= 0 { + inSlashStar = false + line = bytes.TrimSpace(line[i+len(starSlash):]) + continue ThisLine + } + continue Lines + } + if bytes.HasPrefix(line, []byte("//")) { + continue Lines + } + // Note that if /* appears in the line, but not at the beginning, + // then the line is still non-empty, so skipping this and + // terminating below is correct. + if bytes.HasPrefix(line, []byte(slashStar)) { + inSlashStar = true + line = bytes.TrimSpace(line[len(slashStar):]) + continue ThisLine + } + // A non-empty non-comment line terminates scanning for go:build. + break Lines + } + } + return e, s.Err() +} + +// FromString extracts the build constraint from the Go source or assembly file +// containing the given data. If no build constraint applies to the file, it +// returns nil. +func FromString(str string) (constraint.Expr, error) { + return FromReader(strings.NewReader(str)) +} + +// FromFile extracts the build constraint from the Go source or assembly file +// at the given path. If no build constraint applies to the file, it returns +// nil. +func FromFile(path string) (constraint.Expr, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + return FromReader(f) +} + +// Combine returns a constraint.Expr that evaluates to true iff all expressions +// in es evaluate to true. If es is empty, Combine returns nil. +// +// Preconditions: All constraint.Exprs in es are non-nil. +func Combine(es []constraint.Expr) constraint.Expr { + switch len(es) { + case 0: + return nil + case 1: + return es[0] + default: + a := &constraint.AndExpr{es[0], es[1]} + for i := 2; i < len(es); i++ { + a = &constraint.AndExpr{a, es[i]} + } + return a + } +} + +// CombineFromFiles returns a build constraint expression that evaluates to +// true iff the build constraints from all of the given Go source or assembly +// files evaluate to true. If no build constraints apply to any of the given +// files, it returns nil. +func CombineFromFiles(paths []string) (constraint.Expr, error) { + var es []constraint.Expr + for _, path := range paths { + e, err := FromFile(path) + if err != nil { + return nil, fmt.Errorf("failed to read build constraints from %q: %v", path, err) + } + if e != nil { + es = append(es, e) + } + } + return Combine(es), nil +} + +// Lines returns a string containing build constraint directives for the given +// constraint.Expr, including two trailing newlines, as appropriate for a Go +// source or assembly file. At least a go:build directive will be emitted; if +// the constraint is expressible using +build directives as well, then +build +// directives will also be emitted. +// +// If e is nil, Lines returns the empty string. +func Lines(e constraint.Expr) string { + if e == nil { + return "" + } + + var b strings.Builder + b.WriteString("//go:build ") + b.WriteString(e.String()) + b.WriteByte('\n') + + if pblines, err := constraint.PlusBuildLines(e); err == nil { + for _, line := range pblines { + b.WriteString(line) + b.WriteByte('\n') + } + } + + b.WriteByte('\n') + return b.String() +} diff --git a/tools/constraintutil/constraintutil_test.go b/tools/constraintutil/constraintutil_test.go new file mode 100644 index 000000000..eeabd8dcf --- /dev/null +++ b/tools/constraintutil/constraintutil_test.go @@ -0,0 +1,138 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package constraintutil + +import ( + "go/build/constraint" + "testing" +) + +func TestFileParsing(t *testing.T) { + for _, test := range []struct { + name string + data string + expr string + }{ + { + name: "Empty", + }, + { + name: "NoConstraint", + data: "// copyright header\n\npackage main", + }, + { + name: "ConstraintOnFirstLine", + data: "//go:build amd64\n#include \"textflag.h\"", + expr: "amd64", + }, + { + name: "ConstraintAfterSlashSlashComment", + data: "// copyright header\n\n//go:build linux\n\npackage newlib", + expr: "linux", + }, + { + name: "ConstraintAfterSlashStarComment", + data: "/*\ncopyright header\n*/\n\n//go:build !race\n\npackage oldlib", + expr: "!race", + }, + { + name: "ConstraintInSlashSlashComment", + data: "// blah blah //go:build windows", + }, + { + name: "ConstraintInSlashStarComment", + data: "/*\n//go:build windows\n*/", + }, + { + name: "ConstraintAfterPackageClause", + data: "package oops\n//go:build race", + }, + { + name: "ConstraintAfterCppInclude", + data: "#include \"textflag.h\"\n//go:build arm64", + }, + } { + t.Run(test.name, func(t *testing.T) { + e, err := FromString(test.data) + if err != nil { + t.Fatalf("FromString(%q) failed: %v", test.data, err) + } + if e == nil { + if len(test.expr) != 0 { + t.Errorf("FromString(%q): got no constraint, wanted %q", test.data, test.expr) + } + } else { + got := e.String() + if len(test.expr) == 0 { + t.Errorf("FromString(%q): got %q, wanted no constraint", test.data, got) + } else if got != test.expr { + t.Errorf("FromString(%q): got %q, wanted %q", test.data, got, test.expr) + } + } + }) + } +} + +func TestCombine(t *testing.T) { + for _, test := range []struct { + name string + in []string + out string + }{ + { + name: "0", + }, + { + name: "1", + in: []string{"amd64 || arm64"}, + out: "amd64 || arm64", + }, + { + name: "2", + in: []string{"amd64", "amd64 && linux"}, + out: "amd64 && amd64 && linux", + }, + { + name: "3", + in: []string{"amd64", "amd64 || arm64", "amd64 || riscv64"}, + out: "amd64 && (amd64 || arm64) && (amd64 || riscv64)", + }, + } { + t.Run(test.name, func(t *testing.T) { + inexprs := make([]constraint.Expr, 0, len(test.in)) + for _, estr := range test.in { + line := "//go:build " + estr + e, err := constraint.Parse(line) + if err != nil { + t.Fatalf("constraint.Parse(%q) failed: %v", line, err) + } + inexprs = append(inexprs, e) + } + outexpr := Combine(inexprs) + if outexpr == nil { + if len(test.out) != 0 { + t.Errorf("Combine(%v): got no constraint, wanted %q", test.in, test.out) + } + } else { + got := outexpr.String() + if len(test.out) == 0 { + t.Errorf("Combine(%v): got %q, wanted no constraint", test.in, got) + } else if got != test.out { + t.Errorf("Combine(%v): got %q, wanted %q", test.in, got, test.out) + } + } + }) + } +} diff --git a/tools/go_generics/go_merge/BUILD b/tools/go_generics/go_merge/BUILD index 5e0487e93..211e6b3ed 100644 --- a/tools/go_generics/go_merge/BUILD +++ b/tools/go_generics/go_merge/BUILD @@ -7,6 +7,6 @@ go_binary( srcs = ["main.go"], visibility = ["//:sandbox"], deps = [ - "//tools/tags", + "//tools/constraintutil", ], ) diff --git a/tools/go_generics/go_merge/main.go b/tools/go_generics/go_merge/main.go index 801f2354f..81394ddce 100644 --- a/tools/go_generics/go_merge/main.go +++ b/tools/go_generics/go_merge/main.go @@ -25,9 +25,8 @@ import ( "os" "path/filepath" "strconv" - "strings" - "gvisor.dev/gvisor/tools/tags" + "gvisor.dev/gvisor/tools/constraintutil" ) var ( @@ -131,6 +130,12 @@ func main() { } f.Decls = newDecls + // Infer build constraints for the output file. + bcexpr, err := constraintutil.CombineFromFiles(flag.Args()) + if err != nil { + fatalf("Failed to read build constraints: %v\n", err) + } + // Write the output file. var buf bytes.Buffer if err := format.Node(&buf, fset, f); err != nil { @@ -141,9 +146,7 @@ func main() { fatalf("opening output: %v\n", err) } defer outf.Close() - if t := tags.Aggregate(flag.Args()); len(t) > 0 { - fmt.Fprintf(outf, "%s\n\n", strings.Join(t.Lines(), "\n")) - } + outf.WriteString(constraintutil.Lines(bcexpr)) if _, err := outf.Write(buf.Bytes()); err != nil { fatalf("write: %v\n", err) } diff --git a/tools/go_marshal/gomarshal/BUILD b/tools/go_marshal/gomarshal/BUILD index c2747d94c..aaa203115 100644 --- a/tools/go_marshal/gomarshal/BUILD +++ b/tools/go_marshal/gomarshal/BUILD @@ -18,5 +18,5 @@ go_library( visibility = [ "//:sandbox", ], - deps = ["//tools/tags"], + deps = ["//tools/constraintutil"], ) diff --git a/tools/go_marshal/gomarshal/generator.go b/tools/go_marshal/gomarshal/generator.go index 00961c90d..4c23637c0 100644 --- a/tools/go_marshal/gomarshal/generator.go +++ b/tools/go_marshal/gomarshal/generator.go @@ -25,7 +25,7 @@ import ( "sort" "strings" - "gvisor.dev/gvisor/tools/tags" + "gvisor.dev/gvisor/tools/constraintutil" ) // List of identifiers we use in generated code that may conflict with a @@ -123,16 +123,18 @@ func (g *Generator) writeHeader() error { var b sourceBuffer b.emit("// Automatically generated marshal implementation. See tools/go_marshal.\n\n") - // Emit build tags. - b.emit("// If there are issues with build tag aggregation, see\n") - b.emit("// tools/go_marshal/gomarshal/generator.go:writeHeader(). The build tags here\n") - b.emit("// come from the input set of files used to generate this file. This input set\n") - b.emit("// is filtered based on pre-defined file suffixes related to build tags, see \n") - b.emit("// tools/defs.bzl:calculate_sets().\n\n") - - if t := tags.Aggregate(g.inputs); len(t) > 0 { - b.emit(strings.Join(t.Lines(), "\n")) - b.emit("\n\n") + bcexpr, err := constraintutil.CombineFromFiles(g.inputs) + if err != nil { + return err + } + if bcexpr != nil { + // Emit build constraints. + b.emit("// If there are issues with build constraint aggregation, see\n") + b.emit("// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here\n") + b.emit("// come from the input set of files used to generate this file. This input set\n") + b.emit("// is filtered based on pre-defined file suffixes related to build constraints,\n") + b.emit("// see tools/defs.bzl:calculate_sets().\n\n") + b.emit(constraintutil.Lines(bcexpr)) } // Package header. @@ -553,11 +555,12 @@ func (g *Generator) writeTests(ts []*testGenerator) error { b.reset() b.emit("// Automatically generated marshal tests. See tools/go_marshal.\n\n") - // Emit build tags. - if t := tags.Aggregate(g.inputs); len(t) > 0 { - b.emit(strings.Join(t.Lines(), "\n")) - b.emit("\n\n") + // Emit build constraints. + bcexpr, err := constraintutil.CombineFromFiles(g.inputs) + if err != nil { + return err } + b.emit(constraintutil.Lines(bcexpr)) b.emit("package %s\n\n", g.pkg) if err := b.write(g.outputTest); err != nil { diff --git a/tools/go_stateify/BUILD b/tools/go_stateify/BUILD index 913558b4e..ad66981c7 100644 --- a/tools/go_stateify/BUILD +++ b/tools/go_stateify/BUILD @@ -6,7 +6,7 @@ go_binary( name = "stateify", srcs = ["main.go"], visibility = ["//:sandbox"], - deps = ["//tools/tags"], + deps = ["//tools/constraintutil"], ) bzl_library( diff --git a/tools/go_stateify/main.go b/tools/go_stateify/main.go index 93022f504..7216388a0 100644 --- a/tools/go_stateify/main.go +++ b/tools/go_stateify/main.go @@ -28,7 +28,7 @@ import ( "strings" "sync" - "gvisor.dev/gvisor/tools/tags" + "gvisor.dev/gvisor/tools/constraintutil" ) var ( @@ -214,10 +214,13 @@ func main() { // Automated warning. fmt.Fprint(outputFile, "// automatically generated by stateify.\n\n") - // Emit build tags. - if t := tags.Aggregate(flag.Args()); len(t) > 0 { - fmt.Fprintf(outputFile, "%s\n\n", strings.Join(t.Lines(), "\n")) + // Emit build constraints. + bcexpr, err := constraintutil.CombineFromFiles(flag.Args()) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to infer build constraints: %v", err) + os.Exit(1) } + outputFile.WriteString(constraintutil.Lines(bcexpr)) // Emit the package name. _, pkg := filepath.Split(*fullPkg) diff --git a/tools/nogo/BUILD b/tools/nogo/BUILD index a7e280b32..d72821377 100644 --- a/tools/nogo/BUILD +++ b/tools/nogo/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "bzl_library", "go_library", "select_goarch", "select_goos") +load("//tools:defs.bzl", "bzl_library", "go_library", "go_test", "select_goarch", "select_goos") load("//tools/nogo:defs.bzl", "nogo_objdump_tool", "nogo_stdlib", "nogo_target") package(licenses = ["notice"]) @@ -35,6 +35,7 @@ go_library( visibility = ["//:sandbox"], deps = [ "//tools/checkescape", + "//tools/checklinkname", "//tools/checklocks", "//tools/checkunsafe", "//tools/nogo/objdump", @@ -73,6 +74,12 @@ go_library( ], ) +go_test( + name = "nogo_test", + srcs = ["config_test.go"], + library = ":nogo", +) + bzl_library( name = "defs_bzl", srcs = ["defs.bzl"], diff --git a/tools/nogo/analyzers.go b/tools/nogo/analyzers.go index 2b3c03fec..6705fc905 100644 --- a/tools/nogo/analyzers.go +++ b/tools/nogo/analyzers.go @@ -47,6 +47,7 @@ import ( "honnef.co/go/tools/stylecheck" "gvisor.dev/gvisor/tools/checkescape" + "gvisor.dev/gvisor/tools/checklinkname" "gvisor.dev/gvisor/tools/checklocks" "gvisor.dev/gvisor/tools/checkunsafe" ) @@ -80,6 +81,7 @@ var AllAnalyzers = []*analysis.Analyzer{ unusedresult.Analyzer, checkescape.Analyzer, checkunsafe.Analyzer, + checklinkname.Analyzer, checklocks.Analyzer, } diff --git a/tools/nogo/config.go b/tools/nogo/config.go index 6436f9d34..ee2533610 100644 --- a/tools/nogo/config.go +++ b/tools/nogo/config.go @@ -186,16 +186,19 @@ func (a AnalyzerConfig) merge(other AnalyzerConfig) { } } -func (a AnalyzerConfig) shouldReport(groupConfig *Group, fullPos, msg string) bool { +// shouldReport returns whether the finding should be reported or suppressed. +// It returns !ok if there is no configuration sufficient to decide one way or +// another. +func (a AnalyzerConfig) shouldReport(groupConfig *Group, fullPos, msg string) (report, ok bool) { gc, ok := a[groupConfig.Name] if !ok { - return groupConfig.Default + return false, false } // Note that if a section appears for a particular group // for a particular analyzer, then it will now be enabled, // and the group default no longer applies. - return gc.shouldReport(fullPos, msg) + return gc.shouldReport(fullPos, msg), true } // Config is a nogo configuration. @@ -298,7 +301,8 @@ func (c *Config) ShouldReport(finding Finding) bool { } // Suppress via global rule? - if !c.Global.shouldReport(groupConfig, fullPos, finding.Message) { + report, ok := c.Global.shouldReport(groupConfig, fullPos, finding.Message) + if ok && !report { return false } @@ -307,5 +311,9 @@ func (c *Config) ShouldReport(finding Finding) bool { if !ok { return groupConfig.Default } - return ac.shouldReport(groupConfig, fullPos, finding.Message) + report, ok = ac.shouldReport(groupConfig, fullPos, finding.Message) + if !ok { + return groupConfig.Default + } + return report } diff --git a/tools/nogo/config_test.go b/tools/nogo/config_test.go new file mode 100644 index 000000000..685cffbec --- /dev/null +++ b/tools/nogo/config_test.go @@ -0,0 +1,301 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License.package nogo +package nogo + +import ( + "go/token" + "testing" +) + +// TestShouldReport validates the suppression behavior of Config.ShouldReport. +func TestShouldReport(t *testing.T) { + config := &Config{ + Groups: []Group{ + { + Name: "default-enabled", + Regex: "^default-enabled/", + Default: true, + }, + { + Name: "default-disabled", + Regex: "^default-disabled/", + Default: false, + }, + { + Name: "default-disabled-omitted-from-global", + Regex: "^default-disabled-omitted-from-global/", + Default: false, + }, + }, + Global: AnalyzerConfig{ + "default-enabled": &ItemConfig{ + Exclude: []string{"excluded.go"}, + Suppress: []string{"suppressed"}, + }, + "default-disabled": &ItemConfig{ + Exclude: []string{"excluded.go"}, + Suppress: []string{"suppressed"}, + }, + // Omitting default-disabled-omitted-from-global here + // has no effect on configuration below. + }, + Analyzers: map[AnalyzerName]AnalyzerConfig{ + "analyzer-suppressions": AnalyzerConfig{ + // Suppress some. + "default-enabled": &ItemConfig{ + Exclude: []string{"limited-exclude.go"}, + Suppress: []string{"limited suppress"}, + }, + // Enable all. + "default-disabled": nil, + }, + "enabled-for-default-disabled": AnalyzerConfig{ + "default-disabled": nil, + "default-disabled-omitted-from-global": nil, + }, + }, + } + + if err := config.Compile(); err != nil { + t.Fatalf("Compile(%+v) = %v, want nil", config, err) + } + + cases := []struct { + name string + finding Finding + want bool + }{ + { + name: "enabled", + finding: Finding{ + Category: "foo", + Position: token.Position{ + Filename: "default-enabled/file.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message", + }, + want: true, + }, + { + name: "ungrouped", + finding: Finding{ + Category: "foo", + Position: token.Position{ + Filename: "ungrouped/file.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message", + }, + want: true, + }, + { + name: "suppressed", + finding: Finding{ + Category: "foo", + Position: token.Position{ + Filename: "default-enabled/file.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message suppressed", + }, + want: false, + }, + { + name: "excluded", + finding: Finding{ + Category: "foo", + Position: token.Position{ + Filename: "default-enabled/excluded.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message", + }, + want: false, + }, + { + name: "disabled", + finding: Finding{ + Category: "foo", + Position: token.Position{ + Filename: "default-disabled/file.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message", + }, + want: false, + }, + { + name: "analyzer suppressed", + finding: Finding{ + Category: "analyzer-suppressions", + Position: token.Position{ + Filename: "default-enabled/file.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message limited suppress", + }, + want: false, + }, + { + name: "analyzer suppressed not global", + finding: Finding{ + // Doesn't apply outside of analyzer-suppressions. + Category: "foo", + Position: token.Position{ + Filename: "default-enabled/file.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message limited suppress", + }, + want: true, + }, + { + name: "analyzer suppressed grouped", + finding: Finding{ + Category: "analyzer-suppressions", + Position: token.Position{ + // Doesn't apply outside of default-enabled. + Filename: "default-disabled/file.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message limited suppress", + }, + want: true, + }, + { + name: "analyzer excluded", + finding: Finding{ + Category: "analyzer-suppressions", + Position: token.Position{ + Filename: "default-enabled/limited-exclude.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message", + }, + want: false, + }, + { + name: "analyzer excluded not global", + finding: Finding{ + // Doesn't apply outside of analyzer-suppressions. + Category: "foo", + Position: token.Position{ + Filename: "default-enabled/limited-exclude.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message", + }, + want: true, + }, + { + name: "analyzer excluded grouped", + finding: Finding{ + Category: "analyzer-suppressions", + Position: token.Position{ + // Doesn't apply outside of default-enabled. + Filename: "default-disabled/limited-exclude.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message", + }, + want: true, + }, + { + name: "disabled-omitted", + finding: Finding{ + Category: "foo", + Position: token.Position{ + Filename: "default-disabled-omitted-from-global/file.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message", + }, + want: false, + }, + { + name: "default enabled applies to customized analyzer", + finding: Finding{ + Category: "enabled-for-default-disabled", + Position: token.Position{ + Filename: "default-enabled/file.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message", + }, + want: true, + }, + { + name: "default overridden in customized analyzer", + finding: Finding{ + Category: "enabled-for-default-disabled", + Position: token.Position{ + Filename: "default-disabled/file.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message", + }, + want: true, + }, + { + name: "default overridden in customized analyzer even when omitted from global", + finding: Finding{ + Category: "enabled-for-default-disabled", + Position: token.Position{ + Filename: "default-disabled-omitted-from-global/file.go", + Offset: 0, + Line: 1, + Column: 1, + }, + Message: "message", + }, + want: true, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := config.ShouldReport(tc.finding); got != tc.want { + t.Errorf("ShouldReport(%+v) = %v, want %v", tc.finding, got, tc.want) + } + }) + } +} diff --git a/tools/nogo/filter/main.go b/tools/nogo/filter/main.go index d50336b9b..4a925d03c 100644 --- a/tools/nogo/filter/main.go +++ b/tools/nogo/filter/main.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Binary check is the nogo entrypoint. +// Binary filter is the filters and reports nogo findings. package main import ( diff --git a/tools/nogo/findings.go b/tools/nogo/findings.go index 329a7062e..a73bf1a09 100644 --- a/tools/nogo/findings.go +++ b/tools/nogo/findings.go @@ -109,7 +109,7 @@ func ExtractFindingsFromFile(filename string, asJSON bool) (FindingSet, error) { return ExtractFindingsFrom(r, asJSON) } -// ExtractFindingsFromBytes loads findings from bytes. +// ExtractFindingsFrom loads findings from an io.Reader. func ExtractFindingsFrom(r io.Reader, asJSON bool) (findings FindingSet, err error) { if asJSON { dec := json.NewDecoder(r) diff --git a/tools/tags/BUILD b/tools/tags/BUILD deleted file mode 100644 index 1c02e2c89..000000000 --- a/tools/tags/BUILD +++ /dev/null @@ -1,11 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "tags", - srcs = ["tags.go"], - marshal = False, - stateify = False, - visibility = ["//tools:__subpackages__"], -) diff --git a/tools/tags/tags.go b/tools/tags/tags.go deleted file mode 100644 index f35904e0a..000000000 --- a/tools/tags/tags.go +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package tags is a utility for parsing build tags. -package tags - -import ( - "fmt" - "io/ioutil" - "strings" -) - -// OrSet is a set of tags on a single line. -// -// Note that tags may include ",", and we don't distinguish this case in the -// logic below. Ideally, this constraints can be split into separate top-level -// build tags in order to resolve any issues. -type OrSet []string - -// Line returns the line for this or. -func (or OrSet) Line() string { - return fmt.Sprintf("// +build %s", strings.Join([]string(or), " ")) -} - -// AndSet is the set of all OrSets. -type AndSet []OrSet - -// Lines returns the lines to be printed. -func (and AndSet) Lines() (ls []string) { - for _, or := range and { - ls = append(ls, or.Line()) - } - return -} - -// Join joins this AndSet with another. -func (and AndSet) Join(other AndSet) AndSet { - return append(and, other...) -} - -// Tags returns the unique set of +build tags. -// -// Derived form the runtime's canBuild. -func Tags(file string) (tags AndSet) { - data, err := ioutil.ReadFile(file) - if err != nil { - return nil - } - // Check file contents for // +build lines. - for _, p := range strings.Split(string(data), "\n") { - p = strings.TrimSpace(p) - if p == "" { - continue - } - if !strings.HasPrefix(p, "//") { - break - } - if !strings.Contains(p, "+build") { - continue - } - fields := strings.Fields(p[2:]) - if len(fields) < 1 || fields[0] != "+build" { - continue - } - tags = append(tags, OrSet(fields[1:])) - } - return tags -} - -// Aggregate aggregates all tags from a set of files. -// -// Note that these may be in conflict, in which case the build will fail. -func Aggregate(files []string) (tags AndSet) { - for _, file := range files { - tags = tags.Join(Tags(file)) - } - return tags -} |