diff options
184 files changed, 6753 insertions, 2267 deletions
@@ -114,7 +114,7 @@ runsc: ## Builds the runsc binary. .PHONY: runsc debian: ## Builds the debian packages. - @$(call submake,build OPTIONS="-c opt" TARGETS="//runsc:runsc-debian") + @$(call submake,build OPTIONS="-c opt" TARGETS="//debian:debian") .PHONY: debian smoke-tests: ## Runs a simple smoke test after build runsc. @@ -301,8 +301,10 @@ $(RELEASE_KEY): release: $(RELEASE_KEY) ## Builds a release. @mkdir -p $(RELEASE_ROOT) @T=$$(mktemp -d /tmp/release.XXXXXX); \ - $(call submake,copy TARGETS="runsc" DESTINATION=$$T) && \ - $(call submake,copy TARGETS="runsc:runsc-debian" DESTINATION=$$T) && \ + $(call submake,copy TARGETS="//runsc:runsc" DESTINATION=$$T) && \ + $(call submake,copy TARGETS="//shim/v1:gvisor-containerd-shim" DESTINATION=$$T) && \ + $(call submake,copy TARGETS="//shim/v2:containerd-shim-runsc-v1" DESTINATION=$$T) && \ + $(call submake,copy TARGETS="//debian:debian" DESTINATION=$$T) && \ NIGHTLY=$(RELEASE_NIGHTLY) tools/make_release.sh $(RELEASE_KEY) $(RELEASE_ROOT) $$T/*; \ rc=$$?; rm -rf $$T; exit $$rc .PHONY: release diff --git a/debian/BUILD b/debian/BUILD new file mode 100644 index 000000000..331f44a5c --- /dev/null +++ b/debian/BUILD @@ -0,0 +1,59 @@ +load("//tools:defs.bzl", "pkg_deb", "pkg_tar") + +package(licenses = ["notice"]) + +pkg_tar( + name = "debian-bin", + srcs = [ + "//runsc", + "//shim/v1:gvisor-containerd-shim", + "//shim/v2:containerd-shim-runsc-v1", + ], + mode = "0755", + package_dir = "/usr/bin", +) + +pkg_tar( + name = "debian-data", + extension = "tar.gz", + deps = [ + ":debian-bin", + "//shim:config", + ], +) + +genrule( + name = "debian-version", + # Note that runsc must appear in the srcs parameter and not the tools + # parameter, otherwise it will not be stamped. This is reasonable, as tools + # may be encoded differently in the build graph (cached more aggressively + # because they are assumes to be hermetic). + srcs = ["//runsc"], + outs = ["version.txt"], + # Note that the little dance here is necessary because files in the $(SRCS) + # attribute are not executable by default, and we can't touch in place. + cmd = "cp $(location //runsc:runsc) $(@D)/runsc && \ + chmod a+x $(@D)/runsc && \ + $(@D)/runsc -version | grep version | sed 's/^[^0-9]*//' > $@ && \ + rm -f $(@D)/runsc", + stamp = 1, +) + +pkg_deb( + name = "debian", + architecture = "amd64", + data = ":debian-data", + # Note that the description_file will be flatten (all newlines removed), + # and therefore it is kept to a simple one-line description. The expected + # format for debian packages is "short summary\nLonger explanation of + # tool." and this is impossible with the flattening. + description_file = "description", + homepage = "https://gvisor.dev/", + maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>", + package = "runsc", + postinst = "postinst.sh", + version_file = ":version.txt", + visibility = [ + "//visibility:public", + ], +) diff --git a/runsc/debian/description b/debian/description index 9e8e08805..9e8e08805 100644 --- a/runsc/debian/description +++ b/debian/description diff --git a/runsc/debian/postinst.sh b/debian/postinst.sh index d1e28e17b..6a326f823 100755 --- a/runsc/debian/postinst.sh +++ b/debian/postinst.sh @@ -21,7 +21,7 @@ fi # Update docker configuration. if [ -f /etc/docker/daemon.json ]; then runsc install - if systemctl status docker 2>/dev/null; then + if systemctl is-active -q docker; then systemctl restart docker || echo "unable to restart docker; you must do so manually." >&2 fi fi diff --git a/pkg/abi/linux/fs.go b/pkg/abi/linux/fs.go index 158d2db5b..2b1ef0d4e 100644 --- a/pkg/abi/linux/fs.go +++ b/pkg/abi/linux/fs.go @@ -44,17 +44,18 @@ type Statfs struct { // Type is one of the filesystem magic values, defined above. Type uint64 - // BlockSize is the data block size. + // BlockSize is the optimal transfer block size in bytes. BlockSize int64 - // Blocks is the number of data blocks in use. + // Blocks is the maximum number of data blocks the filesystem may store, in + // units of BlockSize. Blocks uint64 - // BlocksFree is the number of free blocks. + // BlocksFree is the number of free data blocks, in units of BlockSize. BlocksFree uint64 - // BlocksAvailable is the number of blocks free for use by - // unprivileged users. + // BlocksAvailable is the number of data blocks free for use by + // unprivileged users, in units of BlockSize. BlocksAvailable uint64 // Files is the number of used file nodes on the filesystem. diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go index 2c5e56ae5..d6dbedc3e 100644 --- a/pkg/abi/linux/ioctl.go +++ b/pkg/abi/linux/ioctl.go @@ -117,3 +117,24 @@ const ( func IOC(dir, typ, nr, size uint32) uint32 { return uint32(dir)<<_IOC_DIRSHIFT | typ<<_IOC_TYPESHIFT | nr<<_IOC_NRSHIFT | size<<_IOC_SIZESHIFT } + +// Kcov ioctls from kernel/kcov.h. +var ( + KCOV_INIT_TRACE = IOC(_IOC_READ, 'c', 1, 8) + KCOV_ENABLE = IOC(_IOC_NONE, 'c', 100, 0) + KCOV_DISABLE = IOC(_IOC_NONE, 'c', 101, 0) +) + +// Kcov trace types from kernel/kcov.h. +const ( + KCOV_TRACE_PC = 0 + KCOV_TRACE_CMP = 1 +) + +// Kcov state constants from kernel/kcov.h. +const ( + KCOV_MODE_DISABLED = 0 + KCOV_MODE_INIT = 1 + KCOV_MODE_TRACE_PC = 2 + KCOV_MODE_TRACE_CMP = 3 +) diff --git a/pkg/abi/linux/tty.go b/pkg/abi/linux/tty.go index 8ac02aee8..e640969a6 100644 --- a/pkg/abi/linux/tty.go +++ b/pkg/abi/linux/tty.go @@ -23,6 +23,8 @@ const ( ) // Winsize is struct winsize, defined in uapi/asm-generic/termios.h. +// +// +marshal type Winsize struct { Row uint16 Col uint16 @@ -31,6 +33,8 @@ type Winsize struct { } // Termios is struct termios, defined in uapi/asm-generic/termbits.h. +// +// +marshal type Termios struct { InputFlags uint32 OutputFlags uint32 diff --git a/pkg/abi/linux/xattr.go b/pkg/abi/linux/xattr.go index 99180b208..8ef837f27 100644 --- a/pkg/abi/linux/xattr.go +++ b/pkg/abi/linux/xattr.go @@ -23,6 +23,9 @@ const ( XATTR_CREATE = 1 XATTR_REPLACE = 2 + XATTR_TRUSTED_PREFIX = "trusted." + XATTR_TRUSTED_PREFIX_LEN = len(XATTR_TRUSTED_PREFIX) + XATTR_USER_PREFIX = "user." XATTR_USER_PREFIX_LEN = len(XATTR_USER_PREFIX) ) diff --git a/pkg/coverage/BUILD b/pkg/coverage/BUILD new file mode 100644 index 000000000..a198e8028 --- /dev/null +++ b/pkg/coverage/BUILD @@ -0,0 +1,14 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "coverage", + srcs = ["coverage.go"], + visibility = ["//:sandbox"], + deps = [ + "//pkg/sync", + "//pkg/usermem", + "@io_bazel_rules_go//go/tools/coverdata", + ], +) diff --git a/pkg/coverage/coverage.go b/pkg/coverage/coverage.go new file mode 100644 index 000000000..6831adcce --- /dev/null +++ b/pkg/coverage/coverage.go @@ -0,0 +1,175 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package coverage provides an interface through which Go coverage data can +// be collected, converted to kcov format, and exposed to userspace. +// +// Coverage can be enabled by calling bazel {build,test} with +// --collect_coverage_data and --instrumentation_filter with the desired +// coverage surface. This causes bazel to use the Go cover tool manually to +// generate instrumented files. It injects a hook that registers all coverage +// data with the coverdata package. +package coverage + +import ( + "fmt" + "io" + "sort" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" + + "github.com/bazelbuild/rules_go/go/tools/coverdata" +) + +// KcovAvailable returns whether the kcov coverage interface is available. It is +// available as long as coverage is enabled for some files. +func KcovAvailable() bool { + return len(coverdata.Cover.Blocks) > 0 +} + +// coverageMu must be held while accessing coverdata.Cover. This prevents +// concurrent reads/writes from multiple threads collecting coverage data. +var coverageMu sync.RWMutex + +// once ensures that globalData is only initialized once. +var once sync.Once + +var globalData struct { + // files is the set of covered files sorted by filename. It is calculated at + // startup. + files []string + + // syntheticPCs are a set of PCs calculated at startup, where the PC + // at syntheticPCs[i][j] corresponds to file i, block j. + syntheticPCs [][]uint64 +} + +// ClearCoverageData clears existing coverage data. +func ClearCoverageData() { + coverageMu.Lock() + defer coverageMu.Unlock() + for _, counters := range coverdata.Cover.Counters { + for index := 0; index < len(counters); index++ { + atomic.StoreUint32(&counters[index], 0) + } + } +} + +var coveragePool = sync.Pool{ + New: func() interface{} { + return make([]byte, 0) + }, +} + +// ConsumeCoverageData builds and writes the collection of covered PCs. It +// returns the number of bytes written. +// +// In Linux, a kernel configuration is set that compiles the kernel with a +// custom function that is called at the beginning of every basic block, which +// updates the memory-mapped coverage information. The Go coverage tool does not +// allow us to inject arbitrary instructions into basic blocks, but it does +// provide data that we can convert to a kcov-like format and transfer them to +// userspace through a memory mapping. +// +// Note that this is not a strict implementation of kcov, which is especially +// tricky to do because we do not have the same coverage tools available in Go +// that that are available for the actual Linux kernel. In Linux, a kernel +// configuration is set that compiles the kernel with a custom function that is +// called at the beginning of every basic block to write program counters to the +// kcov memory mapping. In Go, however, coverage tools only give us a count of +// basic blocks as they are executed. Every time we return to userspace, we +// collect the coverage information and write out PCs for each block that was +// executed, providing userspace with the illusion that the kcov data is always +// up to date. For convenience, we also generate a unique synthetic PC for each +// block instead of using actual PCs. Finally, we do not provide thread-specific +// coverage data (each kcov instance only contains PCs executed by the thread +// owning it); instead, we will supply data for any file specified by -- +// instrumentation_filter. +// +// Note that we "consume", i.e. clear, coverdata when this function is run, to +// ensure that each event is only reported once. +// +// TODO(b/160639712): evaluate whether it is ok to reset the global coverage +// data every time this function is run. We could technically have each thread +// store a local snapshot against which we compare the most recent coverdata so +// that separate threads do not affect each other's view of the data. +func ConsumeCoverageData(w io.Writer) int { + once.Do(initCoverageData) + + coverageMu.Lock() + defer coverageMu.Unlock() + + total := 0 + var pcBuffer [8]byte + for fileIndex, file := range globalData.files { + counters := coverdata.Cover.Counters[file] + for index := 0; index < len(counters); index++ { + val := atomic.SwapUint32(&counters[index], 0) + if val != 0 { + // Calculate the synthetic PC. + pc := globalData.syntheticPCs[fileIndex][index] + + usermem.ByteOrder.PutUint64(pcBuffer[:], pc) + n, err := w.Write(pcBuffer[:]) + if err != nil { + if err == io.EOF { + // Simply stop writing if we encounter EOF; it's ok if we attempted to + // write more than we can hold. + return total + n + } + panic(fmt.Sprintf("Internal error writing PCs to kcov area: %v", err)) + } + total += n + } + } + } + + if total == 0 { + // An empty profile indicates that coverage is not enabled, in which case + // there shouldn't be any task work registered. + panic("kcov task work is registered, but no coverage data was found") + } + return total +} + +// initCoverageData initializes globalData. It should only be called once, +// before any kcov data is written. +func initCoverageData() { + // First, order all files. Then calculate synthetic PCs for every block + // (using the well-defined ordering for files as well). + for file := range coverdata.Cover.Blocks { + globalData.files = append(globalData.files, file) + } + sort.Strings(globalData.files) + + // nextSyntheticPC is the first PC that we generate for a block. + // + // This uses a standard-looking kernel range for simplicity. + // + // FIXME(b/160639712): This is only necessary because syzkaller requires + // addresses in the kernel range. If we can remove this constraint, then we + // should be able to use the actual addresses. + var nextSyntheticPC uint64 = 0xffffffff80000000 + for _, file := range globalData.files { + blocks := coverdata.Cover.Blocks[file] + thisFile := make([]uint64, 0, len(blocks)) + for range blocks { + thisFile = append(thisFile, nextSyntheticPC) + nextSyntheticPC++ // Advance. + } + globalData.syntheticPCs = append(globalData.syntheticPCs, thisFile) + } +} diff --git a/pkg/cpuid/cpuid_parse_x86_test.go b/pkg/cpuid/cpuid_parse_x86_test.go index c9bd40e1b..e4ae0d689 100644 --- a/pkg/cpuid/cpuid_parse_x86_test.go +++ b/pkg/cpuid/cpuid_parse_x86_test.go @@ -32,27 +32,27 @@ func kernelVersion() (int, int, error) { return 0, 0, err } - var r string + var sb strings.Builder for _, b := range u.Release { if b == 0 { break } - r += string(b) + sb.WriteByte(byte(b)) } - s := strings.Split(r, ".") + s := strings.Split(sb.String(), ".") if len(s) < 2 { - return 0, 0, fmt.Errorf("kernel release missing major and minor component: %s", r) + return 0, 0, fmt.Errorf("kernel release missing major and minor component: %s", sb.String()) } major, err := strconv.Atoi(s[0]) if err != nil { - return 0, 0, fmt.Errorf("error parsing major version %q in %q: %v", s[0], r, err) + return 0, 0, fmt.Errorf("error parsing major version %q in %q: %w", s[0], sb.String(), err) } minor, err := strconv.Atoi(s[1]) if err != nil { - return 0, 0, fmt.Errorf("error parsing minor version %q in %q: %v", s[1], r, err) + return 0, 0, fmt.Errorf("error parsing minor version %q in %q: %w", s[1], sb.String(), err) } return major, minor, nil diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go index 955c9c473..1a0477c6a 100644 --- a/pkg/merkletree/merkletree.go +++ b/pkg/merkletree/merkletree.go @@ -45,12 +45,25 @@ type Layout struct { // InitLayout initializes and returns a new Layout object describing the structure // of a tree. dataSize specifies the size of input data in bytes. -func InitLayout(dataSize int64) Layout { +func InitLayout(dataSize int64, dataAndTreeInSameFile bool) Layout { layout := Layout{ blockSize: usermem.PageSize, // TODO(b/156980949): Allow config other hash methods (SHA384/SHA512). digestSize: sha256DigestSize, } + + // treeStart is the offset (in bytes) of the first level of the tree in + // the file. If data and tree are in different files, treeStart should + // be zero. If data is in the same file as the tree, treeStart points + // to the block after the last data block (which may be zero-padded). + var treeStart int64 + if dataAndTreeInSameFile { + treeStart = dataSize + if dataSize%layout.blockSize != 0 { + treeStart += layout.blockSize - dataSize%layout.blockSize + } + } + numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize level := 0 offset := int64(0) @@ -60,14 +73,15 @@ func InitLayout(dataSize int64) Layout { // contain the hashes of the data blocks, while level numLevels - 1 is // the root. for numBlocks > 1 { - layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize) + layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize) // Round numBlocks up to fill up a block. numBlocks += (layout.hashesPerBlock() - numBlocks%layout.hashesPerBlock()) % layout.hashesPerBlock() offset += numBlocks / layout.hashesPerBlock() numBlocks = numBlocks / layout.hashesPerBlock() level++ } - layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize) + layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize) + return layout } @@ -107,11 +121,44 @@ func (layout Layout) blockOffset(level int, index int64) int64 { // written to treeWriter. The treeReader should be able to read the tree after // it has been written. That is, treeWriter and treeReader should point to the // same underlying data but have separate cursors. -func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter io.Writer) ([]byte, error) { - layout := InitLayout(dataSize) +// Generate will modify the cursor for data, but always restores it to its +// original position upon exit. The cursor for tree is modified and not +// restored. +func Generate(data io.ReadSeeker, dataSize int64, treeReader io.ReadSeeker, treeWriter io.WriteSeeker, dataAndTreeInSameFile bool) ([]byte, error) { + layout := InitLayout(dataSize, dataAndTreeInSameFile) numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize + // If the data is in the same file as the tree, zero pad the last data + // block. + bytesInLastBlock := dataSize % layout.blockSize + if dataAndTreeInSameFile && bytesInLastBlock != 0 { + zeroBuf := make([]byte, layout.blockSize-bytesInLastBlock) + if _, err := treeWriter.Seek(0, io.SeekEnd); err != nil && err != io.EOF { + return nil, err + } + if _, err := treeWriter.Write(zeroBuf); err != nil { + return nil, err + } + } + + // Store the current offset, so we can set it back once verification + // finishes. + origOffset, err := data.Seek(0, io.SeekCurrent) + if err != nil { + return nil, err + } + defer data.Seek(origOffset, io.SeekStart) + + // Read from the beginning of both data and treeReader. + if _, err := data.Seek(0, io.SeekStart); err != nil && err != io.EOF { + return nil, err + } + + if _, err := treeReader.Seek(0, io.SeekStart); err != nil && err != io.EOF { + return nil, err + } + var root []byte for level := 0; level < layout.numLevels(); level++ { for i := int64(0); i < numBlocks; i++ { @@ -172,11 +219,11 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i // Verify will modify the cursor for data, but always restores it to its // original position upon exit. The cursor for tree is modified and not // restored. -func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset int64, readSize int64, expectedRoot []byte) error { +func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset int64, readSize int64, expectedRoot []byte, dataAndTreeInSameFile bool) error { if readSize <= 0 { return fmt.Errorf("Unexpected read size: %d", readSize) } - layout := InitLayout(int64(dataSize)) + layout := InitLayout(int64(dataSize), dataAndTreeInSameFile) // Calculate the index of blocks that includes the target range in input // data. diff --git a/pkg/merkletree/merkletree_test.go b/pkg/merkletree/merkletree_test.go index 911f61df9..ad50ba5f6 100644 --- a/pkg/merkletree/merkletree_test.go +++ b/pkg/merkletree/merkletree_test.go @@ -27,80 +27,58 @@ import ( func TestLayout(t *testing.T) { testCases := []struct { - dataSize int64 - expectedLevelOffset []int64 + dataSize int64 + dataAndTreeInSameFile bool + expectedLevelOffset []int64 }{ { - dataSize: 100, - expectedLevelOffset: []int64{0}, + dataSize: 100, + dataAndTreeInSameFile: false, + expectedLevelOffset: []int64{0}, }, { - dataSize: 1000000, - expectedLevelOffset: []int64{0, 2 * usermem.PageSize, 3 * usermem.PageSize}, + dataSize: 100, + dataAndTreeInSameFile: true, + expectedLevelOffset: []int64{usermem.PageSize}, }, { - dataSize: 4096 * int64(usermem.PageSize), - expectedLevelOffset: []int64{0, 32 * usermem.PageSize, 33 * usermem.PageSize}, + dataSize: 1000000, + dataAndTreeInSameFile: false, + expectedLevelOffset: []int64{0, 2 * usermem.PageSize, 3 * usermem.PageSize}, }, - } - - for _, tc := range testCases { - t.Run(fmt.Sprintf("%d", tc.dataSize), func(t *testing.T) { - p := InitLayout(tc.dataSize) - if p.blockSize != int64(usermem.PageSize) { - t.Errorf("got blockSize %d, want %d", p.blockSize, usermem.PageSize) - } - if p.digestSize != sha256DigestSize { - t.Errorf("got digestSize %d, want %d", p.digestSize, sha256DigestSize) - } - if p.numLevels() != len(tc.expectedLevelOffset) { - t.Errorf("got levels %d, want %d", p.numLevels(), len(tc.expectedLevelOffset)) - } - for i := 0; i < p.numLevels() && i < len(tc.expectedLevelOffset); i++ { - if p.levelOffset[i] != tc.expectedLevelOffset[i] { - t.Errorf("got levelStart[%d] %d, want %d", i, p.levelOffset[i], tc.expectedLevelOffset[i]) - } - } - }) - } -} - -func TestGenerate(t *testing.T) { - // The input data has size dataSize. It starts with the data in startWith, - // and all other bytes are zeroes. - testCases := []struct { - data []byte - expectedRoot []byte - }{ { - data: bytes.Repeat([]byte{0}, usermem.PageSize), - expectedRoot: []byte{173, 127, 172, 178, 88, 111, 198, 233, 102, 192, 4, 215, 209, 209, 107, 2, 79, 88, 5, 255, 124, 180, 124, 122, 133, 218, 189, 139, 72, 137, 44, 167}, - }, - { - data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1), - expectedRoot: []byte{62, 93, 40, 92, 161, 241, 30, 223, 202, 99, 39, 2, 132, 113, 240, 139, 117, 99, 79, 243, 54, 18, 100, 184, 141, 121, 238, 46, 149, 202, 203, 132}, + dataSize: 1000000, + dataAndTreeInSameFile: true, + expectedLevelOffset: []int64{245 * usermem.PageSize, 247 * usermem.PageSize, 248 * usermem.PageSize}, }, { - data: []byte{'a'}, - expectedRoot: []byte{52, 75, 204, 142, 172, 129, 37, 14, 145, 137, 103, 203, 11, 162, 209, 205, 30, 169, 213, 72, 20, 28, 243, 24, 242, 2, 92, 43, 169, 59, 110, 210}, + dataSize: 4096 * int64(usermem.PageSize), + dataAndTreeInSameFile: false, + expectedLevelOffset: []int64{0, 32 * usermem.PageSize, 33 * usermem.PageSize}, }, { - data: bytes.Repeat([]byte{'a'}, usermem.PageSize), - expectedRoot: []byte{201, 62, 238, 45, 13, 176, 47, 16, 172, 199, 70, 13, 149, 118, 225, 34, 220, 248, 205, 83, 196, 191, 141, 252, 174, 27, 62, 116, 235, 207, 255, 90}, + dataSize: 4096 * int64(usermem.PageSize), + dataAndTreeInSameFile: true, + expectedLevelOffset: []int64{4096 * usermem.PageSize, 4128 * usermem.PageSize, 4129 * usermem.PageSize}, }, } for _, tc := range testCases { - t.Run(fmt.Sprintf("%d:%v", len(tc.data), tc.data[0]), func(t *testing.T) { - var tree bytes.Buffer - - root, err := Generate(bytes.NewBuffer(tc.data), int64(len(tc.data)), &tree, &tree) - if err != nil { - t.Fatalf("Generate failed: %v", err) + t.Run(fmt.Sprintf("%d", tc.dataSize), func(t *testing.T) { + l := InitLayout(tc.dataSize, tc.dataAndTreeInSameFile) + if l.blockSize != int64(usermem.PageSize) { + t.Errorf("got blockSize %d, want %d", l.blockSize, usermem.PageSize) } - - if !bytes.Equal(root, tc.expectedRoot) { - t.Errorf("Unexpected root") + if l.digestSize != sha256DigestSize { + t.Errorf("got digestSize %d, want %d", l.digestSize, sha256DigestSize) + } + if l.numLevels() != len(tc.expectedLevelOffset) { + t.Errorf("got levels %d, want %d", l.numLevels(), len(tc.expectedLevelOffset)) + } + for i := 0; i < l.numLevels() && i < len(tc.expectedLevelOffset); i++ { + if l.levelOffset[i] != tc.expectedLevelOffset[i] { + t.Errorf("got levelStart[%d] %d, want %d", i, l.levelOffset[i], tc.expectedLevelOffset[i]) + } } }) } @@ -151,6 +129,57 @@ func (brw *bytesReadWriter) Seek(offset int64, whence int) (int64, error) { return off, nil } +func TestGenerate(t *testing.T) { + // The input data has size dataSize. It starts with the data in startWith, + // and all other bytes are zeroes. + testCases := []struct { + data []byte + expectedRoot []byte + }{ + { + data: bytes.Repeat([]byte{0}, usermem.PageSize), + expectedRoot: []byte{173, 127, 172, 178, 88, 111, 198, 233, 102, 192, 4, 215, 209, 209, 107, 2, 79, 88, 5, 255, 124, 180, 124, 122, 133, 218, 189, 139, 72, 137, 44, 167}, + }, + { + data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1), + expectedRoot: []byte{62, 93, 40, 92, 161, 241, 30, 223, 202, 99, 39, 2, 132, 113, 240, 139, 117, 99, 79, 243, 54, 18, 100, 184, 141, 121, 238, 46, 149, 202, 203, 132}, + }, + { + data: []byte{'a'}, + expectedRoot: []byte{52, 75, 204, 142, 172, 129, 37, 14, 145, 137, 103, 203, 11, 162, 209, 205, 30, 169, 213, 72, 20, 28, 243, 24, 242, 2, 92, 43, 169, 59, 110, 210}, + }, + { + data: bytes.Repeat([]byte{'a'}, usermem.PageSize), + expectedRoot: []byte{201, 62, 238, 45, 13, 176, 47, 16, 172, 199, 70, 13, 149, 118, 225, 34, 220, 248, 205, 83, 196, 191, 141, 252, 174, 27, 62, 116, 235, 207, 255, 90}, + }, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("%d:%v", len(tc.data), tc.data[0]), func(t *testing.T) { + for _, dataAndTreeInSameFile := range []bool{false, true} { + var tree bytesReadWriter + var root []byte + var err error + if dataAndTreeInSameFile { + tree.Write(tc.data) + root, err = Generate(&tree, int64(len(tc.data)), &tree, &tree, dataAndTreeInSameFile) + } else { + root, err = Generate(&bytesReadWriter{ + bytes: tc.data, + }, int64(len(tc.data)), &tree, &tree, dataAndTreeInSameFile) + } + if err != nil { + t.Fatalf("got err: %v, want nil", err) + } + + if !bytes.Equal(root, tc.expectedRoot) { + t.Errorf("got root: %v, want %v", root, tc.expectedRoot) + } + } + }) + } +} + func TestVerify(t *testing.T) { // The input data has size dataSize. The portion to be verified ranges from // verifyStart with verifySize. A bit is flipped in outOfRangeByteIndex to @@ -284,26 +313,37 @@ func TestVerify(t *testing.T) { data := make([]byte, tc.dataSize) // Generate random bytes in data. rand.Read(data) - var tree bytesReadWriter - - root, err := Generate(bytes.NewBuffer(data), int64(tc.dataSize), &tree, &tree) - if err != nil { - t.Fatalf("Generate failed: %v", err) - } - // Flip a bit in data and checks Verify results. - var buf bytes.Buffer - data[tc.modifyByte] ^= 1 - if tc.shouldSucceed { - if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root); err != nil && err != io.EOF { - t.Errorf("Verification failed when expected to succeed: %v", err) + for _, dataAndTreeInSameFile := range []bool{false, true} { + var tree bytesReadWriter + var root []byte + var err error + if dataAndTreeInSameFile { + tree.Write(data) + root, err = Generate(&tree, int64(len(data)), &tree, &tree, dataAndTreeInSameFile) + } else { + root, err = Generate(&bytesReadWriter{ + bytes: data, + }, int64(tc.dataSize), &tree, &tree, false /* dataAndTreeInSameFile */) } - if int64(buf.Len()) != tc.verifySize || !bytes.Equal(data[tc.verifyStart:tc.verifyStart+tc.verifySize], buf.Bytes()) { - t.Errorf("Incorrect output from Verify") + if err != nil { + t.Fatalf("Generate failed: %v", err) } - } else { - if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root); err == nil { - t.Errorf("Verification succeeded when expected to fail") + + // Flip a bit in data and checks Verify results. + var buf bytes.Buffer + data[tc.modifyByte] ^= 1 + if tc.shouldSucceed { + if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root, dataAndTreeInSameFile); err != nil && err != io.EOF { + t.Errorf("Verification failed when expected to succeed: %v", err) + } + if int64(buf.Len()) != tc.verifySize || !bytes.Equal(data[tc.verifyStart:tc.verifyStart+tc.verifySize], buf.Bytes()) { + t.Errorf("Incorrect output from Verify") + } + } else { + if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root, dataAndTreeInSameFile); err == nil { + t.Errorf("Verification succeeded when expected to fail") + } } } }) @@ -318,36 +358,47 @@ func TestVerifyRandom(t *testing.T) { data := make([]byte, dataSize) // Generate random bytes in data. rand.Read(data) - var tree bytesReadWriter - root, err := Generate(bytes.NewBuffer(data), int64(dataSize), &tree, &tree) - if err != nil { - t.Fatalf("Generate failed: %v", err) - } + for _, dataAndTreeInSameFile := range []bool{false, true} { + var tree bytesReadWriter + var root []byte + var err error + if dataAndTreeInSameFile { + tree.Write(data) + root, err = Generate(&tree, int64(len(data)), &tree, &tree, dataAndTreeInSameFile) + } else { + root, err = Generate(&bytesReadWriter{ + bytes: data, + }, int64(dataSize), &tree, &tree, dataAndTreeInSameFile) + } + if err != nil { + t.Fatalf("Generate failed: %v", err) + } - // Pick a random portion of data. - start := rand.Int63n(dataSize - 1) - size := rand.Int63n(dataSize) + 1 + // Pick a random portion of data. + start := rand.Int63n(dataSize - 1) + size := rand.Int63n(dataSize) + 1 - var buf bytes.Buffer - // Checks that the random portion of data from the original data is - // verified successfully. - if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root); err != nil && err != io.EOF { - t.Errorf("Verification failed for correct data: %v", err) - } - if size > dataSize-start { - size = dataSize - start - } - if int64(buf.Len()) != size || !bytes.Equal(data[start:start+size], buf.Bytes()) { - t.Errorf("Incorrect output from Verify") - } + var buf bytes.Buffer + // Checks that the random portion of data from the original data is + // verified successfully. + if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root, dataAndTreeInSameFile); err != nil && err != io.EOF { + t.Errorf("Verification failed for correct data: %v", err) + } + if size > dataSize-start { + size = dataSize - start + } + if int64(buf.Len()) != size || !bytes.Equal(data[start:start+size], buf.Bytes()) { + t.Errorf("Incorrect output from Verify") + } - buf.Reset() - // Flip a random bit in randPortion, and check that verification fails. - randBytePos := rand.Int63n(size) - data[start+randBytePos] ^= 1 + buf.Reset() + // Flip a random bit in randPortion, and check that verification fails. + randBytePos := rand.Int63n(size) + data[start+randBytePos] ^= 1 - if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root); err == nil { - t.Errorf("Verification succeeded for modified data") + if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root, dataAndTreeInSameFile); err == nil { + t.Errorf("Verification succeeded for modified data") + } } } diff --git a/pkg/procid/procid_amd64.s b/pkg/procid/procid_amd64.s index 7c622e5d7..a45920040 100644 --- a/pkg/procid/procid_amd64.s +++ b/pkg/procid/procid_amd64.s @@ -14,7 +14,7 @@ // +build amd64 // +build go1.8 -// +build !go1.16 +// +build !go1.17 #include "textflag.h" diff --git a/pkg/procid/procid_arm64.s b/pkg/procid/procid_arm64.s index 48ebb5fd1..9d3b0666d 100644 --- a/pkg/procid/procid_arm64.s +++ b/pkg/procid/procid_arm64.s @@ -14,7 +14,7 @@ // +build arm64 // +build go1.8 -// +build !go1.16 +// +build !go1.17 #include "textflag.h" diff --git a/pkg/refs_vfs2/BUILD b/pkg/refs_vfs2/BUILD index 7b3e10683..577b827a5 100644 --- a/pkg/refs_vfs2/BUILD +++ b/pkg/refs_vfs2/BUILD @@ -11,7 +11,7 @@ go_template( types = [ "T", ], - visibility = ["//pkg/sentry:internal"], + visibility = ["//:sandbox"], deps = [ "//pkg/log", "//pkg/refs", diff --git a/pkg/refs_vfs2/refs_template.go b/pkg/refs_vfs2/refs_template.go index 99c43c065..d9b552896 100644 --- a/pkg/refs_vfs2/refs_template.go +++ b/pkg/refs_vfs2/refs_template.go @@ -12,11 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package refs_template defines a template that can be used by reference counted -// objects. +// Package refs_template defines a template that can be used by reference +// counted objects. The "owner" template parameter is used in log messages to +// indicate the type of reference-counted object that exhibited a reference +// leak. As a result, structs that are embedded in other structs should not use +// this template, since it will make tracking down leaks more difficult. package refs_template import ( + "fmt" "runtime" "sync/atomic" @@ -38,6 +42,11 @@ var ownerType *T // Note that the number of references is actually refCount + 1 so that a default // zero-value Refs object contains one reference. // +// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in +// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount. +// This will allow us to add stack trace information to the leak messages +// without growing the size of Refs. +// // +stateify savable type Refs struct { // refCount is composed of two fields: @@ -82,7 +91,7 @@ func (r *Refs) ReadRefs() int64 { //go:nosplit func (r *Refs) IncRef() { if v := atomic.AddInt64(&r.refCount, 1); v <= 0 { - panic("Incrementing non-positive ref count") + panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, ownerType)) } } @@ -122,7 +131,7 @@ func (r *Refs) TryIncRef() bool { func (r *Refs) DecRef(destroy func()) { switch v := atomic.AddInt64(&r.refCount, -1); { case v < -1: - panic("Decrementing non-positive ref count") + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, ownerType)) case v == -1: // Call the destructor. diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD index 29aeaab8c..bdef7762c 100644 --- a/pkg/seccomp/BUILD +++ b/pkg/seccomp/BUILD @@ -10,6 +10,7 @@ go_binary( "seccomp_test_victim_amd64.go", "seccomp_test_victim_arm64.go", ], + nogo = False, deps = [":seccomp"], ) diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index d41d23a43..42a6c41c2 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -55,6 +55,7 @@ go_library( "//pkg/unet", "//pkg/usermem", "//pkg/waiter", + "//tools/go_marshal/primitive", ], ) diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index e29ae00f2..67a807f9d 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange @@ -123,6 +124,11 @@ func (t *TTYFileOperations) Release(ctx context.Context) { // Ioctl implements fs.FileOperations.Ioctl. func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + task := kernel.TaskFromContext(ctx) + if task == nil { + return 0, syserror.ENOTTY + } + // Ignore arg[0]. This is the real FD: fd := t.fileOperations.iops.fileState.FD() ioctl := args[1].Uint64() @@ -132,9 +138,7 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO if err != nil { return 0, err } - _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), termios, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err = termios.CopyOut(task, args[2].Pointer()) return 0, err case linux.TCSETS, linux.TCSETSW, linux.TCSETSF: @@ -146,9 +150,7 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO } var termios linux.Termios - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &termios, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := termios.CopyIn(task, args[2].Pointer()); err != nil { return 0, err } err := ioctlSetTermios(fd, ioctl, &termios) @@ -173,10 +175,8 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO // Map the ProcessGroup into a ProcessGroupID in the task's PID // namespace. - pgID := pidns.IDOfProcessGroup(t.fgProcessGroup) - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{ - AddressSpaceActive: true, - }) + pgID := primitive.Int32(pidns.IDOfProcessGroup(t.fgProcessGroup)) + _, err := pgID.CopyOut(task, args[2].Pointer()) return 0, err case linux.TIOCSPGRP: @@ -184,11 +184,6 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO // Equivalent to tcsetpgrp(fd, *argp). // Set the foreground process group ID of this terminal. - task := kernel.TaskFromContext(ctx) - if task == nil { - return 0, syserror.ENOTTY - } - t.mu.Lock() defer t.mu.Unlock() @@ -208,12 +203,11 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO return 0, syserror.ENOTTY } - var pgID kernel.ProcessGroupID - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + var pgIDP primitive.Int32 + if _, err := pgIDP.CopyIn(task, args[2].Pointer()); err != nil { return 0, err } + pgID := kernel.ProcessGroupID(pgIDP) // pgID must be non-negative. if pgID < 0 { @@ -242,9 +236,7 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO if err != nil { return 0, err } - _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), winsize, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err = winsize.CopyOut(task, args[2].Pointer()) return 0, err case linux.TIOCSWINSZ: @@ -255,9 +247,7 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO // background ones) can set the winsize. var winsize linux.Winsize - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &winsize, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := winsize.CopyIn(task, args[2].Pointer()); err != nil { return 0, err } err := ioctlSetWinsize(fd, &winsize) diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index b095312fe..998b697ca 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -16,6 +16,8 @@ package tmpfs import ( + "math" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -32,9 +34,15 @@ import ( var fsInfo = fs.Info{ Type: linux.TMPFS_MAGIC, + // tmpfs currently does not support configurable size limits. In Linux, + // such a tmpfs mount will return f_blocks == f_bfree == f_bavail == 0 from + // statfs(2). However, many applications treat this as having a size limit + // of 0. To work around this, claim to have a very large but non-zero size, + // chosen to ensure that BlockSize * Blocks does not overflow int64 (which + // applications may also handle incorrectly). // TODO(b/29637826): allow configuring a tmpfs size and enforce it. - TotalBlocks: 0, - FreeBlocks: 0, + TotalBlocks: math.MaxInt64 / usermem.PageSize, + FreeBlocks: math.MaxInt64 / usermem.PageSize, } // rename implements fs.InodeOperations.Rename for tmpfs nodes. diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD index 93512c9b6..3f64fab3a 100644 --- a/pkg/sentry/fsimpl/devpts/BUILD +++ b/pkg/sentry/fsimpl/devpts/BUILD @@ -1,7 +1,19 @@ load("//tools:defs.bzl", "go_library", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") licenses(["notice"]) +go_template_instance( + name = "root_inode_refs", + out = "root_inode_refs.go", + package = "devpts", + prefix = "rootInode", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "rootInode", + }, +) + go_library( name = "devpts", srcs = [ @@ -9,6 +21,7 @@ go_library( "line_discipline.go", "master.go", "queue.go", + "root_inode_refs.go", "slave.go", "terminal.go", ], @@ -16,6 +29,8 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/log", + "//pkg/refs", "//pkg/safemem", "//pkg/sentry/arch", "//pkg/sentry/fs/lock", diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index 3f3a099bd..0eaff9087 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -83,6 +83,7 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds } root.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, devMinor, 1, linux.ModeDirectory|0555) root.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) + root.EnableLeakCheck() root.dentry.Init(root) // Construct the pts master inode and dentry. Linux always uses inode @@ -110,6 +111,7 @@ func (fs *filesystem) Release(ctx context.Context) { // rootInode is the root directory inode for the devpts mounts. type rootInode struct { + rootInodeRefs kernfs.AlwaysValid kernfs.InodeAttrs kernfs.InodeDirectoryNoNewChildren @@ -233,3 +235,8 @@ func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, } return offset, nil } + +// DecRef implements kernfs.Inode. +func (i *rootInode) DecRef(context.Context) { + i.rootInodeRefs.DecRef(i.Destroy) +} diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD index 999111deb..53a4f3012 100644 --- a/pkg/sentry/fsimpl/fuse/BUILD +++ b/pkg/sentry/fsimpl/fuse/BUILD @@ -15,6 +15,17 @@ go_template_instance( }, ) +go_template_instance( + name = "inode_refs", + out = "inode_refs.go", + package = "fuse", + prefix = "inode", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "inode", + }, +) + go_library( name = "fuse", srcs = [ @@ -22,6 +33,7 @@ go_library( "dev.go", "fusefs.go", "init.go", + "inode_refs.go", "register.go", "request_list.go", ], @@ -30,6 +42,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/log", + "//pkg/refs", "//pkg/sentry/fsimpl/devtmpfs", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/kernel", diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index 44021ee4b..9717c0e15 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -198,6 +198,7 @@ func (fs *filesystem) Release(ctx context.Context) { // inode implements kernfs.Inode. type inode struct { + inodeRefs kernfs.InodeAttrs kernfs.InodeNoDynamicLookup kernfs.InodeNotSymlink @@ -213,6 +214,7 @@ func (fs *filesystem) newInode(creds *auth.Credentials, mode linux.FileMode) *ke i := &inode{} i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755) i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) + i.EnableLeakCheck() i.dentry.Init(i) return &i.dentry @@ -324,3 +326,8 @@ func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptio return statFromFUSEAttr(out.Attr, opts.Mask, fusefs.devMinor), nil } + +// DecRef implements kernfs.Inode. +func (i *inode) DecRef(context.Context) { + i.inodeRefs.DecRef(i.Destroy) +} diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index c6696b9d8..81d34cfe3 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -703,6 +703,13 @@ type dentry struct { locks vfs.FileLocks // Inotify watches for this dentry. + // + // Note that inotify may behave unexpectedly in the presence of hard links, + // because dentries corresponding to the same file have separate inotify + // watches when they should share the same set. This is the case because it is + // impossible for us to know for sure whether two dentries correspond to the + // same underlying file (see the gofer filesystem section fo vfs/inotify.md for + // a more in-depth discussion on this matter). watches vfs.Watches } diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index 7e1cbf065..a2e9342d5 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -56,10 +56,16 @@ func (fd *regularFileFD) OnClose(ctx context.Context) error { if !fd.vfsfd.IsWritable() { return nil } - // Skip flushing if writes may be buffered by the client, since (as with - // the VFS1 client) we don't flush buffered writes on close anyway. + // Skip flushing if there are client-buffered writes, since (as with the + // VFS1 client) we don't flush buffered writes on close anyway. d := fd.dentry() - if d.fs.opts.interop == InteropModeExclusive { + if d.fs.opts.interop != InteropModeExclusive { + return nil + } + d.dataMu.RLock() + haveDirtyPages := !d.dirty.IsEmpty() + d.dataMu.RUnlock() + if haveDirtyPages { return nil } d.handleMu.RLock() @@ -117,6 +123,10 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs return 0, io.EOF } + var ( + n int64 + readErr error + ) if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { // Lock d.metadataMu for the rest of the read to prevent d.size from // changing. @@ -127,20 +137,25 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs if err := d.writeback(ctx, offset, dst.NumBytes()); err != nil { return 0, err } - } - - rw := getDentryReadWriter(ctx, d, offset) - if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { + rw := getDentryReadWriter(ctx, d, offset) // Require the read to go to the remote file. rw.direct = true + n, readErr = dst.CopyOutFrom(ctx, rw) + putDentryReadWriter(rw) + if d.fs.opts.interop != InteropModeShared { + // Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed(). + d.touchAtimeLocked(fd.vfsfd.Mount()) + } + } else { + rw := getDentryReadWriter(ctx, d, offset) + n, readErr = dst.CopyOutFrom(ctx, rw) + putDentryReadWriter(rw) + if d.fs.opts.interop != InteropModeShared { + // Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed(). + d.touchAtime(fd.vfsfd.Mount()) + } } - n, err := dst.CopyOutFrom(ctx, rw) - putDentryReadWriter(rw) - if d.fs.opts.interop != InteropModeShared { - // Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed(). - d.touchAtime(fd.vfsfd.Mount()) - } - return n, err + return n, readErr } // Read implements vfs.FileDescriptionImpl.Read. diff --git a/pkg/sentry/fsimpl/gofer/time.go b/pkg/sentry/fsimpl/gofer/time.go index 98733253d..7e825caae 100644 --- a/pkg/sentry/fsimpl/gofer/time.go +++ b/pkg/sentry/fsimpl/gofer/time.go @@ -52,6 +52,20 @@ func (d *dentry) touchAtime(mnt *vfs.Mount) { mnt.EndWrite() } +// Preconditions: d.metadataMu is locked. d.cachedMetadataAuthoritative() == true. +func (d *dentry) touchAtimeLocked(mnt *vfs.Mount) { + if mnt.Flags.NoATime || mnt.ReadOnly() { + return + } + if err := mnt.CheckBeginWrite(); err != nil { + return + } + now := d.fs.clock.Now().Nanoseconds() + atomic.StoreInt64(&d.atime, now) + atomic.StoreUint32(&d.atimeDirty, 1) + mnt.EndWrite() +} + // Preconditions: // * d.cachedMetadataAuthoritative() == true. // * The caller has successfully called vfs.Mount.CheckBeginWrite(). diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index 090ae0804..be1c88c82 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -72,6 +72,7 @@ go_library( "//pkg/unet", "//pkg/usermem", "//pkg/waiter", + "//tools/go_marshal/primitive", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go index 27cbd3059..7a9be4b97 100644 --- a/pkg/sentry/fsimpl/host/tty.go +++ b/pkg/sentry/fsimpl/host/tty.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // TTYFileDescription implements vfs.FileDescriptionImpl for a host file @@ -143,6 +144,11 @@ func (t *TTYFileDescription) Write(ctx context.Context, src usermem.IOSequence, // Ioctl implements vfs.FileDescriptionImpl. func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + task := kernel.TaskFromContext(ctx) + if task == nil { + return 0, syserror.ENOTTY + } + // Ignore arg[0]. This is the real FD: fd := t.inode.hostFD ioctl := args[1].Uint64() @@ -152,9 +158,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch if err != nil { return 0, err } - _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), termios, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err = termios.CopyOut(task, args[2].Pointer()) return 0, err case linux.TCSETS, linux.TCSETSW, linux.TCSETSF: @@ -166,9 +170,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch } var termios linux.Termios - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &termios, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := termios.CopyIn(task, args[2].Pointer()); err != nil { return 0, err } err := ioctlSetTermios(fd, ioctl, &termios) @@ -192,10 +194,8 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch defer t.mu.Unlock() // Map the ProcessGroup into a ProcessGroupID in the task's PID namespace. - pgID := pidns.IDOfProcessGroup(t.fgProcessGroup) - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{ - AddressSpaceActive: true, - }) + pgID := primitive.Int32(pidns.IDOfProcessGroup(t.fgProcessGroup)) + _, err := pgID.CopyOut(task, args[2].Pointer()) return 0, err case linux.TIOCSPGRP: @@ -203,11 +203,6 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch // Equivalent to tcsetpgrp(fd, *argp). // Set the foreground process group ID of this terminal. - task := kernel.TaskFromContext(ctx) - if task == nil { - return 0, syserror.ENOTTY - } - t.mu.Lock() defer t.mu.Unlock() @@ -226,12 +221,11 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch return 0, syserror.ENOTTY } - var pgID kernel.ProcessGroupID - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + var pgIDP primitive.Int32 + if _, err := pgIDP.CopyIn(task, args[2].Pointer()); err != nil { return 0, err } + pgID := kernel.ProcessGroupID(pgIDP) // pgID must be non-negative. if pgID < 0 { @@ -260,9 +254,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch if err != nil { return 0, err } - _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), winsize, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err = winsize.CopyOut(task, args[2].Pointer()) return 0, err case linux.TIOCSWINSZ: @@ -273,9 +265,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch // set the winsize. var winsize linux.Winsize - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &winsize, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := winsize.CopyIn(task, args[2].Pointer()); err != nil { return 0, err } err := ioctlSetWinsize(fd, &winsize) diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index 3835557fe..637dca70c 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -26,9 +26,54 @@ go_template_instance( }, ) +go_template_instance( + name = "dentry_refs", + out = "dentry_refs.go", + package = "kernfs", + prefix = "Dentry", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "Dentry", + }, +) + +go_template_instance( + name = "static_directory_refs", + out = "static_directory_refs.go", + package = "kernfs", + prefix = "StaticDirectory", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "StaticDirectory", + }, +) + +go_template_instance( + name = "dir_refs", + out = "dir_refs.go", + package = "kernfs_test", + prefix = "dir", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "dir", + }, +) + +go_template_instance( + name = "readonly_dir_refs", + out = "readonly_dir_refs.go", + package = "kernfs_test", + prefix = "readonlyDir", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "readonlyDir", + }, +) + go_library( name = "kernfs", srcs = [ + "dentry_refs.go", "dynamic_bytes_file.go", "fd_impl_util.go", "filesystem.go", @@ -36,6 +81,7 @@ go_library( "inode_impl_util.go", "kernfs.go", "slot_list.go", + "static_directory_refs.go", "symlink.go", ], visibility = ["//pkg/sentry:internal"], @@ -59,11 +105,17 @@ go_library( go_test( name = "kernfs_test", size = "small", - srcs = ["kernfs_test.go"], + srcs = [ + "dir_refs.go", + "kernfs_test.go", + "readonly_dir_refs.go", + ], deps = [ ":kernfs", "//pkg/abi/linux", "//pkg/context", + "//pkg/log", + "//pkg/refs", "//pkg/sentry/contexttest", "//pkg/sentry/fsimpl/testutil", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 885856868..f442a5606 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -20,7 +20,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" @@ -344,8 +343,6 @@ type OrderedChildrenOptions struct { // // Must be initialize with Init before first use. type OrderedChildren struct { - refs.AtomicRefCount - // Can children be modified by user syscalls? It set to false, interface // methods that would modify the children return EPERM. Immutable. writable bool @@ -361,14 +358,14 @@ func (o *OrderedChildren) Init(opts OrderedChildrenOptions) { o.set = make(map[string]*slot) } -// DecRef implements Inode.DecRef. -func (o *OrderedChildren) DecRef(ctx context.Context) { - o.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) { - o.mu.Lock() - defer o.mu.Unlock() - o.order.Reset() - o.set = nil - }) +// Destroy clears the children stored in o. It should be called by structs +// embedding OrderedChildren upon destruction, i.e. when their reference count +// reaches zero. +func (o *OrderedChildren) Destroy() { + o.mu.Lock() + defer o.mu.Unlock() + o.order.Reset() + o.set = nil } // Populate inserts children into this OrderedChildren, and d's dentry @@ -549,6 +546,7 @@ func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.D // // +stateify savable type StaticDirectory struct { + StaticDirectoryRefs InodeNotSymlink InodeDirectoryNoNewChildren InodeAttrs @@ -594,11 +592,16 @@ func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd return fd.VFSFileDescription(), nil } -// SetStat implements Inode.SetStat not allowing inode attributes to be changed. +// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. func (*StaticDirectory) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { return syserror.EPERM } +// DecRef implements kernfs.Inode. +func (s *StaticDirectory) DecRef(context.Context) { + s.StaticDirectoryRefs.DecRef(s.Destroy) +} + // AlwaysValid partially implements kernfs.inodeDynamicLookup. type AlwaysValid struct{} diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 51dbc050c..ca3685800 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -57,7 +57,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" @@ -161,9 +160,9 @@ const ( // // Must be initialized by Init prior to first use. type Dentry struct { - vfsd vfs.Dentry + DentryRefs - refs.AtomicRefCount + vfsd vfs.Dentry // flags caches useful information about the dentry from the inode. See the // dflags* consts above. Must be accessed by atomic ops. @@ -194,6 +193,7 @@ func (d *Dentry) Init(inode Inode) { if ftype == linux.ModeSymlink { d.flags |= dflagsIsSymlink } + d.EnableLeakCheck() } // VFSDentry returns the generic vfs dentry for this kernfs dentry. @@ -213,16 +213,14 @@ func (d *Dentry) isSymlink() bool { // DecRef implements vfs.DentryImpl.DecRef. func (d *Dentry) DecRef(ctx context.Context) { - d.AtomicRefCount.DecRefWithDestructor(ctx, d.destroy) -} - -// Precondition: Dentry must be removed from VFS' dentry cache. -func (d *Dentry) destroy(ctx context.Context) { - d.inode.DecRef(ctx) // IncRef from Init. - d.inode = nil - if d.parent != nil { - d.parent.DecRef(ctx) // IncRef from Dentry.InsertChild. - } + // Before the destructor is called, Dentry must be removed from VFS' dentry cache. + d.DentryRefs.DecRef(func() { + d.inode.DecRef(ctx) // IncRef from Init. + d.inode = nil + if d.parent != nil { + d.parent.DecRef(ctx) // IncRef from Dentry.InsertChild. + } + }) } // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index e5c28c0e4..e376d1736 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -96,6 +96,7 @@ func (*attrs) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.S } type readonlyDir struct { + readonlyDirRefs attrs kernfs.InodeNotSymlink kernfs.InodeNoDynamicLookup @@ -111,6 +112,7 @@ func (fs *filesystem) newReadonlyDir(creds *auth.Credentials, mode linux.FileMod dir := &readonlyDir{} dir.attrs.Init(creds, 0 /* devMajor */, 0 /* devMinor */, fs.NextIno(), linux.ModeDirectory|mode) dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) + dir.EnableLeakCheck() dir.dentry.Init(dir) dir.IncLinks(dir.OrderedChildren.Populate(&dir.dentry, contents)) @@ -128,7 +130,12 @@ func (d *readonlyDir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs return fd.VFSFileDescription(), nil } +func (d *readonlyDir) DecRef(context.Context) { + d.readonlyDirRefs.DecRef(d.Destroy) +} + type dir struct { + dirRefs attrs kernfs.InodeNotSymlink kernfs.InodeNoDynamicLookup @@ -145,6 +152,7 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte dir.fs = fs dir.attrs.Init(creds, 0 /* devMajor */, 0 /* devMinor */, fs.NextIno(), linux.ModeDirectory|mode) dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true}) + dir.EnableLeakCheck() dir.dentry.Init(dir) dir.IncLinks(dir.OrderedChildren.Populate(&dir.dentry, contents)) @@ -162,6 +170,10 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, return fd.VFSFileDescription(), nil } +func (d *dir) DecRef(context.Context) { + d.dirRefs.DecRef(d.Destroy) +} + func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*vfs.Dentry, error) { creds := auth.CredentialsFromContext(ctx) dir := d.fs.newDir(creds, opts.Mode, nil) diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go index a3cee4047..e720bfb0b 100644 --- a/pkg/sentry/fsimpl/overlay/filesystem.go +++ b/pkg/sentry/fsimpl/overlay/filesystem.go @@ -30,7 +30,7 @@ import ( // _OVL_XATTR_OPAQUE is an extended attribute key whose value is set to "y" for // opaque directories. // Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_OPAQUE -const _OVL_XATTR_OPAQUE = "trusted.overlay.opaque" +const _OVL_XATTR_OPAQUE = linux.XATTR_TRUSTED_PREFIX + "overlay.opaque" func isWhiteout(stat *linux.Statx) bool { return stat.Mode&linux.S_IFMT == linux.S_IFCHR && stat.RdevMajor == 0 && stat.RdevMinor == 0 diff --git a/pkg/sentry/fsimpl/overlay/non_directory.go b/pkg/sentry/fsimpl/overlay/non_directory.go index d3060a481..268b32537 100644 --- a/pkg/sentry/fsimpl/overlay/non_directory.go +++ b/pkg/sentry/fsimpl/overlay/non_directory.go @@ -121,7 +121,6 @@ func (fd *nonDirectoryFD) OnClose(ctx context.Context) error { fd.cachedFlags = statusFlags } wrappedFD := fd.cachedFD - defer wrappedFD.IncRef() fd.mu.Unlock() return wrappedFD.OnClose(ctx) } diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go index 4b3dfbc01..00562667f 100644 --- a/pkg/sentry/fsimpl/overlay/overlay.go +++ b/pkg/sentry/fsimpl/overlay/overlay.go @@ -315,7 +315,11 @@ func clonePrivateMount(vfsObj *vfs.VirtualFilesystem, vd vfs.VirtualDentry, forc if err != nil { return vfs.VirtualDentry{}, err } - return vfs.MakeVirtualDentry(newmnt, vd.Dentry()), nil + // Take a reference on the dentry which will be owned by the returned + // VirtualDentry. + d := vd.Dentry() + d.IncRef() + return vfs.MakeVirtualDentry(newmnt, d), nil } // Release implements vfs.FilesystemImpl.Release. diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 14ecfd300..a45b44440 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -1,18 +1,79 @@ load("//tools:defs.bzl", "go_library", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") licenses(["notice"]) +go_template_instance( + name = "fd_dir_inode_refs", + out = "fd_dir_inode_refs.go", + package = "proc", + prefix = "fdDirInode", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "fdDirInode", + }, +) + +go_template_instance( + name = "fd_info_dir_inode_refs", + out = "fd_info_dir_inode_refs.go", + package = "proc", + prefix = "fdInfoDirInode", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "fdInfoDirInode", + }, +) + +go_template_instance( + name = "subtasks_inode_refs", + out = "subtasks_inode_refs.go", + package = "proc", + prefix = "subtasksInode", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "subtasksInode", + }, +) + +go_template_instance( + name = "task_inode_refs", + out = "task_inode_refs.go", + package = "proc", + prefix = "taskInode", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "taskInode", + }, +) + +go_template_instance( + name = "tasks_inode_refs", + out = "tasks_inode_refs.go", + package = "proc", + prefix = "tasksInode", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "tasksInode", + }, +) + go_library( name = "proc", srcs = [ + "fd_dir_inode_refs.go", + "fd_info_dir_inode_refs.go", "filesystem.go", "subtasks.go", + "subtasks_inode_refs.go", "task.go", "task_fds.go", "task_files.go", + "task_inode_refs.go", "task_net.go", "tasks.go", "tasks_files.go", + "tasks_inode_refs.go", "tasks_sys.go", ], visibility = ["//pkg/sentry:internal"], diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go index f25747da3..01c0efb3a 100644 --- a/pkg/sentry/fsimpl/proc/subtasks.go +++ b/pkg/sentry/fsimpl/proc/subtasks.go @@ -31,6 +31,7 @@ import ( // // +stateify savable type subtasksInode struct { + subtasksInodeRefs kernfs.InodeNotSymlink kernfs.InodeDirectoryNoNewChildren kernfs.InodeAttrs @@ -57,6 +58,7 @@ func (fs *filesystem) newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, // Note: credentials are overridden by taskOwnedInode. subInode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) subInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) + subInode.EnableLeakCheck() inode := &taskOwnedInode{Inode: subInode, owner: task} dentry := &kernfs.Dentry{} @@ -182,3 +184,8 @@ func (i *subtasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs func (*subtasksInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { return syserror.EPERM } + +// DecRef implements kernfs.Inode. +func (i *subtasksInode) DecRef(context.Context) { + i.subtasksInodeRefs.DecRef(i.Destroy) +} diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index 109b31b4c..66b557abd 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -32,6 +32,7 @@ import ( // // +stateify savable type taskInode struct { + taskInodeRefs kernfs.InodeNotSymlink kernfs.InodeDirectoryNoNewChildren kernfs.InodeNoDynamicLookup @@ -84,6 +85,7 @@ func (fs *filesystem) newTaskInode(task *kernel.Task, pidns *kernel.PIDNamespace taskInode := &taskInode{task: task} // Note: credentials are overridden by taskOwnedInode. taskInode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) + taskInode.EnableLeakCheck() inode := &taskOwnedInode{Inode: taskInode, owner: task} dentry := &kernfs.Dentry{} @@ -119,6 +121,11 @@ func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, v return syserror.EPERM } +// DecRef implements kernfs.Inode. +func (i *taskInode) DecRef(context.Context) { + i.taskInodeRefs.DecRef(i.Destroy) +} + // taskOwnedInode implements kernfs.Inode and overrides inode owner with task // effective user and group. type taskOwnedInode struct { @@ -147,6 +154,7 @@ func (fs *filesystem) newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux. dir.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm, kernfs.GenericDirectoryFDOptions{ SeekEnd: kernfs.SeekEndZero, }) + dir.EnableLeakCheck() inode := &taskOwnedInode{Inode: dir, owner: task} d := &kernfs.Dentry{} diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go index e8fcb9aa1..0527b2de8 100644 --- a/pkg/sentry/fsimpl/proc/task_fds.go +++ b/pkg/sentry/fsimpl/proc/task_fds.go @@ -22,7 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -101,6 +100,7 @@ func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, off // // +stateify savable type fdDirInode struct { + fdDirInodeRefs kernfs.InodeNotSymlink kernfs.InodeDirectoryNoNewChildren kernfs.InodeAttrs @@ -120,6 +120,7 @@ func (fs *filesystem) newFDDirInode(task *kernel.Task) *kernfs.Dentry { }, } inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) + inode.EnableLeakCheck() dentry := &kernfs.Dentry{} dentry.Init(inode) @@ -175,6 +176,11 @@ func (i *fdDirInode) CheckPermissions(ctx context.Context, creds *auth.Credentia return err } +// DecRef implements kernfs.Inode. +func (i *fdDirInode) DecRef(context.Context) { + i.fdDirInodeRefs.DecRef(i.Destroy) +} + // fdSymlink is an symlink for the /proc/[pid]/fd/[fd] file. // // +stateify savable @@ -227,6 +233,7 @@ func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDen // // +stateify savable type fdInfoDirInode struct { + fdInfoDirInodeRefs kernfs.InodeNotSymlink kernfs.InodeDirectoryNoNewChildren kernfs.InodeAttrs @@ -245,6 +252,7 @@ func (fs *filesystem) newFDInfoDirInode(task *kernel.Task) *kernfs.Dentry { }, } inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) + inode.EnableLeakCheck() dentry := &kernfs.Dentry{} dentry.Init(inode) @@ -282,12 +290,16 @@ func (i *fdInfoDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd * return fd.VFSFileDescription(), nil } +// DecRef implements kernfs.Inode. +func (i *fdInfoDirInode) DecRef(context.Context) { + i.fdInfoDirInodeRefs.DecRef(i.Destroy) +} + // fdInfoData implements vfs.DynamicBytesSource for /proc/[pid]/fdinfo/[fd]. // // +stateify savable type fdInfoData struct { kernfs.DynamicBytesFile - refs.AtomicRefCount task *kernel.Task fd int32 diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go index a4c884bf9..4e69782c7 100644 --- a/pkg/sentry/fsimpl/proc/task_net.go +++ b/pkg/sentry/fsimpl/proc/task_net.go @@ -262,7 +262,7 @@ func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error { // For now, we always redact this pointer. fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %8d", (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct. - s.Refs()-1, // RefCount, don't count our own ref. + s.ReadRefs()-1, // RefCount, don't count our own ref. 0, // Protocol, always 0 for UDS. sockFlags, // Flags. sops.Endpoint().Type(), // Type. @@ -430,7 +430,7 @@ func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, // Field: refcount. Don't count the ref we obtain while deferencing // the weakref to this socket. - fmt.Fprintf(buf, "%d ", s.Refs()-1) + fmt.Fprintf(buf, "%d ", s.ReadRefs()-1) // Field: Socket struct address. Redacted due to the same reason as // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. @@ -589,7 +589,7 @@ func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error { // Field: ref; reference count on the socket inode. Don't count the ref // we obtain while deferencing the weakref to this socket. - fmt.Fprintf(buf, "%d ", s.Refs()-1) + fmt.Fprintf(buf, "%d ", s.ReadRefs()-1) // Field: Socket struct address. Redacted due to the same reason as // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index 1391992b7..863c4467e 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -37,6 +37,7 @@ const ( // // +stateify savable type tasksInode struct { + tasksInodeRefs kernfs.InodeNotSymlink kernfs.InodeDirectoryNoNewChildren kernfs.InodeAttrs @@ -84,6 +85,7 @@ func (fs *filesystem) newTasksInode(k *kernel.Kernel, pidns *kernel.PIDNamespace cgroupControllers: cgroupControllers, } inode.InodeAttrs.Init(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) + inode.EnableLeakCheck() dentry := &kernfs.Dentry{} dentry.Init(inode) @@ -226,6 +228,11 @@ func (i *tasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs.St return stat, nil } +// DecRef implements kernfs.Inode. +func (i *tasksInode) DecRef(context.Context) { + i.tasksInodeRefs.DecRef(i.Destroy) +} + // staticFileSetStat implements a special static file that allows inode // attributes to be set. This is to support /proc files that are readonly, but // allow attributes to be set. diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD index 1b548ccd4..906cd52cb 100644 --- a/pkg/sentry/fsimpl/sys/BUILD +++ b/pkg/sentry/fsimpl/sys/BUILD @@ -1,21 +1,41 @@ load("//tools:defs.bzl", "go_library", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") licenses(["notice"]) +go_template_instance( + name = "dir_refs", + out = "dir_refs.go", + package = "sys", + prefix = "dir", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "dir", + }, +) + go_library( name = "sys", srcs = [ + "dir_refs.go", + "kcov.go", "sys.go", ], visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/coverage", + "//pkg/log", + "//pkg/refs", + "//pkg/sentry/arch", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sentry/memmap", "//pkg/sentry/vfs", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/sys/kcov.go b/pkg/sentry/fsimpl/sys/kcov.go new file mode 100644 index 000000000..92710d877 --- /dev/null +++ b/pkg/sentry/fsimpl/sys/kcov.go @@ -0,0 +1,116 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sys + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +func (fs *filesystem) newKcovFile(ctx context.Context, creds *auth.Credentials) *kernfs.Dentry { + k := &kcovInode{} + k.InodeAttrs.Init(creds, 0, 0, fs.NextIno(), linux.S_IFREG|0600) + d := &kernfs.Dentry{} + d.Init(k) + return d +} + +// kcovInode implements kernfs.Inode. +type kcovInode struct { + kernfs.InodeAttrs + kernfs.InodeNoopRefCount + kernfs.InodeNotSymlink + kernfs.InodeNotDirectory +} + +func (i *kcovInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + k := kernel.KernelFromContext(ctx) + if k == nil { + panic("KernelFromContext returned nil") + } + fd := &kcovFD{ + inode: i, + kcov: k.NewKcov(), + } + + if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{ + DenyPRead: true, + DenyPWrite: true, + }); err != nil { + return nil, err + } + return &fd.vfsfd, nil +} + +type kcovFD struct { + vfs.FileDescriptionDefaultImpl + vfs.NoLockFD + + vfsfd vfs.FileDescription + inode *kcovInode + kcov *kernel.Kcov +} + +// Ioctl implements vfs.FileDescriptionImpl.Ioctl. +func (fd *kcovFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { + cmd := uint32(args[1].Int()) + arg := args[2].Uint64() + switch uint32(cmd) { + case linux.KCOV_INIT_TRACE: + return 0, fd.kcov.InitTrace(arg) + case linux.KCOV_ENABLE: + return 0, fd.kcov.EnableTrace(ctx, uint8(arg)) + case linux.KCOV_DISABLE: + if arg != 0 { + // This arg is unused; it should be 0. + return 0, syserror.EINVAL + } + return 0, fd.kcov.DisableTrace(ctx) + default: + return 0, syserror.ENOTTY + } +} + +// ConfigureMmap implements vfs.FileDescriptionImpl.ConfigureMmap. +func (fd *kcovFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + return fd.kcov.ConfigureMMap(ctx, opts) +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *kcovFD) Release(ctx context.Context) { + // kcov instances have reference counts in Linux, but this seems sufficient + // for our purposes. + fd.kcov.Reset() +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (fd *kcovFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { + creds := auth.CredentialsFromContext(ctx) + fs := fd.vfsfd.VirtualDentry().Mount().Filesystem() + return fd.inode.SetStat(ctx, fs, creds, opts) +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (fd *kcovFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + return fd.inode.Stat(ctx, fd.vfsfd.Mount().Filesystem(), opts) +} diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index 393feb802..ea30a4ec2 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/coverage" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -73,7 +74,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt }), "firmware": fs.newDir(creds, defaultSysDirMode, nil), "fs": fs.newDir(creds, defaultSysDirMode, nil), - "kernel": fs.newDir(creds, defaultSysDirMode, nil), + "kernel": kernelDir(ctx, fs, creds), "module": fs.newDir(creds, defaultSysDirMode, nil), "power": fs.newDir(creds, defaultSysDirMode, nil), }) @@ -94,6 +95,21 @@ func cpuDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernf return fs.newDir(creds, defaultSysDirMode, children) } +func kernelDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernfs.Dentry { + // If kcov is available, set up /sys/kernel/debug/kcov. Technically, debugfs + // should be mounted at debug/, but for our purposes, it is sufficient to + // keep it in sys. + var children map[string]*kernfs.Dentry + if coverage.KcovAvailable() { + children = map[string]*kernfs.Dentry{ + "debug": fs.newDir(creds, linux.FileMode(0700), map[string]*kernfs.Dentry{ + "kcov": fs.newKcovFile(ctx, creds), + }), + } + } + return fs.newDir(creds, defaultSysDirMode, children) +} + // Release implements vfs.FilesystemImpl.Release. func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) @@ -102,6 +118,7 @@ func (fs *filesystem) Release(ctx context.Context) { // dir implements kernfs.Inode. type dir struct { + dirRefs kernfs.InodeAttrs kernfs.InodeNoDynamicLookup kernfs.InodeNotSymlink @@ -117,6 +134,7 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte d := &dir{} d.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755) d.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) + d.EnableLeakCheck() d.dentry.Init(d) d.IncLinks(d.OrderedChildren.Populate(&d.dentry, contents)) @@ -124,7 +142,7 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte return &d.dentry } -// SetStat implements Inode.SetStat not allowing inode attributes to be changed. +// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { return syserror.EPERM } @@ -140,6 +158,11 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, return fd.VFSFileDescription(), nil } +// DecRef implements kernfs.Inode.DecRef. +func (d *dir) DecRef(context.Context) { + d.dirRefs.DecRef(d.Destroy) +} + // cpuFile implements kernfs.Inode. type cpuFile struct { kernfs.DynamicBytesFile diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 7924a0911..eddfeab76 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // Sync implements vfs.FilesystemImpl.Sync. @@ -706,16 +705,7 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu if _, err := resolveLocked(ctx, rp); err != nil { return linux.Statfs{}, err } - statfs := linux.Statfs{ - Type: linux.TMPFS_MAGIC, - BlockSize: usermem.PageSize, - FragmentSize: usermem.PageSize, - NameLength: linux.NAME_MAX, - // TODO(b/29637826): Allow configuring a tmpfs size and enforce it. - Blocks: 0, - BlocksFree: 0, - } - return statfs, nil + return globalStatfs, nil } // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 428f62aaa..c4cec4130 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -201,6 +201,25 @@ func (fs *filesystem) Release(ctx context.Context) { fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) } +// immutable +var globalStatfs = linux.Statfs{ + Type: linux.TMPFS_MAGIC, + BlockSize: usermem.PageSize, + FragmentSize: usermem.PageSize, + NameLength: linux.NAME_MAX, + + // tmpfs currently does not support configurable size limits. In Linux, + // such a tmpfs mount will return f_blocks == f_bfree == f_bavail == 0 from + // statfs(2). However, many applications treat this as having a size limit + // of 0. To work around this, claim to have a very large but non-zero size, + // chosen to ensure that BlockSize * Blocks does not overflow int64 (which + // applications may also handle incorrectly). + // TODO(b/29637826): allow configuring a tmpfs size and enforce it. + Blocks: math.MaxInt64 / usermem.PageSize, + BlocksFree: math.MaxInt64 / usermem.PageSize, + BlocksAvailable: math.MaxInt64 / usermem.PageSize, +} + // dentry implements vfs.DentryImpl. type dentry struct { vfsd vfs.Dentry @@ -612,49 +631,65 @@ func (i *inode) listxattr(size uint64) ([]string, error) { } func (i *inode) getxattr(creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) { - if err := i.checkPermissions(creds, vfs.MayRead); err != nil { + if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil { return "", err } - if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { - return "", syserror.EOPNOTSUPP - } - if !i.userXattrSupported() { - return "", syserror.ENODATA - } return i.xattrs.Getxattr(opts) } func (i *inode) setxattr(creds *auth.Credentials, opts *vfs.SetxattrOptions) error { - if err := i.checkPermissions(creds, vfs.MayWrite); err != nil { + if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil { return err } - if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { - return syserror.EOPNOTSUPP - } - if !i.userXattrSupported() { - return syserror.EPERM - } return i.xattrs.Setxattr(opts) } func (i *inode) removexattr(creds *auth.Credentials, name string) error { - if err := i.checkPermissions(creds, vfs.MayWrite); err != nil { + if err := i.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil { return err } - if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { - return syserror.EOPNOTSUPP - } - if !i.userXattrSupported() { - return syserror.EPERM - } return i.xattrs.Removexattr(name) } -// Extended attributes in the user.* namespace are only supported for regular -// files and directories. -func (i *inode) userXattrSupported() bool { - filetype := linux.S_IFMT & atomic.LoadUint32(&i.mode) - return filetype == linux.S_IFREG || filetype == linux.S_IFDIR +func (i *inode) checkXattrPermissions(creds *auth.Credentials, name string, ats vfs.AccessTypes) error { + switch { + case ats&vfs.MayRead == vfs.MayRead: + if err := i.checkPermissions(creds, vfs.MayRead); err != nil { + return err + } + case ats&vfs.MayWrite == vfs.MayWrite: + if err := i.checkPermissions(creds, vfs.MayWrite); err != nil { + return err + } + default: + panic(fmt.Sprintf("checkXattrPermissions called with impossible AccessTypes: %v", ats)) + } + + switch { + case strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX): + // The trusted.* namespace can only be accessed by privileged + // users. + if creds.HasCapability(linux.CAP_SYS_ADMIN) { + return nil + } + if ats&vfs.MayWrite == vfs.MayWrite { + return syserror.EPERM + } + return syserror.ENODATA + case strings.HasPrefix(name, linux.XATTR_USER_PREFIX): + // Extended attributes in the user.* namespace are only + // supported for regular files and directories. + filetype := linux.S_IFMT & atomic.LoadUint32(&i.mode) + if filetype == linux.S_IFREG || filetype == linux.S_IFDIR { + return nil + } + if ats&vfs.MayWrite == vfs.MayWrite { + return syserror.EPERM + } + return syserror.ENODATA + + } + return syserror.EOPNOTSUPP } // fileDescription is embedded by tmpfs implementations of @@ -698,6 +733,11 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) return nil } +// StatFS implements vfs.FileDescriptionImpl.StatFS. +func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { + return globalStatfs, nil +} + // Listxattr implements vfs.FileDescriptionImpl.Listxattr. func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) { return fd.inode().listxattr(size) diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index 5416a310d..d436daab4 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -74,6 +74,50 @@ go_template_instance( }, ) +go_template_instance( + name = "fd_table_refs", + out = "fd_table_refs.go", + package = "kernel", + prefix = "FDTable", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "FDTable", + }, +) + +go_template_instance( + name = "fs_context_refs", + out = "fs_context_refs.go", + package = "kernel", + prefix = "FSContext", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "FSContext", + }, +) + +go_template_instance( + name = "process_group_refs", + out = "process_group_refs.go", + package = "kernel", + prefix = "ProcessGroup", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "ProcessGroup", + }, +) + +go_template_instance( + name = "session_refs", + out = "session_refs.go", + package = "kernel", + prefix = "Session", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "Session", + }, +) + proto_library( name = "uncaught_signal", srcs = ["uncaught_signal.proto"], @@ -88,9 +132,13 @@ go_library( "aio.go", "context.go", "fd_table.go", + "fd_table_refs.go", "fd_table_unsafe.go", "fs_context.go", + "fs_context_refs.go", "ipc_namespace.go", + "kcov.go", + "kcov_unsafe.go", "kernel.go", "kernel_opts.go", "kernel_state.go", @@ -99,6 +147,7 @@ go_library( "pending_signals_state.go", "posixtimer.go", "process_group_list.go", + "process_group_refs.go", "ptrace.go", "ptrace_amd64.go", "ptrace_arm64.go", @@ -106,6 +155,7 @@ go_library( "seccomp.go", "seqatomic_taskgoroutineschedinfo_unsafe.go", "session_list.go", + "session_refs.go", "sessions.go", "signal.go", "signal_handlers.go", @@ -157,6 +207,7 @@ go_library( "//pkg/bits", "//pkg/bpf", "//pkg/context", + "//pkg/coverage", "//pkg/cpuid", "//pkg/eventchannel", "//pkg/fspath", diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index ce53af69b..5773244ac 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/limits" @@ -78,7 +77,8 @@ type descriptor struct { // // +stateify savable type FDTable struct { - refs.AtomicRefCount + FDTableRefs + k *Kernel // mu protects below. @@ -176,16 +176,15 @@ func (k *Kernel) NewFDTable() *FDTable { return f } -// destroy removes all of the file descriptors from the map. -func (f *FDTable) destroy(ctx context.Context) { - f.RemoveIf(ctx, func(*fs.File, *vfs.FileDescription, FDFlags) bool { - return true - }) -} - -// DecRef implements RefCounter.DecRef with destructor f.destroy. +// DecRef implements RefCounter.DecRef. +// +// If f reaches zero references, all of its file descriptors are removed. func (f *FDTable) DecRef(ctx context.Context) { - f.DecRefWithDestructor(ctx, f.destroy) + f.FDTableRefs.DecRef(func() { + f.RemoveIf(ctx, func(*fs.File, *vfs.FileDescription, FDFlags) bool { + return true + }) + }) } // Size returns the number of file descriptor slots currently allocated. diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go index 7fd97dc53..6b8feb107 100644 --- a/pkg/sentry/kernel/fd_table_unsafe.go +++ b/pkg/sentry/kernel/fd_table_unsafe.go @@ -31,6 +31,8 @@ type descriptorTable struct { } // init initializes the table. +// +// TODO(gvisor.dev/1486): Enable leak check for FDTable. func (f *FDTable) init() { var slice []unsafe.Pointer // Empty slice. atomic.StorePointer(&f.slice, unsafe.Pointer(&slice)) diff --git a/pkg/sentry/kernel/fs_context.go b/pkg/sentry/kernel/fs_context.go index 8f2d36d5a..d46d1e1c1 100644 --- a/pkg/sentry/kernel/fs_context.go +++ b/pkg/sentry/kernel/fs_context.go @@ -18,7 +18,6 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" @@ -30,7 +29,7 @@ import ( // // +stateify savable type FSContext struct { - refs.AtomicRefCount + FSContextRefs // mu protects below. mu sync.Mutex `state:"nosave"` @@ -64,7 +63,7 @@ func newFSContext(root, cwd *fs.Dirent, umask uint) *FSContext { cwd: cwd, umask: umask, } - f.EnableLeakCheck("kernel.FSContext") + f.EnableLeakCheck() return &f } @@ -77,54 +76,56 @@ func NewFSContextVFS2(root, cwd vfs.VirtualDentry, umask uint) *FSContext { cwdVFS2: cwd, umask: umask, } - f.EnableLeakCheck("kernel.FSContext") + f.EnableLeakCheck() return &f } -// destroy is the destructor for an FSContext. +// DecRef implements RefCounter.DecRef. // -// This will call DecRef on both root and cwd Dirents. If either call to -// DecRef returns an error, then it will be propagated. If both calls to -// DecRef return an error, then the one from root.DecRef will be propagated. +// When f reaches zero references, DecRef will be called on both root and cwd +// Dirents. // // Note that there may still be calls to WorkingDirectory() or RootDirectory() // (that return nil). This is because valid references may still be held via // proc files or other mechanisms. -func (f *FSContext) destroy(ctx context.Context) { - // Hold f.mu so that we don't race with RootDirectory() and - // WorkingDirectory(). - f.mu.Lock() - defer f.mu.Unlock() - - if VFS2Enabled { - f.rootVFS2.DecRef(ctx) - f.rootVFS2 = vfs.VirtualDentry{} - f.cwdVFS2.DecRef(ctx) - f.cwdVFS2 = vfs.VirtualDentry{} - } else { - f.root.DecRef(ctx) - f.root = nil - f.cwd.DecRef(ctx) - f.cwd = nil - } -} - -// DecRef implements RefCounter.DecRef with destructor f.destroy. func (f *FSContext) DecRef(ctx context.Context) { - f.DecRefWithDestructor(ctx, f.destroy) + f.FSContextRefs.DecRef(func() { + // Hold f.mu so that we don't race with RootDirectory() and + // WorkingDirectory(). + f.mu.Lock() + defer f.mu.Unlock() + + if VFS2Enabled { + f.rootVFS2.DecRef(ctx) + f.rootVFS2 = vfs.VirtualDentry{} + f.cwdVFS2.DecRef(ctx) + f.cwdVFS2 = vfs.VirtualDentry{} + } else { + f.root.DecRef(ctx) + f.root = nil + f.cwd.DecRef(ctx) + f.cwd = nil + } + }) } // Fork forks this FSContext. // -// This is not a valid call after destroy. +// This is not a valid call after f is destroyed. func (f *FSContext) Fork() *FSContext { f.mu.Lock() defer f.mu.Unlock() if VFS2Enabled { + if !f.cwdVFS2.Ok() { + panic("FSContext.Fork() called after destroy") + } f.cwdVFS2.IncRef() f.rootVFS2.IncRef() } else { + if f.cwd == nil { + panic("FSContext.Fork() called after destroy") + } f.cwd.IncRef() f.root.IncRef() } @@ -140,8 +141,8 @@ func (f *FSContext) Fork() *FSContext { // WorkingDirectory returns the current working directory. // -// This will return nil if called after destroy(), otherwise it will return a -// Dirent with a reference taken. +// This will return nil if called after f is destroyed, otherwise it will return +// a Dirent with a reference taken. func (f *FSContext) WorkingDirectory() *fs.Dirent { f.mu.Lock() defer f.mu.Unlock() @@ -152,8 +153,8 @@ func (f *FSContext) WorkingDirectory() *fs.Dirent { // WorkingDirectoryVFS2 returns the current working directory. // -// This will return nil if called after destroy(), otherwise it will return a -// Dirent with a reference taken. +// This will return nil if called after f is destroyed, otherwise it will return +// a Dirent with a reference taken. func (f *FSContext) WorkingDirectoryVFS2() vfs.VirtualDentry { f.mu.Lock() defer f.mu.Unlock() @@ -165,7 +166,7 @@ func (f *FSContext) WorkingDirectoryVFS2() vfs.VirtualDentry { // SetWorkingDirectory sets the current working directory. // This will take an extra reference on the Dirent. // -// This is not a valid call after destroy. +// This is not a valid call after f is destroyed. func (f *FSContext) SetWorkingDirectory(ctx context.Context, d *fs.Dirent) { if d == nil { panic("FSContext.SetWorkingDirectory called with nil dirent") @@ -187,11 +188,15 @@ func (f *FSContext) SetWorkingDirectory(ctx context.Context, d *fs.Dirent) { // SetWorkingDirectoryVFS2 sets the current working directory. // This will take an extra reference on the VirtualDentry. // -// This is not a valid call after destroy. +// This is not a valid call after f is destroyed. func (f *FSContext) SetWorkingDirectoryVFS2(ctx context.Context, d vfs.VirtualDentry) { f.mu.Lock() defer f.mu.Unlock() + if !f.cwdVFS2.Ok() { + panic(fmt.Sprintf("FSContext.SetWorkingDirectoryVFS2(%v)) called after destroy", d)) + } + old := f.cwdVFS2 f.cwdVFS2 = d d.IncRef() @@ -200,8 +205,8 @@ func (f *FSContext) SetWorkingDirectoryVFS2(ctx context.Context, d vfs.VirtualDe // RootDirectory returns the current filesystem root. // -// This will return nil if called after destroy(), otherwise it will return a -// Dirent with a reference taken. +// This will return nil if called after f is destroyed, otherwise it will return +// a Dirent with a reference taken. func (f *FSContext) RootDirectory() *fs.Dirent { f.mu.Lock() defer f.mu.Unlock() @@ -213,8 +218,8 @@ func (f *FSContext) RootDirectory() *fs.Dirent { // RootDirectoryVFS2 returns the current filesystem root. // -// This will return nil if called after destroy(), otherwise it will return a -// Dirent with a reference taken. +// This will return nil if called after f is destroyed, otherwise it will return +// a Dirent with a reference taken. func (f *FSContext) RootDirectoryVFS2() vfs.VirtualDentry { f.mu.Lock() defer f.mu.Unlock() @@ -226,7 +231,7 @@ func (f *FSContext) RootDirectoryVFS2() vfs.VirtualDentry { // SetRootDirectory sets the root directory. // This will take an extra reference on the Dirent. // -// This is not a valid call after free. +// This is not a valid call after f is destroyed. func (f *FSContext) SetRootDirectory(ctx context.Context, d *fs.Dirent) { if d == nil { panic("FSContext.SetRootDirectory called with nil dirent") @@ -247,7 +252,7 @@ func (f *FSContext) SetRootDirectory(ctx context.Context, d *fs.Dirent) { // SetRootDirectoryVFS2 sets the root directory. It takes a reference on vd. // -// This is not a valid call after free. +// This is not a valid call after f is destroyed. func (f *FSContext) SetRootDirectoryVFS2(ctx context.Context, vd vfs.VirtualDentry) { if !vd.Ok() { panic("FSContext.SetRootDirectoryVFS2 called with zero-value VirtualDentry") diff --git a/pkg/sentry/kernel/kcov.go b/pkg/sentry/kernel/kcov.go new file mode 100644 index 000000000..aad63aa99 --- /dev/null +++ b/pkg/sentry/kernel/kcov.go @@ -0,0 +1,321 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +import ( + "fmt" + "io" + "sync" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/coverage" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// kcovAreaSizeMax is the maximum number of uint64 entries allowed in the kcov +// area. On Linux, the maximum is INT_MAX / 8. +const kcovAreaSizeMax = 10 * 1024 * 1024 + +// Kcov provides kernel coverage data to userspace through a memory-mapped +// region, as kcov does in Linux. +// +// To give the illusion that the data is always up to date, we update the shared +// memory every time before we return to userspace. +type Kcov struct { + // mfp provides application memory. It is immutable after creation. + mfp pgalloc.MemoryFileProvider + + // mu protects all of the fields below. + mu sync.RWMutex + + // mode is the current kcov mode. + mode uint8 + + // size is the size of the mapping through which the kernel conveys coverage + // information to userspace. + size uint64 + + // owningTask is the task that currently owns coverage data on the system. The + // interface for kcov essentially requires that coverage is only going to a + // single task. Note that kcov should only generate coverage data for the + // owning task, but we currently generate global coverage. + owningTask *Task + + // count is a locally cached version of the first uint64 in the kcov data, + // which is the number of subsequent entries representing PCs. + // + // It is used with kcovInode.countBlock(), to copy in/out the first element of + // the actual data in an efficient manner, avoid boilerplate, and prevent + // accidental garbage escapes by the temporary counts. + count uint64 + + mappable *mm.SpecialMappable +} + +// NewKcov creates and returns a Kcov instance. +func (k *Kernel) NewKcov() *Kcov { + return &Kcov{ + mfp: k, + } +} + +var coveragePool = sync.Pool{ + New: func() interface{} { + return make([]byte, 0) + }, +} + +// TaskWork implements TaskWorker.TaskWork. +func (kcov *Kcov) TaskWork(t *Task) { + kcov.mu.Lock() + defer kcov.mu.Unlock() + + rw := &kcovReadWriter{ + mf: kcov.mfp.MemoryFile(), + fr: kcov.mappable.FileRange(), + } + + // Read in the PC count. + if _, err := safemem.ReadFullToBlocks(rw, kcov.countBlock()); err != nil { + panic(fmt.Sprintf("Internal error reading count from kcov area: %v", err)) + } + + rw.off = 8 * (1 + kcov.count) + n := coverage.ConsumeCoverageData(&kcovIOWriter{rw}) + + // Update the pc count, based on the number of entries written. Note that if + // we reached the end of the kcov area, we may not have written everything in + // output. + kcov.count += uint64(n / 8) + rw.off = 0 + if _, err := safemem.WriteFullFromBlocks(rw, kcov.countBlock()); err != nil { + panic(fmt.Sprintf("Internal error writing count to kcov area: %v", err)) + } + + // Re-register for future work. + t.RegisterWork(kcov) +} + +// InitTrace performs the KCOV_INIT_TRACE ioctl. +func (kcov *Kcov) InitTrace(size uint64) error { + kcov.mu.Lock() + defer kcov.mu.Unlock() + + if kcov.mode != linux.KCOV_MODE_DISABLED { + return syserror.EBUSY + } + + // To simplify all the logic around mapping, we require that the length of the + // shared region is a multiple of the system page size. + if (8*size)&(usermem.PageSize-1) != 0 { + return syserror.EINVAL + } + + // We need space for at least two uint64s to hold current position and a + // single PC. + if size < 2 || size > kcovAreaSizeMax { + return syserror.EINVAL + } + + kcov.size = size + kcov.mode = linux.KCOV_MODE_INIT + return nil +} + +// EnableTrace performs the KCOV_ENABLE_TRACE ioctl. +func (kcov *Kcov) EnableTrace(ctx context.Context, traceMode uint8) error { + t := TaskFromContext(ctx) + if t == nil { + panic("kcovInode.EnableTrace() cannot be used outside of a task goroutine") + } + + kcov.mu.Lock() + defer kcov.mu.Unlock() + + // KCOV_ENABLE must be preceded by KCOV_INIT_TRACE and an mmap call. + if kcov.mode != linux.KCOV_MODE_INIT || kcov.mappable == nil { + return syserror.EINVAL + } + + switch traceMode { + case linux.KCOV_TRACE_PC: + kcov.mode = traceMode + case linux.KCOV_TRACE_CMP: + // We do not support KCOV_MODE_TRACE_CMP. + return syserror.ENOTSUP + default: + return syserror.EINVAL + } + + if kcov.owningTask != nil && kcov.owningTask != t { + return syserror.EBUSY + } + + kcov.owningTask = t + t.RegisterWork(kcov) + + // Clear existing coverage data; the task expects to read only coverage data + // from the time it is activated. + coverage.ClearCoverageData() + return nil +} + +// DisableTrace performs the KCOV_DISABLE_TRACE ioctl. +func (kcov *Kcov) DisableTrace(ctx context.Context) error { + kcov.mu.Lock() + defer kcov.mu.Unlock() + + t := TaskFromContext(ctx) + if t == nil { + panic("kcovInode.EnableTrace() cannot be used outside of a task goroutine") + } + + if t != kcov.owningTask { + return syserror.EINVAL + } + kcov.owningTask = nil + kcov.mode = linux.KCOV_MODE_INIT + kcov.resetLocked() + return nil +} + +// Reset is called when the owning task exits. +func (kcov *Kcov) Reset() { + kcov.mu.Lock() + kcov.resetLocked() + kcov.mu.Unlock() +} + +// The kcov instance is reset when the owning task exits or when tracing is +// disabled. +func (kcov *Kcov) resetLocked() { + kcov.owningTask = nil + if kcov.mappable != nil { + kcov.mappable = nil + } +} + +// ConfigureMMap is called by the vfs.FileDescription for this kcov instance to +// implement vfs.FileDescription.ConfigureMMap. +func (kcov *Kcov) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + kcov.mu.Lock() + defer kcov.mu.Unlock() + + if kcov.mode != linux.KCOV_MODE_INIT { + return syserror.EINVAL + } + + if kcov.mappable == nil { + // Set up the kcov area. + fr, err := kcov.mfp.MemoryFile().Allocate(kcov.size*8, usage.Anonymous) + if err != nil { + return err + } + + // Get the thread id for the mmap name. + t := TaskFromContext(ctx) + if t == nil { + panic("ThreadFromContext returned nil") + } + // For convenience, a special mappable is used here. Note that these mappings + // will look different under /proc/[pid]/maps than they do on Linux. + kcov.mappable = mm.NewSpecialMappable(fmt.Sprintf("[kcov:%d]", t.ThreadID()), kcov.mfp, fr) + } + opts.Mappable = kcov.mappable + opts.MappingIdentity = kcov.mappable + return nil +} + +// kcovReadWriter implements safemem.Reader and safemem.Writer. +type kcovReadWriter struct { + off uint64 + mf *pgalloc.MemoryFile + fr memmap.FileRange +} + +// ReadToBlocks implements safemem.Reader.ReadToBlocks. +func (rw *kcovReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { + if dsts.IsEmpty() { + return 0, nil + } + + // Limit the read to the kcov range and check for overflow. + if rw.fr.Length() <= rw.off { + return 0, io.EOF + } + start := rw.fr.Start + rw.off + end := rw.fr.Start + rw.fr.Length() + if rend := start + dsts.NumBytes(); rend < end { + end = rend + } + + // Get internal mappings. + bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, usermem.Read) + if err != nil { + return 0, err + } + + // Copy from internal mappings. + n, err := safemem.CopySeq(dsts, bs) + rw.off += n + return n, err +} + +// WriteFromBlocks implements safemem.Writer.WriteFromBlocks. +func (rw *kcovReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { + if srcs.IsEmpty() { + return 0, nil + } + + // Limit the write to the kcov area and check for overflow. + if rw.fr.Length() <= rw.off { + return 0, io.EOF + } + start := rw.fr.Start + rw.off + end := rw.fr.Start + rw.fr.Length() + if wend := start + srcs.NumBytes(); wend < end { + end = wend + } + + // Get internal mapping. + bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, usermem.Write) + if err != nil { + return 0, err + } + + // Copy to internal mapping. + n, err := safemem.CopySeq(bs, srcs) + rw.off += n + return n, err +} + +// kcovIOWriter implements io.Writer as a basic wrapper over kcovReadWriter. +type kcovIOWriter struct { + rw *kcovReadWriter +} + +// Write implements io.Writer.Write. +func (w *kcovIOWriter) Write(p []byte) (int, error) { + bs := safemem.BlockSeqOf(safemem.BlockFromSafeSlice(p)) + n, err := safemem.WriteFullFromBlocks(w.rw, bs) + return int(n), err +} diff --git a/tools/nogo/data/data.go b/pkg/sentry/kernel/kcov_unsafe.go index eb84d0d27..6f64022eb 100644 --- a/tools/nogo/data/data.go +++ b/pkg/sentry/kernel/kcov_unsafe.go @@ -1,4 +1,4 @@ -// Copyright 2019 The gVisor Authors. +// Copyright 2020 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,10 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package data contains shared data for nogo analysis. -// -// This is used to break a dependency cycle. -package data +package kernel + +import ( + "unsafe" -// Objdump is the dumped binary under analysis. -var Objdump string + "gvisor.dev/gvisor/pkg/safemem" +) + +// countBlock provides a safemem.BlockSeq for k.count. +// +// Like k.count, the block returned is protected by k.mu. +func (k *Kcov) countBlock() safemem.BlockSeq { + return safemem.BlockSeqOf(safemem.BlockFromSafePointer(unsafe.Pointer(&k.count), int(unsafe.Sizeof(k.count)))) +} diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 2e0175e36..402aa1718 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -248,7 +248,7 @@ type Kernel struct { // SpecialOpts contains special kernel options. SpecialOpts - // VFS keeps the filesystem state used across the kernel. + // vfs keeps the filesystem state used across the kernel. vfs vfs.VirtualFilesystem // hostMount is the Mount used for file descriptors that were imported diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go index 5c4c622c2..df5c8421b 100644 --- a/pkg/sentry/kernel/sessions.go +++ b/pkg/sentry/kernel/sessions.go @@ -16,8 +16,6 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/syserror" ) @@ -32,7 +30,7 @@ type ProcessGroupID ThreadID // // +stateify savable type Session struct { - refs refs.AtomicRefCount + SessionRefs // leader is the originator of the Session. // @@ -62,16 +60,11 @@ type Session struct { sessionEntry } -// incRef grabs a reference. -func (s *Session) incRef() { - s.refs.IncRef() -} - -// decRef drops a reference. +// DecRef drops a reference. // // Precondition: callers must hold TaskSet.mu for writing. -func (s *Session) decRef() { - s.refs.DecRefWithDestructor(nil, func(context.Context) { +func (s *Session) DecRef() { + s.SessionRefs.DecRef(func() { // Remove translations from the leader. for ns := s.leader.pidns; ns != nil; ns = ns.parent { id := ns.sids[s] @@ -88,7 +81,7 @@ func (s *Session) decRef() { // // +stateify savable type ProcessGroup struct { - refs refs.AtomicRefCount // not exported. + refs ProcessGroupRefs // originator is the originator of the group. // @@ -163,7 +156,7 @@ func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) { } alive := true - pg.refs.DecRefWithDestructor(nil, func(context.Context) { + pg.refs.DecRef(func() { alive = false // don't bother with handleOrphan. // Remove translations from the originator. @@ -175,7 +168,7 @@ func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) { // Remove the list of process groups. pg.session.processGroups.Remove(pg) - pg.session.decRef() + pg.session.DecRef() }) if alive { pg.handleOrphan() @@ -302,7 +295,7 @@ func (tg *ThreadGroup) createSession() error { id: SessionID(id), leader: tg, } - s.refs.EnableLeakCheck("kernel.Session") + s.EnableLeakCheck() // Create a new ProcessGroup, belonging to that Session. // This also has a single reference (assigned below). @@ -316,7 +309,7 @@ func (tg *ThreadGroup) createSession() error { session: s, ancestors: 0, } - pg.refs.EnableLeakCheck("kernel.ProcessGroup") + pg.refs.EnableLeakCheck() // Tie them and return the result. s.processGroups.PushBack(pg) @@ -396,13 +389,13 @@ func (tg *ThreadGroup) CreateProcessGroup() error { // // We manually adjust the ancestors if the parent is in the same // session. - tg.processGroup.session.incRef() + tg.processGroup.session.IncRef() pg := ProcessGroup{ id: ProcessGroupID(id), originator: tg, session: tg.processGroup.session, } - pg.refs.EnableLeakCheck("kernel.ProcessGroup") + pg.refs.EnableLeakCheck() if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session { pg.ancestors++ diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index c211fc8d0..b7e4b480d 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -1,12 +1,25 @@ load("//tools:defs.bzl", "go_library") +load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) +go_template_instance( + name = "shm_refs", + out = "shm_refs.go", + package = "shm", + prefix = "Shm", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "Shm", + }, +) + go_library( name = "shm", srcs = [ "device.go", "shm.go", + "shm_refs.go", ], visibility = ["//pkg/sentry:internal"], deps = [ diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index 13ec7afe0..00c03585e 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -39,7 +39,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -252,7 +251,7 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi creatorPID: pid, changeTime: ktime.NowFromContext(ctx), } - shm.EnableLeakCheck("kernel.Shm") + shm.EnableLeakCheck() // Find the next available ID. for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ { @@ -337,14 +336,14 @@ func (r *Registry) remove(s *Shm) { // // +stateify savable type Shm struct { - // AtomicRefCount tracks the number of references to this segment. + // ShmRefs tracks the number of references to this segment. // // A segment holds a reference to itself until it is marked for // destruction. // // In addition to direct users, the MemoryManager will hold references // via MappingIdentity. - refs.AtomicRefCount + ShmRefs mfp pgalloc.MemoryFileProvider @@ -428,11 +427,14 @@ func (s *Shm) InodeID() uint64 { return uint64(s.ID) } -// DecRef overrides refs.RefCount.DecRef with a destructor. +// DecRef drops a reference on s. // // Precondition: Caller must not hold s.mu. func (s *Shm) DecRef(ctx context.Context) { - s.DecRefWithDestructor(ctx, s.destroy) + s.ShmRefs.DecRef(func() { + s.mfp.MemoryFile().DecRef(s.fr) + s.registry.remove(s) + }) } // Msync implements memmap.MappingIdentity.Msync. Msync is a no-op for shm @@ -642,11 +644,6 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error { return nil } -func (s *Shm) destroy(context.Context) { - s.mfp.MemoryFile().DecRef(s.fr) - s.registry.remove(s) -} - // MarkDestroyed marks a segment for destruction. The segment is actually // destroyed once it has no references. MarkDestroyed may be called multiple // times, and is safe to call after a segment has already been destroyed. See diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 5aee699e7..a436610c9 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -574,6 +574,11 @@ type Task struct { // // startTime is protected by mu. startTime ktime.Time + + // kcov is the kcov instance providing code coverage owned by this task. + // + // kcov is exclusive to the task goroutine. + kcov *Kcov } func (t *Task) savePtraceTracer() *Task { @@ -903,3 +908,16 @@ func (t *Task) UID() uint32 { func (t *Task) GID() uint32 { return uint32(t.Credentials().EffectiveKGID) } + +// SetKcov sets the kcov instance associated with t. +func (t *Task) SetKcov(k *Kcov) { + t.kcov = k +} + +// ResetKcov clears the kcov instance associated with t. +func (t *Task) ResetKcov() { + if t.kcov != nil { + t.kcov.Reset() + t.kcov = nil + } +} diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index c165d6cb1..b76f7f503 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -239,6 +239,8 @@ func (*runExitMain) execute(t *Task) taskRunState { t.traceExitEvent() lastExiter := t.exitThreadGroup() + t.ResetKcov() + // If the task has a cleartid, and the thread group wasn't killed by a // signal, handle that before releasing the MM. if t.cleartid != 0 { diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index f9d0837a1..b4a47ccca 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -73,12 +73,35 @@ go_template_instance( }, ) +go_template_instance( + name = "aio_mappable_refs", + out = "aio_mappable_refs.go", + package = "mm", + prefix = "aioMappable", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "aioMappable", + }, +) + +go_template_instance( + name = "special_mappable_refs", + out = "special_mappable_refs.go", + package = "mm", + prefix = "SpecialMappable", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "SpecialMappable", + }, +) + go_library( name = "mm", srcs = [ "address_space.go", "aio_context.go", "aio_context_state.go", + "aio_mappable_refs.go", "debug.go", "file_refcount_set.go", "io.go", @@ -92,6 +115,7 @@ go_library( "save_restore.go", "shm.go", "special_mappable.go", + "special_mappable_refs.go", "syscalls.go", "vma.go", "vma_set.go", diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go index 16fea53c4..7bf48cb2c 100644 --- a/pkg/sentry/mm/aio_context.go +++ b/pkg/sentry/mm/aio_context.go @@ -17,7 +17,6 @@ package mm import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" @@ -239,7 +238,7 @@ func (ctx *AIOContext) Drain() { // // +stateify savable type aioMappable struct { - refs.AtomicRefCount + aioMappableRefs mfp pgalloc.MemoryFileProvider fr memmap.FileRange @@ -253,13 +252,13 @@ func newAIOMappable(mfp pgalloc.MemoryFileProvider) (*aioMappable, error) { return nil, err } m := aioMappable{mfp: mfp, fr: fr} - m.EnableLeakCheck("mm.aioMappable") + m.EnableLeakCheck() return &m, nil } // DecRef implements refs.RefCounter.DecRef. func (m *aioMappable) DecRef(ctx context.Context) { - m.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) { + m.aioMappableRefs.DecRef(func() { m.mfp.MemoryFile().DecRef(m.fr) }) } diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go index 4cdb52eb6..f4c93baeb 100644 --- a/pkg/sentry/mm/special_mappable.go +++ b/pkg/sentry/mm/special_mappable.go @@ -16,7 +16,6 @@ package mm import ( "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" @@ -31,7 +30,7 @@ import ( // // +stateify savable type SpecialMappable struct { - refs.AtomicRefCount + SpecialMappableRefs mfp pgalloc.MemoryFileProvider fr memmap.FileRange @@ -45,13 +44,13 @@ type SpecialMappable struct { // Preconditions: fr.Length() != 0. func NewSpecialMappable(name string, mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *SpecialMappable { m := SpecialMappable{mfp: mfp, fr: fr, name: name} - m.EnableLeakCheck("mm.SpecialMappable") + m.EnableLeakCheck() return &m } // DecRef implements refs.RefCounter.DecRef. func (m *SpecialMappable) DecRef(ctx context.Context) { - m.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) { + m.SpecialMappableRefs.DecRef(func() { m.mfp.MemoryFile().DecRef(m.fr) }) } diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go index e34f46aeb..a182e4f22 100644 --- a/pkg/sentry/platform/kvm/bluepill_fault.go +++ b/pkg/sentry/platform/kvm/bluepill_fault.go @@ -98,6 +98,10 @@ func handleBluepillFault(m *machine, physical uintptr, phyRegions []physicalRegi } errno := m.setMemoryRegion(int(slot), physicalStart, length, virtualStart, flags) if errno == 0 { + // Store the physical address in the slot. This is used to + // avoid calls to handleBluepillFault in the future (see + // machine.mapPhysical). + atomic.StoreUintptr(&m.usedSlots[slot], physical) // Successfully added region; we can increment nextSlot and // allow another set to proceed here. atomic.StoreUint32(&m.nextSlot, slot+1) diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go index bf357de1a..979be5d89 100644 --- a/pkg/sentry/platform/kvm/bluepill_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go @@ -13,7 +13,7 @@ // limitations under the License. // +build go1.12 -// +build !go1.16 +// +build !go1.17 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sentry/platform/kvm/kvm_const.go b/pkg/sentry/platform/kvm/kvm_const.go index 3bf918446..5c4b18899 100644 --- a/pkg/sentry/platform/kvm/kvm_const.go +++ b/pkg/sentry/platform/kvm/kvm_const.go @@ -56,6 +56,7 @@ const ( // KVM capability options. const ( + _KVM_CAP_MAX_MEMSLOTS = 0x0a _KVM_CAP_MAX_VCPUS = 0x42 _KVM_CAP_ARM_VM_IPA_SIZE = 0xa5 _KVM_CAP_VCPU_EVENTS = 0x29 @@ -64,6 +65,7 @@ const ( // KVM limits. const ( + _KVM_NR_MEMSLOTS = 0x100 _KVM_NR_VCPUS = 0xff _KVM_NR_INTERRUPTS = 0x100 _KVM_NR_CPUID_ENTRIES = 0x100 diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 6c54712d1..372a4cbd7 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -43,9 +43,6 @@ type machine struct { // kernel is the set of global structures. kernel ring0.Kernel - // mappingCache is used for mapPhysical. - mappingCache sync.Map - // mu protects vCPUs. mu sync.RWMutex @@ -63,6 +60,12 @@ type machine struct { // maxVCPUs is the maximum number of vCPUs supported by the machine. maxVCPUs int + // maxSlots is the maximum number of memory slots supported by the machine. + maxSlots int + + // usedSlots is the set of used physical addresses (sorted). + usedSlots []uintptr + // nextID is the next vCPU ID. nextID uint32 } @@ -184,6 +187,7 @@ func newMachine(vm int) (*machine, error) { PageTables: pagetables.New(newAllocator()), }) + // Pull the maximum vCPUs. maxVCPUs, _, errno := syscall.RawSyscall(syscall.SYS_IOCTL, uintptr(m.fd), _KVM_CHECK_EXTENSION, _KVM_CAP_MAX_VCPUS) if errno != 0 { m.maxVCPUs = _KVM_NR_VCPUS @@ -191,11 +195,19 @@ func newMachine(vm int) (*machine, error) { m.maxVCPUs = int(maxVCPUs) } log.Debugf("The maximum number of vCPUs is %d.", m.maxVCPUs) - - // Create the vCPUs map/slices. m.vCPUsByTID = make(map[uint64]*vCPU) m.vCPUsByID = make([]*vCPU, m.maxVCPUs) + // Pull the maximum slots. + maxSlots, _, errno := syscall.RawSyscall(syscall.SYS_IOCTL, uintptr(m.fd), _KVM_CHECK_EXTENSION, _KVM_CAP_MAX_MEMSLOTS) + if errno != 0 { + m.maxSlots = _KVM_NR_MEMSLOTS + } else { + m.maxSlots = int(maxSlots) + } + log.Debugf("The maximum number of slots is %d.", m.maxSlots) + m.usedSlots = make([]uintptr, m.maxSlots) + // Apply the physical mappings. Note that these mappings may point to // guest physical addresses that are not actually available. These // physical pages are mapped on demand, see kernel_unsafe.go. @@ -272,6 +284,20 @@ func newMachine(vm int) (*machine, error) { return m, nil } +// hasSlot returns true iff the given address is mapped. +// +// This must be done via a linear scan. +// +//go:nosplit +func (m *machine) hasSlot(physical uintptr) bool { + for i := 0; i < len(m.usedSlots); i++ { + if p := atomic.LoadUintptr(&m.usedSlots[i]); p == physical { + return true + } + } + return false +} + // mapPhysical checks for the mapping of a physical range, and installs one if // not available. This attempts to be efficient for calls in the hot path. // @@ -286,8 +312,8 @@ func (m *machine) mapPhysical(physical, length uintptr, phyRegions []physicalReg panic("mapPhysical on unknown physical address") } - if _, ok := m.mappingCache.LoadOrStore(physicalStart, true); !ok { - // Not present in the cache; requires setting the slot. + // Is this already mapped? Check the usedSlots. + if !m.hasSlot(physicalStart) { if _, ok := handleBluepillFault(m, physical, phyRegions, flags); !ok { panic("handleBluepillFault failed") } diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go index 9f86f6a7a..607c82156 100644 --- a/pkg/sentry/platform/kvm/machine_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_unsafe.go @@ -13,7 +13,7 @@ // limitations under the License. // +build go1.12 -// +build !go1.16 +// +build !go1.17 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sentry/platform/ptrace/subprocess_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_unsafe.go index 0bee995e4..7ee20d89a 100644 --- a/pkg/sentry/platform/ptrace/subprocess_unsafe.go +++ b/pkg/sentry/platform/ptrace/subprocess_unsafe.go @@ -13,7 +13,7 @@ // limitations under the License. // +build go1.12 -// +build !go1.16 +// +build !go1.17 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 4d0e33696..9e2ebc7d4 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -479,8 +479,35 @@ func (s *socketOpsCommon) fetchReadView() *syserr.Error { } // Release implements fs.FileOperations.Release. -func (s *socketOpsCommon) Release(context.Context) { +func (s *socketOpsCommon) Release(ctx context.Context) { + e, ch := waiter.NewChannelEntry(nil) + s.EventRegister(&e, waiter.EventHUp|waiter.EventErr) + defer s.EventUnregister(&e) + s.Endpoint.Close() + + // SO_LINGER option is valid only for TCP. For other socket types + // return after endpoint close. + if family, skType, _ := s.Type(); skType != linux.SOCK_STREAM || (family != linux.AF_INET && family != linux.AF_INET6) { + return + } + + var v tcpip.LingerOption + if err := s.Endpoint.GetSockOpt(&v); err != nil { + return + } + + // The case for zero timeout is handled in tcp endpoint close function. + // Close is blocked until either: + // 1. The endpoint state is not in any of the states: FIN-WAIT1, + // CLOSING and LAST_ACK. + // 2. Timeout is reached. + if v.Enabled && v.Timeout != 0 { + t := kernel.TaskFromContext(ctx) + start := t.Kernel().MonotonicClock().Now() + deadline := start.Add(v.Timeout) + t.BlockWithDeadline(ch, true, deadline) + } } // Read implements fs.FileOperations.Read. @@ -956,53 +983,12 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us return &val, nil } - if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP { - switch name { - case linux.IPT_SO_GET_INFO: - if outLen < linux.SizeOfIPTGetinfo { - return nil, syserr.ErrInvalidArgument - } - if s.family != linux.AF_INET { - return nil, syserr.ErrInvalidArgument - } - - stack := inet.StackFromContext(t) - if stack == nil { - return nil, syserr.ErrNoDevice - } - info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr) - if err != nil { - return nil, err - } - return &info, nil - - case linux.IPT_SO_GET_ENTRIES: - if outLen < linux.SizeOfIPTGetEntries { - return nil, syserr.ErrInvalidArgument - } - if s.family != linux.AF_INET { - return nil, syserr.ErrInvalidArgument - } - - stack := inet.StackFromContext(t) - if stack == nil { - return nil, syserr.ErrNoDevice - } - entries, err := netfilter.GetEntries4(t, stack.(*Stack).Stack, outPtr, outLen) - if err != nil { - return nil, err - } - return &entries, nil - - } - } - - return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outLen) + return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outPtr, outLen) } // GetSockOpt can be used to implement the linux syscall getsockopt(2) for // sockets backed by a commonEndpoint. -func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name, outLen int) (marshal.Marshallable, *syserr.Error) { +func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { switch level { case linux.SOL_SOCKET: return getSockOptSocket(t, s, ep, family, skType, name, outLen) @@ -1014,7 +1000,7 @@ func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family in return getSockOptIPv6(t, ep, name, outLen) case linux.SOL_IP: - return getSockOptIP(t, ep, name, outLen, family) + return getSockOptIP(t, s, ep, name, outPtr, outLen, family) case linux.SOL_UDP, linux.SOL_ICMPV6, @@ -1195,7 +1181,16 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam return nil, syserr.ErrInvalidArgument } - linger := linux.Linger{} + var v tcpip.LingerOption + var linger linux.Linger + if err := ep.GetSockOpt(&v); err != nil { + return &linger, nil + } + + if v.Enabled { + linger.OnOff = 1 + } + linger.Linger = int32(v.Timeout.Seconds()) return &linger, nil case linux.SO_SNDTIMEO: @@ -1409,8 +1404,12 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal if err := ep.GetSockOpt(&v); err != nil { return nil, syserr.TranslateNetstackError(err) } - - lingerTimeout := primitive.Int32(time.Duration(v) / time.Second) + var lingerTimeout primitive.Int32 + if v >= 0 { + lingerTimeout = primitive.Int32(time.Duration(v) / time.Second) + } else { + lingerTimeout = -1 + } return &lingerTimeout, nil case linux.TCP_DEFER_ACCEPT: @@ -1520,7 +1519,7 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marsha } // getSockOptIP implements GetSockOpt when level is SOL_IP. -func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family int) (marshal.Marshallable, *syserr.Error) { +func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr usermem.Addr, outLen int, family int) (marshal.Marshallable, *syserr.Error) { switch name { case linux.IP_TTL: if outLen < sizeOfInt32 { @@ -1636,6 +1635,46 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in a, _ := ConvertAddress(linux.AF_INET, tcpip.FullAddress(v)) return a.(*linux.SockAddrInet), nil + case linux.IPT_SO_GET_INFO: + if outLen < linux.SizeOfIPTGetinfo { + return nil, syserr.ErrInvalidArgument + } + + // Only valid for raw IPv4 sockets. + if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW { + return nil, syserr.ErrProtocolNotAvailable + } + + stack := inet.StackFromContext(t) + if stack == nil { + return nil, syserr.ErrNoDevice + } + info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr) + if err != nil { + return nil, err + } + return &info, nil + + case linux.IPT_SO_GET_ENTRIES: + if outLen < linux.SizeOfIPTGetEntries { + return nil, syserr.ErrInvalidArgument + } + + // Only valid for raw IPv4 sockets. + if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW { + return nil, syserr.ErrProtocolNotAvailable + } + + stack := inet.StackFromContext(t) + if stack == nil { + return nil, syserr.ErrNoDevice + } + entries, err := netfilter.GetEntries4(t, stack.(*Stack).Stack, outPtr, outLen) + if err != nil { + return nil, err + } + return &entries, nil + default: emitUnimplementedEventIP(t, name) } @@ -1669,29 +1708,6 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa return nil } - if s.skType == linux.SOCK_RAW && level == linux.SOL_IP { - switch name { - case linux.IPT_SO_SET_REPLACE: - if len(optVal) < linux.SizeOfIPTReplace { - return syserr.ErrInvalidArgument - } - if s.family != linux.AF_INET { - return syserr.ErrInvalidArgument - } - - stack := inet.StackFromContext(t) - if stack == nil { - return syserr.ErrNoDevice - } - // Stack must be a netstack stack. - return netfilter.SetEntries(stack.(*Stack).Stack, optVal) - - case linux.IPT_SO_SET_ADD_COUNTERS: - // TODO(gvisor.dev/issue/170): Counter support. - return nil - } - } - return SetSockOpt(t, s, s.Endpoint, level, name, optVal) } @@ -1709,7 +1725,7 @@ func SetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, level int return setSockOptIPv6(t, ep, name, optVal) case linux.SOL_IP: - return setSockOptIP(t, ep, name, optVal) + return setSockOptIP(t, s, ep, name, optVal) case linux.SOL_UDP, linux.SOL_ICMPV6, @@ -1861,7 +1877,10 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam socket.SetSockOptEmitUnimplementedEvent(t, name) } - return nil + return syserr.TranslateNetstackError( + ep.SetSockOpt(tcpip.LingerOption{ + Enabled: v.OnOff != 0, + Timeout: time.Second * time.Duration(v.Linger)})) case linux.SO_DETACH_FILTER: // optval is ignored. @@ -1967,7 +1986,7 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) * return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := int32(usermem.ByteOrder.Uint32(optVal)) return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPLingerTimeoutOption(time.Second * time.Duration(v)))) case linux.TCP_DEFER_ACCEPT: @@ -2117,7 +2136,7 @@ func parseIntOrChar(buf []byte) (int32, *syserr.Error) { } // setSockOptIP implements SetSockOpt when level is SOL_IP. -func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error { +func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error { switch name { case linux.IP_MULTICAST_TTL: v, err := parseIntOrChar(optVal) @@ -2237,6 +2256,27 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s } return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.IPHdrIncludedOption, v != 0)) + case linux.IPT_SO_SET_REPLACE: + if len(optVal) < linux.SizeOfIPTReplace { + return syserr.ErrInvalidArgument + } + + // Only valid for raw IPv4 sockets. + if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW { + return syserr.ErrProtocolNotAvailable + } + + stack := inet.StackFromContext(t) + if stack == nil { + return syserr.ErrNoDevice + } + // Stack must be a netstack stack. + return netfilter.SetEntries(stack.(*Stack).Stack, optVal) + + case linux.IPT_SO_SET_ADD_COUNTERS: + // TODO(gvisor.dev/issue/170): Counter support. + return nil + case linux.IP_ADD_SOURCE_MEMBERSHIP, linux.IP_BIND_ADDRESS_NO_PORT, linux.IP_BLOCK_SOURCE, diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index 1db8ae491..59fa4c58f 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -21,10 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" - "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" @@ -233,48 +231,7 @@ func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem. return &val, nil } - if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP { - switch name { - case linux.IPT_SO_GET_INFO: - if outLen < linux.SizeOfIPTGetinfo { - return nil, syserr.ErrInvalidArgument - } - if s.family != linux.AF_INET { - return nil, syserr.ErrInvalidArgument - } - - stack := inet.StackFromContext(t) - if stack == nil { - return nil, syserr.ErrNoDevice - } - info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr) - if err != nil { - return nil, err - } - return &info, nil - - case linux.IPT_SO_GET_ENTRIES: - if outLen < linux.SizeOfIPTGetEntries { - return nil, syserr.ErrInvalidArgument - } - if s.family != linux.AF_INET { - return nil, syserr.ErrInvalidArgument - } - - stack := inet.StackFromContext(t) - if stack == nil { - return nil, syserr.ErrNoDevice - } - entries, err := netfilter.GetEntries4(t, stack.(*Stack).Stack, outPtr, outLen) - if err != nil { - return nil, err - } - return &entries, nil - - } - } - - return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outLen) + return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outPtr, outLen) } // SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by @@ -304,29 +261,6 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by return nil } - if s.skType == linux.SOCK_RAW && level == linux.SOL_IP { - switch name { - case linux.IPT_SO_SET_REPLACE: - if len(optVal) < linux.SizeOfIPTReplace { - return syserr.ErrInvalidArgument - } - if s.family != linux.AF_INET { - return syserr.ErrInvalidArgument - } - - stack := inet.StackFromContext(t) - if stack == nil { - return syserr.ErrNoDevice - } - // Stack must be a netstack stack. - return netfilter.SetEntries(stack.(*Stack).Stack, optVal) - - case linux.IPT_SO_SET_ADD_COUNTERS: - // TODO(gvisor.dev/issue/170): Counter support. - return nil - } - } - return SetSockOpt(t, s, s.Endpoint, level, name, optVal) } diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD index c708b6030..26c3a51b9 100644 --- a/pkg/sentry/socket/unix/transport/BUILD +++ b/pkg/sentry/socket/unix/transport/BUILD @@ -15,6 +15,17 @@ go_template_instance( }, ) +go_template_instance( + name = "queue_refs", + out = "queue_refs.go", + package = "transport", + prefix = "queue", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "queue", + }, +) + go_library( name = "transport", srcs = [ @@ -22,6 +33,7 @@ go_library( "connectioned_state.go", "connectionless.go", "queue.go", + "queue_refs.go", "transport_message_list.go", "unix.go", ], diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go index c67b602f0..e3a75b519 100644 --- a/pkg/sentry/socket/unix/transport/connectioned.go +++ b/pkg/sentry/socket/unix/transport/connectioned.go @@ -142,9 +142,9 @@ func NewPair(ctx context.Context, stype linux.SockType, uid UniqueIDProvider) (E } q1 := &queue{ReaderQueue: a.Queue, WriterQueue: b.Queue, limit: initialLimit} - q1.EnableLeakCheck("transport.queue") + q1.EnableLeakCheck() q2 := &queue{ReaderQueue: b.Queue, WriterQueue: a.Queue, limit: initialLimit} - q2.EnableLeakCheck("transport.queue") + q2.EnableLeakCheck() if stype == linux.SOCK_STREAM { a.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{q1}} @@ -300,14 +300,14 @@ func (e *connectionedEndpoint) BidirectionalConnect(ctx context.Context, ce Conn } readQueue := &queue{ReaderQueue: ce.WaiterQueue(), WriterQueue: ne.Queue, limit: initialLimit} - readQueue.EnableLeakCheck("transport.queue") + readQueue.EnableLeakCheck() ne.connected = &connectedEndpoint{ endpoint: ce, writeQueue: readQueue, } writeQueue := &queue{ReaderQueue: ne.Queue, WriterQueue: ce.WaiterQueue(), limit: initialLimit} - writeQueue.EnableLeakCheck("transport.queue") + writeQueue.EnableLeakCheck() if e.stype == linux.SOCK_STREAM { ne.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{readQueue: writeQueue}} } else { diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go index 70ee8f9b8..4751b2fd8 100644 --- a/pkg/sentry/socket/unix/transport/connectionless.go +++ b/pkg/sentry/socket/unix/transport/connectionless.go @@ -42,7 +42,7 @@ var ( func NewConnectionless(ctx context.Context) Endpoint { ep := &connectionlessEndpoint{baseEndpoint{Queue: &waiter.Queue{}}} q := queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: initialLimit} - q.EnableLeakCheck("transport.queue") + q.EnableLeakCheck() ep.receiver = &queueReceiver{readQueue: &q} return ep } diff --git a/pkg/sentry/socket/unix/transport/queue.go b/pkg/sentry/socket/unix/transport/queue.go index ef6043e19..342def28f 100644 --- a/pkg/sentry/socket/unix/transport/queue.go +++ b/pkg/sentry/socket/unix/transport/queue.go @@ -16,7 +16,6 @@ package transport import ( "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" @@ -28,7 +27,7 @@ import ( // // +stateify savable type queue struct { - refs.AtomicRefCount + queueRefs ReaderQueue *waiter.Queue WriterQueue *waiter.Queue @@ -68,11 +67,13 @@ func (q *queue) Reset(ctx context.Context) { q.mu.Unlock() } -// DecRef implements RefCounter.DecRef with destructor q.Reset. +// DecRef implements RefCounter.DecRef. func (q *queue) DecRef(ctx context.Context) { - q.DecRefWithDestructor(ctx, q.Reset) - // We don't need to notify after resetting because no one cares about - // this queue after all references have been dropped. + q.queueRefs.DecRef(func() { + // We don't need to notify after resetting because no one cares about + // this queue after all references have been dropped. + q.Reset(ctx) + }) } // IsReadable determines if q is currently readable. diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go index 475d7177e..ab7bab5cd 100644 --- a/pkg/sentry/socket/unix/transport/unix.go +++ b/pkg/sentry/socket/unix/transport/unix.go @@ -942,7 +942,7 @@ func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { // GetSockOpt implements tcpip.Endpoint.GetSockOpt. func (e *baseEndpoint) GetSockOpt(opt interface{}) *tcpip.Error { switch opt.(type) { - case tcpip.ErrorOption: + case tcpip.ErrorOption, *tcpip.LingerOption: return nil default: diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index b7e8e4325..0a7a26495 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -194,7 +194,7 @@ func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by // a transport.Endpoint. func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { - return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outLen) + return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outPtr, outLen) } // Listen implements the linux syscall listen(2) for sockets backed by diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index d066ef8ab..65a285b8f 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -91,7 +91,7 @@ func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint3 // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by // a transport.Endpoint. func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { - return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outLen) + return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outPtr, outLen) } // blockingAccept implements a blocking version of accept(2), that is, if no diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index 80c65164a..da6bd85e1 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -305,9 +305,9 @@ var AMD64 = &kernel.SyscallTable{ 250: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil), 251: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) 252: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) - 253: syscalls.PartiallySupported("inotify_init", InotifyInit, "inotify events are only available inside the sandbox.", nil), - 254: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil), - 255: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil), + 253: syscalls.PartiallySupported("inotify_init", InotifyInit, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil), + 254: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil), + 255: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil), 256: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil), 257: syscalls.Supported("openat", Openat), 258: syscalls.Supported("mkdirat", Mkdirat), @@ -346,7 +346,7 @@ var AMD64 = &kernel.SyscallTable{ 291: syscalls.Supported("epoll_create1", EpollCreate1), 292: syscalls.Supported("dup3", Dup3), 293: syscalls.Supported("pipe2", Pipe2), - 294: syscalls.Supported("inotify_init1", InotifyInit1), + 294: syscalls.PartiallySupported("inotify_init1", InotifyInit1, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil), 295: syscalls.Supported("preadv", Preadv), 296: syscalls.Supported("pwritev", Pwritev), 297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo), @@ -454,9 +454,9 @@ var ARM64 = &kernel.SyscallTable{ 23: syscalls.Supported("dup", Dup), 24: syscalls.Supported("dup3", Dup3), 25: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil), - 26: syscalls.Supported("inotify_init1", InotifyInit1), - 27: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil), - 28: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil), + 26: syscalls.PartiallySupported("inotify_init1", InotifyInit1, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil), + 27: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil), + 28: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil), 29: syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil), 30: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) 31: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 642769e7c..8093ca55c 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -27,6 +27,39 @@ go_template_instance( }, ) +go_template_instance( + name = "file_description_refs", + out = "file_description_refs.go", + package = "vfs", + prefix = "FileDescription", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "FileDescription", + }, +) + +go_template_instance( + name = "mount_namespace_refs", + out = "mount_namespace_refs.go", + package = "vfs", + prefix = "MountNamespace", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "MountNamespace", + }, +) + +go_template_instance( + name = "filesystem_refs", + out = "filesystem_refs.go", + package = "vfs", + prefix = "Filesystem", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "Filesystem", + }, +) + go_library( name = "vfs", srcs = [ @@ -40,12 +73,15 @@ go_library( "event_list.go", "file_description.go", "file_description_impl_util.go", + "file_description_refs.go", "filesystem.go", "filesystem_impl_util.go", + "filesystem_refs.go", "filesystem_type.go", "inotify.go", "lock.go", "mount.go", + "mount_namespace_refs.go", "mount_unsafe.go", "options.go", "pathname.go", @@ -63,6 +99,7 @@ go_library( "//pkg/fspath", "//pkg/gohacks", "//pkg/log", + "//pkg/refs", "//pkg/safemem", "//pkg/sentry/arch", "//pkg/sentry/fs", diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md index 4b9faf2ea..5aad31b78 100644 --- a/pkg/sentry/vfs/README.md +++ b/pkg/sentry/vfs/README.md @@ -184,12 +184,3 @@ This construction, which is essentially a type-safe analogue to Linux's - File locking - `O_ASYNC` - -- Reference counts in the `vfs` package do not use the `refs` package since - `refs.AtomicRefCount` adds 64 bytes of overhead to each 8-byte reference - count, resulting in considerable cache bloat. 24 bytes of this overhead is - for weak reference support, which have poor performance and will not be used - by VFS2. The remaining 40 bytes is to store a descriptive string and stack - trace for reference leak checking; we can support reference leak checking - without incurring this space overhead by including the applicable - information directly in finalizers for applicable types. diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 33910e095..22a54fa48 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -38,9 +38,7 @@ import ( // // FileDescription is analogous to Linux's struct file. type FileDescription struct { - // refs is the reference count. refs is accessed using atomic memory - // operations. - refs int64 + FileDescriptionRefs // flagsMu protects statusFlags and asyncHandler below. flagsMu sync.Mutex @@ -131,7 +129,7 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou } } - fd.refs = 1 + fd.EnableLeakCheck() // Remove "file creation flags" to mirror the behavior from file.f_flags in // fs/open.c:do_dentry_open. @@ -149,30 +147,9 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou return nil } -// IncRef increments fd's reference count. -func (fd *FileDescription) IncRef() { - atomic.AddInt64(&fd.refs, 1) -} - -// TryIncRef increments fd's reference count and returns true. If fd's -// reference count is already zero, TryIncRef does nothing and returns false. -// -// TryIncRef does not require that a reference is held on fd. -func (fd *FileDescription) TryIncRef() bool { - for { - refs := atomic.LoadInt64(&fd.refs) - if refs <= 0 { - return false - } - if atomic.CompareAndSwapInt64(&fd.refs, refs, refs+1) { - return true - } - } -} - // DecRef decrements fd's reference count. func (fd *FileDescription) DecRef(ctx context.Context) { - if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 { + fd.FileDescriptionRefs.DecRef(func() { // Unregister fd from all epoll instances. fd.epollMu.Lock() epolls := fd.epolls @@ -208,15 +185,7 @@ func (fd *FileDescription) DecRef(ctx context.Context) { } fd.asyncHandler = nil fd.flagsMu.Unlock() - } else if refs < 0 { - panic("FileDescription.DecRef() called without holding a reference") - } -} - -// Refs returns the current number of references. The returned count -// is inherently racy and is unsafe to use without external synchronization. -func (fd *FileDescription) Refs() int64 { - return atomic.LoadInt64(&fd.refs) + }) } // Mount returns the mount on which fd was opened. It does not take a reference @@ -851,7 +820,7 @@ func (fd *FileDescription) SetAsyncHandler(newHandler func() FileAsync) FileAsyn // FileReadWriteSeeker is a helper struct to pass a FileDescription as // io.Reader/io.Writer/io.ReadSeeker/etc. type FileReadWriteSeeker struct { - Fd *FileDescription + FD *FileDescription Ctx context.Context ROpts ReadOptions WOpts WriteOptions @@ -860,18 +829,18 @@ type FileReadWriteSeeker struct { // Read implements io.ReadWriteSeeker.Read. func (f *FileReadWriteSeeker) Read(p []byte) (int, error) { dst := usermem.BytesIOSequence(p) - ret, err := f.Fd.Read(f.Ctx, dst, f.ROpts) + ret, err := f.FD.Read(f.Ctx, dst, f.ROpts) return int(ret), err } // Seek implements io.ReadWriteSeeker.Seek. func (f *FileReadWriteSeeker) Seek(offset int64, whence int) (int64, error) { - return f.Fd.Seek(f.Ctx, offset, int32(whence)) + return f.FD.Seek(f.Ctx, offset, int32(whence)) } // Write implements io.ReadWriteSeeker.Write. func (f *FileReadWriteSeeker) Write(p []byte) (int, error) { buf := usermem.BytesIOSequence(p) - ret, err := f.Fd.Write(f.Ctx, buf, f.WOpts) + ret, err := f.FD.Write(f.Ctx, buf, f.WOpts) return int(ret), err } diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index 2c60cfab2..46851f638 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -15,8 +15,6 @@ package vfs import ( - "sync/atomic" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" @@ -34,9 +32,7 @@ import ( // // +stateify savable type Filesystem struct { - // refs is the reference count. refs is accessed using atomic memory - // operations. - refs int64 + FilesystemRefs // vfs is the VirtualFilesystem that uses this Filesystem. vfs is // immutable. @@ -52,7 +48,7 @@ type Filesystem struct { // Init must be called before first use of fs. func (fs *Filesystem) Init(vfsObj *VirtualFilesystem, fsType FilesystemType, impl FilesystemImpl) { - fs.refs = 1 + fs.EnableLeakCheck() fs.vfs = vfsObj fs.fsType = fsType fs.impl = impl @@ -76,39 +72,14 @@ func (fs *Filesystem) Impl() FilesystemImpl { return fs.impl } -// IncRef increments fs' reference count. -func (fs *Filesystem) IncRef() { - if atomic.AddInt64(&fs.refs, 1) <= 1 { - panic("Filesystem.IncRef() called without holding a reference") - } -} - -// TryIncRef increments fs' reference count and returns true. If fs' reference -// count is zero, TryIncRef does nothing and returns false. -// -// TryIncRef does not require that a reference is held on fs. -func (fs *Filesystem) TryIncRef() bool { - for { - refs := atomic.LoadInt64(&fs.refs) - if refs <= 0 { - return false - } - if atomic.CompareAndSwapInt64(&fs.refs, refs, refs+1) { - return true - } - } -} - // DecRef decrements fs' reference count. func (fs *Filesystem) DecRef(ctx context.Context) { - if refs := atomic.AddInt64(&fs.refs, -1); refs == 0 { + fs.FilesystemRefs.DecRef(func() { fs.vfs.filesystemsMu.Lock() delete(fs.vfs.filesystems, fs) fs.vfs.filesystemsMu.Unlock() fs.impl.Release(ctx) - } else if refs < 0 { - panic("Filesystem.decRef() called without holding a reference") - } + }) } // FilesystemImpl contains implementation details for a Filesystem. diff --git a/pkg/sentry/vfs/g3doc/inotify.md b/pkg/sentry/vfs/g3doc/inotify.md index e7da49faa..833db213f 100644 --- a/pkg/sentry/vfs/g3doc/inotify.md +++ b/pkg/sentry/vfs/g3doc/inotify.md @@ -28,9 +28,9 @@ The set of all watches held on a single file (i.e., the watch target) is stored in vfs.Watches. Each watch will belong to a different inotify instance (an instance can only have one watch on any watch target). The watches are stored in a map indexed by their vfs.Inotify owner’s id. Hard links and file descriptions -to a single file will all share the same vfs.Watches. Activity on the target -causes its vfs.Watches to generate notifications on its watches’ inotify -instances. +to a single file will all share the same vfs.Watches (with the exception of the +gofer filesystem, described in a later section). Activity on the target causes +its vfs.Watches to generate notifications on its watches’ inotify instances. ### vfs.Watch @@ -103,12 +103,12 @@ inotify: unopened p9 file (and possibly an open FID), through which the Sentry interacts with the gofer. * *Solution:* Because there is no inode structure stored in the sandbox, - inotify watches must be held on the dentry. This would be an issue in - the presence of hard links, where multiple dentries would need to share - the same set of watches, but in VFS2, we do not support the internal - creation of hard links on gofer fs. As a result, we make the assumption - that every dentry corresponds to a unique inode. However, the next point - raises an issue with this assumption: + inotify watches must be held on the dentry. For the purposes of inotify, + we assume that every dentry corresponds to a unique inode, which may + cause unexpected behavior in the presence of hard links, where multiple + dentries should share the same set of watches. Indeed, it is impossible + for us to be absolutely sure whether dentries correspond to the same + file or not, due to the following point: * **The Sentry cannot always be aware of hard links on the remote filesystem.** There is no way for us to confirm whether two files on the remote filesystem are actually links to the same inode. QIDs and inodes are diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index cd5456eef..db5fb3bb1 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -128,16 +128,14 @@ func (mnt *Mount) Options() MountOptions { // // +stateify savable type MountNamespace struct { + MountNamespaceRefs + // Owner is the usernamespace that owns this mount namespace. Owner *auth.UserNamespace // root is the MountNamespace's root mount. root is immutable. root *Mount - // refs is the reference count. refs is accessed using atomic memory - // operations. - refs int64 - // mountpoints maps all Dentries which are mount points in this namespace // to the number of Mounts for which they are mount points. mountpoints is // protected by VirtualFilesystem.mountMu. @@ -168,9 +166,9 @@ func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth } mntns := &MountNamespace{ Owner: creds.UserNamespace, - refs: 1, mountpoints: make(map[*Dentry]uint32), } + mntns.EnableLeakCheck() mntns.root = newMount(vfs, fs, root, mntns, &MountOptions{}) return mntns, nil } @@ -509,17 +507,10 @@ func (mnt *Mount) DecRef(ctx context.Context) { } } -// IncRef increments mntns' reference count. -func (mntns *MountNamespace) IncRef() { - if atomic.AddInt64(&mntns.refs, 1) <= 1 { - panic("MountNamespace.IncRef() called without holding a reference") - } -} - // DecRef decrements mntns' reference count. func (mntns *MountNamespace) DecRef(ctx context.Context) { vfs := mntns.root.fs.VirtualFilesystem() - if refs := atomic.AddInt64(&mntns.refs, -1); refs == 0 { + mntns.MountNamespaceRefs.DecRef(func() { vfs.mountMu.Lock() vfs.mounts.seq.BeginWrite() vdsToDecRef, mountsToDecRef := vfs.umountRecursiveLocked(mntns.root, &umountRecursiveOptions{ @@ -533,9 +524,7 @@ func (mntns *MountNamespace) DecRef(ctx context.Context) { for _, mnt := range mountsToDecRef { mnt.DecRef(ctx) } - } else if refs < 0 { - panic("MountNamespace.DecRef() called without holding a reference") - } + }) } // getMountAt returns the last Mount in the stack mounted at (mnt, d). It takes diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go index 777d631cb..da2a2e9c4 100644 --- a/pkg/sentry/vfs/mount_unsafe.go +++ b/pkg/sentry/vfs/mount_unsafe.go @@ -13,7 +13,7 @@ // limitations under the License. // +build go1.12 -// +build !go1.16 +// +build !go1.17 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sleep/sleep_unsafe.go b/pkg/sleep/sleep_unsafe.go index 118805492..19bce2afb 100644 --- a/pkg/sleep/sleep_unsafe.go +++ b/pkg/sleep/sleep_unsafe.go @@ -13,7 +13,7 @@ // limitations under the License. // +build go1.11 -// +build !go1.16 +// +build !go1.17 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sync/memmove_unsafe.go b/pkg/sync/memmove_unsafe.go index 1d7780695..f5e630009 100644 --- a/pkg/sync/memmove_unsafe.go +++ b/pkg/sync/memmove_unsafe.go @@ -4,7 +4,7 @@ // license that can be found in the LICENSE file. // +build go1.12 -// +build !go1.16 +// +build !go1.17 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sync/mutex_unsafe.go b/pkg/sync/mutex_unsafe.go index dc034d561..f4c2e9642 100644 --- a/pkg/sync/mutex_unsafe.go +++ b/pkg/sync/mutex_unsafe.go @@ -4,7 +4,7 @@ // license that can be found in the LICENSE file. // +build go1.13 -// +build !go1.16 +// +build !go1.17 // When updating the build constraint (above), check that syncMutex matches the // standard library sync.Mutex definition. diff --git a/pkg/sync/rwmutex_unsafe.go b/pkg/sync/rwmutex_unsafe.go index 995c0346e..b3b4dee78 100644 --- a/pkg/sync/rwmutex_unsafe.go +++ b/pkg/sync/rwmutex_unsafe.go @@ -4,7 +4,7 @@ // license that can be found in the LICENSE file. // +build go1.13 -// +build !go1.16 +// +build !go1.17 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/syncevent/waiter_unsafe.go b/pkg/syncevent/waiter_unsafe.go index ad271e1a0..518f18479 100644 --- a/pkg/syncevent/waiter_unsafe.go +++ b/pkg/syncevent/waiter_unsafe.go @@ -13,7 +13,7 @@ // limitations under the License. // +build go1.11 -// +build !go1.16 +// +build !go1.17 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go index 99313ee25..5db4bf12b 100644 --- a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go +++ b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go @@ -14,7 +14,7 @@ // +build linux,amd64 linux,arm64 // +build go1.12 -// +build !go1.16 +// +build !go1.17 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD index 6c137f693..0243424f6 100644 --- a/pkg/tcpip/link/tun/BUILD +++ b/pkg/tcpip/link/tun/BUILD @@ -1,18 +1,32 @@ load("//tools:defs.bzl", "go_library") +load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) +go_template_instance( + name = "tun_endpoint_refs", + out = "tun_endpoint_refs.go", + package = "tun", + prefix = "tunEndpoint", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "tunEndpoint", + }, +) + go_library( name = "tun", srcs = [ "device.go", "protocol.go", + "tun_endpoint_refs.go", "tun_unsafe.go", ], visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/log", "//pkg/refs", "//pkg/sync", "//pkg/syserror", diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go index 3b1510a33..b6ddbe81e 100644 --- a/pkg/tcpip/link/tun/device.go +++ b/pkg/tcpip/link/tun/device.go @@ -19,7 +19,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -135,6 +134,7 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE // 2. Creating a new NIC. id := tcpip.NICID(s.UniqueID()) + // TODO(gvisor.dev/1486): enable leak check for tunEndpoint. endpoint := &tunEndpoint{ Endpoint: channel.New(defaultDevOutQueueLen, defaultDevMtu, ""), stack: s, @@ -331,19 +331,18 @@ func (d *Device) WriteNotify() { // It is ref-counted as multiple opening files can attach to the same NIC. // The last owner is responsible for deleting the NIC. type tunEndpoint struct { + tunEndpointRefs *channel.Endpoint - refs.AtomicRefCount - stack *stack.Stack nicID tcpip.NICID name string isTap bool } -// DecRef decrements refcount of e, removes NIC if refcount goes to 0. +// DecRef decrements refcount of e, removing NIC if it reaches 0. func (e *tunEndpoint) DecRef(ctx context.Context) { - e.DecRefWithDestructor(ctx, func(context.Context) { + e.tunEndpointRefs.DecRef(func() { e.stack.RemoveNIC(e.nicID) }) } diff --git a/pkg/tcpip/network/arp/BUILD b/pkg/tcpip/network/arp/BUILD index eddf7b725..82c073e32 100644 --- a/pkg/tcpip/network/arp/BUILD +++ b/pkg/tcpip/network/arp/BUILD @@ -28,5 +28,6 @@ go_test( "//pkg/tcpip/network/ipv4", "//pkg/tcpip/stack", "//pkg/tcpip/transport/icmp", + "@com_github_google_go_cmp//cmp:go_default_library", ], ) diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go index 920872c3f..cbbe5b77f 100644 --- a/pkg/tcpip/network/arp/arp.go +++ b/pkg/tcpip/network/arp/arp.go @@ -46,6 +46,7 @@ type endpoint struct { nicID tcpip.NICID linkEP stack.LinkEndpoint linkAddrCache stack.LinkAddressCache + nud stack.NUDHandler } // DefaultTTL is unused for ARP. It implements stack.NetworkEndpoint. @@ -78,7 +79,7 @@ func (e *endpoint) WritePacket(*stack.Route, *stack.GSO, stack.NetworkHeaderPara // NetworkProtocolNumber implements stack.NetworkEndpoint.NetworkProtocolNumber. func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber { - return e.protocol.Number() + return ProtocolNumber } // WritePackets implements stack.NetworkEndpoint.WritePackets. @@ -99,9 +100,25 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) { switch h.Op() { case header.ARPRequest: localAddr := tcpip.Address(h.ProtocolAddressTarget()) - if e.linkAddrCache.CheckLocalAddress(e.nicID, header.IPv4ProtocolNumber, localAddr) == 0 { - return // we have no useful answer, ignore the request + + if e.nud == nil { + if e.linkAddrCache.CheckLocalAddress(e.nicID, header.IPv4ProtocolNumber, localAddr) == 0 { + return // we have no useful answer, ignore the request + } + + addr := tcpip.Address(h.ProtocolAddressSender()) + linkAddr := tcpip.LinkAddress(h.HardwareAddressSender()) + e.linkAddrCache.AddLinkAddress(e.nicID, addr, linkAddr) + } else { + if r.Stack().CheckLocalAddress(e.nicID, header.IPv4ProtocolNumber, localAddr) == 0 { + return // we have no useful answer, ignore the request + } + + remoteAddr := tcpip.Address(h.ProtocolAddressSender()) + remoteLinkAddr := tcpip.LinkAddress(h.HardwareAddressSender()) + e.nud.HandleProbe(remoteAddr, localAddr, ProtocolNumber, remoteLinkAddr, e.protocol) } + pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ ReserveHeaderBytes: int(e.linkEP.MaxHeaderLength()) + header.ARPSize, }) @@ -113,11 +130,28 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) { copy(packet.HardwareAddressTarget(), h.HardwareAddressSender()) copy(packet.ProtocolAddressTarget(), h.ProtocolAddressSender()) _ = e.linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, pkt) - fallthrough // also fill the cache from requests + case header.ARPReply: addr := tcpip.Address(h.ProtocolAddressSender()) linkAddr := tcpip.LinkAddress(h.HardwareAddressSender()) - e.linkAddrCache.AddLinkAddress(e.nicID, addr, linkAddr) + + if e.nud == nil { + e.linkAddrCache.AddLinkAddress(e.nicID, addr, linkAddr) + return + } + + // The solicited, override, and isRouter flags are not available for ARP; + // they are only available for IPv6 Neighbor Advertisements. + e.nud.HandleConfirmation(addr, linkAddr, stack.ReachabilityConfirmationFlags{ + // Solicited and unsolicited (also referred to as gratuitous) ARP Replies + // are handled equivalently to a solicited Neighbor Advertisement. + Solicited: true, + // If a different link address is received than the one cached, the entry + // should always go to Stale. + Override: false, + // ARP does not distinguish between router and non-router hosts. + IsRouter: false, + }) } } @@ -134,12 +168,13 @@ func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) { return tcpip.Address(h.ProtocolAddressSender()), ProtocolAddress } -func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, sender stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint { +func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, nud stack.NUDHandler, dispatcher stack.TransportDispatcher, sender stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint { return &endpoint{ protocol: p, nicID: nicID, linkEP: sender, linkAddrCache: linkAddrCache, + nud: nud, } } diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go index c2c3e6891..9c9a859e3 100644 --- a/pkg/tcpip/network/arp/arp_test.go +++ b/pkg/tcpip/network/arp/arp_test.go @@ -16,10 +16,12 @@ package arp_test import ( "context" + "fmt" "strconv" "testing" "time" + "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -32,57 +34,192 @@ import ( ) const ( - stackLinkAddr1 = tcpip.LinkAddress("\x0a\x0a\x0b\x0b\x0c\x0c") - stackLinkAddr2 = tcpip.LinkAddress("\x0b\x0b\x0c\x0c\x0d\x0d") - stackAddr1 = tcpip.Address("\x0a\x00\x00\x01") - stackAddr2 = tcpip.Address("\x0a\x00\x00\x02") - stackAddrBad = tcpip.Address("\x0a\x00\x00\x03") + nicID = 1 + + stackAddr = tcpip.Address("\x0a\x00\x00\x01") + stackLinkAddr = tcpip.LinkAddress("\x0a\x0a\x0b\x0b\x0c\x0c") + + remoteAddr = tcpip.Address("\x0a\x00\x00\x02") + remoteLinkAddr = tcpip.LinkAddress("\x01\x02\x03\x04\x05\x06") + + unknownAddr = tcpip.Address("\x0a\x00\x00\x03") defaultChannelSize = 1 defaultMTU = 65536 + + // eventChanSize defines the size of event channels used by the neighbor + // cache's event dispatcher. The size chosen here needs to be sufficient to + // queue all the events received during tests before consumption. + // If eventChanSize is too small, the tests may deadlock. + eventChanSize = 32 +) + +type eventType uint8 + +const ( + entryAdded eventType = iota + entryChanged + entryRemoved ) +func (t eventType) String() string { + switch t { + case entryAdded: + return "add" + case entryChanged: + return "change" + case entryRemoved: + return "remove" + default: + return fmt.Sprintf("unknown (%d)", t) + } +} + +type eventInfo struct { + eventType eventType + nicID tcpip.NICID + addr tcpip.Address + linkAddr tcpip.LinkAddress + state stack.NeighborState +} + +func (e eventInfo) String() string { + return fmt.Sprintf("%s event for NIC #%d, addr=%q, linkAddr=%q, state=%q", e.eventType, e.nicID, e.addr, e.linkAddr, e.state) +} + +// arpDispatcher implements NUDDispatcher to validate the dispatching of +// events upon certain NUD state machine events. +type arpDispatcher struct { + // C is where events are queued + C chan eventInfo +} + +var _ stack.NUDDispatcher = (*arpDispatcher)(nil) + +func (d *arpDispatcher) OnNeighborAdded(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) { + e := eventInfo{ + eventType: entryAdded, + nicID: nicID, + addr: addr, + linkAddr: linkAddr, + state: state, + } + d.C <- e +} + +func (d *arpDispatcher) OnNeighborChanged(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) { + e := eventInfo{ + eventType: entryChanged, + nicID: nicID, + addr: addr, + linkAddr: linkAddr, + state: state, + } + d.C <- e +} + +func (d *arpDispatcher) OnNeighborRemoved(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) { + e := eventInfo{ + eventType: entryRemoved, + nicID: nicID, + addr: addr, + linkAddr: linkAddr, + state: state, + } + d.C <- e +} + +func (d *arpDispatcher) waitForEvent(ctx context.Context, want eventInfo) error { + select { + case got := <-d.C: + if diff := cmp.Diff(got, want, cmp.AllowUnexported(got)); diff != "" { + return fmt.Errorf("got invalid event (-got +want):\n%s", diff) + } + case <-ctx.Done(): + return fmt.Errorf("%s for %s", ctx.Err(), want) + } + return nil +} + +func (d *arpDispatcher) waitForEventWithTimeout(want eventInfo, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + return d.waitForEvent(ctx, want) +} + +func (d *arpDispatcher) nextEvent() (eventInfo, bool) { + select { + case event := <-d.C: + return event, true + default: + return eventInfo{}, false + } +} + type testContext struct { - t *testing.T - linkEP *channel.Endpoint - s *stack.Stack + s *stack.Stack + linkEP *channel.Endpoint + nudDisp *arpDispatcher } -func newTestContext(t *testing.T) *testContext { +func newTestContext(t *testing.T, useNeighborCache bool) *testContext { + c := stack.DefaultNUDConfigurations() + // Transition from Reachable to Stale almost immediately to test if receiving + // probes refreshes positive reachability. + c.BaseReachableTime = time.Microsecond + + d := arpDispatcher{ + // Create an event channel large enough so the neighbor cache doesn't block + // while dispatching events. Blocking could interfere with the timing of + // NUD transitions. + C: make(chan eventInfo, eventChanSize), + } + s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), arp.NewProtocol()}, TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol4()}, + NUDConfigs: c, + NUDDisp: &d, + UseNeighborCache: useNeighborCache, }) - ep := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr1) + ep := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr) + ep.LinkEPCapabilities |= stack.CapabilityResolutionRequired + wep := stack.LinkEndpoint(ep) if testing.Verbose() { wep = sniffer.New(ep) } - if err := s.CreateNIC(1, wep); err != nil { + if err := s.CreateNIC(nicID, wep); err != nil { t.Fatalf("CreateNIC failed: %v", err) } - if err := s.AddAddress(1, ipv4.ProtocolNumber, stackAddr1); err != nil { + if err := s.AddAddress(nicID, ipv4.ProtocolNumber, stackAddr); err != nil { t.Fatalf("AddAddress for ipv4 failed: %v", err) } - if err := s.AddAddress(1, ipv4.ProtocolNumber, stackAddr2); err != nil { - t.Fatalf("AddAddress for ipv4 failed: %v", err) + if !useNeighborCache { + // The remote address needs to be assigned to the NIC so we can receive and + // verify outgoing ARP packets. The neighbor cache isn't concerned with + // this; the tests that use linkAddrCache expect the ARP responses to be + // received by the same NIC. + if err := s.AddAddress(nicID, ipv4.ProtocolNumber, remoteAddr); err != nil { + t.Fatalf("AddAddress for ipv4 failed: %v", err) + } } - if err := s.AddAddress(1, arp.ProtocolNumber, arp.ProtocolAddress); err != nil { + if err := s.AddAddress(nicID, arp.ProtocolNumber, arp.ProtocolAddress); err != nil { t.Fatalf("AddAddress for arp failed: %v", err) } s.SetRouteTable([]tcpip.Route{{ Destination: header.IPv4EmptySubnet, - NIC: 1, + NIC: nicID, }}) return &testContext{ - t: t, - s: s, - linkEP: ep, + s: s, + linkEP: ep, + nudDisp: &d, } } @@ -91,7 +228,7 @@ func (c *testContext) cleanup() { } func TestDirectRequest(t *testing.T) { - c := newTestContext(t) + c := newTestContext(t, false /* useNeighborCache */) defer c.cleanup() const senderMAC = "\x01\x02\x03\x04\x05\x06" @@ -111,7 +248,7 @@ func TestDirectRequest(t *testing.T) { })) } - for i, address := range []tcpip.Address{stackAddr1, stackAddr2} { + for i, address := range []tcpip.Address{stackAddr, remoteAddr} { t.Run(strconv.Itoa(i), func(t *testing.T) { inject(address) pi, _ := c.linkEP.ReadContext(context.Background()) @@ -122,7 +259,7 @@ func TestDirectRequest(t *testing.T) { if !rep.IsValid() { t.Fatalf("invalid ARP response: len = %d; response = %x", len(rep), rep) } - if got, want := tcpip.LinkAddress(rep.HardwareAddressSender()), stackLinkAddr1; got != want { + if got, want := tcpip.LinkAddress(rep.HardwareAddressSender()), stackLinkAddr; got != want { t.Errorf("got HardwareAddressSender = %s, want = %s", got, want) } if got, want := tcpip.Address(rep.ProtocolAddressSender()), tcpip.Address(h.ProtocolAddressTarget()); got != want { @@ -137,7 +274,7 @@ func TestDirectRequest(t *testing.T) { }) } - inject(stackAddrBad) + inject(unknownAddr) // Sleep tests are gross, but this will only potentially flake // if there's a bug. If there is no bug this will reliably // succeed. @@ -148,6 +285,144 @@ func TestDirectRequest(t *testing.T) { } } +func TestDirectRequestWithNeighborCache(t *testing.T) { + c := newTestContext(t, true /* useNeighborCache */) + defer c.cleanup() + + tests := []struct { + name string + senderAddr tcpip.Address + senderLinkAddr tcpip.LinkAddress + targetAddr tcpip.Address + isValid bool + }{ + { + name: "Loopback", + senderAddr: stackAddr, + senderLinkAddr: stackLinkAddr, + targetAddr: stackAddr, + isValid: true, + }, + { + name: "Remote", + senderAddr: remoteAddr, + senderLinkAddr: remoteLinkAddr, + targetAddr: stackAddr, + isValid: true, + }, + { + name: "RemoteInvalidTarget", + senderAddr: remoteAddr, + senderLinkAddr: remoteLinkAddr, + targetAddr: unknownAddr, + isValid: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Inject an incoming ARP request. + v := make(buffer.View, header.ARPSize) + h := header.ARP(v) + h.SetIPv4OverEthernet() + h.SetOp(header.ARPRequest) + copy(h.HardwareAddressSender(), test.senderLinkAddr) + copy(h.ProtocolAddressSender(), test.senderAddr) + copy(h.ProtocolAddressTarget(), test.targetAddr) + c.linkEP.InjectInbound(arp.ProtocolNumber, &stack.PacketBuffer{ + Data: v.ToVectorisedView(), + }) + + if !test.isValid { + // No packets should be sent after receiving an invalid ARP request. + // There is no need to perform a blocking read here, since packets are + // sent in the same function that handles ARP requests. + if pkt, ok := c.linkEP.Read(); ok { + t.Errorf("unexpected packet sent with network protocol number %d", pkt.Proto) + } + return + } + + // Verify an ARP response was sent. + pi, ok := c.linkEP.Read() + if !ok { + t.Fatal("expected ARP response to be sent, got none") + } + + if pi.Proto != arp.ProtocolNumber { + t.Fatalf("expected ARP response, got network protocol number %d", pi.Proto) + } + rep := header.ARP(pi.Pkt.NetworkHeader().View()) + if !rep.IsValid() { + t.Fatalf("invalid ARP response: len = %d; response = %x", len(rep), rep) + } + if got, want := tcpip.LinkAddress(rep.HardwareAddressSender()), stackLinkAddr; got != want { + t.Errorf("got HardwareAddressSender() = %s, want = %s", got, want) + } + if got, want := tcpip.Address(rep.ProtocolAddressSender()), tcpip.Address(h.ProtocolAddressTarget()); got != want { + t.Errorf("got ProtocolAddressSender() = %s, want = %s", got, want) + } + if got, want := tcpip.LinkAddress(rep.HardwareAddressTarget()), tcpip.LinkAddress(h.HardwareAddressSender()); got != want { + t.Errorf("got HardwareAddressTarget() = %s, want = %s", got, want) + } + if got, want := tcpip.Address(rep.ProtocolAddressTarget()), tcpip.Address(h.ProtocolAddressSender()); got != want { + t.Errorf("got ProtocolAddressTarget() = %s, want = %s", got, want) + } + + // Verify the sender was saved in the neighbor cache. + wantEvent := eventInfo{ + eventType: entryAdded, + nicID: nicID, + addr: test.senderAddr, + linkAddr: tcpip.LinkAddress(test.senderLinkAddr), + state: stack.Stale, + } + if err := c.nudDisp.waitForEventWithTimeout(wantEvent, time.Second); err != nil { + t.Fatal(err) + } + + neighbors, err := c.s.Neighbors(nicID) + if err != nil { + t.Fatalf("c.s.Neighbors(%d): %s", nicID, err) + } + + neighborByAddr := make(map[tcpip.Address]stack.NeighborEntry) + for _, n := range neighbors { + if existing, ok := neighborByAddr[n.Addr]; ok { + if diff := cmp.Diff(existing, n); diff != "" { + t.Fatalf("duplicate neighbor entry found (-existing +got):\n%s", diff) + } + t.Fatalf("exact neighbor entry duplicate found for addr=%s", n.Addr) + } + neighborByAddr[n.Addr] = n + } + + neigh, ok := neighborByAddr[test.senderAddr] + if !ok { + t.Fatalf("expected neighbor entry with Addr = %s", test.senderAddr) + } + if got, want := neigh.LinkAddr, test.senderLinkAddr; got != want { + t.Errorf("got neighbor LinkAddr = %s, want = %s", got, want) + } + if got, want := neigh.LocalAddr, stackAddr; got != want { + t.Errorf("got neighbor LocalAddr = %s, want = %s", got, want) + } + if got, want := neigh.State, stack.Stale; got != want { + t.Errorf("got neighbor State = %s, want = %s", got, want) + } + + // No more events should be dispatched + for { + event, ok := c.nudDisp.nextEvent() + if !ok { + break + } + t.Errorf("unexpected %s", event) + } + }) + } +} + func TestLinkAddressRequest(t *testing.T) { tests := []struct { name string @@ -156,8 +431,8 @@ func TestLinkAddressRequest(t *testing.T) { }{ { name: "Unicast", - remoteLinkAddr: stackLinkAddr2, - expectLinkAddr: stackLinkAddr2, + remoteLinkAddr: remoteLinkAddr, + expectLinkAddr: remoteLinkAddr, }, { name: "Multicast", @@ -173,9 +448,9 @@ func TestLinkAddressRequest(t *testing.T) { t.Fatal("expected ARP protocol to implement stack.LinkAddressResolver") } - linkEP := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr1) - if err := linkRes.LinkAddressRequest(stackAddr1, stackAddr2, test.remoteLinkAddr, linkEP); err != nil { - t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s", stackAddr1, stackAddr2, test.remoteLinkAddr, err) + linkEP := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr) + if err := linkRes.LinkAddressRequest(stackAddr, remoteAddr, test.remoteLinkAddr, linkEP); err != nil { + t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s", stackAddr, remoteAddr, test.remoteLinkAddr, err) } pkt, ok := linkEP.Read() diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go index 9007346fe..e45dd17f8 100644 --- a/pkg/tcpip/network/ip_test.go +++ b/pkg/tcpip/network/ip_test.go @@ -250,7 +250,7 @@ func buildDummyStack(t *testing.T) *stack.Stack { func TestIPv4Send(t *testing.T) { o := testObject{t: t, v4: true} proto := ipv4.NewProtocol() - ep := proto.NewEndpoint(nicID, nil, nil, &o, buildDummyStack(t)) + ep := proto.NewEndpoint(nicID, nil, nil, nil, &o, buildDummyStack(t)) defer ep.Close() // Allocate and initialize the payload view. @@ -287,7 +287,7 @@ func TestIPv4Send(t *testing.T) { func TestIPv4Receive(t *testing.T) { o := testObject{t: t, v4: true} proto := ipv4.NewProtocol() - ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t)) + ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t)) defer ep.Close() totalLen := header.IPv4MinimumSize + 30 @@ -357,7 +357,7 @@ func TestIPv4ReceiveControl(t *testing.T) { t.Run(c.name, func(t *testing.T) { o := testObject{t: t} proto := ipv4.NewProtocol() - ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t)) + ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t)) defer ep.Close() const dataOffset = header.IPv4MinimumSize*2 + header.ICMPv4MinimumSize @@ -418,7 +418,7 @@ func TestIPv4ReceiveControl(t *testing.T) { func TestIPv4FragmentationReceive(t *testing.T) { o := testObject{t: t, v4: true} proto := ipv4.NewProtocol() - ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t)) + ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t)) defer ep.Close() totalLen := header.IPv4MinimumSize + 24 @@ -495,7 +495,7 @@ func TestIPv4FragmentationReceive(t *testing.T) { func TestIPv6Send(t *testing.T) { o := testObject{t: t} proto := ipv6.NewProtocol() - ep := proto.NewEndpoint(nicID, nil, &o, channel.New(0, 1280, ""), buildDummyStack(t)) + ep := proto.NewEndpoint(nicID, nil, nil, &o, channel.New(0, 1280, ""), buildDummyStack(t)) defer ep.Close() // Allocate and initialize the payload view. @@ -532,7 +532,7 @@ func TestIPv6Send(t *testing.T) { func TestIPv6Receive(t *testing.T) { o := testObject{t: t} proto := ipv6.NewProtocol() - ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t)) + ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t)) defer ep.Close() totalLen := header.IPv6MinimumSize + 30 @@ -611,7 +611,7 @@ func TestIPv6ReceiveControl(t *testing.T) { t.Run(c.name, func(t *testing.T) { o := testObject{t: t} proto := ipv6.NewProtocol() - ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t)) + ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t)) defer ep.Close() dataOffset := header.IPv6MinimumSize*2 + header.ICMPv6MinimumSize diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 63ffb3660..55ca94268 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -59,7 +59,7 @@ type endpoint struct { } // NewEndpoint creates a new ipv4 endpoint. -func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint { +func (p *protocol) NewEndpoint(nicID tcpip.NICID, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint { return &endpoint{ nicID: nicID, linkEP: linkEP, diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go index 66d3a953a..2b83c421e 100644 --- a/pkg/tcpip/network/ipv6/icmp.go +++ b/pkg/tcpip/network/ipv6/icmp.go @@ -15,8 +15,6 @@ package ipv6 import ( - "fmt" - "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -71,6 +69,59 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack e.dispatcher.DeliverTransportControlPacket(src, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt) } +// getLinkAddrOption searches NDP options for a given link address option using +// the provided getAddr function as a filter. Returns the link address if +// found; otherwise, returns the zero link address value. Also returns true if +// the options are valid as per the wire format, false otherwise. +func getLinkAddrOption(it header.NDPOptionIterator, getAddr func(header.NDPOption) tcpip.LinkAddress) (tcpip.LinkAddress, bool) { + var linkAddr tcpip.LinkAddress + for { + opt, done, err := it.Next() + if err != nil { + return "", false + } + if done { + break + } + if addr := getAddr(opt); len(addr) != 0 { + // No RFCs define what to do when an NDP message has multiple Link-Layer + // Address options. Since no interface can have multiple link-layer + // addresses, we consider such messages invalid. + if len(linkAddr) != 0 { + return "", false + } + linkAddr = addr + } + } + return linkAddr, true +} + +// getSourceLinkAddr searches NDP options for the source link address option. +// Returns the link address if found; otherwise, returns the zero link address +// value. Also returns true if the options are valid as per the wire format, +// false otherwise. +func getSourceLinkAddr(it header.NDPOptionIterator) (tcpip.LinkAddress, bool) { + return getLinkAddrOption(it, func(opt header.NDPOption) tcpip.LinkAddress { + if src, ok := opt.(header.NDPSourceLinkLayerAddressOption); ok { + return src.EthernetAddress() + } + return "" + }) +} + +// getTargetLinkAddr searches NDP options for the target link address option. +// Returns the link address if found; otherwise, returns the zero link address +// value. Also returns true if the options are valid as per the wire format, +// false otherwise. +func getTargetLinkAddr(it header.NDPOptionIterator) (tcpip.LinkAddress, bool) { + return getLinkAddrOption(it, func(opt header.NDPOption) tcpip.LinkAddress { + if dst, ok := opt.(header.NDPTargetLinkLayerAddressOption); ok { + return dst.EthernetAddress() + } + return "" + }) +} + func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragmentHeader bool) { stats := r.Stats().ICMP sent := stats.V6PacketsSent @@ -137,7 +188,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme case header.ICMPv6NeighborSolicit: received.NeighborSolicit.Increment() - if pkt.Data.Size() < header.ICMPv6NeighborSolicitMinimumSize || !isNDPValid() { + if !isNDPValid() || pkt.Data.Size() < header.ICMPv6NeighborSolicitMinimumSize { received.Invalid.Increment() return } @@ -147,14 +198,15 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme // NDP messages cannot be fragmented. Also note that in the common case NDP // datagrams are very small and ToView() will not incur allocations. ns := header.NDPNeighborSolicit(payload.ToView()) - it, err := ns.Options().Iter(true) - if err != nil { - // If we have a malformed NDP NS option, drop the packet. + targetAddr := ns.TargetAddress() + + // As per RFC 4861 section 4.3, the Target Address MUST NOT be a multicast + // address. + if header.IsV6MulticastAddress(targetAddr) { received.Invalid.Increment() return } - targetAddr := ns.TargetAddress() s := r.Stack() if isTentative, err := s.IsAddrTentative(e.nicID, targetAddr); err != nil { // We will only get an error if the NIC is unrecognized, which should not @@ -187,39 +239,22 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme // so the packet is processed as defined in RFC 4861, as per RFC 4862 // section 5.4.3. - // Is the NS targetting us? - if e.linkAddrCache.CheckLocalAddress(e.nicID, ProtocolNumber, targetAddr) == 0 { + // Is the NS targeting us? + if s.CheckLocalAddress(e.nicID, ProtocolNumber, targetAddr) == 0 { return } - // If the NS message contains the Source Link-Layer Address option, update - // the link address cache with the value of the option. - // - // TODO(b/148429853): Properly process the NS message and do Neighbor - // Unreachability Detection. - var sourceLinkAddr tcpip.LinkAddress - for { - opt, done, err := it.Next() - if err != nil { - // This should never happen as Iter(true) above did not return an error. - panic(fmt.Sprintf("unexpected error when iterating over NDP options: %s", err)) - } - if done { - break - } + it, err := ns.Options().Iter(false /* check */) + if err != nil { + // Options are not valid as per the wire format, silently drop the packet. + received.Invalid.Increment() + return + } - switch opt := opt.(type) { - case header.NDPSourceLinkLayerAddressOption: - // No RFCs define what to do when an NS message has multiple Source - // Link-Layer Address options. Since no interface can have multiple - // link-layer addresses, we consider such messages invalid. - if len(sourceLinkAddr) != 0 { - received.Invalid.Increment() - return - } - - sourceLinkAddr = opt.EthernetAddress() - } + sourceLinkAddr, ok := getSourceLinkAddr(it) + if !ok { + received.Invalid.Increment() + return } unspecifiedSource := r.RemoteAddress == header.IPv6Any @@ -237,6 +272,8 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme } else if unspecifiedSource { received.Invalid.Increment() return + } else if e.nud != nil { + e.nud.HandleProbe(r.RemoteAddress, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol) } else { e.linkAddrCache.AddLinkAddress(e.nicID, r.RemoteAddress, sourceLinkAddr) } @@ -304,7 +341,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme case header.ICMPv6NeighborAdvert: received.NeighborAdvert.Increment() - if pkt.Data.Size() < header.ICMPv6NeighborAdvertSize || !isNDPValid() { + if !isNDPValid() || pkt.Data.Size() < header.ICMPv6NeighborAdvertSize { received.Invalid.Increment() return } @@ -314,17 +351,10 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme // 5, NDP messages cannot be fragmented. Also note that in the common case // NDP datagrams are very small and ToView() will not incur allocations. na := header.NDPNeighborAdvert(payload.ToView()) - it, err := na.Options().Iter(true) - if err != nil { - // If we have a malformed NDP NA option, drop the packet. - received.Invalid.Increment() - return - } - targetAddr := na.TargetAddress() - stack := r.Stack() + s := r.Stack() - if isTentative, err := stack.IsAddrTentative(e.nicID, targetAddr); err != nil { + if isTentative, err := s.IsAddrTentative(e.nicID, targetAddr); err != nil { // We will only get an error if the NIC is unrecognized, which should not // happen. For now short-circuit this packet. // @@ -335,7 +365,14 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme // DAD on, implying the address is not unique. In this case we let the // stack know so it can handle such a scenario and do nothing furthur with // the NDP NA. - stack.DupTentativeAddrDetected(e.nicID, targetAddr) + s.DupTentativeAddrDetected(e.nicID, targetAddr) + return + } + + it, err := na.Options().Iter(false /* check */) + if err != nil { + // If we have a malformed NDP NA option, drop the packet. + received.Invalid.Increment() return } @@ -348,39 +385,25 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme // TODO(b/143147598): Handle the scenario described above. Also inform the // netstack integration that a duplicate address was detected outside of // DAD. + targetLinkAddr, ok := getTargetLinkAddr(it) + if !ok { + received.Invalid.Increment() + return + } // If the NA message has the target link layer option, update the link // address cache with the link address for the target of the message. - // - // TODO(b/148429853): Properly process the NA message and do Neighbor - // Unreachability Detection. - var targetLinkAddr tcpip.LinkAddress - for { - opt, done, err := it.Next() - if err != nil { - // This should never happen as Iter(true) above did not return an error. - panic(fmt.Sprintf("unexpected error when iterating over NDP options: %s", err)) - } - if done { - break + if len(targetLinkAddr) != 0 { + if e.nud == nil { + e.linkAddrCache.AddLinkAddress(e.nicID, targetAddr, targetLinkAddr) + return } - switch opt := opt.(type) { - case header.NDPTargetLinkLayerAddressOption: - // No RFCs define what to do when an NA message has multiple Target - // Link-Layer Address options. Since no interface can have multiple - // link-layer addresses, we consider such messages invalid. - if len(targetLinkAddr) != 0 { - received.Invalid.Increment() - return - } - - targetLinkAddr = opt.EthernetAddress() - } - } - - if len(targetLinkAddr) != 0 { - e.linkAddrCache.AddLinkAddress(e.nicID, targetAddr, targetLinkAddr) + e.nud.HandleConfirmation(targetAddr, targetLinkAddr, stack.ReachabilityConfirmationFlags{ + Solicited: na.SolicitedFlag(), + Override: na.OverrideFlag(), + IsRouter: na.RouterFlag(), + }) } case header.ICMPv6EchoRequest: @@ -440,27 +463,75 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme case header.ICMPv6RouterSolicit: received.RouterSolicit.Increment() - if !isNDPValid() { + + // + // Validate the RS as per RFC 4861 section 6.1.1. + // + + // Is the NDP payload of sufficient size to hold a Router Solictation? + if !isNDPValid() || pkt.Data.Size()-header.ICMPv6HeaderSize < header.NDPRSMinimumSize { received.Invalid.Increment() return } - case header.ICMPv6RouterAdvert: - received.RouterAdvert.Increment() + stack := r.Stack() - // Is the NDP payload of sufficient size to hold a Router - // Advertisement? - if pkt.Data.Size()-header.ICMPv6HeaderSize < header.NDPRAMinimumSize || !isNDPValid() { + // Is the networking stack operating as a router? + if !stack.Forwarding() { + // ... No, silently drop the packet. + received.RouterOnlyPacketsDroppedByHost.Increment() + return + } + + // Note that in the common case NDP datagrams are very small and ToView() + // will not incur allocations. + rs := header.NDPRouterSolicit(payload.ToView()) + it, err := rs.Options().Iter(false /* check */) + if err != nil { + // Options are not valid as per the wire format, silently drop the packet. received.Invalid.Increment() return } - routerAddr := iph.SourceAddress() + sourceLinkAddr, ok := getSourceLinkAddr(it) + if !ok { + received.Invalid.Increment() + return + } + + // If the RS message has the source link layer option, update the link + // address cache with the link address for the source of the message. + if len(sourceLinkAddr) != 0 { + // As per RFC 4861 section 4.1, the Source Link-Layer Address Option MUST + // NOT be included when the source IP address is the unspecified address. + // Otherwise, it SHOULD be included on link layers that have addresses. + if r.RemoteAddress == header.IPv6Any { + received.Invalid.Increment() + return + } + + if e.nud != nil { + // A RS with a specified source IP address modifies the NUD state + // machine in the same way a reachability probe would. + e.nud.HandleProbe(r.RemoteAddress, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol) + } + } + + case header.ICMPv6RouterAdvert: + received.RouterAdvert.Increment() // // Validate the RA as per RFC 4861 section 6.1.2. // + // Is the NDP payload of sufficient size to hold a Router Advertisement? + if !isNDPValid() || pkt.Data.Size()-header.ICMPv6HeaderSize < header.NDPRAMinimumSize { + received.Invalid.Increment() + return + } + + routerAddr := iph.SourceAddress() + // Is the IP Source Address a link-local address? if !header.IsV6LinkLocalAddress(routerAddr) { // ...No, silently drop the packet. @@ -468,16 +539,18 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme return } - // The remainder of payload must be only the router advertisement, so - // payload.ToView() always returns the advertisement. Per RFC 6980 section - // 5, NDP messages cannot be fragmented. Also note that in the common case - // NDP datagrams are very small and ToView() will not incur allocations. + // Note that in the common case NDP datagrams are very small and ToView() + // will not incur allocations. ra := header.NDPRouterAdvert(payload.ToView()) - opts := ra.Options() + it, err := ra.Options().Iter(false /* check */) + if err != nil { + // Options are not valid as per the wire format, silently drop the packet. + received.Invalid.Increment() + return + } - // Are options valid as per the wire format? - if _, err := opts.Iter(true); err != nil { - // ...No, silently drop the packet. + sourceLinkAddr, ok := getSourceLinkAddr(it) + if !ok { received.Invalid.Increment() return } @@ -487,12 +560,33 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme // as RFC 4861 section 6.1.2 is concerned. // + // If the RA has the source link layer option, update the link address + // cache with the link address for the advertised router. + if len(sourceLinkAddr) != 0 && e.nud != nil { + e.nud.HandleProbe(routerAddr, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol) + } + // Tell the NIC to handle the RA. stack := r.Stack() - rxNICID := r.NICID() - stack.HandleNDPRA(rxNICID, routerAddr, ra) + stack.HandleNDPRA(e.nicID, routerAddr, ra) case header.ICMPv6RedirectMsg: + // TODO(gvisor.dev/issue/2285): Call `e.nud.HandleProbe` after validating + // this redirect message, as per RFC 4871 section 7.3.3: + // + // "A Neighbor Cache entry enters the STALE state when created as a + // result of receiving packets other than solicited Neighbor + // Advertisements (i.e., Router Solicitations, Router Advertisements, + // Redirects, and Neighbor Solicitations). These packets contain the + // link-layer address of either the sender or, in the case of Redirect, + // the redirection target. However, receipt of these link-layer + // addresses does not confirm reachability of the forward-direction path + // to that node. Placing a newly created Neighbor Cache entry for which + // the link-layer address is known in the STALE state provides assurance + // that path failures are detected quickly. In addition, should a cached + // link-layer address be modified due to receiving one of the above + // messages, the state SHOULD also be set to STALE to provide prompt + // verification that the path to the new link-layer address is working." received.RedirectMsg.Increment() if !isNDPValid() { received.Invalid.Increment() diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go index 9e4eeea77..8112ed051 100644 --- a/pkg/tcpip/network/ipv6/icmp_test.go +++ b/pkg/tcpip/network/ipv6/icmp_test.go @@ -31,6 +31,8 @@ import ( ) const ( + nicID = 1 + linkAddr0 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x06") linkAddr1 = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0e") linkAddr2 = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0f") @@ -49,7 +51,10 @@ type stubLinkEndpoint struct { } func (*stubLinkEndpoint) Capabilities() stack.LinkEndpointCapabilities { - return 0 + // Indicate that resolution for link layer addresses is required to send + // packets over this link. This is needed so the NIC knows to allocate a + // neighbor table. + return stack.CapabilityResolutionRequired } func (*stubLinkEndpoint) MaxHeaderLength() uint16 { @@ -84,16 +89,184 @@ func (*stubLinkAddressCache) CheckLocalAddress(tcpip.NICID, tcpip.NetworkProtoco func (*stubLinkAddressCache) AddLinkAddress(tcpip.NICID, tcpip.Address, tcpip.LinkAddress) { } +type stubNUDHandler struct{} + +var _ stack.NUDHandler = (*stubNUDHandler)(nil) + +func (*stubNUDHandler) HandleProbe(remoteAddr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, remoteLinkAddr tcpip.LinkAddress, linkRes stack.LinkAddressResolver) { +} + +func (*stubNUDHandler) HandleConfirmation(addr tcpip.Address, linkAddr tcpip.LinkAddress, flags stack.ReachabilityConfirmationFlags) { +} + +func (*stubNUDHandler) HandleUpperLevelConfirmation(addr tcpip.Address) { +} + func TestICMPCounts(t *testing.T) { + tests := []struct { + name string + useNeighborCache bool + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + }, + { + name: "neighborCache", + useNeighborCache: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, + TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()}, + UseNeighborCache: test.useNeighborCache, + }) + { + if err := s.CreateNIC(nicID, &stubLinkEndpoint{}); err != nil { + t.Fatalf("CreateNIC(_, _) = %s", err) + } + if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil { + t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err) + } + } + { + subnet, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1)))) + if err != nil { + t.Fatal(err) + } + s.SetRouteTable( + []tcpip.Route{{ + Destination: subnet, + NIC: nicID, + }}, + ) + } + + netProto := s.NetworkProtocolInstance(ProtocolNumber) + if netProto == nil { + t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber) + } + ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubNUDHandler{}, &stubDispatcher{}, nil, s) + defer ep.Close() + + r, err := s.FindRoute(nicID, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */) + if err != nil { + t.Fatalf("FindRoute(%d, %s, %s, _, false) = (_, %s), want = (_, nil)", nicID, lladdr0, lladdr1, err) + } + defer r.Release() + + var tllData [header.NDPLinkLayerAddressSize]byte + header.NDPOptions(tllData[:]).Serialize(header.NDPOptionsSerializer{ + header.NDPTargetLinkLayerAddressOption(linkAddr1), + }) + + types := []struct { + typ header.ICMPv6Type + size int + extraData []byte + }{ + { + typ: header.ICMPv6DstUnreachable, + size: header.ICMPv6DstUnreachableMinimumSize, + }, + { + typ: header.ICMPv6PacketTooBig, + size: header.ICMPv6PacketTooBigMinimumSize, + }, + { + typ: header.ICMPv6TimeExceeded, + size: header.ICMPv6MinimumSize, + }, + { + typ: header.ICMPv6ParamProblem, + size: header.ICMPv6MinimumSize, + }, + { + typ: header.ICMPv6EchoRequest, + size: header.ICMPv6EchoMinimumSize, + }, + { + typ: header.ICMPv6EchoReply, + size: header.ICMPv6EchoMinimumSize, + }, + { + typ: header.ICMPv6RouterSolicit, + size: header.ICMPv6MinimumSize, + }, + { + typ: header.ICMPv6RouterAdvert, + size: header.ICMPv6HeaderSize + header.NDPRAMinimumSize, + }, + { + typ: header.ICMPv6NeighborSolicit, + size: header.ICMPv6NeighborSolicitMinimumSize, + }, + { + typ: header.ICMPv6NeighborAdvert, + size: header.ICMPv6NeighborAdvertMinimumSize, + extraData: tllData[:], + }, + { + typ: header.ICMPv6RedirectMsg, + size: header.ICMPv6MinimumSize, + }, + } + + handleIPv6Payload := func(icmp header.ICMPv6) { + pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ + ReserveHeaderBytes: header.IPv6MinimumSize, + Data: buffer.View(icmp).ToVectorisedView(), + }) + ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize)) + ip.Encode(&header.IPv6Fields{ + PayloadLength: uint16(len(icmp)), + NextHeader: uint8(header.ICMPv6ProtocolNumber), + HopLimit: header.NDPHopLimit, + SrcAddr: r.LocalAddress, + DstAddr: r.RemoteAddress, + }) + ep.HandlePacket(&r, pkt) + } + + for _, typ := range types { + icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData))) + copy(icmp[typ.size:], typ.extraData) + icmp.SetType(typ.typ) + icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], r.LocalAddress, r.RemoteAddress, buffer.View(typ.extraData).ToVectorisedView())) + handleIPv6Payload(icmp) + } + + // Construct an empty ICMP packet so that + // Stats().ICMP.ICMPv6ReceivedPacketStats.Invalid is incremented. + handleIPv6Payload(header.ICMPv6(buffer.NewView(header.IPv6MinimumSize))) + + icmpv6Stats := s.Stats().ICMP.V6PacketsReceived + visitStats(reflect.ValueOf(&icmpv6Stats).Elem(), func(name string, s *tcpip.StatCounter) { + if got, want := s.Value(), uint64(1); got != want { + t.Errorf("got %s = %d, want = %d", name, got, want) + } + }) + if t.Failed() { + t.Logf("stats:\n%+v", s.Stats()) + } + }) + } +} + +func TestICMPCountsWithNeighborCache(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()}, + UseNeighborCache: true, }) { - if err := s.CreateNIC(1, &stubLinkEndpoint{}); err != nil { - t.Fatalf("CreateNIC(_) = %s", err) + if err := s.CreateNIC(nicID, &stubLinkEndpoint{}); err != nil { + t.Fatalf("CreateNIC(_, _) = %s", err) } - if err := s.AddAddress(1, ProtocolNumber, lladdr0); err != nil { + if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil { t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err) } } @@ -105,7 +278,7 @@ func TestICMPCounts(t *testing.T) { s.SetRouteTable( []tcpip.Route{{ Destination: subnet, - NIC: 1, + NIC: nicID, }}, ) } @@ -114,12 +287,12 @@ func TestICMPCounts(t *testing.T) { if netProto == nil { t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber) } - ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubDispatcher{}, nil, s) + ep := netProto.NewEndpoint(0, nil, &stubNUDHandler{}, &stubDispatcher{}, nil, s) defer ep.Close() - r, err := s.FindRoute(1, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */) + r, err := s.FindRoute(nicID, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */) if err != nil { - t.Fatalf("FindRoute(_) = _, %s, want = _, nil", err) + t.Fatalf("FindRoute(%d, %s, %s, _, false) = (_, %s), want = (_, nil)", nicID, lladdr0, lladdr1, err) } defer r.Release() @@ -265,19 +438,19 @@ func newTestContext(t *testing.T) *testContext { if testing.Verbose() { wrappedEP0 = sniffer.New(wrappedEP0) } - if err := c.s0.CreateNIC(1, wrappedEP0); err != nil { + if err := c.s0.CreateNIC(nicID, wrappedEP0); err != nil { t.Fatalf("CreateNIC s0: %v", err) } - if err := c.s0.AddAddress(1, ProtocolNumber, lladdr0); err != nil { + if err := c.s0.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil { t.Fatalf("AddAddress lladdr0: %v", err) } c.linkEP1 = channel.New(defaultChannelSize, defaultMTU, linkAddr1) wrappedEP1 := stack.LinkEndpoint(endpointWithResolutionCapability{LinkEndpoint: c.linkEP1}) - if err := c.s1.CreateNIC(1, wrappedEP1); err != nil { + if err := c.s1.CreateNIC(nicID, wrappedEP1); err != nil { t.Fatalf("CreateNIC failed: %v", err) } - if err := c.s1.AddAddress(1, ProtocolNumber, lladdr1); err != nil { + if err := c.s1.AddAddress(nicID, ProtocolNumber, lladdr1); err != nil { t.Fatalf("AddAddress lladdr1: %v", err) } @@ -288,7 +461,7 @@ func newTestContext(t *testing.T) *testContext { c.s0.SetRouteTable( []tcpip.Route{{ Destination: subnet0, - NIC: 1, + NIC: nicID, }}, ) subnet1, err := tcpip.NewSubnet(lladdr0, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr0)))) @@ -298,7 +471,7 @@ func newTestContext(t *testing.T) *testContext { c.s1.SetRouteTable( []tcpip.Route{{ Destination: subnet1, - NIC: 1, + NIC: nicID, }}, ) @@ -359,9 +532,9 @@ func TestLinkResolution(t *testing.T) { c := newTestContext(t) defer c.cleanup() - r, err := c.s0.FindRoute(1, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */) + r, err := c.s0.FindRoute(nicID, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */) if err != nil { - t.Fatalf("FindRoute(_) = _, %s, want = _, nil", err) + t.Fatalf("FindRoute(%d, %s, %s, _, false) = (_, %s), want = (_, nil)", nicID, lladdr0, lladdr1, err) } defer r.Release() @@ -376,14 +549,14 @@ func TestLinkResolution(t *testing.T) { var wq waiter.Queue ep, err := c.s0.NewEndpoint(header.ICMPv6ProtocolNumber, ProtocolNumber, &wq) if err != nil { - t.Fatalf("NewEndpoint(_) = _, %s, want = _, nil", err) + t.Fatalf("NewEndpoint(_) = (_, %s), want = (_, nil)", err) } for { - _, resCh, err := ep.Write(payload, tcpip.WriteOptions{To: &tcpip.FullAddress{NIC: 1, Addr: lladdr1}}) + _, resCh, err := ep.Write(payload, tcpip.WriteOptions{To: &tcpip.FullAddress{NIC: nicID, Addr: lladdr1}}) if resCh != nil { if err != tcpip.ErrNoLinkAddress { - t.Fatalf("ep.Write(_) = _, <non-nil>, %s, want = _, <non-nil>, tcpip.ErrNoLinkAddress", err) + t.Fatalf("ep.Write(_) = (_, <non-nil>, %s), want = (_, <non-nil>, tcpip.ErrNoLinkAddress)", err) } for _, args := range []routeArgs{ {src: c.linkEP0, dst: c.linkEP1, typ: header.ICMPv6NeighborSolicit, remoteLinkAddr: header.EthernetAddressFromMulticastIPv6Address(header.SolicitedNodeAddr(lladdr1))}, @@ -399,7 +572,7 @@ func TestLinkResolution(t *testing.T) { continue } if err != nil { - t.Fatalf("ep.Write(_) = _, _, %s", err) + t.Fatalf("ep.Write(_) = (_, _, %s)", err) } break } @@ -424,6 +597,7 @@ func TestICMPChecksumValidationSimple(t *testing.T) { size int extraData []byte statCounter func(tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter + routerOnly bool }{ { name: "DstUnreachable", @@ -480,6 +654,8 @@ func TestICMPChecksumValidationSimple(t *testing.T) { statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter { return stats.RouterSolicit }, + // Hosts MUST silently discard any received Router Solicitation messages. + routerOnly: true, }, { name: "RouterAdvert", @@ -516,84 +692,133 @@ func TestICMPChecksumValidationSimple(t *testing.T) { }, } - for _, typ := range types { - t.Run(typ.name, func(t *testing.T) { - e := channel.New(10, 1280, linkAddr0) - s := stack.New(stack.Options{ - NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, - }) - if err := s.CreateNIC(1, e); err != nil { - t.Fatalf("CreateNIC(_) = %s", err) - } - - if err := s.AddAddress(1, ProtocolNumber, lladdr0); err != nil { - t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err) - } - { - subnet, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1)))) - if err != nil { - t.Fatal(err) - } - s.SetRouteTable( - []tcpip.Route{{ - Destination: subnet, - NIC: 1, - }}, - ) - } + tests := []struct { + name string + useNeighborCache bool + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + }, + { + name: "neighborCache", + useNeighborCache: true, + }, + } - handleIPv6Payload := func(checksum bool) { - icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData))) - copy(icmp[typ.size:], typ.extraData) - icmp.SetType(typ.typ) - if checksum { - icmp.SetChecksum(header.ICMPv6Checksum(icmp, lladdr1, lladdr0, buffer.View{}.ToVectorisedView())) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + for _, typ := range types { + for _, isRouter := range []bool{false, true} { + name := typ.name + if isRouter { + name += " (Router)" + } + t.Run(name, func(t *testing.T) { + e := channel.New(0, 1280, linkAddr0) + + // Indicate that resolution for link layer addresses is required to + // send packets over this link. This is needed so the NIC knows to + // allocate a neighbor table. + e.LinkEPCapabilities |= stack.CapabilityResolutionRequired + + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, + UseNeighborCache: test.useNeighborCache, + }) + if isRouter { + // Enabling forwarding makes the stack act as a router. + s.SetForwarding(true) + } + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(_, _) = %s", err) + } + + if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil { + t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err) + } + { + subnet, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1)))) + if err != nil { + t.Fatal(err) + } + s.SetRouteTable( + []tcpip.Route{{ + Destination: subnet, + NIC: nicID, + }}, + ) + } + + handleIPv6Payload := func(checksum bool) { + icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData))) + copy(icmp[typ.size:], typ.extraData) + icmp.SetType(typ.typ) + if checksum { + icmp.SetChecksum(header.ICMPv6Checksum(icmp, lladdr1, lladdr0, buffer.View{}.ToVectorisedView())) + } + ip := header.IPv6(buffer.NewView(header.IPv6MinimumSize)) + ip.Encode(&header.IPv6Fields{ + PayloadLength: uint16(len(icmp)), + NextHeader: uint8(header.ICMPv6ProtocolNumber), + HopLimit: header.NDPHopLimit, + SrcAddr: lladdr1, + DstAddr: lladdr0, + }) + pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ + Data: buffer.NewVectorisedView(len(ip)+len(icmp), []buffer.View{buffer.View(ip), buffer.View(icmp)}), + }) + e.InjectInbound(ProtocolNumber, pkt) + } + + stats := s.Stats().ICMP.V6PacketsReceived + invalid := stats.Invalid + routerOnly := stats.RouterOnlyPacketsDroppedByHost + typStat := typ.statCounter(stats) + + // Initial stat counts should be 0. + if got := invalid.Value(); got != 0 { + t.Fatalf("got invalid = %d, want = 0", got) + } + if got := routerOnly.Value(); got != 0 { + t.Fatalf("got RouterOnlyPacketsReceivedByHost = %d, want = 0", got) + } + if got := typStat.Value(); got != 0 { + t.Fatalf("got %s = %d, want = 0", typ.name, got) + } + + // Without setting checksum, the incoming packet should + // be invalid. + handleIPv6Payload(false) + if got := invalid.Value(); got != 1 { + t.Fatalf("got invalid = %d, want = 1", got) + } + // Router only count should not have increased. + if got := routerOnly.Value(); got != 0 { + t.Fatalf("got RouterOnlyPacketsReceivedByHost = %d, want = 0", got) + } + // Rx count of type typ.typ should not have increased. + if got := typStat.Value(); got != 0 { + t.Fatalf("got %s = %d, want = 0", typ.name, got) + } + + // When checksum is set, it should be received. + handleIPv6Payload(true) + if got := typStat.Value(); got != 1 { + t.Fatalf("got %s = %d, want = 1", typ.name, got) + } + // Invalid count should not have increased again. + if got := invalid.Value(); got != 1 { + t.Fatalf("got invalid = %d, want = 1", got) + } + if !isRouter && typ.routerOnly && test.useNeighborCache { + // Router only count should have increased. + if got := routerOnly.Value(); got != 1 { + t.Fatalf("got RouterOnlyPacketsReceivedByHost = %d, want = 1", got) + } + } + }) } - ip := header.IPv6(buffer.NewView(header.IPv6MinimumSize)) - ip.Encode(&header.IPv6Fields{ - PayloadLength: uint16(len(icmp)), - NextHeader: uint8(header.ICMPv6ProtocolNumber), - HopLimit: header.NDPHopLimit, - SrcAddr: lladdr1, - DstAddr: lladdr0, - }) - pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ - Data: buffer.NewVectorisedView(len(ip)+len(icmp), []buffer.View{buffer.View(ip), buffer.View(icmp)}), - }) - e.InjectInbound(ProtocolNumber, pkt) - } - - stats := s.Stats().ICMP.V6PacketsReceived - invalid := stats.Invalid - typStat := typ.statCounter(stats) - - // Initial stat counts should be 0. - if got := invalid.Value(); got != 0 { - t.Fatalf("got invalid = %d, want = 0", got) - } - if got := typStat.Value(); got != 0 { - t.Fatalf("got %s = %d, want = 0", typ.name, got) - } - - // Without setting checksum, the incoming packet should - // be invalid. - handleIPv6Payload(false) - if got := invalid.Value(); got != 1 { - t.Fatalf("got invalid = %d, want = 1", got) - } - // Rx count of type typ.typ should not have increased. - if got := typStat.Value(); got != 0 { - t.Fatalf("got %s = %d, want = 0", typ.name, got) - } - - // When checksum is set, it should be received. - handleIPv6Payload(true) - if got := typStat.Value(); got != 1 { - t.Fatalf("got %s = %d, want = 1", typ.name, got) - } - // Invalid count should not have increased again. - if got := invalid.Value(); got != 1 { - t.Fatalf("got invalid = %d, want = 1", got) } }) } @@ -696,11 +921,11 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, }) - if err := s.CreateNIC(1, e); err != nil { - t.Fatalf("CreateNIC(_) = %s", err) + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(_, _) = %s", err) } - if err := s.AddAddress(1, ProtocolNumber, lladdr0); err != nil { + if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil { t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err) } { @@ -711,7 +936,7 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) { s.SetRouteTable( []tcpip.Route{{ Destination: subnet, - NIC: 1, + NIC: nicID, }}, ) } @@ -750,7 +975,7 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) { t.Fatalf("got invalid = %d, want = 0", got) } if got := typStat.Value(); got != 0 { - t.Fatalf("got %s = %d, want = 0", typ.name, got) + t.Fatalf("got = %d, want = 0", got) } // Without setting checksum, the incoming packet should @@ -761,13 +986,13 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) { } // Rx count of type typ.typ should not have increased. if got := typStat.Value(); got != 0 { - t.Fatalf("got %s = %d, want = 0", typ.name, got) + t.Fatalf("got = %d, want = 0", got) } // When checksum is set, it should be received. handleIPv6Payload(typ.typ, typ.size, typ.payloadSize, typ.payload, true) if got := typStat.Value(); got != 1 { - t.Fatalf("got %s = %d, want = 1", typ.name, got) + t.Fatalf("got = %d, want = 0", got) } // Invalid count should not have increased again. if got := invalid.Value(); got != 1 { @@ -874,12 +1099,12 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, }) - if err := s.CreateNIC(1, e); err != nil { - t.Fatalf("CreateNIC(_) = %s", err) + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } - if err := s.AddAddress(1, ProtocolNumber, lladdr0); err != nil { - t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err) + if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil { + t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, lladdr0, err) } { subnet, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1)))) @@ -889,7 +1114,7 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) { s.SetRouteTable( []tcpip.Route{{ Destination: subnet, - NIC: 1, + NIC: nicID, }}, ) } @@ -929,7 +1154,7 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) { t.Fatalf("got invalid = %d, want = 0", got) } if got := typStat.Value(); got != 0 { - t.Fatalf("got %s = %d, want = 0", typ.name, got) + t.Fatalf("got = %d, want = 0", got) } // Without setting checksum, the incoming packet should @@ -940,13 +1165,13 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) { } // Rx count of type typ.typ should not have increased. if got := typStat.Value(); got != 0 { - t.Fatalf("got %s = %d, want = 0", typ.name, got) + t.Fatalf("got = %d, want = 0", got) } // When checksum is set, it should be received. handleIPv6Payload(typ.typ, typ.size, typ.payloadSize, typ.payload, true) if got := typStat.Value(); got != 1 { - t.Fatalf("got %s = %d, want = 1", typ.name, got) + t.Fatalf("got = %d, want = 0", got) } // Invalid count should not have increased again. if got := invalid.Value(); got != 1 { diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go index 267d2cce8..36fbbebf0 100644 --- a/pkg/tcpip/network/ipv6/ipv6.go +++ b/pkg/tcpip/network/ipv6/ipv6.go @@ -48,6 +48,7 @@ type endpoint struct { nicID tcpip.NICID linkEP stack.LinkEndpoint linkAddrCache stack.LinkAddressCache + nud stack.NUDHandler dispatcher stack.TransportDispatcher protocol *protocol stack *stack.Stack @@ -455,11 +456,12 @@ func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) { } // NewEndpoint creates a new ipv6 endpoint. -func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint { +func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, nud stack.NUDHandler, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint { return &endpoint{ nicID: nicID, linkEP: linkEP, linkAddrCache: linkAddrCache, + nud: nud, dispatcher: dispatcher, protocol: p, stack: st, diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go index af71a7d6b..480c495fa 100644 --- a/pkg/tcpip/network/ipv6/ndp_test.go +++ b/pkg/tcpip/network/ipv6/ndp_test.go @@ -18,6 +18,7 @@ import ( "strings" "testing" + "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/checker" @@ -30,12 +31,13 @@ import ( // setupStackAndEndpoint creates a stack with a single NIC with a link-local // address llladdr and an IPv6 endpoint to a remote with link-local address // rlladdr -func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address) (*stack.Stack, stack.NetworkEndpoint) { +func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address, useNeighborCache bool) (*stack.Stack, stack.NetworkEndpoint) { t.Helper() s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()}, + UseNeighborCache: useNeighborCache, }) if err := s.CreateNIC(1, &stubLinkEndpoint{}); err != nil { @@ -63,8 +65,7 @@ func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address) (*stack t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber) } - ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubDispatcher{}, nil, s) - + ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubNUDHandler{}, &stubDispatcher{}, nil, s) return s, ep } @@ -171,6 +172,123 @@ func TestNeighorSolicitationWithSourceLinkLayerOption(t *testing.T) { } } +// TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache tests +// that receiving a valid NDP NS message with the Source Link Layer Address +// option results in a new entry in the link address cache for the sender of +// the message. +func TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache(t *testing.T) { + const nicID = 1 + + tests := []struct { + name string + optsBuf []byte + expectedLinkAddr tcpip.LinkAddress + }{ + { + name: "Valid", + optsBuf: []byte{1, 1, 2, 3, 4, 5, 6, 7}, + expectedLinkAddr: "\x02\x03\x04\x05\x06\x07", + }, + { + name: "Too Small", + optsBuf: []byte{1, 1, 2, 3, 4, 5, 6}, + }, + { + name: "Invalid Length", + optsBuf: []byte{1, 2, 2, 3, 4, 5, 6, 7}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, + UseNeighborCache: true, + }) + e := channel.New(0, 1280, linkAddr0) + e.LinkEPCapabilities |= stack.CapabilityResolutionRequired + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) + } + if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil { + t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, lladdr0, err) + } + + ndpNSSize := header.ICMPv6NeighborSolicitMinimumSize + len(test.optsBuf) + hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNSSize) + pkt := header.ICMPv6(hdr.Prepend(ndpNSSize)) + pkt.SetType(header.ICMPv6NeighborSolicit) + ns := header.NDPNeighborSolicit(pkt.NDPPayload()) + ns.SetTargetAddress(lladdr0) + opts := ns.Options() + copy(opts, test.optsBuf) + pkt.SetChecksum(header.ICMPv6Checksum(pkt, lladdr1, lladdr0, buffer.VectorisedView{})) + payloadLength := hdr.UsedLength() + ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) + ip.Encode(&header.IPv6Fields{ + PayloadLength: uint16(payloadLength), + NextHeader: uint8(header.ICMPv6ProtocolNumber), + HopLimit: 255, + SrcAddr: lladdr1, + DstAddr: lladdr0, + }) + + invalid := s.Stats().ICMP.V6PacketsReceived.Invalid + + // Invalid count should initially be 0. + if got := invalid.Value(); got != 0 { + t.Fatalf("got invalid = %d, want = 0", got) + } + + e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{ + Data: hdr.View().ToVectorisedView(), + }) + + neighbors, err := s.Neighbors(nicID) + if err != nil { + t.Fatalf("s.Neighbors(%d): %s", nicID, err) + } + + neighborByAddr := make(map[tcpip.Address]stack.NeighborEntry) + for _, n := range neighbors { + if existing, ok := neighborByAddr[n.Addr]; ok { + if diff := cmp.Diff(existing, n); diff != "" { + t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry (-existing +got):\n%s", nicID, diff) + } + t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry: %s", nicID, existing) + } + neighborByAddr[n.Addr] = n + } + + if neigh, ok := neighborByAddr[lladdr1]; len(test.expectedLinkAddr) != 0 { + // Invalid count should not have increased. + if got := invalid.Value(); got != 0 { + t.Errorf("got invalid = %d, want = 0", got) + } + + if !ok { + t.Fatalf("expected a neighbor entry for %q", lladdr1) + } + if neigh.LinkAddr != test.expectedLinkAddr { + t.Errorf("got link address = %s, want = %s", neigh.LinkAddr, test.expectedLinkAddr) + } + if neigh.State != stack.Stale { + t.Errorf("got NUD state = %s, want = %s", neigh.State, stack.Stale) + } + } else { + // Invalid count should have increased. + if got := invalid.Value(); got != 1 { + t.Errorf("got invalid = %d, want = 1", got) + } + + if ok { + t.Fatalf("unexpectedly got neighbor entry: %s", neigh) + } + } + }) + } +} + func TestNeighorSolicitationResponse(t *testing.T) { const nicID = 1 nicAddr := lladdr0 @@ -180,6 +298,20 @@ func TestNeighorSolicitationResponse(t *testing.T) { remoteLinkAddr0 := linkAddr1 remoteLinkAddr1 := linkAddr2 + stacks := []struct { + name string + useNeighborCache bool + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + }, + { + name: "neighborCache", + useNeighborCache: true, + }, + } + tests := []struct { name string nsOpts header.NDPOptionsSerializer @@ -338,86 +470,92 @@ func TestNeighorSolicitationResponse(t *testing.T) { }, } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - s := stack.New(stack.Options{ - NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, - }) - e := channel.New(1, 1280, nicLinkAddr) - if err := s.CreateNIC(nicID, e); err != nil { - t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) - } - if err := s.AddAddress(nicID, ProtocolNumber, nicAddr); err != nil { - t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, nicAddr, err) - } + for _, stackTyp := range stacks { + t.Run(stackTyp.name, func(t *testing.T) { + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, + UseNeighborCache: stackTyp.useNeighborCache, + }) + e := channel.New(1, 1280, nicLinkAddr) + e.LinkEPCapabilities |= stack.CapabilityResolutionRequired + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) + } + if err := s.AddAddress(nicID, ProtocolNumber, nicAddr); err != nil { + t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, nicAddr, err) + } - ndpNSSize := header.ICMPv6NeighborSolicitMinimumSize + test.nsOpts.Length() - hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNSSize) - pkt := header.ICMPv6(hdr.Prepend(ndpNSSize)) - pkt.SetType(header.ICMPv6NeighborSolicit) - ns := header.NDPNeighborSolicit(pkt.NDPPayload()) - ns.SetTargetAddress(nicAddr) - opts := ns.Options() - opts.Serialize(test.nsOpts) - pkt.SetChecksum(header.ICMPv6Checksum(pkt, test.nsSrc, test.nsDst, buffer.VectorisedView{})) - payloadLength := hdr.UsedLength() - ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) - ip.Encode(&header.IPv6Fields{ - PayloadLength: uint16(payloadLength), - NextHeader: uint8(header.ICMPv6ProtocolNumber), - HopLimit: 255, - SrcAddr: test.nsSrc, - DstAddr: test.nsDst, - }) + ndpNSSize := header.ICMPv6NeighborSolicitMinimumSize + test.nsOpts.Length() + hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNSSize) + pkt := header.ICMPv6(hdr.Prepend(ndpNSSize)) + pkt.SetType(header.ICMPv6NeighborSolicit) + ns := header.NDPNeighborSolicit(pkt.NDPPayload()) + ns.SetTargetAddress(nicAddr) + opts := ns.Options() + opts.Serialize(test.nsOpts) + pkt.SetChecksum(header.ICMPv6Checksum(pkt, test.nsSrc, test.nsDst, buffer.VectorisedView{})) + payloadLength := hdr.UsedLength() + ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) + ip.Encode(&header.IPv6Fields{ + PayloadLength: uint16(payloadLength), + NextHeader: uint8(header.ICMPv6ProtocolNumber), + HopLimit: 255, + SrcAddr: test.nsSrc, + DstAddr: test.nsDst, + }) + + invalid := s.Stats().ICMP.V6PacketsReceived.Invalid - invalid := s.Stats().ICMP.V6PacketsReceived.Invalid + // Invalid count should initially be 0. + if got := invalid.Value(); got != 0 { + t.Fatalf("got invalid = %d, want = 0", got) + } - // Invalid count should initially be 0. - if got := invalid.Value(); got != 0 { - t.Fatalf("got invalid = %d, want = 0", got) - } + e.InjectLinkAddr(ProtocolNumber, test.nsSrcLinkAddr, stack.NewPacketBuffer(stack.PacketBufferOptions{ + Data: hdr.View().ToVectorisedView(), + })) - e.InjectLinkAddr(ProtocolNumber, test.nsSrcLinkAddr, stack.NewPacketBuffer(stack.PacketBufferOptions{ - Data: hdr.View().ToVectorisedView(), - })) + if test.nsInvalid { + if got := invalid.Value(); got != 1 { + t.Fatalf("got invalid = %d, want = 1", got) + } - if test.nsInvalid { - if got := invalid.Value(); got != 1 { - t.Fatalf("got invalid = %d, want = 1", got) - } + if p, got := e.Read(); got { + t.Fatalf("unexpected response to an invalid NS = %+v", p.Pkt) + } - if p, got := e.Read(); got { - t.Fatalf("unexpected response to an invalid NS = %+v", p.Pkt) - } + // If we expected the NS to be invalid, we have nothing else to check. + return + } - // If we expected the NS to be invalid, we have nothing else to check. - return - } + if got := invalid.Value(); got != 0 { + t.Fatalf("got invalid = %d, want = 0", got) + } - if got := invalid.Value(); got != 0 { - t.Fatalf("got invalid = %d, want = 0", got) - } + p, got := e.Read() + if !got { + t.Fatal("expected an NDP NA response") + } - p, got := e.Read() - if !got { - t.Fatal("expected an NDP NA response") - } + if p.Route.RemoteLinkAddress != test.naDstLinkAddr { + t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, test.naDstLinkAddr) + } - if p.Route.RemoteLinkAddress != test.naDstLinkAddr { - t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, test.naDstLinkAddr) + checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()), + checker.SrcAddr(test.naSrc), + checker.DstAddr(test.naDst), + checker.TTL(header.NDPHopLimit), + checker.NDPNA( + checker.NDPNASolicitedFlag(test.naSolicited), + checker.NDPNATargetAddress(nicAddr), + checker.NDPNAOptions([]header.NDPOption{ + header.NDPTargetLinkLayerAddressOption(nicLinkAddr[:]), + }), + )) + }) } - - checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()), - checker.SrcAddr(test.naSrc), - checker.DstAddr(test.naDst), - checker.TTL(header.NDPHopLimit), - checker.NDPNA( - checker.NDPNASolicitedFlag(test.naSolicited), - checker.NDPNATargetAddress(nicAddr), - checker.NDPNAOptions([]header.NDPOption{ - header.NDPTargetLinkLayerAddressOption(nicLinkAddr[:]), - }), - )) }) } } @@ -532,197 +670,380 @@ func TestNeighorAdvertisementWithTargetLinkLayerOption(t *testing.T) { } } -func TestNDPValidation(t *testing.T) { - setup := func(t *testing.T) (*stack.Stack, stack.NetworkEndpoint, stack.Route) { - t.Helper() - - // Create a stack with the assigned link-local address lladdr0 - // and an endpoint to lladdr1. - s, ep := setupStackAndEndpoint(t, lladdr0, lladdr1) - - r, err := s.FindRoute(1, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */) - if err != nil { - t.Fatalf("FindRoute(_) = _, %s, want = _, nil", err) - } - - return s, ep, r - } - - handleIPv6Payload := func(payload buffer.View, hopLimit uint8, atomicFragment bool, ep stack.NetworkEndpoint, r *stack.Route) { - nextHdr := uint8(header.ICMPv6ProtocolNumber) - var extensions buffer.View - if atomicFragment { - extensions = buffer.NewView(header.IPv6FragmentExtHdrLength) - extensions[0] = nextHdr - nextHdr = uint8(header.IPv6FragmentExtHdrIdentifier) - } - - pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ - ReserveHeaderBytes: header.IPv6MinimumSize + len(extensions), - Data: payload.ToVectorisedView(), - }) - ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + len(extensions))) - ip.Encode(&header.IPv6Fields{ - PayloadLength: uint16(len(payload) + len(extensions)), - NextHeader: nextHdr, - HopLimit: hopLimit, - SrcAddr: r.LocalAddress, - DstAddr: r.RemoteAddress, - }) - if n := copy(ip[header.IPv6MinimumSize:], extensions); n != len(extensions) { - t.Fatalf("expected to write %d bytes of extensions, but wrote %d", len(extensions), n) - } - ep.HandlePacket(r, pkt) - } - - var tllData [header.NDPLinkLayerAddressSize]byte - header.NDPOptions(tllData[:]).Serialize(header.NDPOptionsSerializer{ - header.NDPTargetLinkLayerAddressOption(linkAddr1), - }) +// TestNeighorAdvertisementWithTargetLinkLayerOptionUsingNeighborCache tests +// that receiving a valid NDP NA message with the Target Link Layer Address +// option does not result in a new entry in the neighbor cache for the target +// of the message. +func TestNeighorAdvertisementWithTargetLinkLayerOptionUsingNeighborCache(t *testing.T) { + const nicID = 1 - types := []struct { - name string - typ header.ICMPv6Type - size int - extraData []byte - statCounter func(tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter + tests := []struct { + name string + optsBuf []byte + isValid bool }{ { - name: "RouterSolicit", - typ: header.ICMPv6RouterSolicit, - size: header.ICMPv6MinimumSize, - statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter { - return stats.RouterSolicit - }, - }, - { - name: "RouterAdvert", - typ: header.ICMPv6RouterAdvert, - size: header.ICMPv6HeaderSize + header.NDPRAMinimumSize, - statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter { - return stats.RouterAdvert - }, + name: "Valid", + optsBuf: []byte{2, 1, 2, 3, 4, 5, 6, 7}, + isValid: true, }, { - name: "NeighborSolicit", - typ: header.ICMPv6NeighborSolicit, - size: header.ICMPv6NeighborSolicitMinimumSize, - statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter { - return stats.NeighborSolicit - }, + name: "Too Small", + optsBuf: []byte{2, 1, 2, 3, 4, 5, 6}, }, { - name: "NeighborAdvert", - typ: header.ICMPv6NeighborAdvert, - size: header.ICMPv6NeighborAdvertMinimumSize, - extraData: tllData[:], - statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter { - return stats.NeighborAdvert - }, + name: "Invalid Length", + optsBuf: []byte{2, 2, 2, 3, 4, 5, 6, 7}, }, { - name: "RedirectMsg", - typ: header.ICMPv6RedirectMsg, - size: header.ICMPv6MinimumSize, - statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter { - return stats.RedirectMsg + name: "Multiple", + optsBuf: []byte{ + 2, 1, 2, 3, 4, 5, 6, 7, + 2, 1, 2, 3, 4, 5, 6, 8, }, }, } - subTests := []struct { - name string - atomicFragment bool - hopLimit uint8 - code header.ICMPv6Code - valid bool + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, + UseNeighborCache: true, + }) + e := channel.New(0, 1280, linkAddr0) + e.LinkEPCapabilities |= stack.CapabilityResolutionRequired + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) + } + if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil { + t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, lladdr0, err) + } + + ndpNASize := header.ICMPv6NeighborAdvertMinimumSize + len(test.optsBuf) + hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNASize) + pkt := header.ICMPv6(hdr.Prepend(ndpNASize)) + pkt.SetType(header.ICMPv6NeighborAdvert) + ns := header.NDPNeighborAdvert(pkt.NDPPayload()) + ns.SetTargetAddress(lladdr1) + opts := ns.Options() + copy(opts, test.optsBuf) + pkt.SetChecksum(header.ICMPv6Checksum(pkt, lladdr1, lladdr0, buffer.VectorisedView{})) + payloadLength := hdr.UsedLength() + ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) + ip.Encode(&header.IPv6Fields{ + PayloadLength: uint16(payloadLength), + NextHeader: uint8(header.ICMPv6ProtocolNumber), + HopLimit: 255, + SrcAddr: lladdr1, + DstAddr: lladdr0, + }) + + invalid := s.Stats().ICMP.V6PacketsReceived.Invalid + + // Invalid count should initially be 0. + if got := invalid.Value(); got != 0 { + t.Fatalf("got invalid = %d, want = 0", got) + } + + e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{ + Data: hdr.View().ToVectorisedView(), + }) + + neighbors, err := s.Neighbors(nicID) + if err != nil { + t.Fatalf("s.Neighbors(%d): %s", nicID, err) + } + + neighborByAddr := make(map[tcpip.Address]stack.NeighborEntry) + for _, n := range neighbors { + if existing, ok := neighborByAddr[n.Addr]; ok { + if diff := cmp.Diff(existing, n); diff != "" { + t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry (-existing +got):\n%s", nicID, diff) + } + t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry: %s", nicID, existing) + } + neighborByAddr[n.Addr] = n + } + + if neigh, ok := neighborByAddr[lladdr1]; ok { + t.Fatalf("unexpectedly got neighbor entry: %s", neigh) + } + + if test.isValid { + // Invalid count should not have increased. + if got := invalid.Value(); got != 0 { + t.Errorf("got invalid = %d, want = 0", got) + } + } else { + // Invalid count should have increased. + if got := invalid.Value(); got != 1 { + t.Errorf("got invalid = %d, want = 1", got) + } + } + }) + } +} + +func TestNDPValidation(t *testing.T) { + stacks := []struct { + name string + useNeighborCache bool }{ { - name: "Valid", - atomicFragment: false, - hopLimit: header.NDPHopLimit, - code: 0, - valid: true, - }, - { - name: "Fragmented", - atomicFragment: true, - hopLimit: header.NDPHopLimit, - code: 0, - valid: false, - }, - { - name: "Invalid hop limit", - atomicFragment: false, - hopLimit: header.NDPHopLimit - 1, - code: 0, - valid: false, + name: "linkAddrCache", + useNeighborCache: false, }, { - name: "Invalid ICMPv6 code", - atomicFragment: false, - hopLimit: header.NDPHopLimit, - code: 1, - valid: false, + name: "neighborCache", + useNeighborCache: true, }, } - for _, typ := range types { - t.Run(typ.name, func(t *testing.T) { - for _, test := range subTests { - t.Run(test.name, func(t *testing.T) { - s, ep, r := setup(t) - defer r.Release() + for _, stackTyp := range stacks { + t.Run(stackTyp.name, func(t *testing.T) { + setup := func(t *testing.T) (*stack.Stack, stack.NetworkEndpoint, stack.Route) { + t.Helper() - stats := s.Stats().ICMP.V6PacketsReceived - invalid := stats.Invalid - typStat := typ.statCounter(stats) + // Create a stack with the assigned link-local address lladdr0 + // and an endpoint to lladdr1. + s, ep := setupStackAndEndpoint(t, lladdr0, lladdr1, stackTyp.useNeighborCache) - icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData))) - copy(icmp[typ.size:], typ.extraData) - icmp.SetType(typ.typ) - icmp.SetCode(test.code) - icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], r.LocalAddress, r.RemoteAddress, buffer.View(typ.extraData).ToVectorisedView())) + r, err := s.FindRoute(1, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */) + if err != nil { + t.Fatalf("FindRoute(_) = _, %s, want = _, nil", err) + } - // Rx count of the NDP message should initially be 0. - if got := typStat.Value(); got != 0 { - t.Errorf("got %s = %d, want = 0", typ.name, got) - } + return s, ep, r + } - // Invalid count should initially be 0. - if got := invalid.Value(); got != 0 { - t.Errorf("got invalid = %d, want = 0", got) - } + handleIPv6Payload := func(payload buffer.View, hopLimit uint8, atomicFragment bool, ep stack.NetworkEndpoint, r *stack.Route) { + nextHdr := uint8(header.ICMPv6ProtocolNumber) + var extensions buffer.View + if atomicFragment { + extensions = buffer.NewView(header.IPv6FragmentExtHdrLength) + extensions[0] = nextHdr + nextHdr = uint8(header.IPv6FragmentExtHdrIdentifier) + } - if t.Failed() { - t.FailNow() - } + pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ + ReserveHeaderBytes: header.IPv6MinimumSize + len(extensions), + Data: payload.ToVectorisedView(), + }) + ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + len(extensions))) + ip.Encode(&header.IPv6Fields{ + PayloadLength: uint16(len(payload) + len(extensions)), + NextHeader: nextHdr, + HopLimit: hopLimit, + SrcAddr: r.LocalAddress, + DstAddr: r.RemoteAddress, + }) + if n := copy(ip[header.IPv6MinimumSize:], extensions); n != len(extensions) { + t.Fatalf("expected to write %d bytes of extensions, but wrote %d", len(extensions), n) + } + ep.HandlePacket(r, pkt) + } - handleIPv6Payload(buffer.View(icmp), test.hopLimit, test.atomicFragment, ep, &r) + var tllData [header.NDPLinkLayerAddressSize]byte + header.NDPOptions(tllData[:]).Serialize(header.NDPOptionsSerializer{ + header.NDPTargetLinkLayerAddressOption(linkAddr1), + }) - // Rx count of the NDP packet should have increased. - if got := typStat.Value(); got != 1 { - t.Errorf("got %s = %d, want = 1", typ.name, got) - } + var sllData [header.NDPLinkLayerAddressSize]byte + header.NDPOptions(sllData[:]).Serialize(header.NDPOptionsSerializer{ + header.NDPSourceLinkLayerAddressOption(linkAddr1), + }) - want := uint64(0) - if !test.valid { - // Invalid count should have increased. - want = 1 - } - if got := invalid.Value(); got != want { - t.Errorf("got invalid = %d, want = %d", got, want) + types := []struct { + name string + typ header.ICMPv6Type + size int + extraData []byte + statCounter func(tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter + routerOnly bool + }{ + { + name: "RouterSolicit", + typ: header.ICMPv6RouterSolicit, + size: header.ICMPv6MinimumSize, + statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter { + return stats.RouterSolicit + }, + routerOnly: true, + }, + { + name: "RouterAdvert", + typ: header.ICMPv6RouterAdvert, + size: header.ICMPv6HeaderSize + header.NDPRAMinimumSize, + statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter { + return stats.RouterAdvert + }, + }, + { + name: "NeighborSolicit", + typ: header.ICMPv6NeighborSolicit, + size: header.ICMPv6NeighborSolicitMinimumSize, + extraData: sllData[:], + statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter { + return stats.NeighborSolicit + }, + }, + { + name: "NeighborAdvert", + typ: header.ICMPv6NeighborAdvert, + size: header.ICMPv6NeighborAdvertMinimumSize, + extraData: tllData[:], + statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter { + return stats.NeighborAdvert + }, + }, + { + name: "RedirectMsg", + typ: header.ICMPv6RedirectMsg, + size: header.ICMPv6MinimumSize, + statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter { + return stats.RedirectMsg + }, + }, + } + + subTests := []struct { + name string + atomicFragment bool + hopLimit uint8 + code header.ICMPv6Code + valid bool + }{ + { + name: "Valid", + atomicFragment: false, + hopLimit: header.NDPHopLimit, + code: 0, + valid: true, + }, + { + name: "Fragmented", + atomicFragment: true, + hopLimit: header.NDPHopLimit, + code: 0, + valid: false, + }, + { + name: "Invalid hop limit", + atomicFragment: false, + hopLimit: header.NDPHopLimit - 1, + code: 0, + valid: false, + }, + { + name: "Invalid ICMPv6 code", + atomicFragment: false, + hopLimit: header.NDPHopLimit, + code: 1, + valid: false, + }, + } + + for _, typ := range types { + for _, isRouter := range []bool{false, true} { + name := typ.name + if isRouter { + name += " (Router)" } - }) + + t.Run(name, func(t *testing.T) { + for _, test := range subTests { + t.Run(test.name, func(t *testing.T) { + s, ep, r := setup(t) + defer r.Release() + + if isRouter { + // Enabling forwarding makes the stack act as a router. + s.SetForwarding(true) + } + + stats := s.Stats().ICMP.V6PacketsReceived + invalid := stats.Invalid + routerOnly := stats.RouterOnlyPacketsDroppedByHost + typStat := typ.statCounter(stats) + + icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData))) + copy(icmp[typ.size:], typ.extraData) + icmp.SetType(typ.typ) + icmp.SetCode(test.code) + icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], r.LocalAddress, r.RemoteAddress, buffer.View(typ.extraData).ToVectorisedView())) + + // Rx count of the NDP message should initially be 0. + if got := typStat.Value(); got != 0 { + t.Errorf("got %s = %d, want = 0", typ.name, got) + } + + // Invalid count should initially be 0. + if got := invalid.Value(); got != 0 { + t.Errorf("got invalid = %d, want = 0", got) + } + + // RouterOnlyPacketsReceivedByHost count should initially be 0. + if got := routerOnly.Value(); got != 0 { + t.Errorf("got RouterOnlyPacketsReceivedByHost = %d, want = 0", got) + } + + if t.Failed() { + t.FailNow() + } + + handleIPv6Payload(buffer.View(icmp), test.hopLimit, test.atomicFragment, ep, &r) + + // Rx count of the NDP packet should have increased. + if got := typStat.Value(); got != 1 { + t.Errorf("got %s = %d, want = 1", typ.name, got) + } + + want := uint64(0) + if !test.valid { + // Invalid count should have increased. + want = 1 + } + if got := invalid.Value(); got != want { + t.Errorf("got invalid = %d, want = %d", got, want) + } + + want = 0 + if test.valid && !isRouter && typ.routerOnly { + // RouterOnlyPacketsReceivedByHost count should have increased. + want = 1 + } + if got := routerOnly.Value(); got != want { + t.Errorf("got RouterOnlyPacketsReceivedByHost = %d, want = %d", got, want) + } + + }) + } + }) + } } }) } + } // TestRouterAdvertValidation tests that when the NIC is configured to handle // NDP Router Advertisement packets, it validates the Router Advertisement // properly before handling them. func TestRouterAdvertValidation(t *testing.T) { + stacks := []struct { + name string + useNeighborCache bool + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + }, + { + name: "neighborCache", + useNeighborCache: true, + }, + } + tests := []struct { name string src tcpip.Address @@ -844,61 +1165,67 @@ func TestRouterAdvertValidation(t *testing.T) { }, } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - e := channel.New(10, 1280, linkAddr1) - s := stack.New(stack.Options{ - NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, - }) - - if err := s.CreateNIC(1, e); err != nil { - t.Fatalf("CreateNIC(_) = %s", err) - } + for _, stackTyp := range stacks { + t.Run(stackTyp.name, func(t *testing.T) { + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + e := channel.New(10, 1280, linkAddr1) + e.LinkEPCapabilities |= stack.CapabilityResolutionRequired + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocol{NewProtocol()}, + UseNeighborCache: stackTyp.useNeighborCache, + }) + + if err := s.CreateNIC(1, e); err != nil { + t.Fatalf("CreateNIC(_) = %s", err) + } - icmpSize := header.ICMPv6HeaderSize + len(test.ndpPayload) - hdr := buffer.NewPrependable(header.IPv6MinimumSize + icmpSize) - pkt := header.ICMPv6(hdr.Prepend(icmpSize)) - pkt.SetType(header.ICMPv6RouterAdvert) - pkt.SetCode(test.code) - copy(pkt.NDPPayload(), test.ndpPayload) - payloadLength := hdr.UsedLength() - pkt.SetChecksum(header.ICMPv6Checksum(pkt, test.src, header.IPv6AllNodesMulticastAddress, buffer.VectorisedView{})) - ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) - ip.Encode(&header.IPv6Fields{ - PayloadLength: uint16(payloadLength), - NextHeader: uint8(icmp.ProtocolNumber6), - HopLimit: test.hopLimit, - SrcAddr: test.src, - DstAddr: header.IPv6AllNodesMulticastAddress, - }) + icmpSize := header.ICMPv6HeaderSize + len(test.ndpPayload) + hdr := buffer.NewPrependable(header.IPv6MinimumSize + icmpSize) + pkt := header.ICMPv6(hdr.Prepend(icmpSize)) + pkt.SetType(header.ICMPv6RouterAdvert) + pkt.SetCode(test.code) + copy(pkt.NDPPayload(), test.ndpPayload) + payloadLength := hdr.UsedLength() + pkt.SetChecksum(header.ICMPv6Checksum(pkt, test.src, header.IPv6AllNodesMulticastAddress, buffer.VectorisedView{})) + ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) + ip.Encode(&header.IPv6Fields{ + PayloadLength: uint16(payloadLength), + NextHeader: uint8(icmp.ProtocolNumber6), + HopLimit: test.hopLimit, + SrcAddr: test.src, + DstAddr: header.IPv6AllNodesMulticastAddress, + }) - stats := s.Stats().ICMP.V6PacketsReceived - invalid := stats.Invalid - rxRA := stats.RouterAdvert + stats := s.Stats().ICMP.V6PacketsReceived + invalid := stats.Invalid + rxRA := stats.RouterAdvert - if got := invalid.Value(); got != 0 { - t.Fatalf("got invalid = %d, want = 0", got) - } - if got := rxRA.Value(); got != 0 { - t.Fatalf("got rxRA = %d, want = 0", got) - } + if got := invalid.Value(); got != 0 { + t.Fatalf("got invalid = %d, want = 0", got) + } + if got := rxRA.Value(); got != 0 { + t.Fatalf("got rxRA = %d, want = 0", got) + } - e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{ - Data: hdr.View().ToVectorisedView(), - })) + e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{ + Data: hdr.View().ToVectorisedView(), + })) - if got := rxRA.Value(); got != 1 { - t.Fatalf("got rxRA = %d, want = 1", got) - } + if got := rxRA.Value(); got != 1 { + t.Fatalf("got rxRA = %d, want = 1", got) + } - if test.expectedSuccess { - if got := invalid.Value(); got != 0 { - t.Fatalf("got invalid = %d, want = 0", got) - } - } else { - if got := invalid.Value(); got != 1 { - t.Fatalf("got invalid = %d, want = 1", got) - } + if test.expectedSuccess { + if got := invalid.Value(); got != 0 { + t.Fatalf("got invalid = %d, want = 0", got) + } + } else { + if got := invalid.Value(); got != 1 { + t.Fatalf("got invalid = %d, want = 1", got) + } + } + }) } }) } diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go index 5a684eb9d..91165ebc7 100644 --- a/pkg/tcpip/stack/forwarder_test.go +++ b/pkg/tcpip/stack/forwarder_test.go @@ -51,6 +51,8 @@ type fwdTestNetworkEndpoint struct { ep LinkEndpoint } +var _ NetworkEndpoint = (*fwdTestNetworkEndpoint)(nil) + func (f *fwdTestNetworkEndpoint) MTU() uint32 { return f.ep.MTU() - uint32(f.MaxHeaderLength()) } @@ -110,11 +112,13 @@ func (*fwdTestNetworkEndpoint) Close() {} // resolution. type fwdTestNetworkProtocol struct { addrCache *linkAddrCache + neigh *neighborCache addrResolveDelay time.Duration - onLinkAddressResolved func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) + onLinkAddressResolved func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) onResolveStaticAddress func(tcpip.Address) (tcpip.LinkAddress, bool) } +var _ NetworkProtocol = (*fwdTestNetworkProtocol)(nil) var _ LinkAddressResolver = (*fwdTestNetworkProtocol)(nil) func (f *fwdTestNetworkProtocol) Number() tcpip.NetworkProtocolNumber { @@ -141,7 +145,7 @@ func (*fwdTestNetworkProtocol) Parse(pkt *PacketBuffer) (tcpip.TransportProtocol return tcpip.TransportProtocolNumber(netHeader[protocolNumberOffset]), true, true } -func (f *fwdTestNetworkProtocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, ep LinkEndpoint, _ *Stack) NetworkEndpoint { +func (f *fwdTestNetworkProtocol) NewEndpoint(nicID tcpip.NICID, _ LinkAddressCache, _ NUDHandler, dispatcher TransportDispatcher, ep LinkEndpoint, _ *Stack) NetworkEndpoint { return &fwdTestNetworkEndpoint{ nicID: nicID, proto: f, @@ -163,9 +167,9 @@ func (f *fwdTestNetworkProtocol) Close() {} func (f *fwdTestNetworkProtocol) Wait() {} func (f *fwdTestNetworkProtocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP LinkEndpoint) *tcpip.Error { - if f.addrCache != nil && f.onLinkAddressResolved != nil { + if f.onLinkAddressResolved != nil { time.AfterFunc(f.addrResolveDelay, func() { - f.onLinkAddressResolved(f.addrCache, addr, remoteLinkAddr) + f.onLinkAddressResolved(f.addrCache, f.neigh, addr, remoteLinkAddr) }) } return nil @@ -300,13 +304,16 @@ func (e *fwdTestLinkEndpoint) AddHeader(local, remote tcpip.LinkAddress, protoco panic("not implemented") } -func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol) (ep1, ep2 *fwdTestLinkEndpoint) { +func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol, useNeighborCache bool) (ep1, ep2 *fwdTestLinkEndpoint) { // Create a stack with the network protocol and two NICs. s := New(Options{ NetworkProtocols: []NetworkProtocol{proto}, + UseNeighborCache: useNeighborCache, }) - proto.addrCache = s.linkAddrCache + if !useNeighborCache { + proto.addrCache = s.linkAddrCache + } // Enable forwarding. s.SetForwarding(true) @@ -337,6 +344,15 @@ func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol) (ep1, ep2 *f t.Fatal("AddAddress #2 failed:", err) } + if useNeighborCache { + // Control the neighbor cache for NIC 2. + nic, ok := s.nics[2] + if !ok { + t.Fatal("failed to get the neighbor cache for NIC 2") + } + proto.neigh = nic.neigh + } + // Route all packets to NIC 2. { subnet, err := tcpip.NewSubnet("\x00", "\x00") @@ -350,79 +366,129 @@ func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol) (ep1, ep2 *f } func TestForwardingWithStaticResolver(t *testing.T) { - // Create a network protocol with a static resolver. - proto := &fwdTestNetworkProtocol{ - onResolveStaticAddress: - // The network address 3 is resolved to the link address "c". - func(addr tcpip.Address) (tcpip.LinkAddress, bool) { - if addr == "\x03" { - return "c", true - } - return "", false + tests := []struct { + name string + useNeighborCache bool + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + }, + { + name: "neighborCache", + useNeighborCache: true, }, } - ep1, ep2 := fwdTestNetFactory(t, proto) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Create a network protocol with a static resolver. + proto := &fwdTestNetworkProtocol{ + onResolveStaticAddress: + // The network address 3 is resolved to the link address "c". + func(addr tcpip.Address) (tcpip.LinkAddress, bool) { + if addr == "\x03" { + return "c", true + } + return "", false + }, + } - // Inject an inbound packet to address 3 on NIC 1, and see if it is - // forwarded to NIC 2. - buf := buffer.NewView(30) - buf[dstAddrOffset] = 3 - ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ - Data: buf.ToVectorisedView(), - })) + ep1, ep2 := fwdTestNetFactory(t, proto, test.useNeighborCache) - var p fwdTestPacketInfo + // Inject an inbound packet to address 3 on NIC 1, and see if it is + // forwarded to NIC 2. + buf := buffer.NewView(30) + buf[dstAddrOffset] = 3 + ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ + Data: buf.ToVectorisedView(), + })) - select { - case p = <-ep2.C: - default: - t.Fatal("packet not forwarded") - } + var p fwdTestPacketInfo - // Test that the static address resolution happened correctly. - if p.RemoteLinkAddress != "c" { - t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) - } - if p.LocalLinkAddress != "b" { - t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) + select { + case p = <-ep2.C: + default: + t.Fatal("packet not forwarded") + } + + // Test that the static address resolution happened correctly. + if p.RemoteLinkAddress != "c" { + t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) + } + if p.LocalLinkAddress != "b" { + t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) + } + }) } } func TestForwardingWithFakeResolver(t *testing.T) { - // Create a network protocol with a fake resolver. - proto := &fwdTestNetworkProtocol{ - addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) { - // Any address will be resolved to the link address "c". - cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c") + tests := []struct { + name string + useNeighborCache bool + proto *fwdTestNetworkProtocol + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + proto: &fwdTestNetworkProtocol{ + addrResolveDelay: 500 * time.Millisecond, + onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) { + // Any address will be resolved to the link address "c". + cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c") + }, + }, + }, + { + name: "neighborCache", + useNeighborCache: true, + proto: &fwdTestNetworkProtocol{ + addrResolveDelay: 500 * time.Millisecond, + onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) { + t.Helper() + if len(remoteLinkAddr) != 0 { + t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr) + } + // Any address will be resolved to the link address "c". + neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{ + Solicited: true, + Override: false, + IsRouter: false, + }) + }, + }, }, } - ep1, ep2 := fwdTestNetFactory(t, proto) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache) - // Inject an inbound packet to address 3 on NIC 1, and see if it is - // forwarded to NIC 2. - buf := buffer.NewView(30) - buf[dstAddrOffset] = 3 - ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ - Data: buf.ToVectorisedView(), - })) + // Inject an inbound packet to address 3 on NIC 1, and see if it is + // forwarded to NIC 2. + buf := buffer.NewView(30) + buf[dstAddrOffset] = 3 + ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ + Data: buf.ToVectorisedView(), + })) - var p fwdTestPacketInfo + var p fwdTestPacketInfo - select { - case p = <-ep2.C: - case <-time.After(time.Second): - t.Fatal("packet not forwarded") - } + select { + case p = <-ep2.C: + case <-time.After(time.Second): + t.Fatal("packet not forwarded") + } - // Test that the address resolution happened correctly. - if p.RemoteLinkAddress != "c" { - t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) - } - if p.LocalLinkAddress != "b" { - t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) + // Test that the address resolution happened correctly. + if p.RemoteLinkAddress != "c" { + t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) + } + if p.LocalLinkAddress != "b" { + t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) + } + }) } } @@ -430,7 +496,9 @@ func TestForwardingWithNoResolver(t *testing.T) { // Create a network protocol without a resolver. proto := &fwdTestNetworkProtocol{} - ep1, ep2 := fwdTestNetFactory(t, proto) + // Whether or not we use the neighbor cache here does not matter since + // neither linkAddrCache nor neighborCache will be used. + ep1, ep2 := fwdTestNetFactory(t, proto, false /* useNeighborCache */) // inject an inbound packet to address 3 on NIC 1, and see if it is // forwarded to NIC 2. @@ -448,203 +516,334 @@ func TestForwardingWithNoResolver(t *testing.T) { } func TestForwardingWithFakeResolverPartialTimeout(t *testing.T) { - // Create a network protocol with a fake resolver. - proto := &fwdTestNetworkProtocol{ - addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) { - // Only packets to address 3 will be resolved to the - // link address "c". - if addr == "\x03" { - cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c") - } + tests := []struct { + name string + useNeighborCache bool + proto *fwdTestNetworkProtocol + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + proto: &fwdTestNetworkProtocol{ + addrResolveDelay: 500 * time.Millisecond, + onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) { + // Only packets to address 3 will be resolved to the + // link address "c". + if addr == "\x03" { + cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c") + } + }, + }, + }, + { + name: "neighborCache", + useNeighborCache: true, + proto: &fwdTestNetworkProtocol{ + addrResolveDelay: 500 * time.Millisecond, + onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) { + t.Helper() + if len(remoteLinkAddr) != 0 { + t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr) + } + // Only packets to address 3 will be resolved to the + // link address "c". + if addr == "\x03" { + neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{ + Solicited: true, + Override: false, + IsRouter: false, + }) + } + }, + }, }, } - ep1, ep2 := fwdTestNetFactory(t, proto) - - // Inject an inbound packet to address 4 on NIC 1. This packet should - // not be forwarded. - buf := buffer.NewView(30) - buf[dstAddrOffset] = 4 - ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ - Data: buf.ToVectorisedView(), - })) - - // Inject an inbound packet to address 3 on NIC 1, and see if it is - // forwarded to NIC 2. - buf = buffer.NewView(30) - buf[dstAddrOffset] = 3 - ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ - Data: buf.ToVectorisedView(), - })) - - var p fwdTestPacketInfo - - select { - case p = <-ep2.C: - case <-time.After(time.Second): - t.Fatal("packet not forwarded") - } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache) + + // Inject an inbound packet to address 4 on NIC 1. This packet should + // not be forwarded. + buf := buffer.NewView(30) + buf[dstAddrOffset] = 4 + ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ + Data: buf.ToVectorisedView(), + })) + + // Inject an inbound packet to address 3 on NIC 1, and see if it is + // forwarded to NIC 2. + buf = buffer.NewView(30) + buf[dstAddrOffset] = 3 + ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ + Data: buf.ToVectorisedView(), + })) + + var p fwdTestPacketInfo + + select { + case p = <-ep2.C: + case <-time.After(time.Second): + t.Fatal("packet not forwarded") + } - if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] != 3 { - t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want = 3", nh[dstAddrOffset]) - } + if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] != 3 { + t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want = 3", nh[dstAddrOffset]) + } - // Test that the address resolution happened correctly. - if p.RemoteLinkAddress != "c" { - t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) - } - if p.LocalLinkAddress != "b" { - t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) + // Test that the address resolution happened correctly. + if p.RemoteLinkAddress != "c" { + t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) + } + if p.LocalLinkAddress != "b" { + t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) + } + }) } } func TestForwardingWithFakeResolverTwoPackets(t *testing.T) { - // Create a network protocol with a fake resolver. - proto := &fwdTestNetworkProtocol{ - addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) { - // Any packets will be resolved to the link address "c". - cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c") + tests := []struct { + name string + useNeighborCache bool + proto *fwdTestNetworkProtocol + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + proto: &fwdTestNetworkProtocol{ + addrResolveDelay: 500 * time.Millisecond, + onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) { + // Any packets will be resolved to the link address "c". + cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c") + }, + }, + }, + { + name: "neighborCache", + useNeighborCache: true, + proto: &fwdTestNetworkProtocol{ + addrResolveDelay: 500 * time.Millisecond, + onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) { + t.Helper() + if len(remoteLinkAddr) != 0 { + t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr) + } + // Any packets will be resolved to the link address "c". + neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{ + Solicited: true, + Override: false, + IsRouter: false, + }) + }, + }, }, } - ep1, ep2 := fwdTestNetFactory(t, proto) - - // Inject two inbound packets to address 3 on NIC 1. - for i := 0; i < 2; i++ { - buf := buffer.NewView(30) - buf[dstAddrOffset] = 3 - ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ - Data: buf.ToVectorisedView(), - })) - } - - for i := 0; i < 2; i++ { - var p fwdTestPacketInfo - - select { - case p = <-ep2.C: - case <-time.After(time.Second): - t.Fatal("packet not forwarded") - } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache) - if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] != 3 { - t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want = 3", nh[dstAddrOffset]) - } + // Inject two inbound packets to address 3 on NIC 1. + for i := 0; i < 2; i++ { + buf := buffer.NewView(30) + buf[dstAddrOffset] = 3 + ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ + Data: buf.ToVectorisedView(), + })) + } - // Test that the address resolution happened correctly. - if p.RemoteLinkAddress != "c" { - t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) - } - if p.LocalLinkAddress != "b" { - t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) - } + for i := 0; i < 2; i++ { + var p fwdTestPacketInfo + + select { + case p = <-ep2.C: + case <-time.After(time.Second): + t.Fatal("packet not forwarded") + } + + if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] != 3 { + t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want = 3", nh[dstAddrOffset]) + } + + // Test that the address resolution happened correctly. + if p.RemoteLinkAddress != "c" { + t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) + } + if p.LocalLinkAddress != "b" { + t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) + } + } + }) } } func TestForwardingWithFakeResolverManyPackets(t *testing.T) { - // Create a network protocol with a fake resolver. - proto := &fwdTestNetworkProtocol{ - addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) { - // Any packets will be resolved to the link address "c". - cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c") + tests := []struct { + name string + useNeighborCache bool + proto *fwdTestNetworkProtocol + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + proto: &fwdTestNetworkProtocol{ + addrResolveDelay: 500 * time.Millisecond, + onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) { + // Any packets will be resolved to the link address "c". + cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c") + }, + }, + }, + { + name: "neighborCache", + useNeighborCache: true, + proto: &fwdTestNetworkProtocol{ + addrResolveDelay: 500 * time.Millisecond, + onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) { + t.Helper() + if len(remoteLinkAddr) != 0 { + t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr) + } + // Any packets will be resolved to the link address "c". + neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{ + Solicited: true, + Override: false, + IsRouter: false, + }) + }, + }, }, } - ep1, ep2 := fwdTestNetFactory(t, proto) - - for i := 0; i < maxPendingPacketsPerResolution+5; i++ { - // Inject inbound 'maxPendingPacketsPerResolution + 5' packets on NIC 1. - buf := buffer.NewView(30) - buf[dstAddrOffset] = 3 - // Set the packet sequence number. - binary.BigEndian.PutUint16(buf[fwdTestNetHeaderLen:], uint16(i)) - ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ - Data: buf.ToVectorisedView(), - })) - } - - for i := 0; i < maxPendingPacketsPerResolution; i++ { - var p fwdTestPacketInfo + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache) - select { - case p = <-ep2.C: - case <-time.After(time.Second): - t.Fatal("packet not forwarded") - } - - b := PayloadSince(p.Pkt.NetworkHeader()) - if b[dstAddrOffset] != 3 { - t.Fatalf("got b[dstAddrOffset] = %d, want = 3", b[dstAddrOffset]) - } - if len(b) < fwdTestNetHeaderLen+2 { - t.Fatalf("packet is too short to hold a sequence number: len(b) = %d", b) - } - seqNumBuf := b[fwdTestNetHeaderLen:] - - // The first 5 packets should not be forwarded so the sequence number should - // start with 5. - want := uint16(i + 5) - if n := binary.BigEndian.Uint16(seqNumBuf); n != want { - t.Fatalf("got the packet #%d, want = #%d", n, want) - } + for i := 0; i < maxPendingPacketsPerResolution+5; i++ { + // Inject inbound 'maxPendingPacketsPerResolution + 5' packets on NIC 1. + buf := buffer.NewView(30) + buf[dstAddrOffset] = 3 + // Set the packet sequence number. + binary.BigEndian.PutUint16(buf[fwdTestNetHeaderLen:], uint16(i)) + ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ + Data: buf.ToVectorisedView(), + })) + } - // Test that the address resolution happened correctly. - if p.RemoteLinkAddress != "c" { - t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) - } - if p.LocalLinkAddress != "b" { - t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) - } + for i := 0; i < maxPendingPacketsPerResolution; i++ { + var p fwdTestPacketInfo + + select { + case p = <-ep2.C: + case <-time.After(time.Second): + t.Fatal("packet not forwarded") + } + + b := PayloadSince(p.Pkt.NetworkHeader()) + if b[dstAddrOffset] != 3 { + t.Fatalf("got b[dstAddrOffset] = %d, want = 3", b[dstAddrOffset]) + } + if len(b) < fwdTestNetHeaderLen+2 { + t.Fatalf("packet is too short to hold a sequence number: len(b) = %d", b) + } + seqNumBuf := b[fwdTestNetHeaderLen:] + + // The first 5 packets should not be forwarded so the sequence number should + // start with 5. + want := uint16(i + 5) + if n := binary.BigEndian.Uint16(seqNumBuf); n != want { + t.Fatalf("got the packet #%d, want = #%d", n, want) + } + + // Test that the address resolution happened correctly. + if p.RemoteLinkAddress != "c" { + t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) + } + if p.LocalLinkAddress != "b" { + t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) + } + } + }) } } func TestForwardingWithFakeResolverManyResolutions(t *testing.T) { - // Create a network protocol with a fake resolver. - proto := &fwdTestNetworkProtocol{ - addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) { - // Any packets will be resolved to the link address "c". - cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c") + tests := []struct { + name string + useNeighborCache bool + proto *fwdTestNetworkProtocol + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + proto: &fwdTestNetworkProtocol{ + addrResolveDelay: 500 * time.Millisecond, + onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) { + // Any packets will be resolved to the link address "c". + cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c") + }, + }, + }, + { + name: "neighborCache", + useNeighborCache: true, + proto: &fwdTestNetworkProtocol{ + addrResolveDelay: 500 * time.Millisecond, + onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) { + t.Helper() + if len(remoteLinkAddr) != 0 { + t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr) + } + // Any packets will be resolved to the link address "c". + neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{ + Solicited: true, + Override: false, + IsRouter: false, + }) + }, + }, }, } - ep1, ep2 := fwdTestNetFactory(t, proto) - - for i := 0; i < maxPendingResolutions+5; i++ { - // Inject inbound 'maxPendingResolutions + 5' packets on NIC 1. - // Each packet has a different destination address (3 to - // maxPendingResolutions + 7). - buf := buffer.NewView(30) - buf[dstAddrOffset] = byte(3 + i) - ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ - Data: buf.ToVectorisedView(), - })) - } - - for i := 0; i < maxPendingResolutions; i++ { - var p fwdTestPacketInfo - - select { - case p = <-ep2.C: - case <-time.After(time.Second): - t.Fatal("packet not forwarded") - } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache) - // The first 5 packets (address 3 to 7) should not be forwarded - // because their address resolutions are interrupted. - if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] < 8 { - t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want p.Pkt.NetworkHeader[dstAddrOffset] >= 8", nh[dstAddrOffset]) - } + for i := 0; i < maxPendingResolutions+5; i++ { + // Inject inbound 'maxPendingResolutions + 5' packets on NIC 1. + // Each packet has a different destination address (3 to + // maxPendingResolutions + 7). + buf := buffer.NewView(30) + buf[dstAddrOffset] = byte(3 + i) + ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{ + Data: buf.ToVectorisedView(), + })) + } - // Test that the address resolution happened correctly. - if p.RemoteLinkAddress != "c" { - t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) - } - if p.LocalLinkAddress != "b" { - t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) - } + for i := 0; i < maxPendingResolutions; i++ { + var p fwdTestPacketInfo + + select { + case p = <-ep2.C: + case <-time.After(time.Second): + t.Fatal("packet not forwarded") + } + + // The first 5 packets (address 3 to 7) should not be forwarded + // because their address resolutions are interrupted. + if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] < 8 { + t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want p.Pkt.NetworkHeader[dstAddrOffset] >= 8", nh[dstAddrOffset]) + } + + // Test that the address resolution happened correctly. + if p.RemoteLinkAddress != "c" { + t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress) + } + if p.LocalLinkAddress != "b" { + t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress) + } + } + }) } } diff --git a/pkg/tcpip/stack/linkaddrcache_test.go b/pkg/tcpip/stack/linkaddrcache_test.go index b15b8d1cb..14fb4239b 100644 --- a/pkg/tcpip/stack/linkaddrcache_test.go +++ b/pkg/tcpip/stack/linkaddrcache_test.go @@ -275,3 +275,71 @@ func TestStaticResolution(t *testing.T) { t.Errorf("c.get(%q)=%q, want %q", string(addr), string(got), string(want)) } } + +// TestCacheWaker verifies that RemoveWaker removes a waker previously added +// through get(). +func TestCacheWaker(t *testing.T) { + c := newLinkAddrCache(1<<63-1, 1*time.Second, 3) + + // First, sanity check that wakers are working. + { + linkRes := &testLinkAddressResolver{cache: c} + s := sleep.Sleeper{} + defer s.Done() + + const wakerID = 1 + w := sleep.Waker{} + s.AddWaker(&w, wakerID) + + e := testAddrs[0] + + if _, _, err := c.get(e.addr, linkRes, "", nil, &w); err != tcpip.ErrWouldBlock { + t.Fatalf("got c.get(%q, _, _, _, _) = %s, want = %s", e.addr.Addr, err, tcpip.ErrWouldBlock) + } + id, ok := s.Fetch(true /* block */) + if !ok { + t.Fatal("got s.Fetch(true) = (_, false), want = (_, true)") + } + if id != wakerID { + t.Fatalf("got s.Fetch(true) = (%d, %t), want = (%d, true)", id, ok, wakerID) + } + + if got, _, err := c.get(e.addr, linkRes, "", nil, nil); err != nil { + t.Fatalf("c.get(%q, _, _, _, _): %s", e.addr.Addr, err) + } else if got != e.linkAddr { + t.Fatalf("got c.get(%q) = %q, want = %q", e.addr.Addr, got, e.linkAddr) + } + } + + // Check that RemoveWaker works. + { + linkRes := &testLinkAddressResolver{cache: c} + s := sleep.Sleeper{} + defer s.Done() + + const wakerID = 2 // different than the ID used in the sanity check + w := sleep.Waker{} + s.AddWaker(&w, wakerID) + + e := testAddrs[1] + linkRes.onLinkAddressRequest = func() { + // Remove the waker before the linkAddrCache has the opportunity to send + // a notification. + c.removeWaker(e.addr, &w) + } + + if _, _, err := c.get(e.addr, linkRes, "", nil, &w); err != tcpip.ErrWouldBlock { + t.Fatalf("got c.get(%q, _, _, _, _) = %s, want = %s", e.addr.Addr, err, tcpip.ErrWouldBlock) + } + + if got, err := getBlocking(c, e.addr, linkRes); err != nil { + t.Fatalf("c.get(%q, _, _, _, _): %s", e.addr.Addr, err) + } else if got != e.linkAddr { + t.Fatalf("c.get(%q) = %q, want = %q", e.addr.Addr, got, e.linkAddr) + } + + if id, ok := s.Fetch(false /* block */); ok { + t.Fatalf("unexpected notification from waker with id %d", id) + } + } +} diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index 21bf53010..67dc5364f 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -2787,7 +2787,7 @@ func TestMixedSLAACAddrConflictRegen(t *testing.T) { // stack.Stack will have a default route through the router (llAddr3) installed // and a static link-address (linkAddr3) added to the link address cache for the // router. -func stackAndNdpDispatcherWithDefaultRoute(t *testing.T, nicID tcpip.NICID) (*ndpDispatcher, *channel.Endpoint, *stack.Stack) { +func stackAndNdpDispatcherWithDefaultRoute(t *testing.T, nicID tcpip.NICID, useNeighborCache bool) (*ndpDispatcher, *channel.Endpoint, *stack.Stack) { t.Helper() ndpDisp := &ndpDispatcher{ autoGenAddrC: make(chan ndpAutoGenAddrEvent, 1), @@ -2800,7 +2800,8 @@ func stackAndNdpDispatcherWithDefaultRoute(t *testing.T, nicID tcpip.NICID) (*nd HandleRAs: true, AutoGenGlobalAddresses: true, }, - NDPDisp: ndpDisp, + NDPDisp: ndpDisp, + UseNeighborCache: useNeighborCache, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -2810,7 +2811,11 @@ func stackAndNdpDispatcherWithDefaultRoute(t *testing.T, nicID tcpip.NICID) (*nd Gateway: llAddr3, NIC: nicID, }}) - s.AddLinkAddress(nicID, llAddr3, linkAddr3) + if useNeighborCache { + s.AddStaticNeighbor(nicID, llAddr3, linkAddr3) + } else { + s.AddLinkAddress(nicID, llAddr3, linkAddr3) + } return ndpDisp, e, s } @@ -2884,110 +2889,128 @@ func addrForNewConnectionWithAddr(t *testing.T, s *stack.Stack, addr tcpip.FullA // TestAutoGenAddrDeprecateFromPI tests deprecating a SLAAC address when // receiving a PI with 0 preferred lifetime. func TestAutoGenAddrDeprecateFromPI(t *testing.T) { - const nicID = 1 + stacks := []struct { + name string + useNeighborCache bool + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + }, + { + name: "neighborCache", + useNeighborCache: true, + }, + } - prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1) - prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1) + for _, stackTyp := range stacks { + t.Run(stackTyp.name, func(t *testing.T) { + const nicID = 1 - ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID) + prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1) + prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1) - expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) { - t.Helper() + ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID, stackTyp.useNeighborCache) - select { - case e := <-ndpDisp.autoGenAddrC: - if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" { - t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) + expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) { + t.Helper() + + select { + case e := <-ndpDisp.autoGenAddrC: + if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" { + t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) + } + default: + t.Fatal("expected addr auto gen event") + } } - default: - t.Fatal("expected addr auto gen event") - } - } - expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) { - t.Helper() + expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) { + t.Helper() - if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil { - t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err) - } else if got != addr { - t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr) - } + if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil { + t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err) + } else if got != addr { + t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr) + } - if got := addrForNewConnection(t, s); got != addr.Address { - t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address) - } - } + if got := addrForNewConnection(t, s); got != addr.Address { + t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address) + } + } - // Receive PI for prefix1. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 100)) - expectAutoGenAddrEvent(addr1, newAddr) - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { - t.Fatalf("should have %s in the list of addresses", addr1) - } - expectPrimaryAddr(addr1) + // Receive PI for prefix1. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 100)) + expectAutoGenAddrEvent(addr1, newAddr) + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { + t.Fatalf("should have %s in the list of addresses", addr1) + } + expectPrimaryAddr(addr1) - // Deprecate addr for prefix1 immedaitely. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 0)) - expectAutoGenAddrEvent(addr1, deprecatedAddr) - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { - t.Fatalf("should have %s in the list of addresses", addr1) - } - // addr should still be the primary endpoint as there are no other addresses. - expectPrimaryAddr(addr1) + // Deprecate addr for prefix1 immedaitely. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 0)) + expectAutoGenAddrEvent(addr1, deprecatedAddr) + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { + t.Fatalf("should have %s in the list of addresses", addr1) + } + // addr should still be the primary endpoint as there are no other addresses. + expectPrimaryAddr(addr1) - // Refresh lifetimes of addr generated from prefix1. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 100)) - select { - case <-ndpDisp.autoGenAddrC: - t.Fatal("unexpectedly got an auto-generated event") - default: - } - expectPrimaryAddr(addr1) + // Refresh lifetimes of addr generated from prefix1. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 100)) + select { + case <-ndpDisp.autoGenAddrC: + t.Fatal("unexpectedly got an auto-generated event") + default: + } + expectPrimaryAddr(addr1) - // Receive PI for prefix2. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100)) - expectAutoGenAddrEvent(addr2, newAddr) - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { - t.Fatalf("should have %s in the list of addresses", addr2) - } - expectPrimaryAddr(addr2) + // Receive PI for prefix2. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100)) + expectAutoGenAddrEvent(addr2, newAddr) + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { + t.Fatalf("should have %s in the list of addresses", addr2) + } + expectPrimaryAddr(addr2) - // Deprecate addr for prefix2 immedaitely. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 0)) - expectAutoGenAddrEvent(addr2, deprecatedAddr) - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { - t.Fatalf("should have %s in the list of addresses", addr2) - } - // addr1 should be the primary endpoint now since addr2 is deprecated but - // addr1 is not. - expectPrimaryAddr(addr1) - // addr2 is deprecated but if explicitly requested, it should be used. - fullAddr2 := tcpip.FullAddress{Addr: addr2.Address, NIC: nicID} - if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address) - } + // Deprecate addr for prefix2 immedaitely. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 0)) + expectAutoGenAddrEvent(addr2, deprecatedAddr) + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { + t.Fatalf("should have %s in the list of addresses", addr2) + } + // addr1 should be the primary endpoint now since addr2 is deprecated but + // addr1 is not. + expectPrimaryAddr(addr1) + // addr2 is deprecated but if explicitly requested, it should be used. + fullAddr2 := tcpip.FullAddress{Addr: addr2.Address, NIC: nicID} + if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address { + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address) + } - // Another PI w/ 0 preferred lifetime should not result in a deprecation - // event. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 0)) - select { - case <-ndpDisp.autoGenAddrC: - t.Fatal("unexpectedly got an auto-generated event") - default: - } - expectPrimaryAddr(addr1) - if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address) - } + // Another PI w/ 0 preferred lifetime should not result in a deprecation + // event. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 0)) + select { + case <-ndpDisp.autoGenAddrC: + t.Fatal("unexpectedly got an auto-generated event") + default: + } + expectPrimaryAddr(addr1) + if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address { + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address) + } - // Refresh lifetimes of addr generated from prefix2. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100)) - select { - case <-ndpDisp.autoGenAddrC: - t.Fatal("unexpectedly got an auto-generated event") - default: + // Refresh lifetimes of addr generated from prefix2. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100)) + select { + case <-ndpDisp.autoGenAddrC: + t.Fatal("unexpectedly got an auto-generated event") + default: + } + expectPrimaryAddr(addr2) + }) } - expectPrimaryAddr(addr2) } // TestAutoGenAddrJobDeprecation tests that an address is properly deprecated @@ -2996,217 +3019,236 @@ func TestAutoGenAddrJobDeprecation(t *testing.T) { const nicID = 1 const newMinVL = 2 newMinVLDuration := newMinVL * time.Second - saved := stack.MinPrefixInformationValidLifetimeForUpdate - defer func() { - stack.MinPrefixInformationValidLifetimeForUpdate = saved - }() - stack.MinPrefixInformationValidLifetimeForUpdate = newMinVLDuration - prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1) - prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1) + stacks := []struct { + name string + useNeighborCache bool + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + }, + { + name: "neighborCache", + useNeighborCache: true, + }, + } - ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID) + for _, stackTyp := range stacks { + t.Run(stackTyp.name, func(t *testing.T) { + saved := stack.MinPrefixInformationValidLifetimeForUpdate + defer func() { + stack.MinPrefixInformationValidLifetimeForUpdate = saved + }() + stack.MinPrefixInformationValidLifetimeForUpdate = newMinVLDuration - expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) { - t.Helper() + prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1) + prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1) - select { - case e := <-ndpDisp.autoGenAddrC: - if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" { - t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) + ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID, stackTyp.useNeighborCache) + + expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) { + t.Helper() + + select { + case e := <-ndpDisp.autoGenAddrC: + if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" { + t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) + } + default: + t.Fatal("expected addr auto gen event") + } } - default: - t.Fatal("expected addr auto gen event") - } - } - expectAutoGenAddrEventAfter := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType, timeout time.Duration) { - t.Helper() + expectAutoGenAddrEventAfter := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType, timeout time.Duration) { + t.Helper() - select { - case e := <-ndpDisp.autoGenAddrC: - if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" { - t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) + select { + case e := <-ndpDisp.autoGenAddrC: + if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" { + t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) + } + case <-time.After(timeout): + t.Fatal("timed out waiting for addr auto gen event") + } } - case <-time.After(timeout): - t.Fatal("timed out waiting for addr auto gen event") - } - } - expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) { - t.Helper() + expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) { + t.Helper() - if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil { - t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err) - } else if got != addr { - t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr) - } + if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil { + t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err) + } else if got != addr { + t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr) + } - if got := addrForNewConnection(t, s); got != addr.Address { - t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address) - } - } + if got := addrForNewConnection(t, s); got != addr.Address { + t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address) + } + } - // Receive PI for prefix2. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100)) - expectAutoGenAddrEvent(addr2, newAddr) - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { - t.Fatalf("should have %s in the list of addresses", addr2) - } - expectPrimaryAddr(addr2) + // Receive PI for prefix2. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100)) + expectAutoGenAddrEvent(addr2, newAddr) + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { + t.Fatalf("should have %s in the list of addresses", addr2) + } + expectPrimaryAddr(addr2) - // Receive a PI for prefix1. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 90)) - expectAutoGenAddrEvent(addr1, newAddr) - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { - t.Fatalf("should have %s in the list of addresses", addr1) - } - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { - t.Fatalf("should have %s in the list of addresses", addr2) - } - expectPrimaryAddr(addr1) + // Receive a PI for prefix1. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 90)) + expectAutoGenAddrEvent(addr1, newAddr) + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { + t.Fatalf("should have %s in the list of addresses", addr1) + } + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { + t.Fatalf("should have %s in the list of addresses", addr2) + } + expectPrimaryAddr(addr1) - // Refresh lifetime for addr of prefix1. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, newMinVL-1)) - select { - case <-ndpDisp.autoGenAddrC: - t.Fatal("unexpectedly got an auto-generated event") - default: - } - expectPrimaryAddr(addr1) + // Refresh lifetime for addr of prefix1. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, newMinVL-1)) + select { + case <-ndpDisp.autoGenAddrC: + t.Fatal("unexpectedly got an auto-generated event") + default: + } + expectPrimaryAddr(addr1) - // Wait for addr of prefix1 to be deprecated. - expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncPositiveEventTimeout) - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { - t.Fatalf("should not have %s in the list of addresses", addr1) - } - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { - t.Fatalf("should have %s in the list of addresses", addr2) - } - // addr2 should be the primary endpoint now since addr1 is deprecated but - // addr2 is not. - expectPrimaryAddr(addr2) - // addr1 is deprecated but if explicitly requested, it should be used. - fullAddr1 := tcpip.FullAddress{Addr: addr1.Address, NIC: nicID} - if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) - } + // Wait for addr of prefix1 to be deprecated. + expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncPositiveEventTimeout) + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { + t.Fatalf("should not have %s in the list of addresses", addr1) + } + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { + t.Fatalf("should have %s in the list of addresses", addr2) + } + // addr2 should be the primary endpoint now since addr1 is deprecated but + // addr2 is not. + expectPrimaryAddr(addr2) + // addr1 is deprecated but if explicitly requested, it should be used. + fullAddr1 := tcpip.FullAddress{Addr: addr1.Address, NIC: nicID} + if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) + } - // Refresh valid lifetime for addr of prefix1, w/ 0 preferred lifetime to make - // sure we do not get a deprecation event again. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, 0)) - select { - case <-ndpDisp.autoGenAddrC: - t.Fatal("unexpectedly got an auto-generated event") - default: - } - expectPrimaryAddr(addr2) - if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) - } + // Refresh valid lifetime for addr of prefix1, w/ 0 preferred lifetime to make + // sure we do not get a deprecation event again. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, 0)) + select { + case <-ndpDisp.autoGenAddrC: + t.Fatal("unexpectedly got an auto-generated event") + default: + } + expectPrimaryAddr(addr2) + if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) + } - // Refresh lifetimes for addr of prefix1. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, newMinVL-1)) - select { - case <-ndpDisp.autoGenAddrC: - t.Fatal("unexpectedly got an auto-generated event") - default: - } - // addr1 is the primary endpoint again since it is non-deprecated now. - expectPrimaryAddr(addr1) + // Refresh lifetimes for addr of prefix1. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, newMinVL-1)) + select { + case <-ndpDisp.autoGenAddrC: + t.Fatal("unexpectedly got an auto-generated event") + default: + } + // addr1 is the primary endpoint again since it is non-deprecated now. + expectPrimaryAddr(addr1) - // Wait for addr of prefix1 to be deprecated. - expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncPositiveEventTimeout) - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { - t.Fatalf("should not have %s in the list of addresses", addr1) - } - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { - t.Fatalf("should have %s in the list of addresses", addr2) - } - // addr2 should be the primary endpoint now since it is not deprecated. - expectPrimaryAddr(addr2) - if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) - } + // Wait for addr of prefix1 to be deprecated. + expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncPositiveEventTimeout) + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { + t.Fatalf("should not have %s in the list of addresses", addr1) + } + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { + t.Fatalf("should have %s in the list of addresses", addr2) + } + // addr2 should be the primary endpoint now since it is not deprecated. + expectPrimaryAddr(addr2) + if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) + } - // Wait for addr of prefix1 to be invalidated. - expectAutoGenAddrEventAfter(addr1, invalidatedAddr, time.Second+defaultAsyncPositiveEventTimeout) - if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { - t.Fatalf("should not have %s in the list of addresses", addr1) - } - if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { - t.Fatalf("should have %s in the list of addresses", addr2) - } - expectPrimaryAddr(addr2) + // Wait for addr of prefix1 to be invalidated. + expectAutoGenAddrEventAfter(addr1, invalidatedAddr, time.Second+defaultAsyncPositiveEventTimeout) + if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { + t.Fatalf("should not have %s in the list of addresses", addr1) + } + if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { + t.Fatalf("should have %s in the list of addresses", addr2) + } + expectPrimaryAddr(addr2) - // Refresh both lifetimes for addr of prefix2 to the same value. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, newMinVL, newMinVL)) - select { - case <-ndpDisp.autoGenAddrC: - t.Fatal("unexpectedly got an auto-generated event") - default: - } + // Refresh both lifetimes for addr of prefix2 to the same value. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, newMinVL, newMinVL)) + select { + case <-ndpDisp.autoGenAddrC: + t.Fatal("unexpectedly got an auto-generated event") + default: + } - // Wait for a deprecation then invalidation events, or just an invalidation - // event. We need to cover both cases but cannot deterministically hit both - // cases because the deprecation and invalidation handlers could be handled in - // either deprecation then invalidation, or invalidation then deprecation - // (which should be cancelled by the invalidation handler). - select { - case e := <-ndpDisp.autoGenAddrC: - if diff := checkAutoGenAddrEvent(e, addr2, deprecatedAddr); diff == "" { - // If we get a deprecation event first, we should get an invalidation - // event almost immediately after. + // Wait for a deprecation then invalidation events, or just an invalidation + // event. We need to cover both cases but cannot deterministically hit both + // cases because the deprecation and invalidation handlers could be handled in + // either deprecation then invalidation, or invalidation then deprecation + // (which should be cancelled by the invalidation handler). select { case e := <-ndpDisp.autoGenAddrC: - if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff != "" { - t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) + if diff := checkAutoGenAddrEvent(e, addr2, deprecatedAddr); diff == "" { + // If we get a deprecation event first, we should get an invalidation + // event almost immediately after. + select { + case e := <-ndpDisp.autoGenAddrC: + if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff != "" { + t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) + } + case <-time.After(defaultAsyncPositiveEventTimeout): + t.Fatal("timed out waiting for addr auto gen event") + } + } else if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff == "" { + // If we get an invalidation event first, we should not get a deprecation + // event after. + select { + case <-ndpDisp.autoGenAddrC: + t.Fatal("unexpectedly got an auto-generated event") + case <-time.After(defaultAsyncNegativeEventTimeout): + } + } else { + t.Fatalf("got unexpected auto-generated event") } - case <-time.After(defaultAsyncPositiveEventTimeout): + case <-time.After(newMinVLDuration + defaultAsyncPositiveEventTimeout): t.Fatal("timed out waiting for addr auto gen event") } - } else if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff == "" { - // If we get an invalidation event first, we should not get a deprecation - // event after. - select { - case <-ndpDisp.autoGenAddrC: - t.Fatal("unexpectedly got an auto-generated event") - case <-time.After(defaultAsyncNegativeEventTimeout): + if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { + t.Fatalf("should not have %s in the list of addresses", addr1) + } + if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { + t.Fatalf("should not have %s in the list of addresses", addr2) + } + // Should not have any primary endpoints. + if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil { + t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err) + } else if want := (tcpip.AddressWithPrefix{}); got != want { + t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, want) + } + wq := waiter.Queue{} + we, ch := waiter.NewChannelEntry(nil) + wq.EventRegister(&we, waiter.EventIn) + defer wq.EventUnregister(&we) + defer close(ch) + ep, err := s.NewEndpoint(header.UDPProtocolNumber, header.IPv6ProtocolNumber, &wq) + if err != nil { + t.Fatalf("s.NewEndpoint(%d, %d, _): %s", header.UDPProtocolNumber, header.IPv6ProtocolNumber, err) + } + defer ep.Close() + if err := ep.SetSockOptBool(tcpip.V6OnlyOption, true); err != nil { + t.Fatalf("SetSockOpt(tcpip.V6OnlyOption, true): %s", err) } - } else { - t.Fatalf("got unexpected auto-generated event") - } - case <-time.After(newMinVLDuration + defaultAsyncPositiveEventTimeout): - t.Fatal("timed out waiting for addr auto gen event") - } - if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) { - t.Fatalf("should not have %s in the list of addresses", addr1) - } - if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) { - t.Fatalf("should not have %s in the list of addresses", addr2) - } - // Should not have any primary endpoints. - if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil { - t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err) - } else if want := (tcpip.AddressWithPrefix{}); got != want { - t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, want) - } - wq := waiter.Queue{} - we, ch := waiter.NewChannelEntry(nil) - wq.EventRegister(&we, waiter.EventIn) - defer wq.EventUnregister(&we) - defer close(ch) - ep, err := s.NewEndpoint(header.UDPProtocolNumber, header.IPv6ProtocolNumber, &wq) - if err != nil { - t.Fatalf("s.NewEndpoint(%d, %d, _): %s", header.UDPProtocolNumber, header.IPv6ProtocolNumber, err) - } - defer ep.Close() - if err := ep.SetSockOptBool(tcpip.V6OnlyOption, true); err != nil { - t.Fatalf("SetSockOpt(tcpip.V6OnlyOption, true): %s", err) - } - if err := ep.Connect(dstAddr); err != tcpip.ErrNoRoute { - t.Errorf("got ep.Connect(%+v) = %v, want = %s", dstAddr, err, tcpip.ErrNoRoute) + if err := ep.Connect(dstAddr); err != tcpip.ErrNoRoute { + t.Errorf("got ep.Connect(%+v) = %s, want = %s", dstAddr, err, tcpip.ErrNoRoute) + } + }) } } @@ -3524,110 +3566,128 @@ func TestAutoGenAddrRemoval(t *testing.T) { func TestAutoGenAddrAfterRemoval(t *testing.T) { const nicID = 1 - prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1) - prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1) - ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID) - - expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) { - t.Helper() - - select { - case e := <-ndpDisp.autoGenAddrC: - if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" { - t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) - } - default: - t.Fatal("expected addr auto gen event") - } + stacks := []struct { + name string + useNeighborCache bool + }{ + { + name: "linkAddrCache", + useNeighborCache: false, + }, + { + name: "neighborCache", + useNeighborCache: true, + }, } - expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) { - t.Helper() + for _, stackTyp := range stacks { + t.Run(stackTyp.name, func(t *testing.T) { + prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1) + prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1) + ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID, stackTyp.useNeighborCache) - if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil { - t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err) - } else if got != addr { - t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr) - } - - if got := addrForNewConnection(t, s); got != addr.Address { - t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address) - } - } + expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) { + t.Helper() - // Receive a PI to auto-generate addr1 with a large valid and preferred - // lifetime. - const largeLifetimeSeconds = 999 - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix1, true, true, largeLifetimeSeconds, largeLifetimeSeconds)) - expectAutoGenAddrEvent(addr1, newAddr) - expectPrimaryAddr(addr1) + select { + case e := <-ndpDisp.autoGenAddrC: + if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" { + t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) + } + default: + t.Fatal("expected addr auto gen event") + } + } - // Add addr2 as a static address. - protoAddr2 := tcpip.ProtocolAddress{ - Protocol: header.IPv6ProtocolNumber, - AddressWithPrefix: addr2, - } - if err := s.AddProtocolAddressWithOptions(nicID, protoAddr2, stack.FirstPrimaryEndpoint); err != nil { - t.Fatalf("AddProtocolAddressWithOptions(%d, %+v, %d) = %s", nicID, protoAddr2, stack.FirstPrimaryEndpoint, err) - } - // addr2 should be more preferred now since it is at the front of the primary - // list. - expectPrimaryAddr(addr2) + expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) { + t.Helper() - // Get a route using addr2 to increment its reference count then remove it - // to leave it in the permanentExpired state. - r, err := s.FindRoute(nicID, addr2.Address, addr3, header.IPv6ProtocolNumber, false) - if err != nil { - t.Fatalf("FindRoute(%d, %s, %s, %d, false): %s", nicID, addr2.Address, addr3, header.IPv6ProtocolNumber, err) - } - defer r.Release() - if err := s.RemoveAddress(nicID, addr2.Address); err != nil { - t.Fatalf("s.RemoveAddress(%d, %s): %s", nicID, addr2.Address, err) - } - // addr1 should be preferred again since addr2 is in the expired state. - expectPrimaryAddr(addr1) + if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil { + t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err) + } else if got != addr { + t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr) + } - // Receive a PI to auto-generate addr2 as valid and preferred. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, largeLifetimeSeconds)) - expectAutoGenAddrEvent(addr2, newAddr) - // addr2 should be more preferred now that it is closer to the front of the - // primary list and not deprecated. - expectPrimaryAddr(addr2) + if got := addrForNewConnection(t, s); got != addr.Address { + t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address) + } + } - // Removing the address should result in an invalidation event immediately. - // It should still be in the permanentExpired state because r is still held. - // - // We remove addr2 here to make sure addr2 was marked as a SLAAC address - // (it was previously marked as a static address). - if err := s.RemoveAddress(1, addr2.Address); err != nil { - t.Fatalf("RemoveAddress(_, %s) = %s", addr2.Address, err) - } - expectAutoGenAddrEvent(addr2, invalidatedAddr) - // addr1 should be more preferred since addr2 is in the expired state. - expectPrimaryAddr(addr1) + // Receive a PI to auto-generate addr1 with a large valid and preferred + // lifetime. + const largeLifetimeSeconds = 999 + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix1, true, true, largeLifetimeSeconds, largeLifetimeSeconds)) + expectAutoGenAddrEvent(addr1, newAddr) + expectPrimaryAddr(addr1) - // Receive a PI to auto-generate addr2 as valid and deprecated. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, 0)) - expectAutoGenAddrEvent(addr2, newAddr) - // addr1 should still be more preferred since addr2 is deprecated, even though - // it is closer to the front of the primary list. - expectPrimaryAddr(addr1) + // Add addr2 as a static address. + protoAddr2 := tcpip.ProtocolAddress{ + Protocol: header.IPv6ProtocolNumber, + AddressWithPrefix: addr2, + } + if err := s.AddProtocolAddressWithOptions(nicID, protoAddr2, stack.FirstPrimaryEndpoint); err != nil { + t.Fatalf("AddProtocolAddressWithOptions(%d, %+v, %d) = %s", nicID, protoAddr2, stack.FirstPrimaryEndpoint, err) + } + // addr2 should be more preferred now since it is at the front of the primary + // list. + expectPrimaryAddr(addr2) - // Receive a PI to refresh addr2's preferred lifetime. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, largeLifetimeSeconds)) - select { - case <-ndpDisp.autoGenAddrC: - t.Fatal("unexpectedly got an auto gen addr event") - default: - } - // addr2 should be more preferred now that it is not deprecated. - expectPrimaryAddr(addr2) + // Get a route using addr2 to increment its reference count then remove it + // to leave it in the permanentExpired state. + r, err := s.FindRoute(nicID, addr2.Address, addr3, header.IPv6ProtocolNumber, false) + if err != nil { + t.Fatalf("FindRoute(%d, %s, %s, %d, false): %s", nicID, addr2.Address, addr3, header.IPv6ProtocolNumber, err) + } + defer r.Release() + if err := s.RemoveAddress(nicID, addr2.Address); err != nil { + t.Fatalf("s.RemoveAddress(%d, %s): %s", nicID, addr2.Address, err) + } + // addr1 should be preferred again since addr2 is in the expired state. + expectPrimaryAddr(addr1) + + // Receive a PI to auto-generate addr2 as valid and preferred. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, largeLifetimeSeconds)) + expectAutoGenAddrEvent(addr2, newAddr) + // addr2 should be more preferred now that it is closer to the front of the + // primary list and not deprecated. + expectPrimaryAddr(addr2) + + // Removing the address should result in an invalidation event immediately. + // It should still be in the permanentExpired state because r is still held. + // + // We remove addr2 here to make sure addr2 was marked as a SLAAC address + // (it was previously marked as a static address). + if err := s.RemoveAddress(1, addr2.Address); err != nil { + t.Fatalf("RemoveAddress(_, %s) = %s", addr2.Address, err) + } + expectAutoGenAddrEvent(addr2, invalidatedAddr) + // addr1 should be more preferred since addr2 is in the expired state. + expectPrimaryAddr(addr1) + + // Receive a PI to auto-generate addr2 as valid and deprecated. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, 0)) + expectAutoGenAddrEvent(addr2, newAddr) + // addr1 should still be more preferred since addr2 is deprecated, even though + // it is closer to the front of the primary list. + expectPrimaryAddr(addr1) + + // Receive a PI to refresh addr2's preferred lifetime. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, largeLifetimeSeconds)) + select { + case <-ndpDisp.autoGenAddrC: + t.Fatal("unexpectedly got an auto gen addr event") + default: + } + // addr2 should be more preferred now that it is not deprecated. + expectPrimaryAddr(addr2) - if err := s.RemoveAddress(1, addr2.Address); err != nil { - t.Fatalf("RemoveAddress(_, %s) = %s", addr2.Address, err) + if err := s.RemoveAddress(1, addr2.Address); err != nil { + t.Fatalf("RemoveAddress(_, %s) = %s", addr2.Address, err) + } + expectAutoGenAddrEvent(addr2, invalidatedAddr) + expectPrimaryAddr(addr1) + }) } - expectAutoGenAddrEvent(addr2, invalidatedAddr) - expectPrimaryAddr(addr1) } // TestAutoGenAddrStaticConflict tests that if SLAAC generates an address that diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 728292782..0c811efdb 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -21,6 +21,7 @@ import ( "sort" "sync/atomic" + "gvisor.dev/gvisor/pkg/sleep" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -135,18 +136,8 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC } nic.mu.ndp.initializeTempAddrState() - // Register supported packet endpoint protocols. - for _, netProto := range header.Ethertypes { - nic.mu.packetEPs[netProto] = []PacketEndpoint{} - } - for _, netProto := range stack.networkProtocols { - netNum := netProto.Number() - nic.mu.packetEPs[netNum] = nil - nic.networkEndpoints[netNum] = netProto.NewEndpoint(id, stack, nic, ep, stack) - } - // Check for Neighbor Unreachability Detection support. - if ep.Capabilities()&CapabilityResolutionRequired != 0 && len(stack.linkAddrResolvers) != 0 { + if ep.Capabilities()&CapabilityResolutionRequired != 0 && len(stack.linkAddrResolvers) != 0 && stack.useNeighborCache { rng := rand.New(rand.NewSource(stack.clock.NowNanoseconds())) nic.neigh = &neighborCache{ nic: nic, @@ -155,6 +146,16 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC } } + // Register supported packet endpoint protocols. + for _, netProto := range header.Ethertypes { + nic.mu.packetEPs[netProto] = []PacketEndpoint{} + } + for _, netProto := range stack.networkProtocols { + netNum := netProto.Number() + nic.mu.packetEPs[netNum] = nil + nic.networkEndpoints[netNum] = netProto.NewEndpoint(id, stack, nic.neigh, nic, ep, stack) + } + nic.linkEP.Attach(nic) return nic @@ -431,7 +432,7 @@ func (n *NIC) setSpoofing(enable bool) { // If an IPv6 primary endpoint is requested, Source Address Selection (as // defined by RFC 6724 section 5) will be performed. func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber, remoteAddr tcpip.Address) *referencedNetworkEndpoint { - if protocol == header.IPv6ProtocolNumber && remoteAddr != "" { + if protocol == header.IPv6ProtocolNumber && len(remoteAddr) != 0 { return n.primaryIPv6Endpoint(remoteAddr) } @@ -666,8 +667,19 @@ func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address t } // A usable reference was not found, create a temporary one if requested by - // the caller or if the address is found in the NIC's subnets. + // the caller or if the address is found in the NIC's subnets and the NIC is + // a loopback interface. createTempEP := spoofingOrPromiscuous + if !createTempEP && n.isLoopback() { + for _, r := range n.mu.endpoints { + addr := r.addrWithPrefix() + subnet := addr.Subnet() + if subnet.Contains(address) { + createTempEP = true + break + } + } + } n.mu.RUnlock() if !createTempEP { @@ -807,11 +819,24 @@ func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb Primar } } - ep, ok := n.networkEndpoints[protocolAddress.Protocol] + netProto, ok := n.stack.networkProtocols[protocolAddress.Protocol] if !ok { return nil, tcpip.ErrUnknownProtocol } + var nud NUDHandler + if n.neigh != nil { + // An interface value that holds a nil concrete value is itself non-nil. + // For this reason, n.neigh cannot be passed directly to NewEndpoint so + // NetworkEndpoints don't confuse it for non-nil. + // + // See https://golang.org/doc/faq#nil_error for more information. + nud = n.neigh + } + + // Create the new network endpoint. + ep := netProto.NewEndpoint(n.id, n.stack, nud, n, n.linkEP, n.stack) + isIPv6Unicast := protocolAddress.Protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(protocolAddress.AddressWithPrefix.Address) // If the address is an IPv6 address and it is a permanent address, @@ -833,10 +858,11 @@ func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb Primar deprecated: deprecated, } - // Set up cache if link address resolution exists for this protocol. + // Set up resolver if link address resolution exists for this protocol. if n.linkEP.Capabilities()&CapabilityResolutionRequired != 0 { - if _, ok := n.stack.linkAddrResolvers[protocolAddress.Protocol]; ok { + if linkRes, ok := n.stack.linkAddrResolvers[protocolAddress.Protocol]; ok { ref.linkCache = n.stack + ref.linkRes = linkRes } } @@ -1071,6 +1097,51 @@ func (n *NIC) RemoveAddress(addr tcpip.Address) *tcpip.Error { return n.removePermanentAddressLocked(addr) } +func (n *NIC) neighbors() ([]NeighborEntry, *tcpip.Error) { + if n.neigh == nil { + return nil, tcpip.ErrNotSupported + } + + return n.neigh.entries(), nil +} + +func (n *NIC) removeWaker(addr tcpip.Address, w *sleep.Waker) { + if n.neigh == nil { + return + } + + n.neigh.removeWaker(addr, w) +} + +func (n *NIC) addStaticNeighbor(addr tcpip.Address, linkAddress tcpip.LinkAddress) *tcpip.Error { + if n.neigh == nil { + return tcpip.ErrNotSupported + } + + n.neigh.addStaticEntry(addr, linkAddress) + return nil +} + +func (n *NIC) removeNeighbor(addr tcpip.Address) *tcpip.Error { + if n.neigh == nil { + return tcpip.ErrNotSupported + } + + if !n.neigh.removeEntry(addr) { + return tcpip.ErrBadAddress + } + return nil +} + +func (n *NIC) clearNeighbors() *tcpip.Error { + if n.neigh == nil { + return tcpip.ErrNotSupported + } + + n.neigh.clear() + return nil +} + // joinGroup adds a new endpoint for the given multicast address, if none // exists yet. Otherwise it just increments its count. func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error { @@ -1651,6 +1722,10 @@ type referencedNetworkEndpoint struct { // protocol. Set to nil otherwise. linkCache LinkAddressCache + // linkRes is set if link address resolution is enabled for this protocol. + // Set to nil otherwise. + linkRes LinkAddressResolver + // refs is counting references held for this endpoint. When refs hits zero it // triggers the automatic removal of the endpoint from the NIC. refs int32 diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go index d312a79eb..1e065b5c1 100644 --- a/pkg/tcpip/stack/nic_test.go +++ b/pkg/tcpip/stack/nic_test.go @@ -192,7 +192,7 @@ func (*testIPv6Protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) } // NewEndpoint implements NetworkProtocol.NewEndpoint. -func (p *testIPv6Protocol) NewEndpoint(nicID tcpip.NICID, _ LinkAddressCache, _ TransportDispatcher, linkEP LinkEndpoint, _ *Stack) NetworkEndpoint { +func (p *testIPv6Protocol) NewEndpoint(nicID tcpip.NICID, _ LinkAddressCache, _ NUDHandler, _ TransportDispatcher, linkEP LinkEndpoint, _ *Stack) NetworkEndpoint { return &testIPv6Endpoint{ nicID: nicID, linkEP: linkEP, diff --git a/pkg/tcpip/stack/nud_test.go b/pkg/tcpip/stack/nud_test.go index 2494ee610..2b97e5972 100644 --- a/pkg/tcpip/stack/nud_test.go +++ b/pkg/tcpip/stack/nud_test.go @@ -61,6 +61,7 @@ func TestSetNUDConfigurationFailsForBadNICID(t *testing.T) { // stack will only allocate neighbor caches if a protocol providing link // address resolution is specified (e.g. ARP or IPv6). NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()}, + UseNeighborCache: true, }) // No NIC with ID 1 yet. @@ -84,7 +85,8 @@ func TestNUDConfigurationFailsForNotSupported(t *testing.T) { e.LinkEPCapabilities |= stack.CapabilityResolutionRequired s := stack.New(stack.Options{ - NUDConfigs: stack.DefaultNUDConfigurations(), + NUDConfigs: stack.DefaultNUDConfigurations(), + UseNeighborCache: true, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -108,7 +110,8 @@ func TestSetNUDConfigurationFailsForNotSupported(t *testing.T) { e.LinkEPCapabilities |= stack.CapabilityResolutionRequired s := stack.New(stack.Options{ - NUDConfigs: stack.DefaultNUDConfigurations(), + NUDConfigs: stack.DefaultNUDConfigurations(), + UseNeighborCache: true, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -136,6 +139,7 @@ func TestDefaultNUDConfigurations(t *testing.T) { // address resolution is specified (e.g. ARP or IPv6). NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()}, NUDConfigs: stack.DefaultNUDConfigurations(), + UseNeighborCache: true, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -190,6 +194,7 @@ func TestNUDConfigurationsBaseReachableTime(t *testing.T) { // providing link address resolution is specified (e.g. ARP or IPv6). NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()}, NUDConfigs: c, + UseNeighborCache: true, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -246,6 +251,7 @@ func TestNUDConfigurationsMinRandomFactor(t *testing.T) { // providing link address resolution is specified (e.g. ARP or IPv6). NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()}, NUDConfigs: c, + UseNeighborCache: true, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -325,6 +331,7 @@ func TestNUDConfigurationsMaxRandomFactor(t *testing.T) { // providing link address resolution is specified (e.g. ARP or IPv6). NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()}, NUDConfigs: c, + UseNeighborCache: true, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -386,6 +393,7 @@ func TestNUDConfigurationsRetransmitTimer(t *testing.T) { // providing link address resolution is specified (e.g. ARP or IPv6). NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()}, NUDConfigs: c, + UseNeighborCache: true, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -437,6 +445,7 @@ func TestNUDConfigurationsDelayFirstProbeTime(t *testing.T) { // providing link address resolution is specified (e.g. ARP or IPv6). NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()}, NUDConfigs: c, + UseNeighborCache: true, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -488,6 +497,7 @@ func TestNUDConfigurationsMaxMulticastProbes(t *testing.T) { // providing link address resolution is specified (e.g. ARP or IPv6). NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()}, NUDConfigs: c, + UseNeighborCache: true, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -539,6 +549,7 @@ func TestNUDConfigurationsMaxUnicastProbes(t *testing.T) { // providing link address resolution is specified (e.g. ARP or IPv6). NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()}, NUDConfigs: c, + UseNeighborCache: true, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -590,6 +601,7 @@ func TestNUDConfigurationsUnreachableTime(t *testing.T) { // providing link address resolution is specified (e.g. ARP or IPv6). NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()}, NUDConfigs: c, + UseNeighborCache: true, }) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index aca2f77f8..21ac38583 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -298,7 +298,7 @@ type NetworkProtocol interface { ParseAddresses(v buffer.View) (src, dst tcpip.Address) // NewEndpoint creates a new endpoint of this protocol. - NewEndpoint(nicID tcpip.NICID, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, sender LinkEndpoint, st *Stack) NetworkEndpoint + NewEndpoint(nicID tcpip.NICID, linkAddrCache LinkAddressCache, nud NUDHandler, dispatcher TransportDispatcher, sender LinkEndpoint, st *Stack) NetworkEndpoint // SetOption allows enabling/disabling protocol specific features. // SetOption returns an error if the option is not supported or the @@ -488,7 +488,7 @@ type LinkAddressResolver interface { ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) // LinkAddressProtocol returns the network protocol of the - // addresses this this resolver can resolve. + // addresses this resolver can resolve. LinkAddressProtocol() tcpip.NetworkProtocolNumber } diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go index e267bebb0..c2eabde9e 100644 --- a/pkg/tcpip/stack/route.go +++ b/pkg/tcpip/stack/route.go @@ -141,6 +141,16 @@ func (r *Route) Resolve(waker *sleep.Waker) (<-chan struct{}, *tcpip.Error) { } nextAddr = r.RemoteAddress } + + if r.ref.nic.neigh != nil { + entry, ch, err := r.ref.nic.neigh.entry(nextAddr, r.LocalAddress, r.ref.linkRes, waker) + if err != nil { + return ch, err + } + r.RemoteLinkAddress = entry.LinkAddr + return nil, nil + } + linkAddr, ch, err := r.ref.linkCache.GetLinkAddress(r.ref.nic.ID(), nextAddr, r.LocalAddress, r.NetProto, waker) if err != nil { return ch, err @@ -155,6 +165,12 @@ func (r *Route) RemoveWaker(waker *sleep.Waker) { if nextAddr == "" { nextAddr = r.RemoteAddress } + + if r.ref.nic.neigh != nil { + r.ref.nic.neigh.removeWaker(nextAddr, waker) + return + } + r.ref.linkCache.RemoveWaker(r.ref.nic.ID(), nextAddr, waker) } @@ -163,6 +179,9 @@ func (r *Route) RemoveWaker(waker *sleep.Waker) { // // The NIC r uses must not be locked. func (r *Route) IsResolutionRequired() bool { + if r.ref.nic.neigh != nil { + return r.ref.isValidForOutgoing() && r.ref.linkRes != nil && r.RemoteLinkAddress == "" + } return r.ref.isValidForOutgoing() && r.ref.linkCache != nil && r.RemoteLinkAddress == "" } diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index a3f87c8af..7f5ed9e83 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -248,7 +248,7 @@ type RcvBufAutoTuneParams struct { // was started. MeasureTime time.Time - // CopiedBytes is the number of bytes copied to userspace since + // CopiedBytes is the number of bytes copied to user space since // this measure began. CopiedBytes int @@ -461,6 +461,10 @@ type Stack struct { // nudConfigs is the default NUD configurations used by interfaces. nudConfigs NUDConfigurations + // useNeighborCache indicates whether ARP and NDP packets should be handled + // by the NIC's neighborCache instead of linkAddrCache. + useNeighborCache bool + // autoGenIPv6LinkLocal determines whether or not the stack will attempt // to auto-generate an IPv6 link-local address for newly enabled non-loopback // NICs. See the AutoGenIPv6LinkLocal field of Options for more details. @@ -541,6 +545,13 @@ type Options struct { // NUDConfigs is the default NUD configurations used by interfaces. NUDConfigs NUDConfigurations + // UseNeighborCache indicates whether ARP and NDP packets should be handled + // by the Neighbor Unreachability Detection (NUD) state machine. This flag + // also enables the APIs for inspecting and modifying the neighbor table via + // NUDDispatcher and the following Stack methods: Neighbors, RemoveNeighbor, + // and ClearNeighbors. + UseNeighborCache bool + // AutoGenIPv6LinkLocal determines whether or not the stack will attempt to // auto-generate an IPv6 link-local address for newly enabled non-loopback // NICs. @@ -715,6 +726,7 @@ func New(opts Options) *Stack { seed: generateRandUint32(), ndpConfigs: opts.NDPConfigs, nudConfigs: opts.NUDConfigs, + useNeighborCache: opts.UseNeighborCache, autoGenIPv6LinkLocal: opts.AutoGenIPv6LinkLocal, uniqueIDGenerator: opts.UniqueID, ndpDisp: opts.NDPDisp, @@ -1209,8 +1221,8 @@ func (s *Stack) AddProtocolAddressWithOptions(id tcpip.NICID, protocolAddress tc s.mu.RLock() defer s.mu.RUnlock() - nic := s.nics[id] - if nic == nil { + nic, ok := s.nics[id] + if !ok { return tcpip.ErrUnknownNICID } @@ -1335,8 +1347,8 @@ func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProto // If a NIC is specified, we try to find the address there only. if nicID != 0 { - nic := s.nics[nicID] - if nic == nil { + nic, ok := s.nics[nicID] + if !ok { return 0 } @@ -1367,8 +1379,8 @@ func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) *tcpip.Error s.mu.RLock() defer s.mu.RUnlock() - nic := s.nics[nicID] - if nic == nil { + nic, ok := s.nics[nicID] + if !ok { return tcpip.ErrUnknownNICID } @@ -1383,8 +1395,8 @@ func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) *tcpip.Error { s.mu.RLock() defer s.mu.RUnlock() - nic := s.nics[nicID] - if nic == nil { + nic, ok := s.nics[nicID] + if !ok { return tcpip.ErrUnknownNICID } @@ -1416,8 +1428,33 @@ func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, return s.linkAddrCache.get(fullAddr, linkRes, localAddr, nic.linkEP, waker) } -// RemoveWaker implements LinkAddressCache.RemoveWaker. +// Neighbors returns all IP to MAC address associations. +func (s *Stack) Neighbors(nicID tcpip.NICID) ([]NeighborEntry, *tcpip.Error) { + s.mu.RLock() + nic, ok := s.nics[nicID] + s.mu.RUnlock() + + if !ok { + return nil, tcpip.ErrUnknownNICID + } + + return nic.neighbors() +} + +// RemoveWaker removes a waker that has been added when link resolution for +// addr was requested. func (s *Stack) RemoveWaker(nicID tcpip.NICID, addr tcpip.Address, waker *sleep.Waker) { + if s.useNeighborCache { + s.mu.RLock() + nic, ok := s.nics[nicID] + s.mu.RUnlock() + + if ok { + nic.removeWaker(addr, waker) + } + return + } + s.mu.RLock() defer s.mu.RUnlock() @@ -1427,6 +1464,47 @@ func (s *Stack) RemoveWaker(nicID tcpip.NICID, addr tcpip.Address, waker *sleep. } } +// AddStaticNeighbor statically associates an IP address to a MAC address. +func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress) *tcpip.Error { + s.mu.RLock() + nic, ok := s.nics[nicID] + s.mu.RUnlock() + + if !ok { + return tcpip.ErrUnknownNICID + } + + return nic.addStaticNeighbor(addr, linkAddr) +} + +// RemoveNeighbor removes an IP to MAC address association previously created +// either automically or by AddStaticNeighbor. Returns ErrBadAddress if there +// is no association with the provided address. +func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, addr tcpip.Address) *tcpip.Error { + s.mu.RLock() + nic, ok := s.nics[nicID] + s.mu.RUnlock() + + if !ok { + return tcpip.ErrUnknownNICID + } + + return nic.removeNeighbor(addr) +} + +// ClearNeighbors removes all IP to MAC address associations. +func (s *Stack) ClearNeighbors(nicID tcpip.NICID) *tcpip.Error { + s.mu.RLock() + nic, ok := s.nics[nicID] + s.mu.RUnlock() + + if !ok { + return tcpip.ErrUnknownNICID + } + + return nic.clearNeighbors() +} + // RegisterTransportEndpoint registers the given endpoint with the stack // transport dispatcher. Received packets that match the provided id will be // delivered to the given endpoint; specifying a nic is optional, but @@ -1961,7 +2039,7 @@ func (s *Stack) FindNetworkEndpoint(netProto tcpip.NetworkProtocolNumber, addres return nil, tcpip.ErrBadAddress } -// FindNICNameFromID returns the name of the nic for the given NICID. +// FindNICNameFromID returns the name of the NIC for the given NICID. func (s *Stack) FindNICNameFromID(id tcpip.NICID) string { s.mu.RLock() defer s.mu.RUnlock() diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index 106645c50..1deeccb89 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -197,7 +197,7 @@ func (*fakeNetworkProtocol) ParseAddresses(v buffer.View) (src, dst tcpip.Addres return tcpip.Address(v[srcAddrOffset : srcAddrOffset+1]), tcpip.Address(v[dstAddrOffset : dstAddrOffset+1]) } -func (f *fakeNetworkProtocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, ep stack.LinkEndpoint, _ *stack.Stack) stack.NetworkEndpoint { +func (f *fakeNetworkProtocol) NewEndpoint(nicID tcpip.NICID, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher, ep stack.LinkEndpoint, _ *stack.Stack) stack.NetworkEndpoint { return &fakeNetworkEndpoint{ nicID: nicID, proto: f, diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 07c85ce59..609b8af33 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -978,6 +978,15 @@ const ( TCPTimeWaitReuseLoopbackOnly ) +// LingerOption is used by SetSockOpt/GetSockOpt to set/get the +// duration for which a socket lingers before returning from Close. +// +// +stateify savable +type LingerOption struct { + Enabled bool + Timeout time.Duration +} + // IPPacketInfo is the message structure for IP_PKTINFO. // // +stateify savable @@ -1020,7 +1029,10 @@ func (r Route) String() string { // TransportProtocolNumber is the number of a transport protocol. type TransportProtocolNumber uint32 -// NetworkProtocolNumber is the number of a network protocol. +// NetworkProtocolNumber is the EtherType of a network protocol in an Ethernet +// frame. +// +// See: https://www.iana.org/assignments/ieee-802-numbers/ieee-802-numbers.xhtml type NetworkProtocolNumber uint32 // A StatCounter keeps track of a statistic. @@ -1183,6 +1195,10 @@ type ICMPv6ReceivedPacketStats struct { // Invalid is the total number of ICMPv6 packets received that the // transport layer could not parse. Invalid *StatCounter + + // RouterOnlyPacketsDroppedByHost is the total number of ICMPv6 packets + // dropped due to being router-specific packets. + RouterOnlyPacketsDroppedByHost *StatCounter } // ICMPStats collects ICMP-specific stats (both v4 and v6). diff --git a/pkg/tcpip/tests/integration/BUILD b/pkg/tcpip/tests/integration/BUILD index 6d52af98a..06c7a3cd3 100644 --- a/pkg/tcpip/tests/integration/BUILD +++ b/pkg/tcpip/tests/integration/BUILD @@ -5,12 +5,16 @@ package(licenses = ["notice"]) go_test( name = "integration_test", size = "small", - srcs = ["multicast_broadcast_test.go"], + srcs = [ + "loopback_test.go", + "multicast_broadcast_test.go", + ], deps = [ "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", "//pkg/tcpip/link/channel", + "//pkg/tcpip/link/loopback", "//pkg/tcpip/network/ipv4", "//pkg/tcpip/network/ipv6", "//pkg/tcpip/stack", diff --git a/pkg/tcpip/tests/integration/loopback_test.go b/pkg/tcpip/tests/integration/loopback_test.go new file mode 100644 index 000000000..3a2f75837 --- /dev/null +++ b/pkg/tcpip/tests/integration/loopback_test.go @@ -0,0 +1,229 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package integration_test + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" +) + +// TestLoopbackAcceptAllInSubnet tests that a loopback interface considers +// itself bound to all addresses in the subnet of an assigned address. +func TestLoopbackAcceptAllInSubnet(t *testing.T) { + const ( + nicID = 1 + localPort = 80 + ) + + data := []byte{1, 2, 3, 4} + + ipv4ProtocolAddress := tcpip.ProtocolAddress{ + Protocol: header.IPv4ProtocolNumber, + AddressWithPrefix: ipv4Addr, + } + ipv4Bytes := []byte(ipv4Addr.Address) + ipv4Bytes[len(ipv4Bytes)-1]++ + otherIPv4Address := tcpip.Address(ipv4Bytes) + + ipv6ProtocolAddress := tcpip.ProtocolAddress{ + Protocol: header.IPv6ProtocolNumber, + AddressWithPrefix: ipv6Addr, + } + ipv6Bytes := []byte(ipv6Addr.Address) + ipv6Bytes[len(ipv6Bytes)-1]++ + otherIPv6Address := tcpip.Address(ipv6Bytes) + + tests := []struct { + name string + addAddress tcpip.ProtocolAddress + bindAddr tcpip.Address + dstAddr tcpip.Address + expectRx bool + }{ + { + name: "IPv4 bind to wildcard and send to assigned address", + addAddress: ipv4ProtocolAddress, + dstAddr: ipv4Addr.Address, + expectRx: true, + }, + { + name: "IPv4 bind to wildcard and send to other subnet-local address", + addAddress: ipv4ProtocolAddress, + dstAddr: otherIPv4Address, + expectRx: true, + }, + { + name: "IPv4 bind to wildcard send to other address", + addAddress: ipv4ProtocolAddress, + dstAddr: remoteIPv4Addr, + expectRx: false, + }, + { + name: "IPv4 bind to other subnet-local address and send to assigned address", + addAddress: ipv4ProtocolAddress, + bindAddr: otherIPv4Address, + dstAddr: ipv4Addr.Address, + expectRx: false, + }, + { + name: "IPv4 bind and send to other subnet-local address", + addAddress: ipv4ProtocolAddress, + bindAddr: otherIPv4Address, + dstAddr: otherIPv4Address, + expectRx: true, + }, + { + name: "IPv4 bind to assigned address and send to other subnet-local address", + addAddress: ipv4ProtocolAddress, + bindAddr: ipv4Addr.Address, + dstAddr: otherIPv4Address, + expectRx: false, + }, + + { + name: "IPv6 bind and send to assigned address", + addAddress: ipv6ProtocolAddress, + bindAddr: ipv6Addr.Address, + dstAddr: ipv6Addr.Address, + expectRx: true, + }, + { + name: "IPv6 bind to wildcard and send to assigned address", + addAddress: ipv6ProtocolAddress, + dstAddr: ipv6Addr.Address, + expectRx: true, + }, + { + name: "IPv6 bind to wildcard and send to other subnet-local address", + addAddress: ipv6ProtocolAddress, + dstAddr: otherIPv6Address, + expectRx: true, + }, + { + name: "IPv6 bind to wildcard send to other address", + addAddress: ipv6ProtocolAddress, + dstAddr: remoteIPv6Addr, + expectRx: false, + }, + { + name: "IPv6 bind to other subnet-local address and send to assigned address", + addAddress: ipv6ProtocolAddress, + bindAddr: otherIPv6Address, + dstAddr: ipv6Addr.Address, + expectRx: false, + }, + { + name: "IPv6 bind and send to other subnet-local address", + addAddress: ipv6ProtocolAddress, + bindAddr: otherIPv6Address, + dstAddr: otherIPv6Address, + expectRx: true, + }, + { + name: "IPv6 bind to assigned address and send to other subnet-local address", + addAddress: ipv6ProtocolAddress, + bindAddr: ipv6Addr.Address, + dstAddr: otherIPv6Address, + expectRx: false, + }, + { + name: "IPv6 bind and send to assigned address", + addAddress: ipv6ProtocolAddress, + bindAddr: ipv6Addr.Address, + dstAddr: ipv6Addr.Address, + expectRx: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()}, + TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()}, + }) + if err := s.CreateNIC(nicID, loopback.New()); err != nil { + t.Fatalf("CreateNIC(%d, _): %s", nicID, err) + } + if err := s.AddProtocolAddress(nicID, test.addAddress); err != nil { + t.Fatalf("AddProtocolAddress(%d, %+v): %s", nicID, test.addAddress, err) + } + s.SetRouteTable([]tcpip.Route{ + tcpip.Route{ + Destination: header.IPv4EmptySubnet, + NIC: nicID, + }, + tcpip.Route{ + Destination: header.IPv6EmptySubnet, + NIC: nicID, + }, + }) + + wq := waiter.Queue{} + rep, err := s.NewEndpoint(udp.ProtocolNumber, test.addAddress.Protocol, &wq) + if err != nil { + t.Fatalf("NewEndpoint(%d, %d, _): %s", udp.ProtocolNumber, test.addAddress.Protocol, err) + } + defer rep.Close() + + bindAddr := tcpip.FullAddress{Addr: test.bindAddr, Port: localPort} + if err := rep.Bind(bindAddr); err != nil { + t.Fatalf("rep.Bind(%+v): %s", bindAddr, err) + } + + sep, err := s.NewEndpoint(udp.ProtocolNumber, test.addAddress.Protocol, &wq) + if err != nil { + t.Fatalf("NewEndpoint(%d, %d, _): %s", udp.ProtocolNumber, test.addAddress.Protocol, err) + } + defer sep.Close() + + wopts := tcpip.WriteOptions{ + To: &tcpip.FullAddress{ + Addr: test.dstAddr, + Port: localPort, + }, + } + n, _, err := sep.Write(tcpip.SlicePayload(data), wopts) + if err != nil { + t.Fatalf("sep.Write(_, _): %s", err) + } + if want := int64(len(data)); n != want { + t.Fatalf("got sep.Write(_, _) = (%d, _, nil), want = (%d, _, nil)", n, want) + } + + if gotPayload, _, err := rep.Read(nil); test.expectRx { + if err != nil { + t.Fatalf("reep.Read(nil): %s", err) + } + if diff := cmp.Diff(buffer.View(data), gotPayload); diff != "" { + t.Errorf("got UDP payload mismatch (-want +got):\n%s", diff) + } + } else { + if err != tcpip.ErrWouldBlock { + t.Fatalf("got rep.Read(nil) = (%x, _, %s), want = (_, _, %s)", gotPayload, err, tcpip.ErrWouldBlock) + } + } + }) + } +} diff --git a/pkg/tcpip/tests/integration/multicast_broadcast_test.go b/pkg/tcpip/tests/integration/multicast_broadcast_test.go index 9f0dd4d6d..52c27e045 100644 --- a/pkg/tcpip/tests/integration/multicast_broadcast_test.go +++ b/pkg/tcpip/tests/integration/multicast_broadcast_test.go @@ -430,7 +430,7 @@ func TestIncomingMulticastAndBroadcast(t *testing.T) { } } else { if err != tcpip.ErrWouldBlock { - t.Fatalf("got Read(nil) = (%x, _, %v), want = (_, _, %s)", gotPayload, err, tcpip.ErrWouldBlock) + t.Fatalf("got Read(nil) = (%x, _, %s), want = (_, _, %s)", gotPayload, err, tcpip.ErrWouldBlock) } } }) diff --git a/pkg/tcpip/time_unsafe.go b/pkg/tcpip/time_unsafe.go index f32d58091..606363567 100644 --- a/pkg/tcpip/time_unsafe.go +++ b/pkg/tcpip/time_unsafe.go @@ -13,7 +13,7 @@ // limitations under the License. // +build go1.9 -// +build !go1.16 +// +build !go1.17 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index bde071f2a..234fb95ce 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -11,8 +11,7 @@ go_template_instance( template = "//pkg/ilist:generic_list", types = { "Element": "*segment", - "ElementMapper": "segmentMapper", - "Linker": "*segmentEntry", + "Linker": "*segment", }, ) @@ -28,19 +27,6 @@ go_template_instance( }, ) -go_template_instance( - name = "tcp_rack_segment_list", - out = "tcp_rack_segment_list.go", - package = "tcp", - prefix = "rackSegment", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*segment", - "ElementMapper": "rackSegmentMapper", - "Linker": "*rackSegmentEntry", - }, -) - go_library( name = "tcp", srcs = [ @@ -69,7 +55,6 @@ go_library( "snd.go", "snd_state.go", "tcp_endpoint_list.go", - "tcp_rack_segment_list.go", "tcp_segment_list.go", "timer.go", ], diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 87980c0a1..290172ac9 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -924,18 +924,7 @@ func (e *endpoint) handleWrite() *tcpip.Error { first := e.sndQueue.Front() if first != nil { - lastSeg := e.snd.writeList.Back() e.snd.writeList.PushBackList(&e.sndQueue) - if lastSeg == nil { - lastSeg = e.snd.writeList.Front() - } else { - lastSeg = lastSeg.segEntry.Next() - } - // Add new segments to rcList, as rcList and writeList should - // be consistent. - for seg := lastSeg; seg != nil; seg = seg.segEntry.Next() { - e.snd.rcList.PushBack(seg) - } e.sndBufInQueue = 0 } diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 9df22ac84..ff9b8804d 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -654,6 +654,9 @@ type endpoint struct { // owner is used to get uid and gid of the packet. owner tcpip.PacketOwner + + // linger is used for SO_LINGER socket option. + linger tcpip.LingerOption } // UniqueID implements stack.TransportEndpoint.UniqueID. @@ -1007,6 +1010,26 @@ func (e *endpoint) Close() { return } + if e.linger.Enabled && e.linger.Timeout == 0 { + s := e.EndpointState() + isResetState := s == StateEstablished || s == StateCloseWait || s == StateFinWait1 || s == StateFinWait2 || s == StateSynRecv + if isResetState { + // Close the endpoint without doing full shutdown and + // send a RST. + e.resetConnectionLocked(tcpip.ErrConnectionAborted) + e.closeNoShutdownLocked() + + // Wake up worker to close the endpoint. + switch s { + case StateSynRecv: + e.notifyProtocolGoroutine(notifyClose) + default: + e.notifyProtocolGoroutine(notifyTickleWorker) + } + return + } + } + // Issue a shutdown so that the peer knows we won't send any more data // if we're connected, or stop accepting if we're listening. e.shutdownLocked(tcpip.ShutdownWrite | tcpip.ShutdownRead) @@ -1428,7 +1451,7 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro vec = append([][]byte(nil), vec...) var num int64 - for s := e.rcvList.Front(); s != nil; s = s.segEntry.Next() { + for s := e.rcvList.Front(); s != nil; s = s.Next() { views := s.data.Views() for i := s.viewToDeliver; i < len(views); i++ { @@ -1775,15 +1798,24 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { case tcpip.TCPLingerTimeoutOption: e.LockUser() - if v < 0 { + + switch { + case v < 0: // Same as effectively disabling TCPLinger timeout. - v = 0 - } - // Cap it to MaxTCPLingerTimeout. - stkTCPLingerTimeout := tcpip.TCPLingerTimeoutOption(MaxTCPLingerTimeout) - if v > stkTCPLingerTimeout { - v = stkTCPLingerTimeout + v = -1 + case v == 0: + // Same as the stack default. + var stackLingerTimeout tcpip.TCPLingerTimeoutOption + if err := e.stack.TransportProtocolOption(ProtocolNumber, &stackLingerTimeout); err != nil { + panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %+v) = %v", ProtocolNumber, &stackLingerTimeout, err)) + } + v = stackLingerTimeout + case v > tcpip.TCPLingerTimeoutOption(MaxTCPLingerTimeout): + // Cap it to Stack's default TCP_LINGER2 timeout. + v = tcpip.TCPLingerTimeoutOption(MaxTCPLingerTimeout) + default: } + e.tcpLingerTimeout = time.Duration(v) e.UnlockUser() @@ -1798,6 +1830,11 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { case tcpip.SocketDetachFilterOption: return nil + case tcpip.LingerOption: + e.LockUser() + e.linger = v + e.UnlockUser() + default: return nil } @@ -2023,6 +2060,11 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { Port: port, } + case *tcpip.LingerOption: + e.LockUser() + *o = e.linger + e.UnlockUser() + default: return tcpip.ErrUnknownProtocolOption } @@ -2249,7 +2291,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc if !handshake { e.segmentQueue.mu.Lock() for _, l := range []segmentList{e.segmentQueue.list, e.sndQueue, e.snd.writeList} { - for s := l.Front(); s != nil; s = s.segEntry.Next() { + for s := l.Front(); s != nil; s = s.Next() { s.id = e.ID s.route = r.Clone() e.sndWaker.Assert() diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go index a20755f78..94307d31a 100644 --- a/pkg/tcpip/transport/tcp/segment.go +++ b/pkg/tcpip/transport/tcp/segment.go @@ -30,13 +30,12 @@ import ( // // +stateify savable type segment struct { - segEntry segmentEntry - rackSegEntry rackSegmentEntry - refCnt int32 - id stack.TransportEndpointID `state:"manual"` - route stack.Route `state:"manual"` - data buffer.VectorisedView `state:".(buffer.VectorisedView)"` - hdr header.TCP + segmentEntry + refCnt int32 + id stack.TransportEndpointID `state:"manual"` + route stack.Route `state:"manual"` + data buffer.VectorisedView `state:".(buffer.VectorisedView)"` + hdr header.TCP // views is used as buffer for data when its length is large // enough to store a VectorisedView. views [8]buffer.View `state:"nosave"` @@ -62,16 +61,6 @@ type segment struct { xmitCount uint32 } -// segmentMapper is the ElementMapper for the writeList. -type segmentMapper struct{} - -func (segmentMapper) linkerFor(seg *segment) *segmentEntry { return &seg.segEntry } - -// rackSegmentMapper is the ElementMapper for the rcList. -type rackSegmentMapper struct{} - -func (rackSegmentMapper) linkerFor(seg *segment) *rackSegmentEntry { return &seg.rackSegEntry } - func newSegment(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) *segment { s := &segment{ refCnt: 1, diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 31151f23d..c55589c45 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -154,7 +154,6 @@ type sender struct { closed bool writeNext *segment writeList segmentList - rcList rackSegmentList resendTimer timer `state:"nosave"` resendWaker sleep.Waker `state:"nosave"` @@ -368,7 +367,7 @@ func (s *sender) updateMaxPayloadSize(mtu, count int) { // Rewind writeNext to the first segment exceeding the MTU. Do nothing // if it is already before such a packet. - for seg := s.writeList.Front(); seg != nil; seg = seg.segEntry.Next() { + for seg := s.writeList.Front(); seg != nil; seg = seg.Next() { if seg == s.writeNext { // We got to writeNext before we could find a segment // exceeding the MTU. @@ -623,7 +622,6 @@ func (s *sender) splitSeg(seg *segment, size int) { nSeg.data.TrimFront(size) nSeg.sequenceNumber.UpdateForward(seqnum.Size(size)) s.writeList.InsertAfter(seg, nSeg) - s.rcList.InsertAfter(seg, nSeg) // The segment being split does not carry PUSH flag because it is // followed by the newly split segment. @@ -655,7 +653,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt var s3 *segment var s4 *segment // Step 1. - for seg := nextSegHint; seg != nil; seg = seg.segEntry.Next() { + for seg := nextSegHint; seg != nil; seg = seg.Next() { // Stop iteration if we hit a segment that has never been // transmitted (i.e. either it has no assigned sequence number // or if it does have one, it's >= the next sequence number @@ -685,7 +683,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt // NextSeg(): // (1.c) IsLost(S2) returns true. if s.ep.scoreboard.IsLost(segSeq) { - return seg, seg.segEntry.Next(), false + return seg, seg.Next(), false } // NextSeg(): @@ -699,7 +697,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt // SHOULD be returned. if s3 == nil { s3 = seg - hint = seg.segEntry.Next() + hint = seg.Next() } } // NextSeg(): @@ -733,7 +731,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt // range of one segment of up to SMSS octets of // previously unsent data starting with sequence number // HighData+1 MUST be returned." - for seg := s.writeNext; seg != nil; seg = seg.segEntry.Next() { + for seg := s.writeNext; seg != nil; seg = seg.Next() { if s.isAssignedSequenceNumber(seg) && seg.sequenceNumber.LessThan(s.sndNxt) { continue } @@ -775,16 +773,15 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se // triggering bugs in poorly written DNS // implementations. var nextTooBig bool - for seg.segEntry.Next() != nil && seg.segEntry.Next().data.Size() != 0 { - if seg.data.Size()+seg.segEntry.Next().data.Size() > available { + for seg.Next() != nil && seg.Next().data.Size() != 0 { + if seg.data.Size()+seg.Next().data.Size() > available { nextTooBig = true break } - seg.data.Append(seg.segEntry.Next().data) + seg.data.Append(seg.Next().data) // Consume the segment that we just merged in. - s.writeList.Remove(seg.segEntry.Next()) - s.rcList.Remove(seg.rackSegEntry.Next()) + s.writeList.Remove(seg.Next()) } if !nextTooBig && seg.data.Size() < available { // Segment is not full. @@ -951,7 +948,7 @@ func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool) } dataSent = true s.outstanding++ - s.writeNext = nextSeg.segEntry.Next() + s.writeNext = nextSeg.Next() continue } @@ -964,7 +961,6 @@ func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool) // transmitted in (C.1)." s.outstanding++ dataSent = true - s.sendSegment(nextSeg) segEnd := nextSeg.sequenceNumber.Add(nextSeg.logicalLen()) @@ -1039,7 +1035,7 @@ func (s *sender) sendData() { if s.fr.active && s.ep.sackPermitted { dataSent = s.handleSACKRecovery(s.maxPayloadSize, end) } else { - for seg := s.writeNext; seg != nil && s.outstanding < s.sndCwnd; seg = seg.segEntry.Next() { + for seg := s.writeNext; seg != nil && s.outstanding < s.sndCwnd; seg = seg.Next() { cwndLimit := (s.sndCwnd - s.outstanding) * s.maxPayloadSize if cwndLimit < limit { limit = cwndLimit @@ -1047,7 +1043,7 @@ func (s *sender) sendData() { if s.isAssignedSequenceNumber(seg) && s.ep.sackPermitted && s.ep.scoreboard.IsSACKED(seg.sackBlock()) { // Move writeNext along so that we don't try and scan data that // has already been SACKED. - s.writeNext = seg.segEntry.Next() + s.writeNext = seg.Next() continue } if sent := s.maybeSendSegment(seg, limit, end); !sent { @@ -1055,7 +1051,7 @@ func (s *sender) sendData() { } dataSent = true s.outstanding += s.pCount(seg) - s.writeNext = seg.segEntry.Next() + s.writeNext = seg.Next() } } @@ -1186,7 +1182,7 @@ func (s *sender) SetPipe() { } pipe := 0 smss := seqnum.Size(s.ep.scoreboard.SMSS()) - for s1 := s.writeList.Front(); s1 != nil && s1.data.Size() != 0 && s.isAssignedSequenceNumber(s1); s1 = s1.segEntry.Next() { + for s1 := s.writeList.Front(); s1 != nil && s1.data.Size() != 0 && s.isAssignedSequenceNumber(s1); s1 = s1.Next() { // With GSO each segment can be much larger than SMSS. So check the segment // in SMSS sized ranges. segEnd := s1.sequenceNumber.Add(seqnum.Size(s1.data.Size())) @@ -1388,7 +1384,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { } if s.writeNext == seg { - s.writeNext = seg.segEntry.Next() + s.writeNext = seg.Next() } // Update the RACK fields if SACK is enabled. @@ -1397,7 +1393,6 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { } s.writeList.Remove(seg) - s.rcList.Remove(seg) // if SACK is enabled then Only reduce outstanding if // the segment was not previously SACKED as these have @@ -1465,12 +1460,6 @@ func (s *sender) sendSegment(seg *segment) *tcpip.Error { if s.sndCwnd < s.sndSsthresh { s.ep.stack.Stats().TCP.SlowStartRetransmits.Increment() } - - // Move the segment which has to be retransmitted to the end of the list, as - // RACK requires the segments in the order of their transmission times. - // See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-09#section-6.2 - // Step 5 - s.rcList.PushBack(seg) } seg.xmitTime = time.Now() seg.xmitCount++ diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 55ae09a2f..9650bb06c 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -6206,12 +6206,13 @@ func TestTCPLingerTimeout(t *testing.T) { tcpLingerTimeout time.Duration want time.Duration }{ - {"NegativeLingerTimeout", -123123, 0}, - {"ZeroLingerTimeout", 0, 0}, + {"NegativeLingerTimeout", -123123, -1}, + // Zero is treated same as the stack's default TCP_LINGER2 timeout. + {"ZeroLingerTimeout", 0, tcp.DefaultTCPLingerTimeout}, {"InRangeLingerTimeout", 10 * time.Second, 10 * time.Second}, // Values > stack's TCPLingerTimeout are capped to the stack's // value. Defaults to tcp.DefaultTCPLingerTimeout(60 seconds) - {"AboveMaxLingerTimeout", 125 * time.Second, 120 * time.Second}, + {"AboveMaxLingerTimeout", tcp.MaxTCPLingerTimeout + 5*time.Second, tcp.MaxTCPLingerTimeout}, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index c33434b75..0a9d3c6cf 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -1366,6 +1366,22 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { return result } +// verifyChecksum verifies the checksum unless RX checksum offload is enabled. +// On IPv4, UDP checksum is optional, and a zero value means the transmitter +// omitted the checksum generation (RFC768). +// On IPv6, UDP checksum is not optional (RFC2460 Section 8.1). +func verifyChecksum(r *stack.Route, hdr header.UDP, pkt *stack.PacketBuffer) bool { + if r.Capabilities()&stack.CapabilityRXChecksumOffload == 0 && + (hdr.Checksum() != 0 || r.NetProto == header.IPv6ProtocolNumber) { + xsum := r.PseudoHeaderChecksum(ProtocolNumber, hdr.Length()) + for _, v := range pkt.Data.Views() { + xsum = header.Checksum(v, xsum) + } + return hdr.CalculateChecksum(xsum) == 0xffff + } + return true +} + // HandlePacket is called by the stack when new packets arrive to this transport // endpoint. func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) { @@ -1387,22 +1403,11 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk return } - // Verify checksum unless RX checksum offload is enabled. - // On IPv4, UDP checksum is optional, and a zero value means - // the transmitter omitted the checksum generation (RFC768). - // On IPv6, UDP checksum is not optional (RFC2460 Section 8.1). - if r.Capabilities()&stack.CapabilityRXChecksumOffload == 0 && - (hdr.Checksum() != 0 || r.NetProto == header.IPv6ProtocolNumber) { - xsum := r.PseudoHeaderChecksum(ProtocolNumber, hdr.Length()) - for _, v := range pkt.Data.Views() { - xsum = header.Checksum(v, xsum) - } - if hdr.CalculateChecksum(xsum) != 0xffff { - // Checksum Error. - e.stack.Stats().UDP.ChecksumErrors.Increment() - e.stats.ReceiveErrors.ChecksumErrors.Increment() - return - } + if !verifyChecksum(r, hdr, pkt) { + // Checksum Error. + e.stack.Stats().UDP.ChecksumErrors.Increment() + e.stats.ReceiveErrors.ChecksumErrors.Increment() + return } e.stack.Stats().UDP.PacketsReceived.Increment() diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go index 63d4bed7c..f65751dd4 100644 --- a/pkg/tcpip/transport/udp/protocol.go +++ b/pkg/tcpip/transport/udp/protocol.go @@ -88,7 +88,12 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans r.Stack().Stats().UDP.MalformedPacketsReceived.Increment() return true } - // TODO(b/129426613): only send an ICMP message if UDP checksum is valid. + + if !verifyChecksum(r, hdr, pkt) { + // Checksum Error. + r.Stack().Stats().UDP.ChecksumErrors.Increment() + return true + } // Only send ICMP error if the address is not a multicast/broadcast // v4/v6 address or the source is not the unspecified address. diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index f87d99d5a..bd1c8ac31 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -403,18 +403,35 @@ func (c *testContext) getPacketAndVerify(flow testFlow, checkers ...checker.Netw } // injectPacket creates a packet of the given flow and with the given payload, -// and injects it into the link endpoint. -func (c *testContext) injectPacket(flow testFlow, payload []byte) { +// and injects it into the link endpoint. If badChecksum is true, the packet has +// a bad checksum in the UDP header. +func (c *testContext) injectPacket(flow testFlow, payload []byte, badChecksum bool) { c.t.Helper() h := flow.header4Tuple(incoming) if flow.isV4() { buf := c.buildV4Packet(payload, &h) + if badChecksum { + // Invalidate the UDP header checksum field, taking care to avoid + // overflow to zero, which would disable checksum validation. + for u := header.UDP(buf[header.IPv4MinimumSize:]); ; { + u.SetChecksum(u.Checksum() + 1) + if u.Checksum() != 0 { + break + } + } + } c.linkEP.InjectInbound(ipv4.ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{ Data: buf.ToVectorisedView(), })) } else { buf := c.buildV6Packet(payload, &h) + if badChecksum { + // Invalidate the UDP header checksum field (Unlike IPv4, zero is + // a valid checksum value for IPv6 so no need to avoid it). + u := header.UDP(buf[header.IPv6MinimumSize:]) + u.SetChecksum(u.Checksum() + 1) + } c.linkEP.InjectInbound(ipv6.ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{ Data: buf.ToVectorisedView(), })) @@ -569,7 +586,7 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe c.t.Helper() payload := newPayload() - c.injectPacket(flow, payload) + c.injectPacket(flow, payload, false) // Try to receive the data. we, ch := waiter.NewChannelEntry(nil) @@ -925,7 +942,7 @@ func TestReadFromMulticastStats(t *testing.T) { } payload := newPayload() - c.injectPacket(flow, payload) + c.injectPacket(flow, payload, false) var want uint64 = 0 if flow.isReverseMulticast() { @@ -1469,7 +1486,7 @@ func TestTTL(t *testing.T) { } else { p = ipv6.NewProtocol() } - ep := p.NewEndpoint(0, nil, nil, nil, stack.New(stack.Options{ + ep := p.NewEndpoint(0, nil, nil, nil, nil, stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()}, TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()}, })) @@ -1502,7 +1519,7 @@ func TestSetTTL(t *testing.T) { } else { p = ipv6.NewProtocol() } - ep := p.NewEndpoint(0, nil, nil, nil, stack.New(stack.Options{ + ep := p.NewEndpoint(0, nil, nil, nil, nil, stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()}, TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()}, })) @@ -1727,21 +1744,33 @@ func TestV4UnknownDestination(t *testing.T) { // so that the final generated IPv4 packet is larger than // header.IPv4MinimumProcessableDatagramSize. largePayload bool + // badChecksum if true, will set an invalid checksum in the + // header. + badChecksum bool }{ - {unicastV4, true, false}, - {unicastV4, true, true}, - {multicastV4, false, false}, - {multicastV4, false, true}, - {broadcast, false, false}, - {broadcast, false, true}, - } + {unicastV4, true, false, false}, + {unicastV4, true, true, false}, + {unicastV4, false, false, true}, + {unicastV4, false, true, true}, + {multicastV4, false, false, false}, + {multicastV4, false, true, false}, + {broadcast, false, false, false}, + {broadcast, false, true, false}, + } + checksumErrors := uint64(0) for _, tc := range testCases { - t.Run(fmt.Sprintf("flow:%s icmpRequired:%t largePayload:%t", tc.flow, tc.icmpRequired, tc.largePayload), func(t *testing.T) { + t.Run(fmt.Sprintf("flow:%s icmpRequired:%t largePayload:%t badChecksum:%t", tc.flow, tc.icmpRequired, tc.largePayload, tc.badChecksum), func(t *testing.T) { payload := newPayload() if tc.largePayload { payload = newMinPayload(576) } - c.injectPacket(tc.flow, payload) + c.injectPacket(tc.flow, payload, tc.badChecksum) + if tc.badChecksum { + checksumErrors++ + if got, want := c.s.Stats().UDP.ChecksumErrors.Value(), checksumErrors; got != want { + t.Fatalf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want) + } + } if !tc.icmpRequired { ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() @@ -1806,19 +1835,31 @@ func TestV6UnknownDestination(t *testing.T) { // largePayload if true will result in a payload large enough to // create an IPv6 packet > header.IPv6MinimumMTU bytes. largePayload bool + // badChecksum if true, will set an invalid checksum in the + // header. + badChecksum bool }{ - {unicastV6, true, false}, - {unicastV6, true, true}, - {multicastV6, false, false}, - {multicastV6, false, true}, - } + {unicastV6, true, false, false}, + {unicastV6, true, true, false}, + {unicastV6, false, false, true}, + {unicastV6, false, true, true}, + {multicastV6, false, false, false}, + {multicastV6, false, true, false}, + } + checksumErrors := uint64(0) for _, tc := range testCases { - t.Run(fmt.Sprintf("flow:%s icmpRequired:%t largePayload:%t", tc.flow, tc.icmpRequired, tc.largePayload), func(t *testing.T) { + t.Run(fmt.Sprintf("flow:%s icmpRequired:%t largePayload:%t badChecksum:%t", tc.flow, tc.icmpRequired, tc.largePayload, tc.badChecksum), func(t *testing.T) { payload := newPayload() if tc.largePayload { payload = newMinPayload(1280) } - c.injectPacket(tc.flow, payload) + c.injectPacket(tc.flow, payload, tc.badChecksum) + if tc.badChecksum { + checksumErrors++ + if got, want := c.s.Stats().UDP.ChecksumErrors.Value(), checksumErrors; got != want { + t.Fatalf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want) + } + } if !tc.icmpRequired { ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() @@ -1953,74 +1994,29 @@ func TestShortHeader(t *testing.T) { } } -// TestIncrementChecksumErrorsV4 verifies if a checksum error is detected, +// TestBadChecksumErrors verifies if a checksum error is detected, // global and endpoint stats are incremented. -func TestIncrementChecksumErrorsV4(t *testing.T) { - c := newDualTestContext(t, defaultMTU) - defer c.cleanup() - - c.createEndpoint(ipv4.ProtocolNumber) - // Bind to wildcard. - if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil { - c.t.Fatalf("Bind failed: %s", err) - } - - payload := newPayload() - h := unicastV4.header4Tuple(incoming) - buf := c.buildV4Packet(payload, &h) +func TestBadChecksumErrors(t *testing.T) { + for _, flow := range []testFlow{unicastV4, unicastV6} { + c := newDualTestContext(t, defaultMTU) + defer c.cleanup() - // Invalidate the UDP header checksum field, taking care to avoid - // overflow to zero, which would disable checksum validation. - for u := header.UDP(buf[header.IPv4MinimumSize:]); ; { - u.SetChecksum(u.Checksum() + 1) - if u.Checksum() != 0 { - break + c.createEndpoint(flow.sockProto()) + // Bind to wildcard. + if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil { + c.t.Fatalf("Bind failed: %s", err) } - } - - c.linkEP.InjectInbound(ipv4.ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{ - Data: buf.ToVectorisedView(), - })) - - const want = 1 - if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want { - t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want) - } - if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want { - t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want) - } -} - -// TestIncrementChecksumErrorsV6 verifies if a checksum error is detected, -// global and endpoint stats are incremented. -func TestIncrementChecksumErrorsV6(t *testing.T) { - c := newDualTestContext(t, defaultMTU) - defer c.cleanup() - - c.createEndpoint(ipv6.ProtocolNumber) - // Bind to wildcard. - if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil { - c.t.Fatalf("Bind failed: %s", err) - } - - payload := newPayload() - h := unicastV6.header4Tuple(incoming) - buf := c.buildV6Packet(payload, &h) - - // Invalidate the UDP header checksum field. - u := header.UDP(buf[header.IPv6MinimumSize:]) - u.SetChecksum(u.Checksum() + 1) - c.linkEP.InjectInbound(ipv6.ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{ - Data: buf.ToVectorisedView(), - })) + payload := newPayload() + c.injectPacket(flow, payload, true /* badChecksum */) - const want = 1 - if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want { - t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want) - } - if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want { - t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want) + const want = 1 + if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want { + t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want) + } + if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want { + t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want) + } } } diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index 952871f95..7027df1a5 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -60,7 +60,6 @@ var ( // enabled for each run. pprofBlock = flag.Bool("pprof-block", false, "enables block profiling with runsc debug") pprofCPU = flag.Bool("pprof-cpu", false, "enables CPU profiling with runsc debug") - pprofGo = flag.Bool("pprof-go", false, "enables goroutine profiling with runsc debug") pprofHeap = flag.Bool("pprof-heap", false, "enables heap profiling with runsc debug") pprofMutex = flag.Bool("pprof-mutex", false, "enables mutex profiling with runsc debug") ) diff --git a/pkg/test/dockerutil/profile.go b/pkg/test/dockerutil/profile.go index f0396ef24..55f9496cd 100644 --- a/pkg/test/dockerutil/profile.go +++ b/pkg/test/dockerutil/profile.go @@ -63,7 +63,7 @@ type Pprof struct { // MakePprofFromFlags makes a Pprof profile from flags. func MakePprofFromFlags(c *Container) *Pprof { - if !(*pprofBlock || *pprofCPU || *pprofGo || *pprofHeap || *pprofMutex) { + if !(*pprofBlock || *pprofCPU || *pprofHeap || *pprofMutex) { return nil } return &Pprof{ diff --git a/runsc/BUILD b/runsc/BUILD index 267fb2af8..33d8554af 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_binary", "pkg_deb", "pkg_tar") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) @@ -61,62 +61,6 @@ go_binary( ], ) -pkg_tar( - name = "debian-bin", - srcs = [ - ":runsc", - "//shim/v1:gvisor-containerd-shim", - "//shim/v2:containerd-shim-runsc-v1", - ], - mode = "0755", - package_dir = "/usr/bin", -) - -pkg_tar( - name = "debian-data", - extension = "tar.gz", - deps = [ - ":debian-bin", - "//shim:config", - ], -) - -genrule( - name = "deb-version", - # Note that runsc must appear in the srcs parameter and not the tools - # parameter, otherwise it will not be stamped. This is reasonable, as tools - # may be encoded differently in the build graph (cached more aggressively - # because they are assumes to be hermetic). - srcs = [":runsc"], - outs = ["version.txt"], - # Note that the little dance here is necessary because files in the $(SRCS) - # attribute are not executable by default, and we can't touch in place. - cmd = "cp $(location :runsc) $(@D)/runsc && \ - chmod a+x $(@D)/runsc && \ - $(@D)/runsc -version | grep version | sed 's/^[^0-9]*//' > $@ && \ - rm -f $(@D)/runsc", - stamp = 1, -) - -pkg_deb( - name = "runsc-debian", - architecture = "amd64", - data = ":debian-data", - # Note that the description_file will be flatten (all newlines removed), - # and therefore it is kept to a simple one-line description. The expected - # format for debian packages is "short summary\nLonger explanation of - # tool." and this is impossible with the flattening. - description_file = "debian/description", - homepage = "https://gvisor.dev/", - maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>", - package = "runsc", - postinst = "debian/postinst.sh", - version_file = ":version.txt", - visibility = [ - "//visibility:public", - ], -) - sh_test( name = "version_test", size = "small", diff --git a/runsc/config/config.go b/runsc/config/config.go index ca85cef51..8cf0378d5 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -300,10 +300,10 @@ type Config struct { // E.g. 0.2 CPU quota will result in 1, and 1.9 in 2. CPUNumFromQuota bool - // Enables VFS2 (not plumbled through yet). + // Enables VFS2 (not plumbed through yet). VFS2 bool - // Enables FUSE usage (not plumbled through yet). + // Enables FUSE usage (not plumbed through yet). FUSE bool // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in @@ -353,6 +353,8 @@ func (c *Config) ToFlags() []string { "--tx-checksum-offload=" + strconv.FormatBool(c.TXChecksumOffload), "--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead), "--qdisc=" + c.QDisc.String(), + "--vfs2=" + strconv.FormatBool(c.VFS2), + "--fuse=" + strconv.FormatBool(c.FUSE), } if c.CPUNumFromQuota { f = append(f, "--cpu-num-from-quota") diff --git a/shim/BUILD b/shim/BUILD index e581618b2..8d29c459b 100644 --- a/shim/BUILD +++ b/shim/BUILD @@ -10,6 +10,6 @@ pkg_tar( mode = "0644", package_dir = "/etc/containerd", visibility = [ - "//runsc:__pkg__", + "//visibility:public", ], ) diff --git a/test/benchmarks/database/redis_test.go b/test/benchmarks/database/redis_test.go index 394fce820..6671a4969 100644 --- a/test/benchmarks/database/redis_test.go +++ b/test/benchmarks/database/redis_test.go @@ -84,12 +84,12 @@ func BenchmarkRedis(b *testing.B) { ip, err := serverMachine.IPAddress() if err != nil { - b.Fatal("failed to get IP from server: %v", err) + b.Fatalf("failed to get IP from server: %v", err) } serverPort, err := server.FindPort(ctx, port) if err != nil { - b.Fatal("failed to get IP from server: %v", err) + b.Fatalf("failed to get IP from server: %v", err) } if err = harness.WaitUntilServing(ctx, clientMachine, ip, serverPort); err != nil { diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go index f4236ba37..fdbbfe280 100644 --- a/test/benchmarks/fs/bazel_test.go +++ b/test/benchmarks/fs/bazel_test.go @@ -73,7 +73,7 @@ func runBuildBenchmark(b *testing.B, image, workdir, target string) { if bm.tmpfs { if out, err := container.Exec(ctx, dockerutil.ExecOpts{}, "cp", "-r", workdir, "/tmp/."); err != nil { - b.Fatal("failed to copy directory: %v %s", err, out) + b.Fatalf("failed to copy directory: %v (%s)", err, out) } workdir = "/tmp" + workdir } diff --git a/test/benchmarks/network/node_test.go b/test/benchmarks/network/node_test.go index 52eb794c4..0f4a205b6 100644 --- a/test/benchmarks/network/node_test.go +++ b/test/benchmarks/network/node_test.go @@ -48,14 +48,14 @@ func runNode(b *testing.B, hey *tools.Hey) { // The machine to hold Redis and the Node Server. serverMachine, err := h.GetMachine() if err != nil { - b.Fatal("failed to get machine with: %v", err) + b.Fatalf("failed to get machine with: %v", err) } defer serverMachine.CleanUp() // The machine to run 'hey'. clientMachine, err := h.GetMachine() if err != nil { - b.Fatal("failed to get machine with: %v", err) + b.Fatalf("failed to get machine with: %v", err) } defer clientMachine.CleanUp() diff --git a/test/benchmarks/network/ruby_test.go b/test/benchmarks/network/ruby_test.go index 5e0b2b724..67f63f76a 100644 --- a/test/benchmarks/network/ruby_test.go +++ b/test/benchmarks/network/ruby_test.go @@ -47,14 +47,14 @@ func runRuby(b *testing.B, hey *tools.Hey) { // The machine to hold Redis and the Ruby Server. serverMachine, err := h.GetMachine() if err != nil { - b.Fatal("failed to get machine with: %v", err) + b.Fatalf("failed to get machine with: %v", err) } defer serverMachine.CleanUp() // The machine to run 'hey'. clientMachine, err := h.GetMachine() if err != nil { - b.Fatal("failed to get machine with: %v", err) + b.Fatalf("failed to get machine with: %v", err) } defer clientMachine.CleanUp() ctx := context.Background() diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc index 29d4cc6fe..0f8e279f8 100644 --- a/test/packetimpact/dut/posix_server.cc +++ b/test/packetimpact/dut/posix_server.cc @@ -28,6 +28,7 @@ #include "include/grpcpp/security/server_credentials.h" #include "include/grpcpp/server_builder.h" +#include "include/grpcpp/server_context.h" #include "test/packetimpact/proto/posix_server.grpc.pb.h" #include "test/packetimpact/proto/posix_server.pb.h" @@ -108,7 +109,7 @@ } class PosixImpl final : public posix_server::Posix::Service { - ::grpc::Status Accept(grpc_impl::ServerContext *context, + ::grpc::Status Accept(grpc::ServerContext *context, const ::posix_server::AcceptRequest *request, ::posix_server::AcceptResponse *response) override { sockaddr_storage addr; @@ -119,7 +120,7 @@ class PosixImpl final : public posix_server::Posix::Service { return sockaddr_to_proto(addr, addrlen, response->mutable_addr()); } - ::grpc::Status Bind(grpc_impl::ServerContext *context, + ::grpc::Status Bind(grpc::ServerContext *context, const ::posix_server::BindRequest *request, ::posix_server::BindResponse *response) override { if (!request->has_addr()) { @@ -140,7 +141,7 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } - ::grpc::Status Close(grpc_impl::ServerContext *context, + ::grpc::Status Close(grpc::ServerContext *context, const ::posix_server::CloseRequest *request, ::posix_server::CloseResponse *response) override { response->set_ret(close(request->fd())); @@ -148,7 +149,7 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } - ::grpc::Status Connect(grpc_impl::ServerContext *context, + ::grpc::Status Connect(grpc::ServerContext *context, const ::posix_server::ConnectRequest *request, ::posix_server::ConnectResponse *response) override { if (!request->has_addr()) { @@ -168,7 +169,7 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } - ::grpc::Status Fcntl(grpc_impl::ServerContext *context, + ::grpc::Status Fcntl(grpc::ServerContext *context, const ::posix_server::FcntlRequest *request, ::posix_server::FcntlResponse *response) override { response->set_ret(::fcntl(request->fd(), request->cmd(), request->arg())); @@ -177,7 +178,7 @@ class PosixImpl final : public posix_server::Posix::Service { } ::grpc::Status GetSockName( - grpc_impl::ServerContext *context, + grpc::ServerContext *context, const ::posix_server::GetSockNameRequest *request, ::posix_server::GetSockNameResponse *response) override { sockaddr_storage addr; @@ -189,7 +190,7 @@ class PosixImpl final : public posix_server::Posix::Service { } ::grpc::Status GetSockOpt( - grpc_impl::ServerContext *context, + grpc::ServerContext *context, const ::posix_server::GetSockOptRequest *request, ::posix_server::GetSockOptResponse *response) override { switch (request->type()) { @@ -230,7 +231,7 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } - ::grpc::Status Listen(grpc_impl::ServerContext *context, + ::grpc::Status Listen(grpc::ServerContext *context, const ::posix_server::ListenRequest *request, ::posix_server::ListenResponse *response) override { response->set_ret(listen(request->sockfd(), request->backlog())); @@ -269,7 +270,7 @@ class PosixImpl final : public posix_server::Posix::Service { } ::grpc::Status SetSockOpt( - grpc_impl::ServerContext *context, + grpc::ServerContext *context, const ::posix_server::SetSockOptRequest *request, ::posix_server::SetSockOptResponse *response) override { switch (request->optval().val_case()) { @@ -302,7 +303,7 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } - ::grpc::Status Socket(grpc_impl::ServerContext *context, + ::grpc::Status Socket(grpc::ServerContext *context, const ::posix_server::SocketRequest *request, ::posix_server::SocketResponse *response) override { response->set_fd( @@ -311,6 +312,13 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } + ::grpc::Status Shutdown(grpc_impl::ServerContext *context, + const ::posix_server::ShutdownRequest *request, + ::posix_server::ShutdownResponse *response) override { + response->set_errno_(shutdown(request->fd(), request->how())); + return ::grpc::Status::OK; + } + ::grpc::Status Recv(::grpc::ServerContext *context, const ::posix_server::RecvRequest *request, ::posix_server::RecvResponse *response) override { diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto index ccd20b10d..f32ed54ef 100644 --- a/test/packetimpact/proto/posix_server.proto +++ b/test/packetimpact/proto/posix_server.proto @@ -188,6 +188,15 @@ message SocketResponse { int32 errno_ = 2; // "errno" may fail to compile in c++. } +message ShutdownRequest { + int32 fd = 1; + int32 how = 2; +} + +message ShutdownResponse { + int32 errno_ = 1; // "errno" may fail to compile in c++. +} + message RecvRequest { int32 sockfd = 1; int32 len = 2; @@ -225,6 +234,8 @@ service Posix { rpc SetSockOpt(SetSockOptRequest) returns (SetSockOptResponse); // Call socket() on the DUT. rpc Socket(SocketRequest) returns (SocketResponse); + // Call shutdown() on the DUT. + rpc Shutdown(ShutdownRequest) returns (ShutdownResponse); // Call recv() on the DUT. rpc Recv(RecvRequest) returns (RecvResponse); } diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl index 93a36c6c2..d72c63fe6 100644 --- a/test/packetimpact/runner/defs.bzl +++ b/test/packetimpact/runner/defs.bzl @@ -125,6 +125,7 @@ def packetimpact_go_test(name, size = "small", pure = True, expect_native_failur name = testbench_binary, size = size, pure = pure, + nogo = False, # FIXME(gvisor.dev/issue/3374): Not working with all build systems. tags = [ "local", "manual", diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index 73c532e75..6165ab293 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -16,11 +16,13 @@ package testbench import ( "context" + "encoding/binary" "flag" "net" "strconv" "syscall" "testing" + "time" pb "gvisor.dev/gvisor/test/packetimpact/proto/posix_server_go_proto" @@ -700,3 +702,43 @@ func (dut *DUT) RecvWithErrno(ctx context.Context, t *testing.T, sockfd, len, fl } return resp.GetRet(), resp.GetBuf(), syscall.Errno(resp.GetErrno_()) } + +// SetSockLingerOption sets SO_LINGER socket option on the DUT. +func (dut *DUT) SetSockLingerOption(t *testing.T, sockfd int32, timeout time.Duration, enable bool) { + var linger unix.Linger + if enable { + linger.Onoff = 1 + } + linger.Linger = int32(timeout / time.Second) + + buf := make([]byte, 8) + binary.LittleEndian.PutUint32(buf, uint32(linger.Onoff)) + binary.LittleEndian.PutUint32(buf[4:], uint32(linger.Linger)) + dut.SetSockOpt(t, sockfd, unix.SOL_SOCKET, unix.SO_LINGER, buf) +} + +// Shutdown calls shutdown on the DUT and causes a fatal test failure if it doesn't +// succeed. If more control over the timeout or error handling is needed, use +// ShutdownWithErrno. +func (dut *DUT) Shutdown(t *testing.T, fd, how int32) error { + t.Helper() + + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) + defer cancel() + return dut.ShutdownWithErrno(ctx, t, fd, how) +} + +// ShutdownWithErrno calls shutdown on the DUT. +func (dut *DUT) ShutdownWithErrno(ctx context.Context, t *testing.T, fd, how int32) error { + t.Helper() + + req := pb.ShutdownRequest{ + Fd: fd, + How: how, + } + resp, err := dut.posixServer.Shutdown(ctx, &req) + if err != nil { + t.Fatalf("failed to call Shutdown: %s", err) + } + return syscall.Errno(resp.GetErrno_()) +} diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 74658fea0..7a7152fa5 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -308,3 +308,13 @@ packetimpact_go_test( "@org_golang_x_sys//unix:go_default_library", ], ) + +packetimpact_go_test( + name = "tcp_linger", + srcs = ["tcp_linger_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/test/packetimpact/tests/tcp_linger_test.go b/test/packetimpact/tests/tcp_linger_test.go new file mode 100644 index 000000000..913e49e06 --- /dev/null +++ b/test/packetimpact/tests/tcp_linger_test.go @@ -0,0 +1,253 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_linger_test + +import ( + "context" + "flag" + "syscall" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + testbench.RegisterFlags(flag.CommandLine) +} + +func createSocket(t *testing.T, dut testbench.DUT) (int32, int32, testbench.TCPIPv4) { + listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) + conn.Connect(t) + acceptFD, _ := dut.Accept(t, listenFD) + return acceptFD, listenFD, conn +} + +func closeAll(t *testing.T, dut testbench.DUT, listenFD int32, conn testbench.TCPIPv4) { + conn.Close(t) + dut.Close(t, listenFD) + dut.TearDown() +} + +// lingerDuration is the timeout value used with SO_LINGER socket option. +const lingerDuration = 3 * time.Second + +// TestTCPLingerZeroTimeout tests when SO_LINGER is set with zero timeout. DUT +// should send RST-ACK when socket is closed. +func TestTCPLingerZeroTimeout(t *testing.T) { + // Create a socket, listen, TCP connect, and accept. + dut := testbench.NewDUT(t) + acceptFD, listenFD, conn := createSocket(t, dut) + defer closeAll(t, dut, listenFD, conn) + + dut.SetSockLingerOption(t, acceptFD, 0, true) + dut.Close(t, acceptFD) + + // If the linger timeout is set to zero, the DUT should send a RST. + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, time.Second); err != nil { + t.Errorf("expected RST-ACK packet within a second but got none: %s", err) + } + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) +} + +// TestTCPLingerOff tests when SO_LINGER is not set. DUT should send FIN-ACK +// when socket is closed. +func TestTCPLingerOff(t *testing.T) { + // Create a socket, listen, TCP connect, and accept. + dut := testbench.NewDUT(t) + acceptFD, listenFD, conn := createSocket(t, dut) + defer closeAll(t, dut, listenFD, conn) + + dut.Close(t, acceptFD) + + // If SO_LINGER is not set, DUT should send a FIN-ACK. + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil { + t.Errorf("expected FIN-ACK packet within a second but got none: %s", err) + } + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) +} + +// TestTCPLingerNonZeroTimeout tests when SO_LINGER is set with non-zero timeout. +// DUT should close the socket after timeout. +func TestTCPLingerNonZeroTimeout(t *testing.T) { + for _, tt := range []struct { + description string + lingerOn bool + }{ + {"WithNonZeroLinger", true}, + {"WithoutLinger", false}, + } { + t.Run(tt.description, func(t *testing.T) { + // Create a socket, listen, TCP connect, and accept. + dut := testbench.NewDUT(t) + acceptFD, listenFD, conn := createSocket(t, dut) + defer closeAll(t, dut, listenFD, conn) + + dut.SetSockLingerOption(t, acceptFD, lingerDuration, tt.lingerOn) + + // Increase timeout as Close will take longer time to + // return when SO_LINGER is set with non-zero timeout. + timeout := lingerDuration + 1*time.Second + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + start := time.Now() + dut.CloseWithErrno(ctx, t, acceptFD) + end := time.Now() + diff := end.Sub(start) + + if tt.lingerOn && diff < lingerDuration { + t.Errorf("expected close to return after %v seconds, but returned sooner", lingerDuration) + } else if !tt.lingerOn && diff > 1*time.Second { + t.Errorf("expected close to return within a second, but returned later") + } + + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil { + t.Errorf("expected FIN-ACK packet within a second but got none: %s", err) + } + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + }) + } +} + +// TestTCPLingerSendNonZeroTimeout tests when SO_LINGER is set with non-zero +// timeout and send a packet. DUT should close the socket after timeout. +func TestTCPLingerSendNonZeroTimeout(t *testing.T) { + for _, tt := range []struct { + description string + lingerOn bool + }{ + {"WithSendNonZeroLinger", true}, + {"WithoutLinger", false}, + } { + t.Run(tt.description, func(t *testing.T) { + // Create a socket, listen, TCP connect, and accept. + dut := testbench.NewDUT(t) + acceptFD, listenFD, conn := createSocket(t, dut) + defer closeAll(t, dut, listenFD, conn) + + dut.SetSockLingerOption(t, acceptFD, lingerDuration, tt.lingerOn) + + // Send data. + sampleData := []byte("Sample Data") + dut.Send(t, acceptFD, sampleData, 0) + + // Increase timeout as Close will take longer time to + // return when SO_LINGER is set with non-zero timeout. + timeout := lingerDuration + 1*time.Second + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + start := time.Now() + dut.CloseWithErrno(ctx, t, acceptFD) + end := time.Now() + diff := end.Sub(start) + + if tt.lingerOn && diff < lingerDuration { + t.Errorf("expected close to return after %v seconds, but returned sooner", lingerDuration) + } else if !tt.lingerOn && diff > 1*time.Second { + t.Errorf("expected close to return within a second, but returned later") + } + + samplePayload := &testbench.Payload{Bytes: sampleData} + if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } + + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil { + t.Errorf("expected FIN-ACK packet within a second but got none: %s", err) + } + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + }) + } +} + +// TestTCPLingerShutdownZeroTimeout tests SO_LINGER with shutdown() and zero +// timeout. DUT should send RST-ACK when socket is closed. +func TestTCPLingerShutdownZeroTimeout(t *testing.T) { + // Create a socket, listen, TCP connect, and accept. + dut := testbench.NewDUT(t) + acceptFD, listenFD, conn := createSocket(t, dut) + defer closeAll(t, dut, listenFD, conn) + + dut.SetSockLingerOption(t, acceptFD, 0, true) + dut.Shutdown(t, acceptFD, syscall.SHUT_RDWR) + dut.Close(t, acceptFD) + + // Shutdown will send FIN-ACK with read/write option. + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil { + t.Errorf("expected FIN-ACK packet within a second but got none: %s", err) + } + + // If the linger timeout is set to zero, the DUT should send a RST. + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, time.Second); err != nil { + t.Errorf("expected RST-ACK packet within a second but got none: %s", err) + } + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) +} + +// TestTCPLingerShutdownSendNonZeroTimeout tests SO_LINGER with shutdown() and +// non-zero timeout. DUT should close the socket after timeout. +func TestTCPLingerShutdownSendNonZeroTimeout(t *testing.T) { + for _, tt := range []struct { + description string + lingerOn bool + }{ + {"shutdownRDWR", true}, + {"shutdownRDWR", false}, + } { + t.Run(tt.description, func(t *testing.T) { + // Create a socket, listen, TCP connect, and accept. + dut := testbench.NewDUT(t) + acceptFD, listenFD, conn := createSocket(t, dut) + defer closeAll(t, dut, listenFD, conn) + + dut.SetSockLingerOption(t, acceptFD, lingerDuration, tt.lingerOn) + + // Send data. + sampleData := []byte("Sample Data") + dut.Send(t, acceptFD, sampleData, 0) + + dut.Shutdown(t, acceptFD, syscall.SHUT_RDWR) + + // Increase timeout as Close will take longer time to + // return when SO_LINGER is set with non-zero timeout. + timeout := lingerDuration + 1*time.Second + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + start := time.Now() + dut.CloseWithErrno(ctx, t, acceptFD) + end := time.Now() + diff := end.Sub(start) + + if tt.lingerOn && diff < lingerDuration { + t.Errorf("expected close to return after %v seconds, but returned sooner", lingerDuration) + } else if !tt.lingerOn && diff > 1*time.Second { + t.Errorf("expected close to return within a second, but returned later") + } + + samplePayload := &testbench.Payload{Bytes: sampleData} + if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } + + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil { + t.Errorf("expected FIN-ACK packet within a second but got none: %s", err) + } + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + }) + } +} diff --git a/test/root/crictl_test.go b/test/root/crictl_test.go index df91fa0fe..11ac5cb52 100644 --- a/test/root/crictl_test.go +++ b/test/root/crictl_test.go @@ -418,7 +418,7 @@ func setup(t *testing.T, version string) (*criutil.Crictl, func(), error) { // care about the docker runtime name. config = v2Template default: - t.Fatalf("unknown version: %d", version) + t.Fatalf("unknown version: %s", version) } t.Logf("Using config: %s", config) diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv index 1d8e65fd0..5866ee56d 100644 --- a/test/runtimes/exclude_nodejs12.4.0.csv +++ b/test/runtimes/exclude_nodejs12.4.0.csv @@ -49,6 +49,7 @@ pseudo-tty/test-tty-wrap.js,b/162801321, pummel/test-heapdump-http2.js,,Flaky pummel/test-net-pingpong.js,, pummel/test-vm-memleak.js,b/162799436, +pummel/test-watch-file.js,,Flaky - Timeout sequential/test-child-process-pass-fd.js,b/63926391,Flaky sequential/test-https-connect-localport.js,,Flaky - EADDRINUSE sequential/test-net-bytes-per-incoming-chunk-overhead.js,,flaky - timeout diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv index 2ce979dc8..815a137c5 100644 --- a/test/runtimes/exclude_php7.3.6.csv +++ b/test/runtimes/exclude_php7.3.6.csv @@ -13,6 +13,13 @@ ext/session/tests/session_set_save_handler_class_018.phpt,, ext/session/tests/session_set_save_handler_iface_003.phpt,, ext/session/tests/session_set_save_handler_sid_001.phpt,, ext/session/tests/session_set_save_handler_variation4.phpt,, +ext/standard/tests/file/disk.phpt,https://bugs.php.net/bug.php?id=80018, +ext/standard/tests/file/disk_free_space_basic.phpt,https://bugs.php.net/bug.php?id=80018, +ext/standard/tests/file/disk_free_space_error.phpt,https://bugs.php.net/bug.php?id=80018, +ext/standard/tests/file/disk_free_space_variation.phpt,https://bugs.php.net/bug.php?id=80018, +ext/standard/tests/file/disk_total_space_basic.phpt,https://bugs.php.net/bug.php?id=80018, +ext/standard/tests/file/disk_total_space_error.phpt,https://bugs.php.net/bug.php?id=80018, +ext/standard/tests/file/disk_total_space_variation.phpt,https://bugs.php.net/bug.php?id=80018, ext/standard/tests/file/fopen_variation19.phpt,b/162894964, ext/standard/tests/file/lstat_stat_variation14.phpt,,Flaky ext/standard/tests/file/php_fd_wrapper_01.phpt,, diff --git a/test/runtimes/proctor/BUILD b/test/runtimes/proctor/BUILD index f76e2ddc0..d1935cbe8 100644 --- a/test/runtimes/proctor/BUILD +++ b/test/runtimes/proctor/BUILD @@ -21,6 +21,7 @@ go_test( size = "small", srcs = ["proctor_test.go"], library = ":proctor", + nogo = False, # FIXME(gvisor.dev/issue/3374): Not working with all build systems. pure = True, deps = [ "//pkg/test/testutil", diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index ad53e92e5..65e8299c3 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -263,6 +263,10 @@ syscall_test( ) syscall_test( + test = "//test/syscalls/linux:kcov_test", +) + +syscall_test( test = "//test/syscalls/linux:kill_test", ) @@ -675,6 +679,14 @@ syscall_test( ) syscall_test( + test = "//test/syscalls/linux:socket_ipv4_udp_unbound_loopback_netlink_test", +) + +syscall_test( + test = "//test/syscalls/linux:socket_ipv6_udp_unbound_loopback_netlink_test", +) + +syscall_test( test = "//test/syscalls/linux:socket_ip_unbound_test", ) diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index ecd2d8d2a..5a323d331 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1068,6 +1068,20 @@ cc_binary( ) cc_binary( + name = "kcov_test", + testonly = 1, + srcs = ["kcov.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( name = "kill_test", testonly = 1, srcs = ["kill.cc"], @@ -2403,6 +2417,57 @@ cc_library( ) cc_library( + name = "socket_ip_udp_unbound_netlink_test_utils", + testonly = 1, + srcs = [ + "socket_ip_udp_unbound_netlink_util.cc", + ], + hdrs = [ + "socket_ip_udp_unbound_netlink_util.h", + ], + deps = [ + ":socket_test_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_ipv4_udp_unbound_netlink_test_cases", + testonly = 1, + srcs = [ + "socket_ipv4_udp_unbound_netlink.cc", + ], + hdrs = [ + "socket_ipv4_udp_unbound_netlink.h", + ], + deps = [ + ":socket_ip_udp_unbound_netlink_test_utils", + ":socket_netlink_route_util", + "//test/util:capability_util", + gtest, + ], + alwayslink = 1, +) + +cc_library( + name = "socket_ipv6_udp_unbound_netlink_test_cases", + testonly = 1, + srcs = [ + "socket_ipv6_udp_unbound_netlink.cc", + ], + hdrs = [ + "socket_ipv6_udp_unbound_netlink.h", + ], + deps = [ + ":socket_ip_udp_unbound_netlink_test_utils", + ":socket_netlink_route_util", + "//test/util:capability_util", + gtest, + ], + alwayslink = 1, +) + +cc_library( name = "socket_ipv4_udp_unbound_external_networking_test_cases", testonly = 1, srcs = [ @@ -2756,6 +2821,38 @@ cc_binary( ) cc_binary( + name = "socket_ipv4_udp_unbound_loopback_netlink_test", + testonly = 1, + srcs = [ + "socket_ipv4_udp_unbound_loopback_netlink.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_ipv4_udp_unbound_netlink_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ipv6_udp_unbound_loopback_netlink_test", + testonly = 1, + srcs = [ + "socket_ipv6_udp_unbound_loopback_netlink.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_ipv6_udp_unbound_netlink_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( name = "socket_ip_unbound_test", testonly = 1, srcs = [ diff --git a/test/syscalls/linux/iptables.cc b/test/syscalls/linux/iptables.cc index 9b338d970..f1af8f097 100644 --- a/test/syscalls/linux/iptables.cc +++ b/test/syscalls/linux/iptables.cc @@ -67,12 +67,43 @@ TEST(IPTablesBasic, FailSockoptNonRaw) { struct ipt_getinfo info = {}; snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); socklen_t info_size = sizeof(info); - EXPECT_THAT(getsockopt(sock, IPPROTO_IP, IPT_SO_GET_INFO, &info, &info_size), + EXPECT_THAT(getsockopt(sock, SOL_IP, IPT_SO_GET_INFO, &info, &info_size), SyscallFailsWithErrno(ENOPROTOOPT)); ASSERT_THAT(close(sock), SyscallSucceeds()); } +TEST(IPTablesBasic, GetInfoErrorPrecedence) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET, SOCK_DGRAM, 0), SyscallSucceeds()); + + // When using the wrong type of socket and a too-short optlen, we should get + // EINVAL. + struct ipt_getinfo info = {}; + snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); + socklen_t info_size = sizeof(info) - 1; + ASSERT_THAT(getsockopt(sock, SOL_IP, IPT_SO_GET_INFO, &info, &info_size), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(IPTablesBasic, GetEntriesErrorPrecedence) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET, SOCK_DGRAM, 0), SyscallSucceeds()); + + // When using the wrong type of socket and a too-short optlen, we should get + // EINVAL. + struct ipt_get_entries entries = {}; + socklen_t entries_size = sizeof(struct ipt_get_entries) - 1; + snprintf(entries.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); + ASSERT_THAT( + getsockopt(sock, SOL_IP, IPT_SO_GET_ENTRIES, &entries, &entries_size), + SyscallFailsWithErrno(EINVAL)); +} + // Fixture for iptables tests. class IPTablesTest : public ::testing::Test { protected: @@ -112,7 +143,7 @@ TEST_F(IPTablesTest, InitialState) { struct ipt_getinfo info = {}; snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); socklen_t info_size = sizeof(info); - ASSERT_THAT(getsockopt(s_, IPPROTO_IP, IPT_SO_GET_INFO, &info, &info_size), + ASSERT_THAT(getsockopt(s_, SOL_IP, IPT_SO_GET_INFO, &info, &info_size), SyscallSucceeds()); // The nat table supports PREROUTING, and OUTPUT. @@ -148,7 +179,7 @@ TEST_F(IPTablesTest, InitialState) { snprintf(entries->name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); entries->size = info.size; ASSERT_THAT( - getsockopt(s_, IPPROTO_IP, IPT_SO_GET_ENTRIES, entries, &entries_size), + getsockopt(s_, SOL_IP, IPT_SO_GET_ENTRIES, entries, &entries_size), SyscallSucceeds()); // Verify the name and size. diff --git a/test/syscalls/linux/kcov.cc b/test/syscalls/linux/kcov.cc new file mode 100644 index 000000000..f3c30444e --- /dev/null +++ b/test/syscalls/linux/kcov.cc @@ -0,0 +1,70 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/errno.h> +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// For this test to work properly, it must be run with coverage enabled. On +// native Linux, this involves compiling the kernel with kcov enabled. For +// gVisor, we need to enable the Go coverage tool, e.g. +// bazel test --collect_coverage_data --instrumentation_filter=//pkg/... <test>. +TEST(KcovTest, Kcov) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_DAC_OVERRIDE)))); + + constexpr int kSize = 4096; + constexpr int KCOV_INIT_TRACE = 0x80086301; + constexpr int KCOV_ENABLE = 0x6364; + + int fd; + ASSERT_THAT(fd = open("/sys/kernel/debug/kcov", O_RDWR), + AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(ENOENT))); + + // Kcov not enabled. + SKIP_IF(errno == ENOENT); + + ASSERT_THAT(ioctl(fd, KCOV_INIT_TRACE, kSize), SyscallSucceeds()); + uint64_t* area = (uint64_t*)mmap(nullptr, kSize * sizeof(uint64_t), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + ASSERT_TRUE(area != MAP_FAILED); + ASSERT_THAT(ioctl(fd, KCOV_ENABLE, 0), SyscallSucceeds()); + + for (int i = 0; i < 10; i++) { + // Make some syscalls to generate coverage data. + ASSERT_THAT(ioctl(fd, KCOV_ENABLE, 0), SyscallFailsWithErrno(EINVAL)); + } + + uint64_t num_pcs = *(uint64_t*)(area); + EXPECT_GT(num_pcs, 0); + for (uint64_t i = 1; i <= num_pcs; i++) { + // Verify that PCs are in the standard kernel range. + EXPECT_GT(area[i], 0xffffffff7fffffffL); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/memfd.cc b/test/syscalls/linux/memfd.cc index f8b7f7938..4a450742b 100644 --- a/test/syscalls/linux/memfd.cc +++ b/test/syscalls/linux/memfd.cc @@ -14,12 +14,10 @@ #include <errno.h> #include <fcntl.h> -#include <linux/magic.h> #include <linux/memfd.h> #include <linux/unistd.h> #include <string.h> #include <sys/mman.h> -#include <sys/statfs.h> #include <sys/syscall.h> #include <vector> @@ -53,6 +51,7 @@ namespace { #define F_SEAL_GROW 0x0004 #define F_SEAL_WRITE 0x0008 +using ::gvisor::testing::IsTmpfs; using ::testing::StartsWith; const std::string kMemfdName = "some-memfd"; @@ -444,20 +443,6 @@ TEST(MemfdTest, SealsAreInodeLevelProperties) { EXPECT_THAT(ftruncate(memfd3.get(), kPageSize), SyscallFailsWithErrno(EPERM)); } -PosixErrorOr<bool> IsTmpfs(const std::string& path) { - struct statfs stat; - if (statfs(path.c_str(), &stat)) { - if (errno == ENOENT) { - // Nothing at path, don't raise this as an error. Instead, just report no - // tmpfs at path. - return false; - } - return PosixError(errno, - absl::StrFormat("statfs(\"%s\", %#p)", path, &stat)); - } - return stat.f_type == TMPFS_MAGIC; -} - // Tmpfs files also support seals, but are created with F_SEAL_SEAL. TEST(MemfdTest, TmpfsFilesHaveSealSeal) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs("/tmp"))); diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index bd30fb86b..7c1d6a414 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -97,11 +97,13 @@ TEST(BadSocketPairArgs, ValidateErrForBadCallsToSocketPair) { ASSERT_THAT(socketpair(AF_INET6, 0, 0, fd), SyscallFailsWithErrno(ESOCKTNOSUPPORT)); - // Invalid AF will return ENOAFSUPPORT. + // Invalid AF will return ENOAFSUPPORT or EPERM. ASSERT_THAT(socketpair(AF_MAX, 0, 0, fd), - SyscallFailsWithErrno(EAFNOSUPPORT)); + ::testing::AnyOf(SyscallFailsWithErrno(EAFNOSUPPORT), + SyscallFailsWithErrno(EPERM))); ASSERT_THAT(socketpair(8675309, 0, 0, fd), - SyscallFailsWithErrno(EAFNOSUPPORT)); + ::testing::AnyOf(SyscallFailsWithErrno(EAFNOSUPPORT), + SyscallFailsWithErrno(EPERM))); } enum class Operation { @@ -116,7 +118,8 @@ std::string OperationToString(Operation operation) { return "Bind"; case Operation::Connect: return "Connect"; - case Operation::SendTo: + // Operation::SendTo is the default. + default: return "SendTo"; } } diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc index 53c076787..f4b69c46c 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -819,18 +819,37 @@ TEST_P(TCPSocketPairTest, TCPLingerTimeoutDefault) { EXPECT_EQ(get, kDefaultTCPLingerTimeout); } -TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutZeroOrLess) { +TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutLessThanZero) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); - constexpr int kZero = 0; - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &kZero, - sizeof(kZero)), - SyscallSucceedsWithValue(0)); - constexpr int kNegative = -1234; EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &kNegative, sizeof(kNegative)), SyscallSucceedsWithValue(0)); + int get = INT_MAX; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, -1); +} + +TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutZero) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &kZero, + sizeof(kZero)), + SyscallSucceedsWithValue(0)); + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_THAT(get, + AnyOf(Eq(kMaxTCPLingerTimeout), Eq(kOldMaxTCPLingerTimeout))); } TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutAboveMax) { @@ -1061,5 +1080,124 @@ TEST_P(TCPSocketPairTest, TCPResetDuringClose_NoRandomSave) { } } +// Test setsockopt and getsockopt for a socket with SO_LINGER option. +TEST_P(TCPSocketPairTest, SetAndGetLingerOption) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Check getsockopt before SO_LINGER option is set. + struct linger got_linger = {-1, -1}; + socklen_t got_len = sizeof(got_linger); + + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, + &got_linger, &got_len), + SyscallSucceeds()); + ASSERT_THAT(got_len, sizeof(got_linger)); + struct linger want_linger = {}; + EXPECT_EQ(0, memcmp(&want_linger, &got_linger, got_len)); + + // Set and get SO_LINGER with negative values. + struct linger sl; + sl.l_onoff = 1; + sl.l_linger = -3; + ASSERT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)), + SyscallSucceeds()); + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, + &got_linger, &got_len), + SyscallSucceeds()); + ASSERT_EQ(got_len, sizeof(got_linger)); + EXPECT_EQ(sl.l_onoff, got_linger.l_onoff); + // Linux returns a different value as it uses HZ to convert the seconds to + // jiffies which overflows for negative values. We want to be compatible with + // linux for getsockopt return value. + if (IsRunningOnGvisor()) { + EXPECT_EQ(sl.l_linger, got_linger.l_linger); + } + + // Set and get SO_LINGER option with positive values. + sl.l_onoff = 1; + sl.l_linger = 5; + ASSERT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)), + SyscallSucceeds()); + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, + &got_linger, &got_len), + SyscallSucceeds()); + ASSERT_EQ(got_len, sizeof(got_linger)); + EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len)); +} + +// Test socket to disable SO_LINGER option. +TEST_P(TCPSocketPairTest, SetOffLingerOption) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Set the SO_LINGER option. + struct linger sl; + sl.l_onoff = 1; + sl.l_linger = 5; + ASSERT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)), + SyscallSucceeds()); + + // Check getsockopt after SO_LINGER option is set. + struct linger got_linger = {-1, -1}; + socklen_t got_len = sizeof(got_linger); + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, + &got_linger, &got_len), + SyscallSucceeds()); + ASSERT_EQ(got_len, sizeof(got_linger)); + EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len)); + + sl.l_onoff = 0; + sl.l_linger = 5; + ASSERT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)), + SyscallSucceeds()); + + // Check getsockopt after SO_LINGER option is set to zero. + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, + &got_linger, &got_len), + SyscallSucceeds()); + ASSERT_EQ(got_len, sizeof(got_linger)); + EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len)); +} + +// Test close on dup'd socket with SO_LINGER option set. +TEST_P(TCPSocketPairTest, CloseWithLingerOption) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Set the SO_LINGER option. + struct linger sl; + sl.l_onoff = 1; + sl.l_linger = 5; + ASSERT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)), + SyscallSucceeds()); + + // Check getsockopt after SO_LINGER option is set. + struct linger got_linger = {-1, -1}; + socklen_t got_len = sizeof(got_linger); + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, + &got_linger, &got_len), + SyscallSucceeds()); + ASSERT_EQ(got_len, sizeof(got_linger)); + EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len)); + + FileDescriptor dupFd = FileDescriptor(dup(sockets->first_fd())); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + char buf[10] = {}; + // Write on dupFd should succeed as socket will not be closed until + // all references are removed. + ASSERT_THAT(RetryEINTR(write)(dupFd.get(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(EBADF)); + + // Close the socket. + dupFd.reset(); + // Write on dupFd should fail as all references for socket are removed. + ASSERT_THAT(RetryEINTR(write)(dupFd.get(), buf, sizeof(buf)), + SyscallFailsWithErrno(EBADF)); +} } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc index edb86aded..6e4ecd680 100644 --- a/test/syscalls/linux/socket_ip_udp_generic.cc +++ b/test/syscalls/linux/socket_ip_udp_generic.cc @@ -435,8 +435,10 @@ TEST_P(UDPSocketPairTest, TOSRecvMismatch) { // Test that an IPv4 socket does not support the IPv6 TClass option. TEST_P(UDPSocketPairTest, TClassRecvMismatch) { - // This should only test AF_INET sockets for the mismatch behavior. - SKIP_IF(GetParam().domain != AF_INET); + // This should only test AF_INET6 sockets for the mismatch behavior. + SKIP_IF(GetParam().domain != AF_INET6); + // IPV6_RECVTCLASS is only valid for SOCK_DGRAM and SOCK_RAW. + SKIP_IF(GetParam().type != SOCK_DGRAM | GetParam().type != SOCK_RAW); auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); @@ -448,5 +450,35 @@ TEST_P(UDPSocketPairTest, TClassRecvMismatch) { SyscallFailsWithErrno(EOPNOTSUPP)); } +// Test the SO_LINGER option can be set/get on udp socket. +TEST_P(UDPSocketPairTest, SoLingerFail) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + int level = SOL_SOCKET; + int type = SO_LINGER; + + struct linger sl; + sl.l_onoff = 1; + sl.l_linger = 5; + ASSERT_THAT(setsockopt(sockets->first_fd(), level, type, &sl, sizeof(sl)), + SyscallSucceedsWithValue(0)); + + struct linger got_linger = {}; + socklen_t length = sizeof(sl); + ASSERT_THAT( + getsockopt(sockets->first_fd(), level, type, &got_linger, &length), + SyscallSucceedsWithValue(0)); + + ASSERT_EQ(length, sizeof(got_linger)); + // Linux returns the values which are set in the SetSockOpt for SO_LINGER. + // In gVisor, we do not store the linger values for UDP as SO_LINGER for UDP + // is a no-op. + if (IsRunningOnGvisor()) { + struct linger want_linger = {}; + EXPECT_EQ(0, memcmp(&want_linger, &got_linger, length)); + } else { + EXPECT_EQ(0, memcmp(&sl, &got_linger, length)); + } +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.cc b/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.cc new file mode 100644 index 000000000..13ffafde7 --- /dev/null +++ b/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.cc @@ -0,0 +1,58 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h" + +namespace gvisor { +namespace testing { + +const size_t kSendBufSize = 200; + +void IPUDPUnboundSocketNetlinkTest::TestSendRecv(TestAddress sender_addr, + TestAddress receiver_addr) { + auto snd_sock = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto rcv_sock = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + EXPECT_THAT( + bind(snd_sock->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr), + sender_addr.addr_len), + SyscallSucceeds()); + + EXPECT_THAT( + bind(rcv_sock->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(rcv_sock->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + char send_buf[kSendBufSize]; + RandomizeBuffer(send_buf, kSendBufSize); + EXPECT_THAT( + RetryEINTR(sendto)(snd_sock->get(), send_buf, kSendBufSize, 0, + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceedsWithValue(kSendBufSize)); + + // Check that we received the packet. + char recv_buf[kSendBufSize] = {}; + ASSERT_THAT(RetryEINTR(recv)(rcv_sock->get(), recv_buf, kSendBufSize, 0), + SyscallSucceedsWithValue(kSendBufSize)); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, kSendBufSize)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h b/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h new file mode 100644 index 000000000..157fb0939 --- /dev/null +++ b/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h @@ -0,0 +1,34 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_UNBOUND_NETLINK_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_UNBOUND_NETLINK_UTIL_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to IP UDP sockets. +class IPUDPUnboundSocketNetlinkTest : public SimpleSocketTest { + public: + // TestSendRecv tests sending and receiving a UDP packet from |sender_addr| to + // |receiver_addr|. + void TestSendRecv(TestAddress sender_addr, TestAddress receiver_addr); +}; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_UNBOUND_NETLINK_UTIL_H_ diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_netlink.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_netlink.cc new file mode 100644 index 000000000..8052bf404 --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_netlink.cc @@ -0,0 +1,32 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +INSTANTIATE_TEST_SUITE_P( + IPv4UDPSockets, IPv4UDPUnboundSocketNetlinkTest, + ::testing::ValuesIn(ApplyVec<SocketKind>(IPv4UDPUnboundSocket, + AllBitwiseCombinations(List<int>{ + 0, SOCK_NONBLOCK})))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.cc new file mode 100644 index 000000000..696fbb189 --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.cc @@ -0,0 +1,60 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h" + +#include <arpa/inet.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_netlink_route_util.h" +#include "test/util/capability_util.h" + +namespace gvisor { +namespace testing { + +// Checks that the loopback interface considers itself bound to all IPs in an +// associated subnet. +TEST_P(IPv4UDPUnboundSocketNetlinkTest, JoinSubnet) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + // Add an IP address to the loopback interface. + Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink()); + struct in_addr addr; + EXPECT_EQ(1, inet_pton(AF_INET, "192.0.2.1", &addr)); + EXPECT_NO_ERRNO(LinkAddLocalAddr(loopback_link.index, AF_INET, + /*prefixlen=*/24, &addr, sizeof(addr))); + + // Send from an unassigned address but an address that is in the subnet + // associated with the loopback interface. + TestAddress sender_addr("V4NotAssignd1"); + sender_addr.addr.ss_family = AF_INET; + sender_addr.addr_len = sizeof(sockaddr_in); + EXPECT_EQ(1, inet_pton(AF_INET, "192.0.2.2", + &(reinterpret_cast<sockaddr_in*>(&sender_addr.addr) + ->sin_addr.s_addr))); + + // Send the packet to an unassigned address but an address that is in the + // subnet associated with the loopback interface. + TestAddress receiver_addr("V4NotAssigned2"); + receiver_addr.addr.ss_family = AF_INET; + receiver_addr.addr_len = sizeof(sockaddr_in); + EXPECT_EQ(1, inet_pton(AF_INET, "192.0.2.254", + &(reinterpret_cast<sockaddr_in*>(&receiver_addr.addr) + ->sin_addr.s_addr))); + + TestSendRecv(sender_addr, receiver_addr); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h new file mode 100644 index 000000000..fcfb3318e --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h @@ -0,0 +1,29 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_NETLINK_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_NETLINK_UTIL_H_ + +#include "test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to IPv4 UDP sockets. +using IPv4UDPUnboundSocketNetlinkTest = IPUDPUnboundSocketNetlinkTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_NETLINK_UTIL_H_ diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_loopback_netlink.cc b/test/syscalls/linux/socket_ipv6_udp_unbound_loopback_netlink.cc new file mode 100644 index 000000000..17021ff82 --- /dev/null +++ b/test/syscalls/linux/socket_ipv6_udp_unbound_loopback_netlink.cc @@ -0,0 +1,32 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +INSTANTIATE_TEST_SUITE_P( + IPv6UDPSockets, IPv6UDPUnboundSocketNetlinkTest, + ::testing::ValuesIn(ApplyVec<SocketKind>(IPv6UDPUnboundSocket, + AllBitwiseCombinations(List<int>{ + 0, SOCK_NONBLOCK})))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc new file mode 100644 index 000000000..6275b5aed --- /dev/null +++ b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc @@ -0,0 +1,60 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h" + +#include <arpa/inet.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_netlink_route_util.h" +#include "test/util/capability_util.h" + +namespace gvisor { +namespace testing { + +// Checks that the loopback interface considers itself bound to all IPs in an +// associated subnet. +TEST_P(IPv6UDPUnboundSocketNetlinkTest, JoinSubnet) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + // Add an IP address to the loopback interface. + Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink()); + struct in6_addr addr; + EXPECT_EQ(1, inet_pton(AF_INET6, "2001:db8::1", &addr)); + EXPECT_NO_ERRNO(LinkAddLocalAddr(loopback_link.index, AF_INET6, + /*prefixlen=*/64, &addr, sizeof(addr))); + + // Send from an unassigned address but an address that is in the subnet + // associated with the loopback interface. + TestAddress sender_addr("V6NotAssignd1"); + sender_addr.addr.ss_family = AF_INET6; + sender_addr.addr_len = sizeof(sockaddr_in6); + EXPECT_EQ(1, inet_pton(AF_INET6, "2001:db8::2", + reinterpret_cast<sockaddr_in6*>(&sender_addr.addr) + ->sin6_addr.s6_addr)); + + // Send the packet to an unassigned address but an address that is in the + // subnet associated with the loopback interface. + TestAddress receiver_addr("V6NotAssigned2"); + receiver_addr.addr.ss_family = AF_INET6; + receiver_addr.addr_len = sizeof(sockaddr_in6); + EXPECT_EQ(1, inet_pton(AF_INET6, "2001:db8::ffff:ffff:ffff:ffff", + reinterpret_cast<sockaddr_in6*>(&receiver_addr.addr) + ->sin6_addr.s6_addr)); + + TestSendRecv(sender_addr, receiver_addr); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h new file mode 100644 index 000000000..6a2b0a5be --- /dev/null +++ b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h @@ -0,0 +1,29 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_NETLINK_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_NETLINK_UTIL_H_ + +#include "test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to IPv6 UDP sockets. +using IPv6UDPUnboundSocketNetlinkTest = IPUDPUnboundSocketNetlinkTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_NETLINK_UTIL_H_ diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index cbcf08451..5510a87a0 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -28,6 +28,7 @@ #include "test/syscalls/linux/file_base.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" #include "test/util/posix_error.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -37,6 +38,8 @@ namespace testing { namespace { +using ::gvisor::testing::IsTmpfs; + class XattrTest : public FileTest {}; TEST_F(XattrTest, XattrNonexistentFile) { @@ -604,6 +607,77 @@ TEST_F(XattrTest, XattrWithFD) { EXPECT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds()); } +TEST_F(XattrTest, TrustedNamespaceWithCapSysAdmin) { + // Trusted namespace not supported in VFS1. + SKIP_IF(IsRunningWithVFS1()); + + // TODO(b/66162845): Only gVisor tmpfs currently supports trusted namespace. + SKIP_IF(IsRunningOnGvisor() && + !ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs(test_file_name_))); + + // Setting/Getting in the trusted namespace requires CAP_SYS_ADMIN. + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + const char* path = test_file_name_.c_str(); + const char name[] = "trusted.test"; + + // Set. + char val = 'a'; + size_t size = sizeof(val); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + // Get. + char got = '\0'; + EXPECT_THAT(getxattr(path, name, &got, size), SyscallSucceedsWithValue(size)); + EXPECT_EQ(val, got); + + // List. + char list[sizeof(name)]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + // Remove. + EXPECT_THAT(removexattr(path, name), SyscallSucceeds()); + + // Get should now return ENODATA. + EXPECT_THAT(getxattr(path, name, &got, size), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, TrustedNamespaceWithoutCapSysAdmin) { + // Trusted namespace not supported in VFS1. + SKIP_IF(IsRunningWithVFS1()); + + // TODO(b/66162845): Only gVisor tmpfs currently supports trusted namespace. + SKIP_IF(IsRunningOnGvisor() && + !ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs(test_file_name_))); + + // Drop CAP_SYS_ADMIN if we have it. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))) { + EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false)); + } + + const char* path = test_file_name_.c_str(); + const char name[] = "trusted.test"; + + // Set fails. + char val = 'a'; + size_t size = sizeof(val); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), + SyscallFailsWithErrno(EPERM)); + + // Get fails. + char got = '\0'; + EXPECT_THAT(getxattr(path, name, &got, size), SyscallFailsWithErrno(ENODATA)); + + // List still works, but returns no items. + char list[sizeof(name)]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), SyscallSucceedsWithValue(0)); + + // Remove fails. + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM)); +} + } // namespace } // namespace testing diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc index 5418948fe..dffa16183 100644 --- a/test/util/fs_util.cc +++ b/test/util/fs_util.cc @@ -15,7 +15,11 @@ #include "test/util/fs_util.h" #include <dirent.h> +#ifndef __fuchsia__ +#include <linux/magic.h> +#endif // __fuchsia__ #include <sys/stat.h> +#include <sys/statfs.h> #include <sys/types.h> #include <unistd.h> @@ -629,5 +633,21 @@ PosixErrorOr<std::string> ProcessExePath(int pid) { return ReadLink(absl::StrCat("/proc/", pid, "/exe")); } +#ifndef __fuchsia__ +PosixErrorOr<bool> IsTmpfs(const std::string& path) { + struct statfs stat; + if (statfs(path.c_str(), &stat)) { + if (errno == ENOENT) { + // Nothing at path, don't raise this as an error. Instead, just report no + // tmpfs at path. + return false; + } + return PosixError(errno, + absl::StrFormat("statfs(\"%s\", %#p)", path, &stat)); + } + return stat.f_type == TMPFS_MAGIC; +} +#endif // __fuchsia__ + } // namespace testing } // namespace gvisor diff --git a/test/util/fs_util.h b/test/util/fs_util.h index 8cdac23a1..044190657 100644 --- a/test/util/fs_util.h +++ b/test/util/fs_util.h @@ -17,6 +17,7 @@ #include <dirent.h> #include <sys/stat.h> +#include <sys/statfs.h> #include <sys/types.h> #include <unistd.h> @@ -178,6 +179,11 @@ std::string CleanPath(absl::string_view path); // Returns the full path to the executable of the given pid or a PosixError. PosixErrorOr<std::string> ProcessExePath(int pid); +#ifndef __fuchsia__ +// IsTmpfs returns true if the file at path is backed by tmpfs. +PosixErrorOr<bool> IsTmpfs(const std::string& path); +#endif // __fucshia__ + namespace internal { // Not part of the public API. std::string JoinPathImpl(std::initializer_list<absl::string_view> paths); diff --git a/tools/bazeldefs/defs.bzl b/tools/bazeldefs/defs.bzl index db7f379b8..dad5fc3b2 100644 --- a/tools/bazeldefs/defs.bzl +++ b/tools/bazeldefs/defs.bzl @@ -87,13 +87,14 @@ def cc_binary(name, static = False, **kwargs): **kwargs ) -def go_binary(name, static = False, pure = False, **kwargs): +def go_binary(name, static = False, pure = False, x_defs = None, **kwargs): """Build a go binary. Args: name: name of the target. static: build a static binary. pure: build without cgo. + x_defs: additional definitions. **kwargs: rest of the arguments are passed to _go_binary. """ if static: @@ -102,6 +103,7 @@ def go_binary(name, static = False, pure = False, **kwargs): kwargs["pure"] = "on" _go_binary( name = name, + x_defs = x_defs, **kwargs ) @@ -145,18 +147,28 @@ def go_rule(rule, implementation, **kwargs): Returns: The result of invoking the rule. """ - attrs = kwargs.pop("attrs", []) + attrs = kwargs.pop("attrs", dict()) attrs["_go_context_data"] = attr.label(default = "@io_bazel_rules_go//:go_context_data") attrs["_stdlib"] = attr.label(default = "@io_bazel_rules_go//:stdlib") toolchains = kwargs.get("toolchains", []) + ["@io_bazel_rules_go//go:toolchain"] return rule(implementation, attrs = attrs, toolchains = toolchains, **kwargs) -def go_context(ctx): +def go_test_library(target): + if hasattr(target.attr, "embed") and len(target.attr.embed) > 0: + return target.attr.embed[0] + return None + +def go_context(ctx, std = False): + # We don't change anything for the standard library analysis. All Go files + # are available in all instances. Note that this includes the standard + # library sources, which are analyzed by nogo. go_ctx = _go_context(ctx) return struct( go = go_ctx.go, env = go_ctx.env, - runfiles = depset([go_ctx.go] + go_ctx.sdk.tools + go_ctx.stdlib.libs), + nogo_args = [], + stdlib_srcs = go_ctx.sdk.srcs, + runfiles = depset([go_ctx.go] + go_ctx.sdk.srcs + go_ctx.sdk.tools + go_ctx.stdlib.libs), goos = go_ctx.sdk.goos, goarch = go_ctx.sdk.goarch, tags = go_ctx.tags, diff --git a/tools/checkescape/BUILD b/tools/checkescape/BUILD index b8c3ddf44..6273aa779 100644 --- a/tools/checkescape/BUILD +++ b/tools/checkescape/BUILD @@ -8,7 +8,7 @@ go_library( nogo = False, visibility = ["//tools/nogo:__subpackages__"], deps = [ - "//tools/nogo/data", + "//tools/nogo/dump", "@org_golang_x_tools//go/analysis:go_tool_library", "@org_golang_x_tools//go/analysis/passes/buildssa:go_tool_library", "@org_golang_x_tools//go/ssa:go_tool_library", diff --git a/tools/checkescape/checkescape.go b/tools/checkescape/checkescape.go index f8def4823..aab3c36a1 100644 --- a/tools/checkescape/checkescape.go +++ b/tools/checkescape/checkescape.go @@ -66,7 +66,6 @@ import ( "go/token" "go/types" "io" - "os" "path/filepath" "strconv" "strings" @@ -74,7 +73,7 @@ import ( "golang.org/x/tools/go/analysis" "golang.org/x/tools/go/analysis/passes/buildssa" "golang.org/x/tools/go/ssa" - "gvisor.dev/gvisor/tools/nogo/data" + "gvisor.dev/gvisor/tools/nogo/dump" ) const ( @@ -256,15 +255,14 @@ func (ec *EscapeCount) Record(reason EscapeReason) bool { // used only to remove false positives for escape analysis. The call will be // elided if escape analysis is able to put the object on the heap exclusively. func loadObjdump() (map[LinePosition]string, error) { - f, err := os.Open(data.Objdump) + cmd, out, err := dump.Command() if err != nil { return nil, err } - defer f.Close() // Build the map. m := make(map[LinePosition]string) - r := bufio.NewReader(f) + r := bufio.NewReader(out) var ( lastField string lastPos LinePosition @@ -329,6 +327,11 @@ func loadObjdump() (map[LinePosition]string, error) { } } + // Wait for the dump to finish. + if err := cmd.Wait(); err != nil { + return nil, err + } + return m, nil } @@ -413,12 +416,6 @@ func run(pass *analysis.Pass) (interface{}, error) { return escapes(unknownPackage, "no package", inst, ec) } - // Atomic functions are instrinics. We can - // assume that they don't escape. - if x.Pkg.Pkg.Name() == "atomic" { - return nil - } - // Is this a local function? If yes, call the // function to load the local function. The // local escapes are the escapes found in the diff --git a/tools/checkescape/test1/test1.go b/tools/checkescape/test1/test1.go index 68d3f72cc..a1d36459f 100644 --- a/tools/checkescape/test1/test1.go +++ b/tools/checkescape/test1/test1.go @@ -17,7 +17,6 @@ package test1 import ( "fmt" - "reflect" ) // Interface is a generic interface. @@ -163,20 +162,6 @@ func dynamicRec(f func()) { Dynamic(f) } -// +mustescape:local,unknown -//go:noinline -//go:nosplit -func Unknown() { - _ = reflect.TypeOf((*Type)(nil)) // Does not actually escape. -} - -// +mustescape:unknown -//go:noinline -//go:nosplit -func unknownRec() { - Unknown() -} - //go:noinline //go:nosplit func internalFunc() { diff --git a/tools/checkescape/test2/test2.go b/tools/checkescape/test2/test2.go index 7fce3e3be..2d5865f47 100644 --- a/tools/checkescape/test2/test2.go +++ b/tools/checkescape/test2/test2.go @@ -81,12 +81,6 @@ func dynamicCrossPkg(f func()) { test1.Dynamic(f) } -// +mustescape:unknown -//go:noinline -func unknownCrossPkg() { - test1.Unknown() -} - // +mustescape:stack //go:noinline func splitCrosssPkt() { diff --git a/tools/defs.bzl b/tools/defs.bzl index e71a26cf4..290d564f2 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -27,7 +27,6 @@ gbenchmark = _gbenchmark gazelle = _gazelle go_embed_data = _go_embed_data go_path = _go_path -go_test = _go_test gtest = _gtest grpcpp = _grpcpp loopback = _loopback @@ -45,17 +44,35 @@ vdso_linker_option = _vdso_linker_option default_platform = _default_platform platforms = _platforms -def go_binary(name, **kwargs): +def go_binary(name, nogo = True, pure = False, static = False, x_defs = None, **kwargs): """Wraps the standard go_binary. Args: name: the rule name. + nogo: enable nogo analysis. + pure: build a pure Go (no CGo) binary. + static: build a static binary. + x_defs: additional linker definitions. **kwargs: standard go_binary arguments. """ _go_binary( name = name, + pure = pure, + static = static, + x_defs = x_defs, **kwargs ) + if nogo: + # Note that the nogo rule applies only for go_library and go_test + # targets, therefore we construct a library from the binary sources. + _go_library( + name = name + "_nogo_library", + **kwargs + ) + nogo_test( + name = name + "_nogo", + deps = [":" + name + "_nogo_library"], + ) def calculate_sets(srcs): """Calculates special Go sets for templates. @@ -119,6 +136,7 @@ def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = F stateify: whether statify is enabled (default: true). marshal: whether marshal is enabled (default: false). marshal_debug: whether the gomarshal tools emits debugging output (default: false). + nogo: enable nogo analysis. **kwargs: standard go_library arguments. """ all_srcs = srcs @@ -202,6 +220,24 @@ def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = F **kwargs ) +def go_test(name, nogo = True, **kwargs): + """Wraps the standard go_test. + + Args: + name: the rule name. + nogo: enable nogo analysis. + **kwargs: standard go_test arguments. + """ + _go_test( + name = name, + **kwargs + ) + if nogo: + nogo_test( + name = name + "_nogo", + deps = [":" + name], + ) + def proto_library(name, srcs, deps = None, has_services = 0, **kwargs): """Wraps the standard proto_library. diff --git a/tools/go_marshal/gomarshal/generator_interfaces.go b/tools/go_marshal/gomarshal/generator_interfaces.go index e3c3dac63..cf76b5241 100644 --- a/tools/go_marshal/gomarshal/generator_interfaces.go +++ b/tools/go_marshal/gomarshal/generator_interfaces.go @@ -224,7 +224,7 @@ func (g *interfaceGenerator) emitNoEscapeSliceDataPointer(srcPtr, dstVar string) func (g *interfaceGenerator) emitKeepAlive(ptrVar string) { g.emit("// Since we bypassed the compiler's escape analysis, indicate that %s\n", ptrVar) g.emit("// must live until the use above.\n") - g.emit("runtime.KeepAlive(%s)\n", ptrVar) + g.emit("runtime.KeepAlive(%s) // escapes: replaced by intrinsic.\n", ptrVar) } func (g *interfaceGenerator) expandBinaryExpr(b *strings.Builder, e *ast.BinaryExpr) { diff --git a/tools/go_marshal/gomarshal/generator_interfaces_struct.go b/tools/go_marshal/gomarshal/generator_interfaces_struct.go index 4b9cea08a..44fbb425c 100644 --- a/tools/go_marshal/gomarshal/generator_interfaces_struct.go +++ b/tools/go_marshal/gomarshal/generator_interfaces_struct.go @@ -400,13 +400,13 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) { g.emit("// WriteTo implements io.WriterTo.WriteTo.\n") g.recordUsedImport("io") - g.emit("func (%s *%s) WriteTo(w io.Writer) (int64, error) {\n", g.r, g.typeName()) + g.emit("func (%s *%s) WriteTo(writer io.Writer) (int64, error) {\n", g.r, g.typeName()) g.inIndent(func() { fallback := func() { g.emit("// Type %s doesn't have a packed layout in memory, fall back to MarshalBytes.\n", g.typeName()) g.emit("buf := make([]byte, %s.SizeBytes())\n", g.r) g.emit("%s.MarshalBytes(buf)\n", g.r) - g.emit("length, err := w.Write(buf)\n") + g.emit("length, err := writer.Write(buf)\n") g.emit("return int64(length), err\n") } if thisPacked { @@ -421,7 +421,7 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) { // Fast serialization. g.emitCastToByteSlice(g.r, "buf", fmt.Sprintf("%s.SizeBytes()", g.r)) - g.emit("length, err := w.Write(buf)\n") + g.emit("length, err := writer.Write(buf)\n") g.emitKeepAlive(g.r) g.emit("return int64(length), err\n") } else { diff --git a/tools/issue_reviver/BUILD b/tools/issue_reviver/BUILD index 4ef1a3124..35b0111ca 100644 --- a/tools/issue_reviver/BUILD +++ b/tools/issue_reviver/BUILD @@ -5,6 +5,7 @@ package(licenses = ["notice"]) go_binary( name = "issue_reviver", srcs = ["main.go"], + nogo = False, deps = [ "//tools/issue_reviver/github", "//tools/issue_reviver/reviver", diff --git a/tools/issue_reviver/github/BUILD b/tools/issue_reviver/github/BUILD index 0eabc2835..555abd296 100644 --- a/tools/issue_reviver/github/BUILD +++ b/tools/issue_reviver/github/BUILD @@ -21,4 +21,5 @@ go_test( size = "small", srcs = ["github_test.go"], library = ":github", + nogo = False, ) diff --git a/tools/make_apt.sh b/tools/make_apt.sh index 3fb1066e5..b47977ed5 100755 --- a/tools/make_apt.sh +++ b/tools/make_apt.sh @@ -64,8 +64,8 @@ trap cleanup EXIT # is not found. This isn't actually a failure for us, because we don't require # the public (this may be stored separately). The second import will succeed # because, in reality, the first import succeeded and it's a no-op. -gpg --no-default-keyring --keyring "${keyring}" --import "${private_key}" || \ - gpg --no-default-keyring --keyring "${keyring}" --import "${private_key}" +gpg --no-default-keyring --keyring "${keyring}" --secret-keyring "${keyring}" --import "${private_key}" || \ + gpg --no-default-keyring --keyring "${keyring}" --secret-keyring "${keyring}" --import "${private_key}" # Copy the packages into the root. for pkg in "$@"; do diff --git a/tools/nogo/BUILD b/tools/nogo/BUILD index e1bfb9a2c..fb35c5ffd 100644 --- a/tools/nogo/BUILD +++ b/tools/nogo/BUILD @@ -1,7 +1,18 @@ load("//tools:defs.bzl", "bzl_library", "go_library") +load("//tools/nogo:defs.bzl", "nogo_dump_tool", "nogo_stdlib") package(licenses = ["notice"]) +nogo_dump_tool( + name = "dump_tool", + visibility = ["//visibility:public"], +) + +nogo_stdlib( + name = "stdlib", + visibility = ["//visibility:public"], +) + go_library( name = "nogo", srcs = [ @@ -16,7 +27,7 @@ go_library( deps = [ "//tools/checkescape", "//tools/checkunsafe", - "//tools/nogo/data", + "//tools/nogo/dump", "@org_golang_x_tools//go/analysis:go_tool_library", "@org_golang_x_tools//go/analysis/internal/facts:go_tool_library", "@org_golang_x_tools//go/analysis/passes/asmdecl:go_tool_library", diff --git a/tools/nogo/build.go b/tools/nogo/build.go index 433d13738..37947b5c3 100644 --- a/tools/nogo/build.go +++ b/tools/nogo/build.go @@ -31,10 +31,10 @@ var ( ) // findStdPkg needs to find the bundled standard library packages. -func (i *importer) findStdPkg(path string) (io.ReadCloser, error) { +func findStdPkg(GOOS, GOARCH, path string) (io.ReadCloser, error) { if path == "C" { // Cgo builds cannot be analyzed. Skip. return nil, ErrSkip } - return os.Open(fmt.Sprintf("external/go_sdk/pkg/%s_%s/%s.a", i.GOOS, i.GOARCH, path)) + return os.Open(fmt.Sprintf("external/go_sdk/pkg/%s_%s/%s.a", GOOS, GOARCH, path)) } diff --git a/tools/nogo/check/BUILD b/tools/nogo/check/BUILD index e2d76cd5c..21ba2c306 100644 --- a/tools/nogo/check/BUILD +++ b/tools/nogo/check/BUILD @@ -7,6 +7,7 @@ package(licenses = ["notice"]) go_binary( name = "check", srcs = ["main.go"], + nogo = False, visibility = ["//visibility:public"], deps = ["//tools/nogo"], ) diff --git a/tools/nogo/config.go b/tools/nogo/config.go index 6958fca69..451cd4a4c 100644 --- a/tools/nogo/config.go +++ b/tools/nogo/config.go @@ -84,6 +84,14 @@ var analyzerConfig = map[*analysis.Analyzer]matcher{ externalExcluded( ".*protobuf/.*.go", // Bad conversions. ".*flate/huffman_bit_writer.go", // Bad conversion. + + // Runtime internal violations. + ".*reflect/value.go", + ".*encoding/xml/xml.go", + ".*runtime/pprof/internal/profile/proto.go", + ".*fmt/scan.go", + ".*go/types/conversions.go", + ".*golang.org/x/net/dns/dnsmessage/message.go", ), ), shadow.Analyzer: disableMatches(), // Disabled for now. diff --git a/tools/nogo/defs.bzl b/tools/nogo/defs.bzl index d399079c5..963084d53 100644 --- a/tools/nogo/defs.bzl +++ b/tools/nogo/defs.bzl @@ -1,6 +1,103 @@ """Nogo rules.""" -load("//tools/bazeldefs:defs.bzl", "go_context", "go_importpath", "go_rule") +load("//tools/bazeldefs:defs.bzl", "go_context", "go_importpath", "go_rule", "go_test_library") + +def _nogo_dump_tool_impl(ctx): + # Extract the Go context. + go_ctx = go_context(ctx) + + # Construct the magic dump command. + # + # Note that in some cases, the input is being fed into the tool via stdin. + # Unfortunately, the Go objdump tool expects to see a seekable file [1], so + # we need the tool to handle this case by creating a temporary file. + # + # [1] https://github.com/golang/go/issues/41051 + env_prefix = " ".join(["%s=%s" % (key, value) for (key, value) in go_ctx.env.items()]) + dumper = ctx.actions.declare_file(ctx.label.name) + ctx.actions.write(dumper, "\n".join([ + "#!/bin/bash", + "set -euo pipefail", + "if [[ $# -eq 0 ]]; then", + " T=$(mktemp -u -t libXXXXXX.a)", + " cat /dev/stdin > ${T}", + "else", + " T=$1;", + "fi", + "%s %s tool objdump ${T}" % ( + env_prefix, + go_ctx.go.path, + ), + "if [[ $# -eq 0 ]]; then", + " rm -rf ${T}", + "fi", + "", + ]), is_executable = True) + + # Include the full runfiles. + return [DefaultInfo( + runfiles = ctx.runfiles(files = go_ctx.runfiles.to_list()), + executable = dumper, + )] + +nogo_dump_tool = go_rule( + rule, + implementation = _nogo_dump_tool_impl, +) + +# NogoStdlibInfo is the set of standard library facts. +NogoStdlibInfo = provider( + "information for nogo analysis (standard library facts)", + fields = { + "facts": "serialized standard library facts", + }, +) + +def _nogo_stdlib_impl(ctx): + # Extract the Go context. + go_ctx = go_context(ctx) + + # Build the standard library facts. + facts = ctx.actions.declare_file(ctx.label.name + ".facts") + config = struct( + Srcs = [f.path for f in go_ctx.stdlib_srcs], + GOOS = go_ctx.goos, + GOARCH = go_ctx.goarch, + Tags = go_ctx.tags, + FactOutput = facts.path, + ) + config_file = ctx.actions.declare_file(ctx.label.name + ".cfg") + ctx.actions.write(config_file, config.to_json()) + ctx.actions.run( + inputs = [config_file] + go_ctx.stdlib_srcs, + outputs = [facts], + tools = depset(go_ctx.runfiles.to_list() + ctx.files._dump_tool), + executable = ctx.files._nogo[0], + mnemonic = "GoStandardLibraryAnalysis", + progress_message = "Analyzing Go Standard Library", + arguments = go_ctx.nogo_args + [ + "-dump_tool=%s" % ctx.files._dump_tool[0].path, + "-stdlib=%s" % config_file.path, + ], + ) + + # Return the stdlib facts as output. + return [NogoStdlibInfo( + facts = facts, + )] + +nogo_stdlib = go_rule( + rule, + implementation = _nogo_stdlib_impl, + attrs = { + "_nogo": attr.label( + default = "//tools/nogo/check:check", + ), + "_dump_tool": attr.label( + default = "//tools/nogo:dump_tool", + ), + }, +) # NogoInfo is the serialized set of package facts for a nogo analysis. # @@ -8,10 +105,13 @@ load("//tools/bazeldefs:defs.bzl", "go_context", "go_importpath", "go_rule") # with the source files as input. Note however, that the individual nogo rules # are simply stubs that enter into the shadow dependency tree (the "aspect"). NogoInfo = provider( + "information for nogo analysis", fields = { "facts": "serialized package facts", "importpath": "package import path", "binaries": "package binary files", + "srcs": "original source files (for go_test support)", + "deps": "original deps (for go_test support)", }, ) @@ -21,17 +121,29 @@ def _nogo_aspect_impl(target, ctx): # All work is done in the shadow properties for go rules. For a proto # library, we simply skip the analysis portion but still need to return a # valid NogoInfo to reference the generated binary. - if ctx.rule.kind == "go_library": + if ctx.rule.kind in ("go_library", "go_binary", "go_test", "go_tool_library"): srcs = ctx.rule.files.srcs - elif ctx.rule.kind == "go_proto_library" or ctx.rule.kind == "go_wrap_cc": + deps = ctx.rule.attr.deps + elif ctx.rule.kind in ("go_proto_library", "go_wrap_cc"): srcs = [] + deps = ctx.rule.attr.deps else: return [NogoInfo()] + # Extract the Go context. go_ctx = go_context(ctx) - # Construct the Go environment from the go_ctx.env dictionary. - env_prefix = " ".join(["%s=%s" % (key, value) for (key, value) in go_ctx.env.items()]) + # If we're using the "library" attribute, then we need to aggregate the + # original library sources and dependencies into this target to perform + # proper type analysis. + if ctx.rule.kind == "go_test": + library = go_test_library(ctx.rule) + if library != None: + info = library[NogoInfo] + if hasattr(info, "srcs"): + srcs = srcs + info.srcs + if hasattr(info, "deps"): + deps = deps + info.deps # Start with all target files and srcs as input. inputs = target.files.to_list() + srcs @@ -41,50 +153,30 @@ def _nogo_aspect_impl(target, ctx): # to cleanly allow us redirect stdout to the actual output file. Perhaps # I'm missing something here, but the intermediate script does work. binaries = target.files.to_list() - disasm_file = ctx.actions.declare_file(target.label.name + ".out") - dumper = ctx.actions.declare_file("%s-dumper" % ctx.label.name) - ctx.actions.write(dumper, "\n".join([ - "#!/bin/bash", - "%s %s tool objdump %s > %s\n" % ( - env_prefix, - go_ctx.go.path, - [f.path for f in binaries if f.path.endswith(".a")][0], - disasm_file.path, - ), - ]), is_executable = True) - ctx.actions.run( - inputs = binaries, - outputs = [disasm_file], - tools = go_ctx.runfiles, - mnemonic = "GoObjdump", - progress_message = "Objdump %s" % target.label, - executable = dumper, - ) - inputs.append(disasm_file) + objfiles = [f for f in binaries if f.path.endswith(".a")] + if len(objfiles) > 0: + # Prefer the .a files for go_library targets. + target_objfile = objfiles[0] + else: + # Use the raw binary for go_binary and go_test targets. + target_objfile = binaries[0] + inputs.append(target_objfile) # Extract the importpath for this package. - importpath = go_importpath(target) - - # The nogo tool requires a configfile serialized in JSON format to do its - # work. This must line up with the nogo.Config fields. - facts = ctx.actions.declare_file(target.label.name + ".facts") - config = struct( - ImportPath = importpath, - GoFiles = [src.path for src in srcs if src.path.endswith(".go")], - NonGoFiles = [src.path for src in srcs if not src.path.endswith(".go")], - # Google's internal build system needs a bit more help to find std. - StdZip = go_ctx.std_zip.short_path if hasattr(go_ctx, "std_zip") else "", - GOOS = go_ctx.goos, - GOARCH = go_ctx.goarch, - Tags = go_ctx.tags, - FactMap = {}, # Constructed below. - ImportMap = {}, # Constructed below. - FactOutput = facts.path, - Objdump = disasm_file.path, - ) + if ctx.rule.kind == "go_test": + # If this is a test, then it will not be imported by anything else. + # We can safely set the importapth to just "test". Note that this + # is necessary if the library also imports the core library (in + # addition to including the sources directly), which happens in + # some complex cases (seccomp_victim). + importpath = "test" + else: + importpath = go_importpath(target) # Collect all info from shadow dependencies. - for dep in ctx.rule.attr.deps: + fact_map = dict() + import_map = dict() + for dep in deps: # There will be no file attribute set for all transitive dependencies # that are not go_library or go_binary rules, such as a proto rules. # This is handled by the ctx.rule.kind check above. @@ -98,27 +190,46 @@ def _nogo_aspect_impl(target, ctx): x_files = [f.path for f in info.binaries if f.path.endswith(".x")] if not len(x_files): x_files = [f.path for f in info.binaries if f.path.endswith(".a")] - config.ImportMap[info.importpath] = x_files[0] - config.FactMap[info.importpath] = info.facts.path + import_map[info.importpath] = x_files[0] + fact_map[info.importpath] = info.facts.path # Ensure the above are available as inputs. inputs.append(info.facts) inputs += info.binaries - # Write the configuration and run the tool. + # Add the standard library facts. + stdlib_facts = ctx.attr._nogo_stdlib[NogoStdlibInfo].facts + inputs.append(stdlib_facts) + + # The nogo tool operates on a configuration serialized in JSON format. + facts = ctx.actions.declare_file(target.label.name + ".facts") + config = struct( + ImportPath = importpath, + GoFiles = [src.path for src in srcs if src.path.endswith(".go")], + NonGoFiles = [src.path for src in srcs if not src.path.endswith(".go")], + GOOS = go_ctx.goos, + GOARCH = go_ctx.goarch, + Tags = go_ctx.tags, + FactMap = fact_map, + ImportMap = import_map, + StdlibFacts = stdlib_facts.path, + FactOutput = facts.path, + ) config_file = ctx.actions.declare_file(target.label.name + ".cfg") ctx.actions.write(config_file, config.to_json()) inputs.append(config_file) - - # Run the nogo tool itself. ctx.actions.run( inputs = inputs, outputs = [facts], - tools = go_ctx.runfiles, + tools = depset(go_ctx.runfiles.to_list() + ctx.files._dump_tool), executable = ctx.files._nogo[0], mnemonic = "GoStaticAnalysis", progress_message = "Analyzing %s" % target.label, - arguments = ["-config=%s" % config_file.path], + arguments = go_ctx.nogo_args + [ + "-binary=%s" % target_objfile.path, + "-dump_tool=%s" % ctx.files._dump_tool[0].path, + "-package=%s" % config_file.path, + ], ) # Return the package facts as output. @@ -126,16 +237,27 @@ def _nogo_aspect_impl(target, ctx): facts = facts, importpath = importpath, binaries = binaries, + srcs = srcs, + deps = deps, )] nogo_aspect = go_rule( aspect, implementation = _nogo_aspect_impl, - attr_aspects = ["deps"], + attr_aspects = [ + "deps", + "library", + "embed", + ], attrs = { "_nogo": attr.label( default = "//tools/nogo/check:check", - allow_single_file = True, + ), + "_nogo_stdlib": attr.label( + default = "//tools/nogo:stdlib", + ), + "_dump_tool": attr.label( + default = "//tools/nogo:dump_tool", ), }, ) @@ -171,6 +293,10 @@ _nogo_test = rule( test = True, ) -def nogo_test(**kwargs): +def nogo_test(name, **kwargs): tags = kwargs.pop("tags", []) + ["nogo"] - _nogo_test(tags = tags, **kwargs) + _nogo_test( + name = name, + tags = tags, + **kwargs + ) diff --git a/tools/nogo/data/BUILD b/tools/nogo/dump/BUILD index b7564cc44..dfa29d651 100644 --- a/tools/nogo/data/BUILD +++ b/tools/nogo/dump/BUILD @@ -3,8 +3,8 @@ load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( - name = "data", - srcs = ["data.go"], + name = "dump", + srcs = ["dump.go"], nogo = False, visibility = ["//tools:__subpackages__"], ) diff --git a/tools/nogo/dump/dump.go b/tools/nogo/dump/dump.go new file mode 100644 index 000000000..f06567e0f --- /dev/null +++ b/tools/nogo/dump/dump.go @@ -0,0 +1,78 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package dump contains data dump tools. +// +// The interface used by the package corresponds to the tool generated by the +// nogo_dump_tool rule. +// +// This package is separate in order to avoid a dependency cycle. +package dump + +import ( + "flag" + "fmt" + "io" + "os" + "os/exec" +) + +var ( + // Binary is the binary under analysis. + // + // See Reader, below. + binary = flag.String("binary", "", "binary under analysis") + + // Reader is the input stream. + // + // This may be set instead of Binary. + Reader io.Reader + + // Tool is the tool used to dump a binary. + tool = flag.String("dump_tool", "", "tool used to dump a binary") +) + +// Command returns a command that will emit the dumped object on stdout. +// +// You must call Wait on the resulting command. +func Command() (*exec.Cmd, io.Reader, error) { + var ( + args []string + stdin io.Reader + ) + if *binary != "" { + args = append(args, *binary) + *binary = "" // Clear. + } else if Reader != nil { + stdin = Reader + Reader = nil // Clear. + } else { + // We have no input stream or binary. + return nil, nil, fmt.Errorf("no binary or reader provided!") + } + + // Construct our command. + cmd := exec.Command(*tool, args...) + cmd.Stdin = stdin + cmd.Stderr = os.Stderr + out, err := cmd.StdoutPipe() + if err != nil { + return nil, nil, err + } + if err := cmd.Start(); err != nil { + return nil, nil, err + } + + return cmd, out, err +} diff --git a/tools/nogo/nogo.go b/tools/nogo/nogo.go index ea1e97076..e44f32d4c 100644 --- a/tools/nogo/nogo.go +++ b/tools/nogo/nogo.go @@ -32,51 +32,97 @@ import ( "io/ioutil" "log" "os" + "path" "path/filepath" "reflect" + "strings" "golang.org/x/tools/go/analysis" "golang.org/x/tools/go/analysis/internal/facts" "golang.org/x/tools/go/gcexportdata" - "gvisor.dev/gvisor/tools/nogo/data" + "gvisor.dev/gvisor/tools/nogo/dump" ) -// pkgConfig is serialized as the configuration. +// stdlibConfig is serialized as the configuration. // -// This contains everything required for the analysis. -type pkgConfig struct { - ImportPath string - GoFiles []string - NonGoFiles []string - Tags []string +// This contains everything required for stdlib analysis. +type stdlibConfig struct { + Srcs []string GOOS string GOARCH string - ImportMap map[string]string - FactMap map[string]string + Tags []string FactOutput string - Objdump string - StdZip string } -// loadFacts finds and loads facts per FactMap. -func (c *pkgConfig) loadFacts(path string) ([]byte, error) { - realPath, ok := c.FactMap[path] - if !ok { - return nil, nil // No facts available. +// packageConfig is serialized as the configuration. +// +// This contains everything required for single package analysis. +type packageConfig struct { + ImportPath string + GoFiles []string + NonGoFiles []string + Tags []string + GOOS string + GOARCH string + ImportMap map[string]string + FactMap map[string]string + FactOutput string + StdlibFacts string +} + +// loader is a fact-loader function. +type loader func(string) ([]byte, error) + +// saver is a fact-saver function. +type saver func([]byte) error + +// factLoader returns a function that loads facts. +// +// This resolves all standard library facts and imported package facts up +// front. The returned loader function will never return an error, only +// empty facts. +// +// This is done because all stdlib data is stored together, and we don't want +// to load this data many times over. +func (c *packageConfig) factLoader() (loader, error) { + allFacts := make(map[string][]byte) + if c.StdlibFacts != "" { + data, err := ioutil.ReadFile(c.StdlibFacts) + if err != nil { + return nil, fmt.Errorf("error loading stdlib facts from %q: %w", c.StdlibFacts, err) + } + var stdlibFacts map[string][]byte + if err := json.Unmarshal(data, &stdlibFacts); err != nil { + return nil, fmt.Errorf("error loading stdlib facts: %w", err) + } + for pkg, data := range stdlibFacts { + allFacts[pkg] = data + } + } + for pkg, file := range c.FactMap { + data, err := ioutil.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("error loading %q: %w", file, err) + } + allFacts[pkg] = data } + return func(path string) ([]byte, error) { + return allFacts[path], nil + }, nil +} - // Read the files file. - data, err := ioutil.ReadFile(realPath) - if err != nil { - return nil, err +// factSaver may be used directly as a saver. +func (c *packageConfig) factSaver(factData []byte) error { + if c.FactOutput == "" { + return nil // Nothing to save. } - return data, nil + return ioutil.WriteFile(c.FactOutput, factData, 0644) } // shouldInclude indicates whether the file should be included. // // NOTE: This does only basic parsing of tags. -func (c *pkgConfig) shouldInclude(path string) (bool, error) { +func (c *packageConfig) shouldInclude(path string) (bool, error) { ctx := build.Default ctx.GOOS = c.GOOS ctx.GOARCH = c.GOARCH @@ -90,10 +136,11 @@ func (c *pkgConfig) shouldInclude(path string) (bool, error) { // files, and the facts. Note that this importer implementation will always // pass when a given package is not available. type importer struct { - pkgConfig - fset *token.FileSet - cache map[string]*types.Package - lastErr error + *packageConfig + fset *token.FileSet + cache map[string]*types.Package + lastErr error + callback func(string) error } // Import implements types.Importer.Import. @@ -104,6 +151,17 @@ func (i *importer) Import(path string) (*types.Package, error) { // analyzers are specifically looking for this. return types.Unsafe, nil } + + // Call the internal callback. This is used to resolve loading order + // for the standard library. See checkStdlib. + if i.callback != nil { + if err := i.callback(path); err != nil { + i.lastErr = err + return nil, err + } + } + + // Actually load the data. realPath, ok := i.ImportMap[path] var ( rc io.ReadCloser @@ -112,7 +170,7 @@ func (i *importer) Import(path string) (*types.Package, error) { if !ok { // Not found in the import path. Attempt to find the package // via the standard library. - rc, err = i.findStdPkg(path) + rc, err = findStdPkg(i.GOOS, i.GOARCH, path) } else { // Open the file. rc, err = os.Open(realPath) @@ -135,6 +193,139 @@ func (i *importer) Import(path string) (*types.Package, error) { // ErrSkip indicates the package should be skipped. var ErrSkip = errors.New("skipped") +// checkStdlib checks the standard library. +// +// This constructs a synthetic package configuration for each library in the +// standard library sources, and call checkPackage repeatedly. +// +// Note that not all parts of the source are expected to build. We skip obvious +// test files, and cmd files, which should not be dependencies. +func checkStdlib(config *stdlibConfig) ([]string, error) { + if len(config.Srcs) == 0 { + return nil, nil + } + + // Ensure all paths are normalized. + for i := 0; i < len(config.Srcs); i++ { + config.Srcs[i] = path.Clean(config.Srcs[i]) + } + + // Calculate the root directory. + longestPrefix := path.Dir(config.Srcs[0]) + for _, file := range config.Srcs[1:] { + for i := 0; i < len(file) && i < len(longestPrefix); i++ { + if file[i] != longestPrefix[i] { + // Truncate here; will stop the loop. + longestPrefix = longestPrefix[:i] + break + } + } + } + if len(longestPrefix) > 0 && longestPrefix[len(longestPrefix)-1] != '/' { + longestPrefix += "/" + } + + // Aggregate all files by directory. + packages := make(map[string]*packageConfig) + for _, file := range config.Srcs { + d := path.Dir(file) + if len(longestPrefix) >= len(d) { + continue // Not a file. + } + pkg := path.Dir(file)[len(longestPrefix):] + // Skip cmd packages and obvious test files: see above. + if strings.HasPrefix(pkg, "cmd/") || strings.HasSuffix(file, "_test.go") { + continue + } + c, ok := packages[pkg] + if !ok { + c = &packageConfig{ + ImportPath: pkg, + GOOS: config.GOOS, + GOARCH: config.GOARCH, + Tags: config.Tags, + } + packages[pkg] = c + } + // Add the files appropriately. Note that they will be further + // filtered by architecture and build tags below, so this need + // not be done immediately. + if strings.HasSuffix(file, ".go") { + c.GoFiles = append(c.GoFiles, file) + } else { + c.NonGoFiles = append(c.NonGoFiles, file) + } + } + + // Closure to check a single package. + allFindings := make([]string, 0) + stdlibFacts := make(map[string][]byte) + var checkOne func(pkg string) error // Recursive. + checkOne = func(pkg string) error { + // Is this already done? + if _, ok := stdlibFacts[pkg]; ok { + return nil + } + + // Lookup the configuration. + config, ok := packages[pkg] + if !ok { + return nil // Not known. + } + + // Find the binary package, and provide to objdump. + rc, err := findStdPkg(config.GOOS, config.GOARCH, pkg) + if err != nil { + // If there's no binary for this package, it is likely + // not built with the distribution. That's fine, we can + // just skip analysis. + return nil + } + + // Provide the input. + oldReader := dump.Reader + dump.Reader = rc // For analysis. + defer func() { + rc.Close() + dump.Reader = oldReader // Restore. + }() + + // Run the analysis. + findings, err := checkPackage(config, func(factData []byte) error { + stdlibFacts[pkg] = factData + return nil + }, checkOne) + if err != nil { + // If we can't analyze a package from the standard library, + // then we skip it. It will simply not have any findings. + return nil + } + allFindings = append(allFindings, findings...) + return nil + } + + // Check all packages. + // + // Note that this may call checkOne recursively, so it's not guaranteed + // to evaluate in the order provided here. We do ensure however, that + // all packages are evaluated. + for pkg := range packages { + checkOne(pkg) + } + + // Write out all findings. + factData, err := json.Marshal(stdlibFacts) + if err != nil { + return nil, fmt.Errorf("error saving stdlib facts: %w", err) + } + if err := ioutil.WriteFile(config.FactOutput, factData, 0644); err != nil { + return nil, fmt.Errorf("error saving findings to %q: %v", config.FactOutput, err) + } + + // Return all findings. + return allFindings, nil +} + // checkPackage runs all analyzers. // // The implementation was adapted from [1], which was in turn adpated from [2]. @@ -143,11 +334,12 @@ var ErrSkip = errors.New("skipped") // // [1] bazelbuid/rules_go/tools/builders/nogo_main.go // [2] golang.org/x/tools/go/checker/internal/checker -func checkPackage(config pkgConfig) ([]string, error) { +func checkPackage(config *packageConfig, factSaver saver, importCallback func(string) error) ([]string, error) { imp := &importer{ - pkgConfig: config, - fset: token.NewFileSet(), - cache: make(map[string]*types.Package), + packageConfig: config, + fset: token.NewFileSet(), + cache: make(map[string]*types.Package), + callback: importCallback, } // Load all source files. @@ -184,14 +376,15 @@ func checkPackage(config pkgConfig) ([]string, error) { } // Load all package facts. - facts, err := facts.Decode(types, config.loadFacts) + loader, err := config.factLoader() + if err != nil { + return nil, fmt.Errorf("error loading facts: %w", err) + } + facts, err := facts.Decode(types, loader) if err != nil { return nil, fmt.Errorf("error decoding facts: %w", err) } - // Set the binary global for use. - data.Objdump = config.Objdump - // Register fact types and establish dependencies between analyzers. // The visit closure will execute recursively, and populate results // will all required analysis results. @@ -263,11 +456,9 @@ func checkPackage(config pkgConfig) ([]string, error) { } // Write the output file. - if config.FactOutput != "" { - factData := facts.Encode() - if err := ioutil.WriteFile(config.FactOutput, factData, 0644); err != nil { - return nil, fmt.Errorf("error: unable to open facts output %q: %v", config.FactOutput, err) - } + factData := facts.Encode() + if err := factSaver(factData); err != nil { + return nil, fmt.Errorf("error: unable to save facts: %v", err) } // Convert all diagnostics to strings. @@ -284,38 +475,56 @@ func checkPackage(config pkgConfig) ([]string, error) { } var ( - configFile = flag.String("config", "", "configuration file (in JSON format)") + packageFile = flag.String("package", "", "package configuration file (in JSON format)") + stdlibFile = flag.String("stdlib", "", "stdlib configuration file (in JSON format)") ) -// Main is the entrypoint; it should be called directly from main. -// -// N.B. This package registers it's own flags. -func Main() { - // Parse all flags. - flag.Parse() - +func loadConfig(file string, config interface{}) interface{} { // Load the configuration. - f, err := os.Open(*configFile) + f, err := os.Open(file) if err != nil { - log.Fatalf("unable to open configuration %q: %v", *configFile, err) + log.Fatalf("unable to open configuration %q: %v", file, err) } defer f.Close() - config := new(pkgConfig) dec := json.NewDecoder(f) dec.DisallowUnknownFields() if err := dec.Decode(config); err != nil { log.Fatalf("unable to decode configuration: %v", err) } + return config +} - // Process the package. - findings, err := checkPackage(*config) +// Main is the entrypoint; it should be called directly from main. +// +// N.B. This package registers it's own flags. +func Main() { + // Parse all flags. + flag.Parse() + + var ( + findings []string + err error + ) + + // Check the configuration. + if *packageFile != "" && *stdlibFile != "" { + log.Fatalf("unable to perform stdlib and package analysis; provide only one!") + } else if *stdlibFile != "" { + c := loadConfig(*stdlibFile, new(stdlibConfig)).(*stdlibConfig) + findings, err = checkStdlib(c) + } else if *packageFile != "" { + c := loadConfig(*packageFile, new(packageConfig)).(*packageConfig) + findings, err = checkPackage(c, c.factSaver, nil) + } else { + log.Fatalf("please provide at least one of package or stdlib!") + } + + // Handle findings & errors. if err != nil { log.Fatalf("error checking package: %v", err) } - - // No findings? if len(findings) == 0 { - os.Exit(0) + return } // Print findings and exit with non-zero code. |