diff options
Diffstat (limited to 'pkg')
184 files changed, 493 insertions, 325 deletions
diff --git a/pkg/abi/abi_linux.go b/pkg/abi/abi_linux.go index 3059479bd..008bbca08 100644 --- a/pkg/abi/abi_linux.go +++ b/pkg/abi/abi_linux.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package abi diff --git a/pkg/abi/linux/arch_amd64.go b/pkg/abi/linux/arch_amd64.go index 0be31e755..064c0a6da 100644 --- a/pkg/abi/linux/arch_amd64.go +++ b/pkg/abi/linux/arch_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/abi/linux/clone.go b/pkg/abi/linux/clone.go index c2cbfca5e..322a4ef5a 100644 --- a/pkg/abi/linux/clone.go +++ b/pkg/abi/linux/clone.go @@ -16,13 +16,16 @@ package linux // Clone constants per clone(2). const ( + CSIGNAL = 0xff + CLONE_VM = 0x100 CLONE_FS = 0x200 CLONE_FILES = 0x400 CLONE_SIGHAND = 0x800 - CLONE_PARENT = 0x8000 + CLONE_PIDFD = 0x1000 CLONE_PTRACE = 0x2000 CLONE_VFORK = 0x4000 + CLONE_PARENT = 0x8000 CLONE_THREAD = 0x10000 CLONE_NEWNS = 0x20000 CLONE_SYSVSEM = 0x40000 @@ -32,10 +35,30 @@ const ( CLONE_DETACHED = 0x400000 CLONE_UNTRACED = 0x800000 CLONE_CHILD_SETTID = 0x1000000 + CLONE_NEWCGROUP = 0x2000000 CLONE_NEWUTS = 0x4000000 CLONE_NEWIPC = 0x8000000 CLONE_NEWUSER = 0x10000000 CLONE_NEWPID = 0x20000000 CLONE_NEWNET = 0x40000000 CLONE_IO = 0x80000000 + + // Only passable via clone3(2). + CLONE_CLEAR_SIGHAND = 0x100000000 + CLONE_INTO_CGROUP = 0x200000000 ) + +// CloneArgs is struct clone_args, from include/uapi/linux/sched.h. +type CloneArgs struct { + Flags uint64 + Pidfd uint64 + ChildTID uint64 + ParentTID uint64 + ExitSignal uint64 + Stack uint64 + StackSize uint64 + TLS uint64 + SetTID uint64 + SetTIDSize uint64 + Cgroup uint64 +} diff --git a/pkg/abi/linux/epoll_amd64.go b/pkg/abi/linux/epoll_amd64.go index 7e74b1143..7d5b9fdfb 100644 --- a/pkg/abi/linux/epoll_amd64.go +++ b/pkg/abi/linux/epoll_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/abi/linux/epoll_arm64.go b/pkg/abi/linux/epoll_arm64.go index a35939cc9..5e5960d32 100644 --- a/pkg/abi/linux/epoll_arm64.go +++ b/pkg/abi/linux/epoll_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/abi/linux/file_amd64.go b/pkg/abi/linux/file_amd64.go index 6b72364ea..ab404b17e 100644 --- a/pkg/abi/linux/file_amd64.go +++ b/pkg/abi/linux/file_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/abi/linux/file_arm64.go b/pkg/abi/linux/file_arm64.go index 6492c9038..6234955ab 100644 --- a/pkg/abi/linux/file_arm64.go +++ b/pkg/abi/linux/file_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/abi/linux/ptrace_amd64.go b/pkg/abi/linux/ptrace_amd64.go index e722971f1..e970b5b4a 100644 --- a/pkg/abi/linux/ptrace_amd64.go +++ b/pkg/abi/linux/ptrace_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/abi/linux/ptrace_arm64.go b/pkg/abi/linux/ptrace_arm64.go index 3d0906565..91e5af56b 100644 --- a/pkg/abi/linux/ptrace_arm64.go +++ b/pkg/abi/linux/ptrace_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/abi/linux/sem_amd64.go b/pkg/abi/linux/sem_amd64.go index ab980cb4f..cabd2d4b8 100644 --- a/pkg/abi/linux/sem_amd64.go +++ b/pkg/abi/linux/sem_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/abi/linux/sem_arm64.go b/pkg/abi/linux/sem_arm64.go index 521468fb1..a0c467dc4 100644 --- a/pkg/abi/linux/sem_arm64.go +++ b/pkg/abi/linux/sem_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/atomicbitops/aligned_32bit_unsafe.go b/pkg/atomicbitops/aligned_32bit_unsafe.go index df706b453..383f81ff2 100644 --- a/pkg/atomicbitops/aligned_32bit_unsafe.go +++ b/pkg/atomicbitops/aligned_32bit_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm || mips || 386 // +build arm mips 386 package atomicbitops diff --git a/pkg/atomicbitops/aligned_64bit.go b/pkg/atomicbitops/aligned_64bit.go index 1544c7814..2c421d920 100644 --- a/pkg/atomicbitops/aligned_64bit.go +++ b/pkg/atomicbitops/aligned_64bit.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !arm && !mips && !386 // +build !arm,!mips,!386 package atomicbitops diff --git a/pkg/atomicbitops/atomicbitops.go b/pkg/atomicbitops/atomicbitops.go index 1be081719..4c4606a58 100644 --- a/pkg/atomicbitops/atomicbitops.go +++ b/pkg/atomicbitops/atomicbitops.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || arm64 // +build amd64 arm64 // Package atomicbitops provides extensions to the sync/atomic package. diff --git a/pkg/atomicbitops/atomicbitops_noasm.go b/pkg/atomicbitops/atomicbitops_noasm.go index 3b2898256..474c0c815 100644 --- a/pkg/atomicbitops/atomicbitops_noasm.go +++ b/pkg/atomicbitops/atomicbitops_noasm.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !amd64 && !arm64 // +build !amd64,!arm64 package atomicbitops diff --git a/pkg/bits/uint64_arch.go b/pkg/bits/uint64_arch.go index 9f23eff77..fc5634167 100644 --- a/pkg/bits/uint64_arch.go +++ b/pkg/bits/uint64_arch.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || arm64 // +build amd64 arm64 package bits diff --git a/pkg/bits/uint64_arch_amd64_asm.s b/pkg/bits/uint64_arch_amd64_asm.s index 8ff364181..2931b5d56 100644 --- a/pkg/bits/uint64_arch_amd64_asm.s +++ b/pkg/bits/uint64_arch_amd64_asm.s @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 TEXT ·TrailingZeros64(SB),$0-16 diff --git a/pkg/bits/uint64_arch_arm64_asm.s b/pkg/bits/uint64_arch_arm64_asm.s index 814ba562d..eb8d4d280 100644 --- a/pkg/bits/uint64_arch_arm64_asm.s +++ b/pkg/bits/uint64_arch_arm64_asm.s @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 TEXT ·TrailingZeros64(SB),$0-16 diff --git a/pkg/bits/uint64_arch_generic.go b/pkg/bits/uint64_arch_generic.go index 9dd2098d1..83b23a3fc 100644 --- a/pkg/bits/uint64_arch_generic.go +++ b/pkg/bits/uint64_arch_generic.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !amd64 && !arm64 // +build !amd64,!arm64 package bits diff --git a/pkg/coverage/coverage.go b/pkg/coverage/coverage.go index b33a20802..0fabee92b 100644 --- a/pkg/coverage/coverage.go +++ b/pkg/coverage/coverage.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + // Package coverage provides an interface through which Go coverage data can // be collected, converted to kcov format, and exposed to userspace. // diff --git a/pkg/cpuid/cpuid_arm64.go b/pkg/cpuid/cpuid_arm64.go index 98c6ec62f..6e61d562f 100644 --- a/pkg/cpuid/cpuid_arm64.go +++ b/pkg/cpuid/cpuid_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package cpuid diff --git a/pkg/cpuid/cpuid_arm64_test.go b/pkg/cpuid/cpuid_arm64_test.go index a34f67779..16b1c064a 100644 --- a/pkg/cpuid/cpuid_arm64_test.go +++ b/pkg/cpuid/cpuid_arm64_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package cpuid diff --git a/pkg/cpuid/cpuid_parse_x86_test.go b/pkg/cpuid/cpuid_parse_x86_test.go index d60fdb550..36dd20552 100644 --- a/pkg/cpuid/cpuid_parse_x86_test.go +++ b/pkg/cpuid/cpuid_parse_x86_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build 386 || amd64 // +build 386 amd64 package cpuid diff --git a/pkg/cpuid/cpuid_x86.go b/pkg/cpuid/cpuid_x86.go index 392711e8f..dc17cade8 100644 --- a/pkg/cpuid/cpuid_x86.go +++ b/pkg/cpuid/cpuid_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build 386 || amd64 // +build 386 amd64 package cpuid diff --git a/pkg/cpuid/cpuid_x86_test.go b/pkg/cpuid/cpuid_x86_test.go index bacf345c8..92a2d9f81 100644 --- a/pkg/cpuid/cpuid_x86_test.go +++ b/pkg/cpuid/cpuid_x86_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build 386 || amd64 // +build 386 amd64 package cpuid diff --git a/pkg/crypto/crypto_stdlib.go b/pkg/crypto/crypto_stdlib.go index 514592b08..69e867386 100644 --- a/pkg/crypto/crypto_stdlib.go +++ b/pkg/crypto/crypto_stdlib.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package crypto import ( diff --git a/pkg/eventchannel/event_any.go b/pkg/eventchannel/event_any.go index a5549f6cd..13f300061 100644 --- a/pkg/eventchannel/event_any.go +++ b/pkg/eventchannel/event_any.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package eventchannel import ( diff --git a/pkg/fdchannel/fdchannel_unsafe.go b/pkg/fdchannel/fdchannel_unsafe.go index 1f24a448d..f9a201eeb 100644 --- a/pkg/fdchannel/fdchannel_unsafe.go +++ b/pkg/fdchannel/fdchannel_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris // +build aix darwin dragonfly freebsd linux netbsd openbsd solaris // Package fdchannel implements passing file descriptors between processes over diff --git a/pkg/fdnotifier/fdnotifier.go b/pkg/fdnotifier/fdnotifier.go index 1290d5d10..152557143 100644 --- a/pkg/fdnotifier/fdnotifier.go +++ b/pkg/fdnotifier/fdnotifier.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package fdnotifier contains an adapter that translates IO events (e.g., a diff --git a/pkg/fdnotifier/poll_unsafe.go b/pkg/fdnotifier/poll_unsafe.go index 493ea8375..db917303f 100644 --- a/pkg/fdnotifier/poll_unsafe.go +++ b/pkg/fdnotifier/poll_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package fdnotifier diff --git a/pkg/flipcall/ctrl_futex.go b/pkg/flipcall/ctrl_futex.go index 2e8452a02..5d2ee4018 100644 --- a/pkg/flipcall/ctrl_futex.go +++ b/pkg/flipcall/ctrl_futex.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package flipcall import ( diff --git a/pkg/flipcall/futex_linux.go b/pkg/flipcall/futex_linux.go index c212f05f1..4bb85939b 100644 --- a/pkg/flipcall/futex_linux.go +++ b/pkg/flipcall/futex_linux.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package flipcall diff --git a/pkg/gohacks/gohacks_unsafe.go b/pkg/gohacks/gohacks_unsafe.go index 374aac2b4..09fc14787 100644 --- a/pkg/gohacks/gohacks_unsafe.go +++ b/pkg/gohacks/gohacks_unsafe.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build go1.13 -// +build !go1.18 +//go:build go1.13 && !go1.18 +// +build go1.13,!go1.18 // Check type signatures when updating Go version. diff --git a/pkg/goid/goid.go b/pkg/goid/goid.go index 193b2c2d4..85fb2f6d4 100644 --- a/pkg/goid/goid.go +++ b/pkg/goid/goid.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build go1.12 -// +build !go1.18 +//go:build go1.12 && !go1.18 +// +build go1.12,!go1.18 // Check type signatures when updating Go version. diff --git a/pkg/hostarch/hostarch_arm64.go b/pkg/hostarch/hostarch_arm64.go index a31a8aeeb..a65c810a5 100644 --- a/pkg/hostarch/hostarch_arm64.go +++ b/pkg/hostarch/hostarch_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package hostarch diff --git a/pkg/hostarch/hostarch_x86.go b/pkg/hostarch/hostarch_x86.go index af6ef2b7f..00bf668f3 100644 --- a/pkg/hostarch/hostarch_x86.go +++ b/pkg/hostarch/hostarch_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || 386 // +build amd64 386 package hostarch diff --git a/pkg/memutil/memfd_linux_unsafe.go b/pkg/memutil/memfd_linux_unsafe.go index 504382213..2179c92f3 100644 --- a/pkg/memutil/memfd_linux_unsafe.go +++ b/pkg/memutil/memfd_linux_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package memutil diff --git a/pkg/memutil/mmap.go b/pkg/memutil/mmap.go index 7c939293f..7a55d1b28 100644 --- a/pkg/memutil/mmap.go +++ b/pkg/memutil/mmap.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package memutil import ( diff --git a/pkg/procid/procid.go b/pkg/procid/procid.go index 78b92422c..e0d42819d 100644 --- a/pkg/procid/procid.go +++ b/pkg/procid/procid.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + // Package procid provides a way to get the current system thread identifier. package procid diff --git a/pkg/procid/procid_amd64.s b/pkg/procid/procid_amd64.s index c4307c523..b5bbfff90 100644 --- a/pkg/procid/procid_amd64.s +++ b/pkg/procid/procid_amd64.s @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build amd64 -// +build go1.8 -// +build !go1.18 +//go:build amd64 && go1.8 && !go1.18 && go1.1 +// +build amd64,go1.8,!go1.18,go1.1 #include "textflag.h" diff --git a/pkg/procid/procid_arm64.s b/pkg/procid/procid_arm64.s index c1c409f3c..772d96289 100644 --- a/pkg/procid/procid_arm64.s +++ b/pkg/procid/procid_arm64.s @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build arm64 -// +build go1.8 -// +build !go1.18 +//go:build arm64 && go1.8 && !go1.18 && go1.1 +// +build arm64,go1.8,!go1.18,go1.1 #include "textflag.h" diff --git a/pkg/rand/rand.go b/pkg/rand/rand.go index a2714784d..be0e85fdb 100644 --- a/pkg/rand/rand.go +++ b/pkg/rand/rand.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !linux // +build !linux // Package rand implements a cryptographically secure pseudorandom number diff --git a/pkg/ring0/aarch64.go b/pkg/ring0/aarch64.go index 3bda594f9..96c884844 100644 --- a/pkg/ring0/aarch64.go +++ b/pkg/ring0/aarch64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/defs_amd64.go b/pkg/ring0/defs_amd64.go index 76776c65c..24f6e4cde 100644 --- a/pkg/ring0/defs_amd64.go +++ b/pkg/ring0/defs_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ring0 diff --git a/pkg/ring0/defs_arm64.go b/pkg/ring0/defs_arm64.go index 0125690d2..3e212516f 100644 --- a/pkg/ring0/defs_arm64.go +++ b/pkg/ring0/defs_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/entry_amd64.go b/pkg/ring0/entry_amd64.go index 397ccac7b..afd646b0b 100644 --- a/pkg/ring0/entry_amd64.go +++ b/pkg/ring0/entry_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ring0 diff --git a/pkg/ring0/entry_arm64.go b/pkg/ring0/entry_arm64.go index 62a93f3d6..299036478 100644 --- a/pkg/ring0/entry_arm64.go +++ b/pkg/ring0/entry_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/kernel_amd64.go b/pkg/ring0/kernel_amd64.go index b5c4a39e3..23ec33f92 100644 --- a/pkg/ring0/kernel_amd64.go +++ b/pkg/ring0/kernel_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ring0 @@ -19,6 +20,7 @@ package ring0 import ( "encoding/binary" "reflect" + "sync" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -30,6 +32,8 @@ func HaltAndWriteFSBase(regs *arch.Registers) // init initializes architecture-specific state. func (k *Kernel) init(maxCPUs int) { + initSentryXCR0() + entrySize := reflect.TypeOf(kernelEntry{}).Size() var ( entries []kernelEntry @@ -257,7 +261,16 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { return } -var sentryXCR0 = xgetbv(0) +var ( + sentryXCR0 uintptr + sentryXCR0Once sync.Once +) + +// initSentryXCR0 saves a value of XCR0 in the host mode. It is used to +// initialize XCR0 of guest vCPU-s. +func initSentryXCR0() { + sentryXCR0Once.Do(func() { sentryXCR0 = xgetbv(0) }) +} // startGo is the CPU entrypoint. // diff --git a/pkg/ring0/kernel_arm64.go b/pkg/ring0/kernel_arm64.go index 21db910a2..79f85ff50 100644 --- a/pkg/ring0/kernel_arm64.go +++ b/pkg/ring0/kernel_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/lib_amd64.go b/pkg/ring0/lib_amd64.go index 46746fd80..05c394ff5 100644 --- a/pkg/ring0/lib_amd64.go +++ b/pkg/ring0/lib_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ring0 diff --git a/pkg/ring0/lib_arm64.go b/pkg/ring0/lib_arm64.go index 5eabd4296..a72a6926d 100644 --- a/pkg/ring0/lib_arm64.go +++ b/pkg/ring0/lib_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/offsets_amd64.go b/pkg/ring0/offsets_amd64.go index dd48118af..75f6218b3 100644 --- a/pkg/ring0/offsets_amd64.go +++ b/pkg/ring0/offsets_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ring0 diff --git a/pkg/ring0/offsets_arm64.go b/pkg/ring0/offsets_arm64.go index 03adaa6b0..60b2c4074 100644 --- a/pkg/ring0/offsets_arm64.go +++ b/pkg/ring0/offsets_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/pagetables/pagetables_aarch64.go b/pkg/ring0/pagetables/pagetables_aarch64.go index 86eb00a4f..aa2a5c984 100644 --- a/pkg/ring0/pagetables/pagetables_aarch64.go +++ b/pkg/ring0/pagetables/pagetables_aarch64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package pagetables diff --git a/pkg/ring0/pagetables/pagetables_amd64_test.go b/pkg/ring0/pagetables/pagetables_amd64_test.go index a13c616ae..c27b3b10a 100644 --- a/pkg/ring0/pagetables/pagetables_amd64_test.go +++ b/pkg/ring0/pagetables/pagetables_amd64_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package pagetables diff --git a/pkg/ring0/pagetables/pagetables_arm64_test.go b/pkg/ring0/pagetables/pagetables_arm64_test.go index 2514b9ac5..1c919ec7d 100644 --- a/pkg/ring0/pagetables/pagetables_arm64_test.go +++ b/pkg/ring0/pagetables/pagetables_arm64_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package pagetables diff --git a/pkg/ring0/pagetables/pagetables_x86.go b/pkg/ring0/pagetables/pagetables_x86.go index e43698173..dc98d8452 100644 --- a/pkg/ring0/pagetables/pagetables_x86.go +++ b/pkg/ring0/pagetables/pagetables_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build 386 || amd64 // +build 386 amd64 package pagetables diff --git a/pkg/ring0/pagetables/pcids_aarch64.go b/pkg/ring0/pagetables/pcids_aarch64.go index fbfd41d83..ad492d039 100644 --- a/pkg/ring0/pagetables/pcids_aarch64.go +++ b/pkg/ring0/pagetables/pcids_aarch64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package pagetables diff --git a/pkg/ring0/pagetables/pcids_aarch64.s b/pkg/ring0/pagetables/pcids_aarch64.s index e9d62d768..cfcedba71 100644 --- a/pkg/ring0/pagetables/pcids_aarch64.s +++ b/pkg/ring0/pagetables/pcids_aarch64.s @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 #include "funcdata.h" diff --git a/pkg/ring0/pagetables/pcids_x86.go b/pkg/ring0/pagetables/pcids_x86.go index 91fc5e8dd..2a107ea70 100644 --- a/pkg/ring0/pagetables/pcids_x86.go +++ b/pkg/ring0/pagetables/pcids_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build i386 || amd64 // +build i386 amd64 package pagetables diff --git a/pkg/ring0/pagetables/walker_amd64.go b/pkg/ring0/pagetables/walker_amd64.go index eb4fbcc31..ca5e2f85f 100644 --- a/pkg/ring0/pagetables/walker_amd64.go +++ b/pkg/ring0/pagetables/walker_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package pagetables diff --git a/pkg/ring0/pagetables/walker_arm64.go b/pkg/ring0/pagetables/walker_arm64.go index 5ed881c7a..e32dbda2d 100644 --- a/pkg/ring0/pagetables/walker_arm64.go +++ b/pkg/ring0/pagetables/walker_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package pagetables diff --git a/pkg/ring0/x86.go b/pkg/ring0/x86.go index 34fbc1c35..7c96cca6b 100644 --- a/pkg/ring0/x86.go +++ b/pkg/ring0/x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build 386 || amd64 // +build 386 amd64 package ring0 diff --git a/pkg/seccomp/seccomp_amd64.go b/pkg/seccomp/seccomp_amd64.go index 00bf332c1..9cd003bc5 100644 --- a/pkg/seccomp/seccomp_amd64.go +++ b/pkg/seccomp/seccomp_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package seccomp diff --git a/pkg/seccomp/seccomp_arm64.go b/pkg/seccomp/seccomp_arm64.go index b62133f21..adcf73e72 100644 --- a/pkg/seccomp/seccomp_arm64.go +++ b/pkg/seccomp/seccomp_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package seccomp diff --git a/pkg/seccomp/seccomp_test_victim_amd64.go b/pkg/seccomp/seccomp_test_victim_amd64.go index efb8604ec..5c1ecc301 100644 --- a/pkg/seccomp/seccomp_test_victim_amd64.go +++ b/pkg/seccomp/seccomp_test_victim_amd64.go @@ -15,6 +15,7 @@ // Test binary used to test that seccomp filters are properly constructed and // indeed kill the process on violation. +//go:build amd64 // +build amd64 package main diff --git a/pkg/seccomp/seccomp_test_victim_arm64.go b/pkg/seccomp/seccomp_test_victim_arm64.go index 97cb5f5fe..9647e2758 100644 --- a/pkg/seccomp/seccomp_test_victim_arm64.go +++ b/pkg/seccomp/seccomp_test_victim_arm64.go @@ -15,6 +15,7 @@ // Test binary used to test that seccomp filters are properly constructed and // indeed kill the process on violation. +//go:build arm64 // +build arm64 package main diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go index 18942e848..9a827e84f 100644 --- a/pkg/sentry/arch/arch_aarch64.go +++ b/pkg/sentry/arch/arch_aarch64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package arch diff --git a/pkg/sentry/arch/arch_amd64.go b/pkg/sentry/arch/arch_amd64.go index d6b4d2357..e7cb24102 100644 --- a/pkg/sentry/arch/arch_amd64.go +++ b/pkg/sentry/arch/arch_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package arch diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go index 348f238fd..0d27a1f22 100644 --- a/pkg/sentry/arch/arch_arm64.go +++ b/pkg/sentry/arch/arch_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package arch diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go index b2b94c304..6da13f26e 100644 --- a/pkg/sentry/arch/arch_state_x86.go +++ b/pkg/sentry/arch/arch_state_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || 386 // +build amd64 386 package arch diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go index 8d8bb8a8b..96e9a6949 100644 --- a/pkg/sentry/arch/arch_x86.go +++ b/pkg/sentry/arch/arch_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || 386 // +build amd64 386 package arch diff --git a/pkg/sentry/arch/arch_x86_impl.go b/pkg/sentry/arch/arch_x86_impl.go index 5d7b99bd9..bb5ff7f7f 100644 --- a/pkg/sentry/arch/arch_x86_impl.go +++ b/pkg/sentry/arch/arch_x86_impl.go @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build (amd64 || 386) && go1.1 // +build amd64 386 +// +build go1.1 package arch diff --git a/pkg/sentry/arch/fpu/fpu_amd64.go b/pkg/sentry/arch/fpu/fpu_amd64.go index 2e61b4955..e422f67a1 100644 --- a/pkg/sentry/arch/fpu/fpu_amd64.go +++ b/pkg/sentry/arch/fpu/fpu_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || i386 // +build amd64 i386 package fpu diff --git a/pkg/sentry/arch/fpu/fpu_arm64.go b/pkg/sentry/arch/fpu/fpu_arm64.go index 46634661f..49e641722 100644 --- a/pkg/sentry/arch/fpu/fpu_arm64.go +++ b/pkg/sentry/arch/fpu/fpu_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package fpu diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go index 58e28dbba..dbd4336f9 100644 --- a/pkg/sentry/arch/signal_amd64.go +++ b/pkg/sentry/arch/signal_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package arch diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go index 80df90076..ee22ec512 100644 --- a/pkg/sentry/arch/signal_arm64.go +++ b/pkg/sentry/arch/signal_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package arch diff --git a/pkg/sentry/arch/syscalls_amd64.go b/pkg/sentry/arch/syscalls_amd64.go index 3859f41ee..c021ba072 100644 --- a/pkg/sentry/arch/syscalls_amd64.go +++ b/pkg/sentry/arch/syscalls_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package arch diff --git a/pkg/sentry/arch/syscalls_arm64.go b/pkg/sentry/arch/syscalls_arm64.go index 95dfd1e90..7146c9e44 100644 --- a/pkg/sentry/arch/syscalls_arm64.go +++ b/pkg/sentry/arch/syscalls_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package arch diff --git a/pkg/sentry/control/logging.go b/pkg/sentry/control/logging.go index 8a500a515..7613dfcbc 100644 --- a/pkg/sentry/control/logging.go +++ b/pkg/sentry/control/logging.go @@ -50,20 +50,20 @@ type LoggingArgs struct { // enable strace at all. If this flag is false then a completely // pristine copy of the syscall table will be swapped in. This // approach is used to remain consistent with an empty strace - // whitelist meaning trace all system calls. + // allowlist meaning trace all system calls. EnableStrace bool - // Strace is the whitelist of syscalls to trace to log. If this - // and StraceEventWhitelist are empty trace all system calls. - StraceWhitelist []string + // Strace is the allowlist of syscalls to trace to log. If this + // and StraceEventAllowlist are empty trace all system calls. + StraceAllowlist []string // SetEventStrace is a flag used to indicate that event strace // related arguments were passed in. SetEventStrace bool - // StraceEventWhitelist is the whitelist of syscalls to trace + // StraceEventAllowlist is the allowlist of syscalls to trace // to event log. - StraceEventWhitelist []string + StraceEventAllowlist []string } // Logging provides functions related to logging. @@ -107,13 +107,13 @@ func (l *Logging) Change(args *LoggingArgs, code *int) error { func (l *Logging) configureStrace(args *LoggingArgs) error { if args.EnableStrace { - // Install the whitelist specified. - if len(args.StraceWhitelist) > 0 { - if err := strace.Enable(args.StraceWhitelist, strace.SinkTypeLog); err != nil { + // Install the allowlist specified. + if len(args.StraceAllowlist) > 0 { + if err := strace.Enable(args.StraceAllowlist, strace.SinkTypeLog); err != nil { return err } } else { - // For convenience, if strace is enabled but whitelist + // For convenience, if strace is enabled but allowlist // is empty, enable everything to log. strace.EnableAll(strace.SinkTypeLog) } @@ -125,8 +125,8 @@ func (l *Logging) configureStrace(args *LoggingArgs) error { } func (l *Logging) configureEventStrace(args *LoggingArgs) error { - if len(args.StraceEventWhitelist) > 0 { - if err := strace.Enable(args.StraceEventWhitelist, strace.SinkTypeEvent); err != nil { + if len(args.StraceEventAllowlist) > 0 { + if err := strace.Enable(args.StraceEventAllowlist, strace.SinkTypeEvent); err != nil { return err } } else { diff --git a/pkg/sentry/fs/host/util_amd64_unsafe.go b/pkg/sentry/fs/host/util_amd64_unsafe.go index 21782f1da..e90629f4e 100644 --- a/pkg/sentry/fs/host/util_amd64_unsafe.go +++ b/pkg/sentry/fs/host/util_amd64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package host diff --git a/pkg/sentry/fs/host/util_arm64_unsafe.go b/pkg/sentry/fs/host/util_arm64_unsafe.go index ed8f5242a..9fbb93726 100644 --- a/pkg/sentry/fs/host/util_arm64_unsafe.go +++ b/pkg/sentry/fs/host/util_arm64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package host diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 14a97b468..05b776c2e 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -39,26 +39,14 @@ import ( // Sync implements vfs.FilesystemImpl.Sync. func (fs *filesystem) Sync(ctx context.Context) error { // Snapshot current syncable dentries and special file FDs. - fs.renameMu.RLock() fs.syncMu.Lock() ds := make([]*dentry, 0, len(fs.syncableDentries)) for d := range fs.syncableDentries { - // It's safe to use IncRef here even though fs.syncableDentries doesn't - // hold references since we hold fs.renameMu. Note that we can't use - // TryIncRef since cached dentries at zero references should still be - // synced. - d.IncRef() ds = append(ds, d) } - fs.renameMu.RUnlock() sffds := make([]*specialFileFD, 0, len(fs.specialFileFDs)) for sffd := range fs.specialFileFDs { - // As above, fs.specialFileFDs doesn't hold references. However, unlike - // dentries, an FD that has reached zero references can't be - // resurrected, so we can use TryIncRef. - if sffd.vfsfd.TryIncRef() { - sffds = append(sffds, sffd) - } + sffds = append(sffds, sffd) } fs.syncMu.Unlock() @@ -68,9 +56,7 @@ func (fs *filesystem) Sync(ctx context.Context) error { // Sync syncable dentries. for _, d := range ds { - err := d.syncCachedFile(ctx, true /* forFilesystemSync */) - d.DecRef(ctx) - if err != nil { + if err := d.syncCachedFile(ctx, true /* forFilesystemSync */); err != nil { ctx.Infof("gofer.filesystem.Sync: dentry.syncCachedFile failed: %v", err) if retErr == nil { retErr = err @@ -81,9 +67,7 @@ func (fs *filesystem) Sync(ctx context.Context) error { // Sync special files, which may be writable but do not use dentry shared // handles (so they won't be synced by the above). for _, sffd := range sffds { - err := sffd.sync(ctx, true /* forFilesystemSync */) - sffd.vfsfd.DecRef(ctx) - if err != nil { + if err := sffd.sync(ctx, true /* forFilesystemSync */); err != nil { ctx.Infof("gofer.filesystem.Sync: specialFileFD.sync failed: %v", err) if retErr == nil { retErr = err diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index bcf989765..ec8d58cc9 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -582,10 +582,10 @@ func (fs *filesystem) Release(ctx context.Context) { d.dataMu.Unlock() // Close host FDs if they exist. if d.readFD >= 0 { - unix.Close(int(d.readFD)) + _ = unix.Close(int(d.readFD)) } if d.writeFD >= 0 && d.readFD != d.writeFD { - unix.Close(int(d.writeFD)) + _ = unix.Close(int(d.writeFD)) } d.readFD = -1 d.writeFD = -1 @@ -1637,18 +1637,18 @@ func (d *dentry) destroyLocked(ctx context.Context) { d.dataMu.Unlock() // Clunk open fids and close open host FDs. if !d.readFile.isNil() { - d.readFile.close(ctx) + _ = d.readFile.close(ctx) } if !d.writeFile.isNil() && d.readFile != d.writeFile { - d.writeFile.close(ctx) + _ = d.writeFile.close(ctx) } d.readFile = p9file{} d.writeFile = p9file{} if d.readFD >= 0 { - unix.Close(int(d.readFD)) + _ = unix.Close(int(d.readFD)) } if d.writeFD >= 0 && d.readFD != d.writeFD { - unix.Close(int(d.writeFD)) + _ = unix.Close(int(d.writeFD)) } d.readFD = -1 d.writeFD = -1 diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index 29afb67d9..4b59c1c3c 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -42,6 +42,11 @@ import ( type specialFileFD struct { fileDescription + // releaseMu synchronizes the closing of fd.handle with fd.sync(). It's safe + // to access fd.handle without locking for operations that require a ref to + // be held by the caller, e.g. vfs.FileDescriptionImpl implementations. + releaseMu sync.RWMutex `state:"nosave"` + // handle is used for file I/O. handle is immutable. handle handle `state:"nosave"` @@ -117,7 +122,10 @@ func (fd *specialFileFD) Release(ctx context.Context) { if fd.haveQueue { fdnotifier.RemoveFD(fd.handle.fd) } + fd.releaseMu.Lock() fd.handle.close(ctx) + fd.releaseMu.Unlock() + fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) fs.syncMu.Lock() delete(fs.specialFileFDs, fd) @@ -373,6 +381,13 @@ func (fd *specialFileFD) Sync(ctx context.Context) error { } func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error { + // Locks to ensure it didn't race with fd.Release(). + fd.releaseMu.RLock() + defer fd.releaseMu.RUnlock() + + if !fd.handle.isOpen() { + return nil + } err := func() error { // If we have a host FD, fsyncing it is likely to be faster than an fsync // RPC. diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 79a54eef3..f2250c025 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -397,8 +397,8 @@ func (i *inode) init(impl interface{}, fs *filesystem, kuid auth.KUID, kgid auth } // Inherit the group and setgid bit as in fs/inode.c:inode_init_owner(). - if parentDir != nil && parentDir.inode.mode&linux.S_ISGID == linux.S_ISGID { - kgid = auth.KGID(parentDir.inode.gid) + if parentDir != nil && atomic.LoadUint32(&parentDir.inode.mode)&linux.S_ISGID == linux.S_ISGID { + kgid = auth.KGID(atomic.LoadUint32(&parentDir.inode.gid)) if mode&linux.S_IFDIR == linux.S_IFDIR { mode |= linux.S_ISGID } diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go index 070914a68..930016a3e 100644 --- a/pkg/sentry/fsimpl/verity/filesystem.go +++ b/pkg/sentry/fsimpl/verity/filesystem.go @@ -851,11 +851,18 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf return nil, err } + tmpOpts := *opts + + // Open the lowerFD with O_PATH if a symlink is opened for verity. + if tmpOpts.Flags&linux.O_NOFOLLOW != 0 && d.isSymlink() { + tmpOpts.Flags |= linux.O_PATH + } + // Open the file in the underlying file system. lowerFD, err := rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ Root: d.lowerVD, Start: d.lowerVD, - }, opts) + }, &tmpOpts) // The file should exist, as we succeeded in finding its dentry. If it's // missing, it indicates an unexpected modification to the file system. @@ -893,7 +900,6 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf // be called if a verity FD is successfully created. defer merkleReader.DecRef(ctx) - lowerFlags := lowerFD.StatusFlags() lowerFDOpts := lowerFD.Options() var merkleWriter *vfs.FileDescription var parentMerkleWriter *vfs.FileDescription @@ -946,7 +952,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf isDir: d.isDir(), } - if err := fd.vfsfd.Init(fd, lowerFlags, rp.Mount(), &d.vfsd, &lowerFDOpts); err != nil { + if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), &d.vfsd, &lowerFDOpts); err != nil { return nil, err } lowerFD.IncRef() diff --git a/pkg/sentry/fsimpl/verity/verity_test.go b/pkg/sentry/fsimpl/verity/verity_test.go index 65465b814..af041bd50 100644 --- a/pkg/sentry/fsimpl/verity/verity_test.go +++ b/pkg/sentry/fsimpl/verity/verity_test.go @@ -899,7 +899,7 @@ func TestUnmodifiedSymlinkFileReadSucceeds(t *testing.T) { t.Fatalf("SymlinkAt: %v", err) } - fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_PATH|linux.O_NOFOLLOW, linux.ModeRegular) + fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular) if err != nil { t.Fatalf("openVerityAt symlink: %v", err) @@ -1034,7 +1034,7 @@ func TestDeletedSymlinkFileReadFails(t *testing.T) { t.Fatalf("SymlinkAt: %v", err) } - fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_PATH|linux.O_NOFOLLOW, linux.ModeRegular) + fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular) if err != nil { t.Fatalf("openVerityAt symlink: %v", err) @@ -1136,7 +1136,7 @@ func TestModifiedSymlinkFileReadFails(t *testing.T) { } // Open symlink file to get the fd for ioctl in new step. - fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_PATH|linux.O_NOFOLLOW, linux.ModeRegular) + fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular) if err != nil { t.Fatalf("OpenAt symlink: %v", err) } diff --git a/pkg/sentry/hostfd/hostfd_linux.go b/pkg/sentry/hostfd/hostfd_linux.go index e103e7296..0131da22d 100644 --- a/pkg/sentry/hostfd/hostfd_linux.go +++ b/pkg/sentry/hostfd/hostfd_linux.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package hostfd // MaxReadWriteIov is the maximum permitted size of a struct iovec array in a diff --git a/pkg/sentry/kernel/kernel_opts.go b/pkg/sentry/kernel/kernel_opts.go index 2e66ec587..5ffafb0d1 100644 --- a/pkg/sentry/kernel/kernel_opts.go +++ b/pkg/sentry/kernel/kernel_opts.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package kernel // SpecialOpts contains non-standard options for the kernel. diff --git a/pkg/sentry/kernel/msgqueue/BUILD b/pkg/sentry/kernel/msgqueue/BUILD index e4305fead..5ec11e1f6 100644 --- a/pkg/sentry/kernel/msgqueue/BUILD +++ b/pkg/sentry/kernel/msgqueue/BUILD @@ -18,8 +18,8 @@ go_template_instance( go_library( name = "msgqueue", srcs = [ - "msgqueue.go", "message_list.go", + "msgqueue.go", ], visibility = ["//pkg/sentry:internal"], deps = [ diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index 21358ec92..079294f81 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -768,14 +768,14 @@ const ( // ptraceClone is called at the end of a clone or fork syscall to check if t // should enter PTRACE_EVENT_CLONE, PTRACE_EVENT_FORK, or PTRACE_EVENT_VFORK // stop. child is the new task. -func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions) bool { +func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, args *linux.CloneArgs) bool { if !t.hasTracer() { return false } t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() event := false - if !opts.Untraced { + if args.Flags&linux.CLONE_UNTRACED == 0 { switch kind { case ptraceCloneKindClone: if t.ptraceOpts.TraceClone { @@ -810,7 +810,7 @@ func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions // clone(2)'s documentation of CLONE_UNTRACED and CLONE_PTRACE is // confusingly wrong; see kernel/fork.c:_do_fork() => copy_process() => // include/linux/ptrace.h:ptrace_init_task(). - if event || opts.InheritTracer { + if event || args.Flags&linux.CLONE_PTRACE != 0 { tracer := t.Tracer() if tracer != nil { child.ptraceTracer.Store(tracer) diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go index 5ae05b5c3..63422e155 100644 --- a/pkg/sentry/kernel/ptrace_amd64.go +++ b/pkg/sentry/kernel/ptrace_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kernel diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go index 46dd84cbc..27514d67b 100644 --- a/pkg/sentry/kernel/ptrace_arm64.go +++ b/pkg/sentry/kernel/ptrace_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kernel diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index 7e1347aa6..da4b77ca2 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -26,140 +26,39 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -// SharingOptions controls what resources are shared by a new task created by -// Task.Clone, or an existing task affected by Task.Unshare. -type SharingOptions struct { - // If NewAddressSpace is true, the task should have an independent virtual - // address space. - NewAddressSpace bool - - // If NewSignalHandlers is true, the task should use an independent set of - // signal handlers. - NewSignalHandlers bool - - // If NewThreadGroup is true, the task should be the leader of its own - // thread group. TerminationSignal is the signal that the thread group - // will send to its parent when it exits. If NewThreadGroup is false, - // TerminationSignal is ignored. - NewThreadGroup bool - TerminationSignal linux.Signal - - // If NewPIDNamespace is true: - // - // - In the context of Task.Clone, the new task should be the init task - // (TID 1) in a new PID namespace. - // - // - In the context of Task.Unshare, the task should create a new PID - // namespace, and all subsequent clones of the task should be members of - // the new PID namespace. - NewPIDNamespace bool - - // If NewUserNamespace is true, the task should have an independent user - // namespace. - NewUserNamespace bool - - // If NewNetworkNamespace is true, the task should have an independent - // network namespace. - NewNetworkNamespace bool - - // If NewFiles is true, the task should use an independent file descriptor - // table. - NewFiles bool - - // If NewFSContext is true, the task should have an independent FSContext. - NewFSContext bool - - // If NewUTSNamespace is true, the task should have an independent UTS - // namespace. - NewUTSNamespace bool - - // If NewIPCNamespace is true, the task should have an independent IPC - // namespace. - NewIPCNamespace bool -} - -// CloneOptions controls the behavior of Task.Clone. -type CloneOptions struct { - // SharingOptions defines the set of resources that the new task will share - // with its parent. - SharingOptions - - // Stack is the initial stack pointer of the new task. If Stack is 0, the - // new task will start with the same stack pointer as its parent. - Stack hostarch.Addr - - // If SetTLS is true, set the new task's TLS (thread-local storage) - // descriptor to TLS. If SetTLS is false, TLS is ignored. - SetTLS bool - TLS hostarch.Addr - - // If ChildClearTID is true, when the child exits, 0 is written to the - // address ChildTID in the child's memory, and if the write is successful a - // futex wake on the same address is performed. - // - // If ChildSetTID is true, the child's thread ID (in the child's PID - // namespace) is written to address ChildTID in the child's memory. (As in - // Linux, failed writes are silently ignored.) - ChildClearTID bool - ChildSetTID bool - ChildTID hostarch.Addr - - // If ParentSetTID is true, the child's thread ID (in the parent's PID - // namespace) is written to address ParentTID in the parent's memory. (As - // in Linux, failed writes are silently ignored.) - // - // Older versions of the clone(2) man page state that CLONE_PARENT_SETTID - // causes the child's thread ID to be written to ptid in both the parent - // and child's memory, but this is a documentation error fixed by - // 87ab04792ced ("clone.2: Fix description of CLONE_PARENT_SETTID"). - ParentSetTID bool - ParentTID hostarch.Addr - - // If Vfork is true, place the parent in vforkStop until the cloned task - // releases its TaskImage. - Vfork bool - - // If Untraced is true, do not report PTRACE_EVENT_CLONE/FORK/VFORK for - // this clone(), and do not ptrace-attach the caller's tracer to the new - // task. (PTRACE_EVENT_VFORK_DONE will still be reported if appropriate). - Untraced bool - - // If InheritTracer is true, ptrace-attach the caller's tracer to the new - // task, even if no PTRACE_EVENT_CLONE/FORK/VFORK event would be reported - // for it. If both Untraced and InheritTracer are true, no event will be - // reported, but tracer inheritance will still occur. - InheritTracer bool -} - // Clone implements the clone(2) syscall and returns the thread ID of the new // task in t's PID namespace. Clone may return both a non-zero thread ID and a // non-nil error. // // Preconditions: The caller must be running Task.doSyscallInvoke on the task // goroutine. -func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { +func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) { // Since signal actions may refer to application signal handlers by virtual // address, any set of signal handlers must refer to the same address // space. - if !opts.NewSignalHandlers && opts.NewAddressSpace { + if args.Flags&(linux.CLONE_SIGHAND|linux.CLONE_VM) == linux.CLONE_SIGHAND { return 0, nil, linuxerr.EINVAL } // In order for the behavior of thread-group-directed signals to be sane, // all tasks in a thread group must share signal handlers. - if !opts.NewThreadGroup && opts.NewSignalHandlers { + if args.Flags&(linux.CLONE_THREAD|linux.CLONE_SIGHAND) == linux.CLONE_THREAD { return 0, nil, linuxerr.EINVAL } // All tasks in a thread group must be in the same PID namespace. - if !opts.NewThreadGroup && (opts.NewPIDNamespace || t.childPIDNamespace != nil) { + if (args.Flags&linux.CLONE_THREAD != 0) && (args.Flags&linux.CLONE_NEWPID != 0 || t.childPIDNamespace != nil) { return 0, nil, linuxerr.EINVAL } // The two different ways of specifying a new PID namespace are // incompatible. - if opts.NewPIDNamespace && t.childPIDNamespace != nil { + if args.Flags&linux.CLONE_NEWPID != 0 && t.childPIDNamespace != nil { return 0, nil, linuxerr.EINVAL } // Thread groups and FS contexts cannot span user namespaces. - if opts.NewUserNamespace && (!opts.NewThreadGroup || !opts.NewFSContext) { + if args.Flags&linux.CLONE_NEWUSER != 0 && args.Flags&(linux.CLONE_THREAD|linux.CLONE_FS) != 0 { + return 0, nil, linuxerr.EINVAL + } + // args.ExitSignal must be a valid signal. + if args.ExitSignal != 0 && !linux.Signal(args.ExitSignal).IsValid() { return 0, nil, linuxerr.EINVAL } @@ -174,7 +73,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { // user_namespaces(7) creds := t.Credentials() userns := creds.UserNamespace - if opts.NewUserNamespace { + if args.Flags&linux.CLONE_NEWUSER != 0 { var err error // "EPERM (since Linux 3.9): CLONE_NEWUSER was specified in flags and // the caller is in a chroot environment (i.e., the caller's root @@ -189,21 +88,19 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { return 0, nil, err } } - if (opts.NewPIDNamespace || opts.NewNetworkNamespace || opts.NewUTSNamespace) && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) { + if args.Flags&(linux.CLONE_NEWPID|linux.CLONE_NEWNET|linux.CLONE_NEWUTS|linux.CLONE_NEWIPC) != 0 && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) { return 0, nil, linuxerr.EPERM } utsns := t.UTSNamespace() - if opts.NewUTSNamespace { + if args.Flags&linux.CLONE_NEWUTS != 0 { // Note that this must happen after NewUserNamespace so we get // the new userns if there is one. utsns = t.UTSNamespace().Clone(userns) } ipcns := t.IPCNamespace() - if opts.NewIPCNamespace { - // Note that "If CLONE_NEWIPC is set, then create the process in a new IPC - // namespace" + if args.Flags&linux.CLONE_NEWIPC != 0 { ipcns = NewIPCNamespace(userns) } else { ipcns.IncRef() @@ -214,7 +111,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { defer cu.Clean() netns := t.NetworkNamespace() - if opts.NewNetworkNamespace { + if args.Flags&linux.CLONE_NEWNET != 0 { netns = inet.NewNamespace(netns) } @@ -227,7 +124,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { }) } - image, err := t.image.Fork(t, t.k, !opts.NewAddressSpace) + image, err := t.image.Fork(t, t.k, args.Flags&linux.CLONE_VM != 0) if err != nil { return 0, nil, err } @@ -236,17 +133,17 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { }) // clone() returns 0 in the child. image.Arch.SetReturn(0) - if opts.Stack != 0 { - image.Arch.SetStack(uintptr(opts.Stack)) + if args.Stack != 0 { + image.Arch.SetStack(uintptr(args.Stack)) } - if opts.SetTLS { - if !image.Arch.SetTLS(uintptr(opts.TLS)) { + if args.Flags&linux.CLONE_SETTLS != 0 { + if !image.Arch.SetTLS(uintptr(args.TLS)) { return 0, nil, linuxerr.EPERM } } var fsContext *FSContext - if opts.NewFSContext { + if args.Flags&linux.CLONE_FS == 0 { fsContext = t.fsContext.Fork() } else { fsContext = t.fsContext @@ -254,7 +151,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { } var fdTable *FDTable - if opts.NewFiles { + if args.Flags&linux.CLONE_FILES == 0 { fdTable = t.fdTable.Fork(t) } else { fdTable = t.fdTable @@ -264,22 +161,22 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { pidns := t.tg.pidns if t.childPIDNamespace != nil { pidns = t.childPIDNamespace - } else if opts.NewPIDNamespace { + } else if args.Flags&linux.CLONE_NEWPID != 0 { pidns = pidns.NewChild(userns) } tg := t.tg rseqAddr := hostarch.Addr(0) rseqSignature := uint32(0) - if opts.NewThreadGroup { + if args.Flags&linux.CLONE_THREAD == 0 { if tg.mounts != nil { tg.mounts.IncRef() } sh := t.tg.signalHandlers - if opts.NewSignalHandlers { + if args.Flags&linux.CLONE_SIGHAND == 0 { sh = sh.Fork() } - tg = t.k.NewThreadGroup(tg.mounts, pidns, sh, opts.TerminationSignal, tg.limits.GetCopy()) + tg = t.k.NewThreadGroup(tg.mounts, pidns, sh, linux.Signal(args.ExitSignal), tg.limits.GetCopy()) tg.oomScoreAdj = atomic.LoadInt32(&t.tg.oomScoreAdj) rseqAddr = t.rseqAddr rseqSignature = t.rseqSignature @@ -304,7 +201,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { RSeqSignature: rseqSignature, ContainerID: t.ContainerID(), } - if opts.NewThreadGroup { + if args.Flags&linux.CLONE_THREAD == 0 { cfg.Parent = t } else { cfg.InheritParent = t @@ -322,7 +219,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { // // However kernel/fork.c:copy_process() adds a limitation to this: // "sigaltstack should be cleared when sharing the same VM". - if opts.NewAddressSpace || opts.Vfork { + if args.Flags&linux.CLONE_VM == 0 || args.Flags&linux.CLONE_VFORK != 0 { nt.SetSignalStack(t.SignalStack()) } @@ -347,35 +244,35 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { copiedFilters := append([]bpf.Program(nil), f.([]bpf.Program)...) nt.syscallFilters.Store(copiedFilters) } - if opts.Vfork { + if args.Flags&linux.CLONE_VFORK != 0 { nt.vforkParent = t } - if opts.ChildClearTID { - nt.SetClearTID(opts.ChildTID) + if args.Flags&linux.CLONE_CHILD_CLEARTID != 0 { + nt.SetClearTID(hostarch.Addr(args.ChildTID)) } - if opts.ChildSetTID { + if args.Flags&linux.CLONE_CHILD_SETTID != 0 { ctid := nt.ThreadID() - ctid.CopyOut(nt.CopyContext(t, usermem.IOOpts{AddressSpaceActive: false}), opts.ChildTID) + ctid.CopyOut(nt.CopyContext(t, usermem.IOOpts{AddressSpaceActive: false}), hostarch.Addr(args.ChildTID)) } ntid := t.tg.pidns.IDOfTask(nt) - if opts.ParentSetTID { - ntid.CopyOut(t, opts.ParentTID) + if args.Flags&linux.CLONE_PARENT_SETTID != 0 { + ntid.CopyOut(t, hostarch.Addr(args.ParentTID)) } kind := ptraceCloneKindClone - if opts.Vfork { + if args.Flags&linux.CLONE_VFORK != 0 { kind = ptraceCloneKindVfork - } else if opts.TerminationSignal == linux.SIGCHLD { + } else if linux.Signal(args.ExitSignal) == linux.SIGCHLD { kind = ptraceCloneKindFork } - if t.ptraceClone(kind, nt, opts) { - if opts.Vfork { + if t.ptraceClone(kind, nt, args) { + if args.Flags&linux.CLONE_VFORK != 0 { return ntid, &SyscallControl{next: &runSyscallAfterPtraceEventClone{vforkChild: nt, vforkChildTID: ntid}}, nil } return ntid, &SyscallControl{next: &runSyscallAfterPtraceEventClone{}}, nil } - if opts.Vfork { + if args.Flags&linux.CLONE_VFORK != 0 { t.maybeBeginVforkStop(nt) return ntid, &SyscallControl{next: &runSyscallAfterVforkStop{childTID: ntid}}, nil } @@ -446,27 +343,35 @@ func (r *runSyscallAfterVforkStop) execute(t *Task) taskRunState { } // Unshare changes the set of resources t shares with other tasks, as specified -// by opts. +// by flags. // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) Unshare(opts *SharingOptions) error { - // In Linux unshare(2), NewThreadGroup implies NewSignalHandlers and - // NewSignalHandlers implies NewAddressSpace. All three flags are no-ops if - // t is the only task using its MM, which due to clone(2)'s rules imply - // that it is also the only task using its signal handlers / in its thread - // group, and cause EINVAL to be returned otherwise. +func (t *Task) Unshare(flags int32) error { + // "CLONE_THREAD, CLONE_SIGHAND, and CLONE_VM can be specified in flags if + // the caller is single threaded (i.e., it is not sharing its address space + // with another process or thread). In this case, these flags have no + // effect. (Note also that specifying CLONE_THREAD automatically implies + // CLONE_VM, and specifying CLONE_VM automatically implies CLONE_SIGHAND.) + // If the process is multithreaded, then the use of these flags results in + // an error." - unshare(2). This is incorrect (cf. + // kernel/fork.c:ksys_unshare()): + // + // - CLONE_THREAD does not imply CLONE_VM. + // + // - CLONE_SIGHAND implies CLONE_THREAD. + // + // - Only CLONE_VM requires that the caller is not sharing its address + // space with another thread. CLONE_SIGHAND requires that the caller is not + // sharing its signal handlers, and CLONE_THREAD requires that the caller + // is the only thread in its thread group. // // Since we don't count the number of tasks using each address space or set - // of signal handlers, we reject NewSignalHandlers and NewAddressSpace - // altogether, and interpret NewThreadGroup as requiring that t be the only - // member of its thread group. This seems to be logically coherent, in the - // sense that clone(2) allows a task to share signal handlers and address - // spaces with tasks in other thread groups. - if opts.NewAddressSpace || opts.NewSignalHandlers { + // of signal handlers, we reject CLONE_VM and CLONE_SIGHAND altogether. + if flags&(linux.CLONE_VM|linux.CLONE_SIGHAND) != 0 { return linuxerr.EINVAL } creds := t.Credentials() - if opts.NewThreadGroup { + if flags&linux.CLONE_THREAD != 0 { t.tg.signalHandlers.mu.Lock() if t.tg.tasksCount != 1 { t.tg.signalHandlers.mu.Unlock() @@ -476,7 +381,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { // This isn't racy because we're the only living task, and therefore // the only task capable of creating new ones, in our thread group. } - if opts.NewUserNamespace { + if flags&linux.CLONE_NEWUSER != 0 { if t.IsChrooted() { return linuxerr.EPERM } @@ -492,7 +397,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { creds = t.Credentials() } haveCapSysAdmin := t.HasCapability(linux.CAP_SYS_ADMIN) - if opts.NewPIDNamespace { + if flags&linux.CLONE_NEWPID != 0 { if !haveCapSysAdmin { return linuxerr.EPERM } @@ -500,14 +405,14 @@ func (t *Task) Unshare(opts *SharingOptions) error { } t.mu.Lock() // Can't defer unlock: DecRefs must occur without holding t.mu. - if opts.NewNetworkNamespace { + if flags&linux.CLONE_NEWNET != 0 { if !haveCapSysAdmin { t.mu.Unlock() return linuxerr.EPERM } t.netns = inet.NewNamespace(t.netns) } - if opts.NewUTSNamespace { + if flags&linux.CLONE_NEWUTS != 0 { if !haveCapSysAdmin { t.mu.Unlock() return linuxerr.EPERM @@ -516,7 +421,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { // new user namespace is used if there is one. t.utsns = t.utsns.Clone(creds.UserNamespace) } - if opts.NewIPCNamespace { + if flags&linux.CLONE_NEWIPC != 0 { if !haveCapSysAdmin { t.mu.Unlock() return linuxerr.EPERM @@ -527,12 +432,12 @@ func (t *Task) Unshare(opts *SharingOptions) error { t.ipcns = NewIPCNamespace(creds.UserNamespace) } var oldFDTable *FDTable - if opts.NewFiles { + if flags&linux.CLONE_FILES != 0 { oldFDTable = t.fdTable t.fdTable = oldFDTable.Fork(t) } var oldFSContext *FSContext - if opts.NewFSContext { + if flags&linux.CLONE_FS != 0 { oldFSContext = t.fsContext t.fsContext = oldFSContext.Fork() } diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go index 73ea73742..0567c8d32 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.s b/pkg/sentry/platform/kvm/bluepill_amd64.s index 953024600..c2a1dca11 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64.s +++ b/pkg/sentry/platform/kvm/bluepill_amd64.s @@ -37,7 +37,15 @@ TEXT ·bluepill(SB),NOSPLIT,$0 begin: MOVQ vcpu+0(FP), AX LEAQ VCPU_CPU(AX), BX + + // The gorountine stack will be changed in guest which renders + // the frame pointer outdated and misleads perf tools. + // Disconnect the frame-chain with the zeroed frame pointer + // when it is saved in the frame in bluepillHandler(). + MOVQ BP, CX + MOVQ $0, BP BYTE CLI; + MOVQ CX, BP check_vcpu: MOVQ ENTRY_CPU_SELF(GS), CX CMPQ BX, CX diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go index 198bafdea..4ba1d6f9c 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go index 9e5c52923..acb0cb05f 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm diff --git a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go index f105fdbd0..ee7dba828 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go index 06fcf1d2e..f63ab6aba 100644 --- a/pkg/sentry/platform/kvm/bluepill_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build go1.12 -// +build !go1.18 +//go:build go1.12 && !go1.18 +// +build go1.12,!go1.18 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sentry/platform/kvm/kvm_amd64.go b/pkg/sentry/platform/kvm/kvm_amd64.go index b9ed4a706..a5189d9e2 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64.go +++ b/pkg/sentry/platform/kvm/kvm_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm diff --git a/pkg/sentry/platform/kvm/kvm_amd64_test.go b/pkg/sentry/platform/kvm/kvm_amd64_test.go index a002ae00c..c3fbbdc75 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64_test.go +++ b/pkg/sentry/platform/kvm/kvm_amd64_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm diff --git a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go index 0c43d72f4..7fdb6ac64 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm diff --git a/pkg/sentry/platform/kvm/kvm_arm64.go b/pkg/sentry/platform/kvm/kvm_arm64.go index b73340f0e..159808433 100644 --- a/pkg/sentry/platform/kvm/kvm_arm64.go +++ b/pkg/sentry/platform/kvm/kvm_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm diff --git a/pkg/sentry/platform/kvm/kvm_arm64_test.go b/pkg/sentry/platform/kvm/kvm_arm64_test.go index 0e3d84d95..b53e354da 100644 --- a/pkg/sentry/platform/kvm/kvm_arm64_test.go +++ b/pkg/sentry/platform/kvm/kvm_arm64_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm diff --git a/pkg/sentry/platform/kvm/kvm_arm64_unsafe.go b/pkg/sentry/platform/kvm/kvm_arm64_unsafe.go index f07a9f34d..54d579a2b 100644 --- a/pkg/sentry/platform/kvm/kvm_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/kvm_arm64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index b28a2c4e8..a96634381 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go index 83bcc7406..de798bb2c 100644 --- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go index edaccf9bc..7937a8481 100644 --- a/pkg/sentry/platform/kvm/machine_arm64.go +++ b/pkg/sentry/platform/kvm/machine_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index f6aa519b1..1a4a9ce7d 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go index 49e1c7136..35660e827 100644 --- a/pkg/sentry/platform/kvm/machine_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_unsafe.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build go1.12 -// +build !go1.18 +//go:build go1.12 && !go1.18 +// +build go1.12,!go1.18 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go index 7c19b6a8f..98c52b2f5 100644 --- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go +++ b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package testutil diff --git a/pkg/sentry/platform/kvm/testutil/testutil_arm64.go b/pkg/sentry/platform/kvm/testutil/testutil_arm64.go index c5235ca9d..6d0ba8252 100644 --- a/pkg/sentry/platform/kvm/testutil/testutil_arm64.go +++ b/pkg/sentry/platform/kvm/testutil/testutil_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package testutil diff --git a/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go index 4f7fe993a..07eda0ef3 100644 --- a/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go +++ b/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ptrace diff --git a/pkg/sentry/platform/ptrace/subprocess_amd64.go b/pkg/sentry/platform/ptrace/subprocess_amd64.go index 32a70f4d5..13a55b784 100644 --- a/pkg/sentry/platform/ptrace/subprocess_amd64.go +++ b/pkg/sentry/platform/ptrace/subprocess_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ptrace diff --git a/pkg/sentry/platform/ptrace/subprocess_arm64.go b/pkg/sentry/platform/ptrace/subprocess_arm64.go index e4257e3bf..8181db659 100644 --- a/pkg/sentry/platform/ptrace/subprocess_arm64.go +++ b/pkg/sentry/platform/ptrace/subprocess_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ptrace diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go index 7e8a31802..129ca52e2 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package ptrace diff --git a/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go index 0835e1d1c..f1e84059d 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux && (amd64 || arm64) // +build linux // +build amd64 arm64 diff --git a/pkg/sentry/platform/ptrace/subprocess_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_unsafe.go index 38b7b1a5e..ffd4665f4 100644 --- a/pkg/sentry/platform/ptrace/subprocess_unsafe.go +++ b/pkg/sentry/platform/ptrace/subprocess_unsafe.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build go1.12 -// +build !go1.18 +//go:build go1.12 && !go1.18 +// +build go1.12,!go1.18 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sentry/socket/hostinet/sockopt_impl.go b/pkg/sentry/socket/hostinet/sockopt_impl.go index 8a783712e..2397e04e7 100644 --- a/pkg/sentry/socket/hostinet/sockopt_impl.go +++ b/pkg/sentry/socket/hostinet/sockopt_impl.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package hostinet import ( diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index ea736e446..9b844b0c0 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -49,6 +49,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" @@ -273,6 +274,7 @@ var Metrics = tcpip.Stats{ Timeouts: mustCreateMetric("/netstack/tcp/timeouts", "Number of times RTO expired."), ChecksumErrors: mustCreateMetric("/netstack/tcp/checksum_errors", "Number of segments dropped due to bad checksums."), FailedPortReservations: mustCreateMetric("/netstack/tcp/failed_port_reservations", "Number of time TCP failed to reserve a port."), + SegmentsAckedWithDSACK: mustCreateMetric("/netstack/tcp/segments_acked_with_dsack", "Number of segments for which DSACK was received."), }, UDP: tcpip.UDPStats{ PacketsReceived: mustCreateMetric("/netstack/udp/packets_received", "Number of UDP datagrams received via HandlePacket."), @@ -1682,12 +1684,12 @@ func SetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, level int return nil } -func clampBufSize(newSz, min, max int64) int64 { +func clampBufSize(newSz, min, max int64, ignoreMax bool) int64 { // packetOverheadFactor is used to multiply the value provided by the user on // a setsockopt(2) for setting the send/receive buffer sizes sockets. const packetOverheadFactor = 2 - if newSz > max { + if !ignoreMax && newSz > max { newSz = max } @@ -1712,7 +1714,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam v := hostarch.ByteOrder.Uint32(optVal) min, max := ep.SocketOptions().SendBufferLimits() - clamped := clampBufSize(int64(v), min, max) + clamped := clampBufSize(int64(v), min, max, false /* ignoreMax */) ep.SocketOptions().SetSendBufferSize(clamped, true /* notify */) return nil @@ -1723,7 +1725,22 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam v := hostarch.ByteOrder.Uint32(optVal) min, max := ep.SocketOptions().ReceiveBufferLimits() - clamped := clampBufSize(int64(v), min, max) + clamped := clampBufSize(int64(v), min, max, false /* ignoreMax */) + ep.SocketOptions().SetReceiveBufferSize(clamped, true /* notify */) + return nil + + case linux.SO_RCVBUFFORCE: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + + if creds := auth.CredentialsFromContext(t); !creds.HasCapability(linux.CAP_NET_ADMIN) { + return syserr.ErrNotPermitted + } + + v := hostarch.ByteOrder.Uint32(optVal) + min, max := ep.SocketOptions().ReceiveBufferLimits() + clamped := clampBufSize(int64(v), min, max, true /* ignoreMax */) ep.SocketOptions().SetReceiveBufferSize(clamped, true /* notify */) return nil diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index f5da3c509..658e90bb9 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -509,7 +509,6 @@ func SetSockOptEmitUnimplementedEvent(t *kernel.Task, name int) { linux.SO_ATTACH_REUSEPORT_EBPF, linux.SO_CNX_ADVICE, linux.SO_DETACH_FILTER, - linux.SO_RCVBUFFORCE, linux.SO_SNDBUFFORCE: t.Kernel().EmitUnimplementedEvent(t) diff --git a/pkg/sentry/state/state_metadata.go b/pkg/sentry/state/state_metadata.go index cefd20b9b..c42297c80 100644 --- a/pkg/sentry/state/state_metadata.go +++ b/pkg/sentry/state/state_metadata.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package state import ( diff --git a/pkg/sentry/strace/linux64_amd64.go b/pkg/sentry/strace/linux64_amd64.go index 6ce1bb592..317c3c31c 100644 --- a/pkg/sentry/strace/linux64_amd64.go +++ b/pkg/sentry/strace/linux64_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package strace diff --git a/pkg/sentry/strace/linux64_arm64.go b/pkg/sentry/strace/linux64_arm64.go index ce5594301..65f27c810 100644 --- a/pkg/sentry/strace/linux64_arm64.go +++ b/pkg/sentry/strace/linux64_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package strace diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go index 3ea9c31dd..757ff2a40 100644 --- a/pkg/sentry/strace/strace.go +++ b/pkg/sentry/strace/strace.go @@ -819,10 +819,10 @@ func convertToSyscallFlag(sinks SinkType) uint32 { return ret } -// Enable enables the syscalls in whitelist in all syscall tables. +// Enable enables the syscalls in allowlist in all syscall tables. // // Preconditions: Initialize has been called. -func Enable(whitelist []string, sinks SinkType) error { +func Enable(allowlist []string, sinks SinkType) error { flags := convertToSyscallFlag(sinks) for _, table := range kernel.SyscallTables() { // Is this known? @@ -832,7 +832,7 @@ func Enable(whitelist []string, sinks SinkType) error { } // Convert to a set of system calls numbers. - wl, err := sys.ConvertToSysnoMap(whitelist) + wl, err := sys.ConvertToSysnoMap(allowlist) if err != nil { return err } diff --git a/pkg/sentry/syscalls/linux/sys_clone_amd64.go b/pkg/sentry/syscalls/linux/sys_clone_amd64.go index dd43cf18d..2b2dbd9f9 100644 --- a/pkg/sentry/syscalls/linux/sys_clone_amd64.go +++ b/pkg/sentry/syscalls/linux/sys_clone_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/sentry/syscalls/linux/sys_clone_arm64.go b/pkg/sentry/syscalls/linux/sys_clone_arm64.go index cf68a8949..877c86e6a 100644 --- a/pkg/sentry/syscalls/linux/sys_clone_arm64.go +++ b/pkg/sentry/syscalls/linux/sys_clone_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/sentry/syscalls/linux/sys_stat_amd64.go b/pkg/sentry/syscalls/linux/sys_stat_amd64.go index 0a04a6113..e38066ea8 100644 --- a/pkg/sentry/syscalls/linux/sys_stat_amd64.go +++ b/pkg/sentry/syscalls/linux/sys_stat_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/sentry/syscalls/linux/sys_stat_arm64.go b/pkg/sentry/syscalls/linux/sys_stat_arm64.go index 5a3b1bfad..b2ea390c5 100644 --- a/pkg/sentry/syscalls/linux/sys_stat_arm64.go +++ b/pkg/sentry/syscalls/linux/sys_stat_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index 46145955e..981cdd985 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -31,11 +31,6 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -const ( - // exitSignalMask is the signal mask to be sent at exit. Same as CSIGNAL in linux. - exitSignalMask = 0xff -) - var ( // ExecMaxTotalSize is the maximum length of all argv and envv entries. // @@ -201,33 +196,16 @@ func ExitGroup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // clone is used by Clone, Fork, and VFork. func clone(t *kernel.Task, flags int, stack hostarch.Addr, parentTID hostarch.Addr, childTID hostarch.Addr, tls hostarch.Addr) (uintptr, *kernel.SyscallControl, error) { - opts := kernel.CloneOptions{ - SharingOptions: kernel.SharingOptions{ - NewAddressSpace: flags&linux.CLONE_VM == 0, - NewSignalHandlers: flags&linux.CLONE_SIGHAND == 0, - NewThreadGroup: flags&linux.CLONE_THREAD == 0, - TerminationSignal: linux.Signal(flags & exitSignalMask), - NewPIDNamespace: flags&linux.CLONE_NEWPID == linux.CLONE_NEWPID, - NewUserNamespace: flags&linux.CLONE_NEWUSER == linux.CLONE_NEWUSER, - NewNetworkNamespace: flags&linux.CLONE_NEWNET == linux.CLONE_NEWNET, - NewFiles: flags&linux.CLONE_FILES == 0, - NewFSContext: flags&linux.CLONE_FS == 0, - NewUTSNamespace: flags&linux.CLONE_NEWUTS == linux.CLONE_NEWUTS, - NewIPCNamespace: flags&linux.CLONE_NEWIPC == linux.CLONE_NEWIPC, - }, - Stack: stack, - SetTLS: flags&linux.CLONE_SETTLS == linux.CLONE_SETTLS, - TLS: tls, - ChildClearTID: flags&linux.CLONE_CHILD_CLEARTID == linux.CLONE_CHILD_CLEARTID, - ChildSetTID: flags&linux.CLONE_CHILD_SETTID == linux.CLONE_CHILD_SETTID, - ChildTID: childTID, - ParentSetTID: flags&linux.CLONE_PARENT_SETTID == linux.CLONE_PARENT_SETTID, - ParentTID: parentTID, - Vfork: flags&linux.CLONE_VFORK == linux.CLONE_VFORK, - Untraced: flags&linux.CLONE_UNTRACED == linux.CLONE_UNTRACED, - InheritTracer: flags&linux.CLONE_PTRACE == linux.CLONE_PTRACE, - } - ntid, ctrl, err := t.Clone(&opts) + args := linux.CloneArgs{ + Flags: uint64(uint32(flags) &^ linux.CSIGNAL), + Pidfd: uint64(parentTID), + ChildTID: uint64(childTID), + ParentTID: uint64(parentTID), + ExitSignal: uint64(flags & linux.CSIGNAL), + Stack: uint64(stack), + TLS: uint64(tls), + } + ntid, ctrl, err := t.Clone(&args) return uintptr(ntid), ctrl, err } @@ -460,29 +438,16 @@ func SetTidAddress(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel // Unshare implements linux syscall unshare(2). func Unshare(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { flags := args[0].Int() - opts := kernel.SharingOptions{ - NewAddressSpace: flags&linux.CLONE_VM == linux.CLONE_VM, - NewSignalHandlers: flags&linux.CLONE_SIGHAND == linux.CLONE_SIGHAND, - NewThreadGroup: flags&linux.CLONE_THREAD == linux.CLONE_THREAD, - NewPIDNamespace: flags&linux.CLONE_NEWPID == linux.CLONE_NEWPID, - NewUserNamespace: flags&linux.CLONE_NEWUSER == linux.CLONE_NEWUSER, - NewNetworkNamespace: flags&linux.CLONE_NEWNET == linux.CLONE_NEWNET, - NewFiles: flags&linux.CLONE_FILES == linux.CLONE_FILES, - NewFSContext: flags&linux.CLONE_FS == linux.CLONE_FS, - NewUTSNamespace: flags&linux.CLONE_NEWUTS == linux.CLONE_NEWUTS, - NewIPCNamespace: flags&linux.CLONE_NEWIPC == linux.CLONE_NEWIPC, - } // "CLONE_NEWPID automatically implies CLONE_THREAD as well." - unshare(2) - if opts.NewPIDNamespace { - opts.NewThreadGroup = true + if flags&linux.CLONE_NEWPID != 0 { + flags |= linux.CLONE_THREAD } // "... specifying CLONE_NEWUSER automatically implies CLONE_THREAD. Since // Linux 3.9, CLONE_NEWUSER also automatically implies CLONE_FS." - if opts.NewUserNamespace { - opts.NewThreadGroup = true - opts.NewFSContext = true + if flags&linux.CLONE_NEWUSER != 0 { + flags |= linux.CLONE_THREAD | linux.CLONE_FS } - return 0, nil, t.Unshare(&opts) + return 0, nil, t.Unshare(flags) } // SchedYield implements linux syscall sched_yield(2). diff --git a/pkg/sentry/syscalls/linux/sys_tls_amd64.go b/pkg/sentry/syscalls/linux/sys_tls_amd64.go index c5899a359..8c6cd7511 100644 --- a/pkg/sentry/syscalls/linux/sys_tls_amd64.go +++ b/pkg/sentry/syscalls/linux/sys_tls_amd64.go @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//+build amd64 +//go:build amd64 +// +build amd64 package linux diff --git a/pkg/sentry/syscalls/linux/sys_tls_arm64.go b/pkg/sentry/syscalls/linux/sys_tls_arm64.go index fb08a356e..ff4ac4d6d 100644 --- a/pkg/sentry/syscalls/linux/sys_tls_arm64.go +++ b/pkg/sentry/syscalls/linux/sys_tls_arm64.go @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//+build arm64 +//go:build arm64 +// +build arm64 package linux diff --git a/pkg/sentry/syscalls/linux/vfs2/stat_amd64.go b/pkg/sentry/syscalls/linux/vfs2/stat_amd64.go index 2da538fc6..122921b52 100644 --- a/pkg/sentry/syscalls/linux/vfs2/stat_amd64.go +++ b/pkg/sentry/syscalls/linux/vfs2/stat_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package vfs2 diff --git a/pkg/sentry/syscalls/linux/vfs2/stat_arm64.go b/pkg/sentry/syscalls/linux/vfs2/stat_arm64.go index 88b9c7627..d32031481 100644 --- a/pkg/sentry/syscalls/linux/vfs2/stat_arm64.go +++ b/pkg/sentry/syscalls/linux/vfs2/stat_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package vfs2 diff --git a/pkg/sentry/time/sampler_amd64.go b/pkg/sentry/time/sampler_amd64.go index 9f1b4b2fb..5fa1832b4 100644 --- a/pkg/sentry/time/sampler_amd64.go +++ b/pkg/sentry/time/sampler_amd64.go @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//+build amd64 +//go:build amd64 +// +build amd64 package time diff --git a/pkg/sentry/time/sampler_arm64.go b/pkg/sentry/time/sampler_arm64.go index 4c8d33ae4..3560e66ae 100644 --- a/pkg/sentry/time/sampler_arm64.go +++ b/pkg/sentry/time/sampler_arm64.go @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//+build arm64 +//go:build arm64 +// +build arm64 package time diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index aeca262e3..eb3c60610 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -427,9 +427,7 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential if opts.Flags&linux.O_DIRECTORY != 0 { rp.mustBeDir = true } - // Ignore O_PATH for verity, as verity performs extra operations on the fd for verification. - // The underlying filesystem that verity wraps opens the fd with O_PATH. - if opts.Flags&linux.O_PATH != 0 && rp.mount.fs.FilesystemType().Name() != "verity" { + if opts.Flags&linux.O_PATH != 0 { vd, err := vfs.GetDentryAt(ctx, creds, pop, &GetDentryOptions{}) if err != nil { return nil, err diff --git a/pkg/shim/epoll.go b/pkg/shim/epoll.go index 737d2b781..463e11a84 100644 --- a/pkg/shim/epoll.go +++ b/pkg/shim/epoll.go @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package shim diff --git a/pkg/shim/runtimeoptions/runtimeoptions_cri.go b/pkg/shim/runtimeoptions/runtimeoptions_cri.go index e6102b4cf..23bbd82be 100644 --- a/pkg/shim/runtimeoptions/runtimeoptions_cri.go +++ b/pkg/shim/runtimeoptions/runtimeoptions_cri.go @@ -13,6 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package runtimeoptions import ( diff --git a/pkg/shim/service_linux.go b/pkg/shim/service_linux.go index 829f69282..fb2f8b062 100644 --- a/pkg/shim/service_linux.go +++ b/pkg/shim/service_linux.go @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package shim diff --git a/pkg/state/state_norace.go b/pkg/state/state_norace.go index 4281aed6d..be09d6141 100644 --- a/pkg/state/state_norace.go +++ b/pkg/state/state_norace.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !race // +build !race package state diff --git a/pkg/state/state_race.go b/pkg/state/state_race.go index 8232981ce..c9f4fd5cf 100644 --- a/pkg/state/state_race.go +++ b/pkg/state/state_race.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build race // +build race package state diff --git a/pkg/state/tests/register_test.go b/pkg/state/tests/register_test.go index 75bdbfc6e..2199d6b01 100644 --- a/pkg/state/tests/register_test.go +++ b/pkg/state/tests/register_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build race // +build race package tests diff --git a/pkg/sync/checklocks_off_unsafe.go b/pkg/sync/checklocks_off_unsafe.go index 62c81b149..87c56dd12 100644 --- a/pkg/sync/checklocks_off_unsafe.go +++ b/pkg/sync/checklocks_off_unsafe.go @@ -3,6 +3,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !checklocks // +build !checklocks package sync diff --git a/pkg/sync/checklocks_on_unsafe.go b/pkg/sync/checklocks_on_unsafe.go index 24f933ed1..f2bfde083 100644 --- a/pkg/sync/checklocks_on_unsafe.go +++ b/pkg/sync/checklocks_on_unsafe.go @@ -3,6 +3,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build checklocks // +build checklocks package sync diff --git a/pkg/sync/goyield_go113_unsafe.go b/pkg/sync/goyield_go113_unsafe.go index 8aee0d455..c4b03e9aa 100644 --- a/pkg/sync/goyield_go113_unsafe.go +++ b/pkg/sync/goyield_go113_unsafe.go @@ -3,8 +3,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build go1.13 -// +build !go1.14 +//go:build go1.13 && !go1.14 +// +build go1.13,!go1.14 package sync diff --git a/pkg/sync/goyield_unsafe.go b/pkg/sync/goyield_unsafe.go index f3cc12163..8639bb64e 100644 --- a/pkg/sync/goyield_unsafe.go +++ b/pkg/sync/goyield_unsafe.go @@ -3,8 +3,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build go1.14 -// +build !go1.18 +//go:build go1.14 && !go1.18 +// +build go1.14,!go1.18 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sync/mutex_unsafe.go b/pkg/sync/mutex_unsafe.go index b829765d9..e00d9467d 100644 --- a/pkg/sync/mutex_unsafe.go +++ b/pkg/sync/mutex_unsafe.go @@ -3,8 +3,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build go1.13 -// +build !go1.18 +//go:build go1.13 && !go1.18 +// +build go1.13,!go1.18 // When updating the build constraint (above), check that syncMutex matches the // standard library sync.Mutex definition. diff --git a/pkg/sync/norace_unsafe.go b/pkg/sync/norace_unsafe.go index 70b5f3a5e..8eca99134 100644 --- a/pkg/sync/norace_unsafe.go +++ b/pkg/sync/norace_unsafe.go @@ -3,6 +3,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !race // +build !race package sync diff --git a/pkg/sync/race_amd64.s b/pkg/sync/race_amd64.s index 57bc0ec79..199602387 100644 --- a/pkg/sync/race_amd64.s +++ b/pkg/sync/race_amd64.s @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build race -// +build amd64 +//go:build race && amd64 +// +build race,amd64 #include "textflag.h" diff --git a/pkg/sync/race_arm64.s b/pkg/sync/race_arm64.s index 88f091fda..c4192e870 100644 --- a/pkg/sync/race_arm64.s +++ b/pkg/sync/race_arm64.s @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build race -// +build arm64 +//go:build race && arm64 +// +build race,arm64 #include "textflag.h" diff --git a/pkg/sync/race_unsafe.go b/pkg/sync/race_unsafe.go index 59985c270..381163cac 100644 --- a/pkg/sync/race_unsafe.go +++ b/pkg/sync/race_unsafe.go @@ -3,6 +3,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build race // +build race package sync diff --git a/pkg/sync/runtime_unsafe.go b/pkg/sync/runtime_unsafe.go index 39c766331..1d9cf304e 100644 --- a/pkg/sync/runtime_unsafe.go +++ b/pkg/sync/runtime_unsafe.go @@ -3,8 +3,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build go1.13 -// +build !go1.18 +//go:build go1.13 && !go1.18 +// +build go1.13,!go1.18 // Check go:linkname function signatures, type definitions, and constants when // updating Go version. diff --git a/pkg/syserr/host_linux.go b/pkg/syserr/host_linux.go index c8c10f48b..fb92738af 100644 --- a/pkg/syserr/host_linux.go +++ b/pkg/syserr/host_linux.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package syserr diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 1b56d2b72..e8e716db0 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package fdbased provides the implemention of data-link layer endpoints diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go index 8aad338b6..eccd21579 100644 --- a/pkg/tcpip/link/fdbased/endpoint_test.go +++ b/pkg/tcpip/link/fdbased/endpoint_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package fdbased diff --git a/pkg/tcpip/link/fdbased/endpoint_unsafe.go b/pkg/tcpip/link/fdbased/endpoint_unsafe.go index df14eaad1..904393faa 100644 --- a/pkg/tcpip/link/fdbased/endpoint_unsafe.go +++ b/pkg/tcpip/link/fdbased/endpoint_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package fdbased diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go index 5d698a5e9..bfae34ab9 100644 --- a/pkg/tcpip/link/fdbased/mmap.go +++ b/pkg/tcpip/link/fdbased/mmap.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build (linux && amd64) || (linux && arm64) // +build linux,amd64 linux,arm64 package fdbased diff --git a/pkg/tcpip/link/fdbased/mmap_stub.go b/pkg/tcpip/link/fdbased/mmap_stub.go index 67be52d67..9d8679502 100644 --- a/pkg/tcpip/link/fdbased/mmap_stub.go +++ b/pkg/tcpip/link/fdbased/mmap_stub.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !linux || (!amd64 && !arm64) // +build !linux !amd64,!arm64 package fdbased diff --git a/pkg/tcpip/link/fdbased/mmap_unsafe.go b/pkg/tcpip/link/fdbased/mmap_unsafe.go index 1293f68a2..58d5dfeef 100644 --- a/pkg/tcpip/link/fdbased/mmap_unsafe.go +++ b/pkg/tcpip/link/fdbased/mmap_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build (linux && amd64) || (linux && arm64) // +build linux,amd64 linux,arm64 package fdbased diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go index 4b7ef3aac..ab2855a63 100644 --- a/pkg/tcpip/link/fdbased/packet_dispatchers.go +++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package fdbased diff --git a/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go index 2206fe0e6..c1438da21 100644 --- a/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go +++ b/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux && !amd64 && !arm64 // +build linux,!amd64,!arm64 package rawfile diff --git a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go index 5002245a1..da900c24b 100644 --- a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go +++ b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build ((linux && amd64) || (linux && arm64)) && go1.12 && !go1.18 // +build linux,amd64 linux,arm64 // +build go1.12 // +build !go1.18 diff --git a/pkg/tcpip/link/rawfile/errors.go b/pkg/tcpip/link/rawfile/errors.go index 9743e70ea..7e21a78d4 100644 --- a/pkg/tcpip/link/rawfile/errors.go +++ b/pkg/tcpip/link/rawfile/errors.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package rawfile diff --git a/pkg/tcpip/link/rawfile/errors_test.go b/pkg/tcpip/link/rawfile/errors_test.go index 8f4bd60da..1b88c309b 100644 --- a/pkg/tcpip/link/rawfile/errors_test.go +++ b/pkg/tcpip/link/rawfile/errors_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package rawfile diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go index 43fe57830..53448a641 100644 --- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go +++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package rawfile contains utilities for using the netstack with raw host diff --git a/pkg/tcpip/link/sharedmem/rx.go b/pkg/tcpip/link/sharedmem/rx.go index 8e6f3e5e3..e882a128c 100644 --- a/pkg/tcpip/link/sharedmem/rx.go +++ b/pkg/tcpip/link/sharedmem/rx.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package sharedmem diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go index df9a0b90a..30cf659b8 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem.go +++ b/pkg/tcpip/link/sharedmem/sharedmem.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package sharedmem provides the implemention of data-link layer endpoints diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go index 0f72d4e95..d6d953085 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem_test.go +++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package sharedmem diff --git a/pkg/tcpip/link/tun/tun_unsafe.go b/pkg/tcpip/link/tun/tun_unsafe.go index 0591fbd63..db4338e79 100644 --- a/pkg/tcpip/link/tun/tun_unsafe.go +++ b/pkg/tcpip/link/tun/tun_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package tun contains methods to open TAP and TUN devices. diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go index b9a24ff56..009cab643 100644 --- a/pkg/tcpip/sample/tun_tcp_connect/main.go +++ b/pkg/tcpip/sample/tun_tcp_connect/main.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // This sample creates a stack with TCP and IPv4 protocols on top of a TUN diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go index ef1bfc186..c10b19aa0 100644 --- a/pkg/tcpip/sample/tun_tcp_echo/main.go +++ b/pkg/tcpip/sample/tun_tcp_echo/main.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // This sample creates a stack with TCP and IPv4 protocols on top of a TUN diff --git a/pkg/tcpip/socketops.go b/pkg/tcpip/socketops.go index 042326a3a..5642c86f8 100644 --- a/pkg/tcpip/socketops.go +++ b/pkg/tcpip/socketops.go @@ -54,7 +54,7 @@ type SocketOptionsHandler interface { // buffer size. It also returns the newly set value. OnSetSendBufferSize(v int64) (newSz int64) - // OnSetReceiveBufferSize is invoked to set the SO_RCVBUFSIZE. + // OnSetReceiveBufferSize is invoked by SO_RCVBUF and SO_RCVBUFFORCE. OnSetReceiveBufferSize(v, oldSz int64) (newSz int64) } diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 8f2658f64..55683b4fb 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -1845,6 +1845,10 @@ type TCPStats struct { // FailedPortReservations is the number of times TCP failed to reserve // a port. FailedPortReservations *StatCounter + + // SegmentsAckedWithDSACK is the number of segments acknowledged with + // DSACK. + SegmentsAckedWithDSACK *StatCounter } // UDPStats collects UDP-specific stats. diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index ab5da987a..b3d8951ff 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -455,8 +455,21 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error { } // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress. -func (*endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { - return tcpip.FullAddress{}, &tcpip.ErrNotSupported{} +func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { + e.mu.RLock() + defer e.mu.RUnlock() + + addr := e.BindAddr + if e.connected { + addr = e.route.LocalAddress() + } + + return tcpip.FullAddress{ + NIC: e.RegisterNICID, + Addr: addr, + // Linux returns the protocol in the port field. + Port: uint16(e.TransProto), + }, nil } // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress. diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 9945fdd6b..044123185 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -478,7 +478,7 @@ type endpoint struct { // shutdownFlags represent the current shutdown state of the endpoint. shutdownFlags tcpip.ShutdownFlags - // tcpRecovery is the loss deteoction algorithm used by TCP. + // tcpRecovery is the loss recovery algorithm used by TCP. tcpRecovery tcpip.TCPRecovery // sack holds TCP SACK related information for this endpoint. @@ -754,7 +754,7 @@ func (e *endpoint) ResumeWork() { // // Precondition: e.mu must be held to call this method. func (e *endpoint) setEndpointState(state EndpointState) { - oldstate := EndpointState(atomic.LoadUint32(&e.state)) + oldstate := EndpointState(atomic.SwapUint32(&e.state, uint32(state))) switch state { case StateEstablished: e.stack.Stats().TCP.CurrentEstablished.Increment() @@ -771,7 +771,6 @@ func (e *endpoint) setEndpointState(state EndpointState) { e.stack.Stats().TCP.CurrentEstablished.Decrement() } } - atomic.StoreUint32(&e.state, uint32(state)) } // EndpointState returns the current state of the endpoint. @@ -870,8 +869,6 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue e.maxSynRetries = uint8(synRetries) } - s.TransportProtocolOption(ProtocolNumber, &e.tcpRecovery) - if p := s.GetTCPProbe(); p != nil { e.probe = p } @@ -2923,6 +2920,7 @@ func (e *endpoint) maybeEnableSACKPermitted(synOpts *header.TCPSynOptions) { } if bool(v) && synOpts.SACKPermitted { e.SACKPermitted = true + e.stack.TransportProtocolOption(ProtocolNumber, &e.tcpRecovery) } } diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 2fc282e73..18b834243 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -478,8 +478,7 @@ func NewProtocol(s *stack.Stack) stack.TransportProtocol { minRTO: MinRTO, maxRTO: MaxRTO, maxRetries: MaxRetries, - // TODO(gvisor.dev/issue/5243): Set recovery to tcpip.TCPRACKLossDetection. - recovery: 0, + recovery: tcpip.TCPRACKLossDetection, } p.dispatcher.init(s.Rand(), runtime.GOMAXPROCS(0)) return &p diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 72d58dcff..92a66f17e 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -1154,6 +1154,13 @@ func (s *sender) walkSACK(rcvdSeg *segment) { idx := 0 n := len(rcvdSeg.parsedOptions.SACKBlocks) if checkDSACK(rcvdSeg) { + dsackBlock := rcvdSeg.parsedOptions.SACKBlocks[0] + numDSACK := uint64(dsackBlock.End-dsackBlock.Start) / uint64(s.MaxPayloadSize) + // numDSACK can be zero when DSACK is sent for subsegments. + if numDSACK < 1 { + numDSACK = 1 + } + s.ep.stack.Stats().TCP.SegmentsAckedWithDSACK.IncrementBy(numDSACK) s.rc.setDSACKSeen(true) idx = 1 n-- diff --git a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go index ced3a9c58..84fb1c416 100644 --- a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go +++ b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go @@ -16,6 +16,7 @@ // iterations taking long enough that the retransmit timer can kick in causing // the congestion window measurements to fail due to extra packets etc. // +//go:build !race // +build !race package tcp_test diff --git a/pkg/tcpip/transport/tcp/tcp_rack_test.go b/pkg/tcpip/transport/tcp/tcp_rack_test.go index d6cf786a1..89e9fb886 100644 --- a/pkg/tcpip/transport/tcp/tcp_rack_test.go +++ b/pkg/tcpip/transport/tcp/tcp_rack_test.go @@ -36,9 +36,9 @@ const ( latency = 5 * time.Millisecond ) -func setStackRACKPermitted(t *testing.T, c *context.Context) { +func setStackTCPRecovery(t *testing.T, c *context.Context, recovery int) { t.Helper() - opt := tcpip.TCPRACKLossDetection + opt := tcpip.TCPRecovery(recovery) if err := c.Stack().SetTransportProtocolOption(header.TCPProtocolNumber, &opt); err != nil { t.Fatalf("c.s.SetTransportProtocolOption(%d, &%v(%v)): %s", header.TCPProtocolNumber, opt, opt, err) } @@ -70,7 +70,6 @@ func TestRACKUpdate(t *testing.T) { close(probeDone) }) setStackSACKPermitted(t, c, true) - setStackRACKPermitted(t, c) createConnectedWithSACKAndTS(c) data := make([]byte, maxPayload) @@ -129,7 +128,6 @@ func TestRACKDetectReorder(t *testing.T) { close(probeDone) }) setStackSACKPermitted(t, c, true) - setStackRACKPermitted(t, c) createConnectedWithSACKAndTS(c) data := make([]byte, ackNumToVerify*maxPayload) for i := range data { @@ -162,8 +160,8 @@ func TestRACKDetectReorder(t *testing.T) { func sendAndReceiveWithSACK(t *testing.T, c *context.Context, numPackets int, enableRACK bool) []byte { setStackSACKPermitted(t, c, true) - if enableRACK { - setStackRACKPermitted(t, c) + if !enableRACK { + setStackTCPRecovery(t, c, 0) } createConnectedWithSACKAndTS(c) @@ -542,6 +540,28 @@ func TestRACKDetectDSACK(t *testing.T) { case invalidDSACKDetected: t.Fatalf("RACK DSACK detected when there is no duplicate SACK") } + + metricPollFn := func() error { + tcpStats := c.Stack().Stats().TCP + stats := []struct { + stat *tcpip.StatCounter + name string + want uint64 + }{ + // Check DSACK was received for one segment. + {tcpStats.SegmentsAckedWithDSACK, "stats.TCP.SegmentsAckedWithDSACK", 1}, + } + for _, s := range stats { + if got, want := s.stat.Value(), s.want; got != want { + return fmt.Errorf("got %s.Value() = %d, want = %d", s.name, got, want) + } + } + return nil + } + + if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil { + t.Error(err) + } } // TestRACKDetectDSACKWithOutOfOrder tests that RACK detects DSACK with out of @@ -682,6 +702,28 @@ func TestRACKDetectDSACKSingleDup(t *testing.T) { case invalidDSACKDetected: t.Fatalf("RACK DSACK detected when there is no duplicate SACK") } + + metricPollFn := func() error { + tcpStats := c.Stack().Stats().TCP + stats := []struct { + stat *tcpip.StatCounter + name string + want uint64 + }{ + // Check DSACK was received for a subsegment. + {tcpStats.SegmentsAckedWithDSACK, "stats.TCP.SegmentsAckedWithDSACK", 1}, + } + for _, s := range stats { + if got, want := s.stat.Value(), s.want; got != want { + return fmt.Errorf("got %s.Value() = %d, want = %d", s.name, got, want) + } + } + return nil + } + + if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil { + t.Error(err) + } } // TestRACKDetectDSACKDupWithCumulativeACK tests DSACK for two non-contiguous @@ -998,7 +1040,6 @@ func TestRACKWithWindowFull(t *testing.T) { defer c.Cleanup() setStackSACKPermitted(t, c, true) - setStackRACKPermitted(t, c) createConnectedWithSACKAndTS(c) seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1) diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go index 20c9761f2..83e0653b9 100644 --- a/pkg/tcpip/transport/tcp/tcp_sack_test.go +++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go @@ -61,6 +61,7 @@ func TestSackPermittedConnect(t *testing.T) { defer c.Cleanup() setStackSACKPermitted(t, c, sackEnabled) + setStackTCPRecovery(t, c, 0) rep := createConnectedWithSACKPermittedOption(c) data := []byte{1, 2, 3} @@ -105,6 +106,7 @@ func TestSackDisabledConnect(t *testing.T) { defer c.Cleanup() setStackSACKPermitted(t, c, sackEnabled) + setStackTCPRecovery(t, c, 0) rep := c.CreateConnectedWithOptions(header.TCPSynOptions{}) @@ -166,6 +168,7 @@ func TestSackPermittedAccept(t *testing.T) { } } setStackSACKPermitted(t, c, sackEnabled) + setStackTCPRecovery(t, c, 0) rep := c.AcceptWithOptions(tc.wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS, SACKPermitted: tc.sackPermitted}) // Now verify no SACK blocks are @@ -239,6 +242,7 @@ func TestSackDisabledAccept(t *testing.T) { } setStackSACKPermitted(t, c, sackEnabled) + setStackTCPRecovery(t, c, 0) rep := c.AcceptWithOptions(tc.wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS}) @@ -386,6 +390,7 @@ func TestSACKRecovery(t *testing.T) { log.Printf("state: %+v\n", s) }) setStackSACKPermitted(t, c, true) + setStackTCPRecovery(t, c, 0) createConnectedWithSACKAndTS(c) const iterations = 3 diff --git a/pkg/test/testutil/testutil_runfiles.go b/pkg/test/testutil/testutil_runfiles.go index ece9ea9a1..1dbd48a47 100644 --- a/pkg/test/testutil/testutil_runfiles.go +++ b/pkg/test/testutil/testutil_runfiles.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package testutil import ( |