summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/arch/BUILD4
-rw-r--r--pkg/sentry/arch/arch.go14
-rw-r--r--pkg/sentry/arch/signal.go276
-rw-r--r--pkg/sentry/arch/signal_act.go83
-rw-r--r--pkg/sentry/arch/signal_amd64.go26
-rw-r--r--pkg/sentry/arch/signal_arm64.go26
-rw-r--r--pkg/sentry/arch/signal_info.go66
-rw-r--r--pkg/sentry/arch/signal_stack.go68
-rw-r--r--pkg/sentry/arch/stack.go2
-rw-r--r--pkg/sentry/control/proc.go9
-rw-r--r--pkg/sentry/fs/attr.go14
-rw-r--r--pkg/sentry/fs/dev/dev.go8
-rw-r--r--pkg/sentry/fs/fsutil/host_mappable.go21
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go27
-rw-r--r--pkg/sentry/fs/gofer/BUILD1
-rw-r--r--pkg/sentry/fs/gofer/cache_policy.go2
-rw-r--r--pkg/sentry/fs/gofer/file.go19
-rw-r--r--pkg/sentry/fs/gofer/inode.go18
-rw-r--r--pkg/sentry/fs/gofer/path.go20
-rw-r--r--pkg/sentry/fs/proc/sys.go22
-rw-r--r--pkg/sentry/fs/proc/sys_net.go16
-rw-r--r--pkg/sentry/fs/proc/sys_net_state.go6
-rw-r--r--pkg/sentry/fs/tmpfs/fs.go2
-rw-r--r--pkg/sentry/fs/tmpfs/inode_file.go15
-rw-r--r--pkg/sentry/fs/tmpfs/tmpfs.go31
-rw-r--r--pkg/sentry/fs/user/user_test.go5
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/base.go27
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/cgroupfs.go16
-rw-r--r--pkg/sentry/fsimpl/gofer/BUILD1
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go24
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go23
-rw-r--r--pkg/sentry/fsimpl/gofer/regular_file.go3
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go3
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go13
-rw-r--r--pkg/sentry/fsimpl/overlay/filesystem.go16
-rw-r--r--pkg/sentry/fsimpl/overlay/regular_file.go13
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys.go19
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys_test.go2
-rw-r--r--pkg/sentry/fsimpl/testutil/BUILD1
-rw-r--r--pkg/sentry/fsimpl/testutil/kernel.go7
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go23
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go57
-rw-r--r--pkg/sentry/fsimpl/verity/verity.go13
-rw-r--r--pkg/sentry/fsmetric/fsmetric.go1
-rw-r--r--pkg/sentry/inet/inet.go3
-rw-r--r--pkg/sentry/inet/test_stack.go5
-rw-r--r--pkg/sentry/kernel/BUILD2
-rw-r--r--pkg/sentry/kernel/auth/BUILD2
-rw-r--r--pkg/sentry/kernel/auth/credentials.go2
-rw-r--r--pkg/sentry/kernel/cgroup.go71
-rw-r--r--pkg/sentry/kernel/fasync/BUILD1
-rw-r--r--pkg/sentry/kernel/fasync/fasync.go5
-rw-r--r--pkg/sentry/kernel/fd_table.go8
-rw-r--r--pkg/sentry/kernel/futex/BUILD2
-rw-r--r--pkg/sentry/kernel/kernel.go69
-rw-r--r--pkg/sentry/kernel/pending_signals.go9
-rw-r--r--pkg/sentry/kernel/pending_signals_state.go6
-rw-r--r--pkg/sentry/kernel/pipe/pipe_util.go8
-rw-r--r--pkg/sentry/kernel/posixtimer.go7
-rw-r--r--pkg/sentry/kernel/ptrace.go23
-rw-r--r--pkg/sentry/kernel/seccomp.go6
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore.go14
-rw-r--r--pkg/sentry/kernel/sessions.go3
-rw-r--r--pkg/sentry/kernel/signal.go15
-rw-r--r--pkg/sentry/kernel/signal_handlers.go17
-rw-r--r--pkg/sentry/kernel/task.go15
-rw-r--r--pkg/sentry/kernel/task_cgroup.go8
-rw-r--r--pkg/sentry/kernel/task_context.go5
-rw-r--r--pkg/sentry/kernel/task_exec.go3
-rw-r--r--pkg/sentry/kernel/task_exit.go43
-rw-r--r--pkg/sentry/kernel/task_sched.go10
-rw-r--r--pkg/sentry/kernel/task_signals.go124
-rw-r--r--pkg/sentry/kernel/task_start.go3
-rw-r--r--pkg/sentry/kernel/thread_group.go13
-rw-r--r--pkg/sentry/kernel/time/time.go19
-rw-r--r--pkg/sentry/loader/interpreter.go2
-rw-r--r--pkg/sentry/pgalloc/pgalloc.go18
-rw-r--r--pkg/sentry/platform/kvm/BUILD3
-rw-r--r--pkg/sentry/platform/kvm/address_space.go9
-rw-r--r--pkg/sentry/platform/kvm/address_space_amd64.go24
-rw-r--r--pkg/sentry/platform/kvm/address_space_arm64.go25
-rw-r--r--pkg/sentry/platform/kvm/context.go7
-rw-r--r--pkg/sentry/platform/kvm/kvm_amd64_test.go5
-rw-r--r--pkg/sentry/platform/kvm/kvm_test.go33
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go32
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64.go12
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64_unsafe.go8
-rw-r--r--pkg/sentry/platform/platform.go6
-rw-r--r--pkg/sentry/platform/ptrace/ptrace.go4
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_unsafe.go2
-rw-r--r--pkg/sentry/platform/ptrace/subprocess.go3
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_amd64.go2
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_arm64.go2
-rw-r--r--pkg/sentry/sighandling/sighandling_unsafe.go13
-rw-r--r--pkg/sentry/socket/hostinet/BUILD2
-rw-r--r--pkg/sentry/socket/hostinet/stack.go24
-rw-r--r--pkg/sentry/socket/netfilter/BUILD1
-rw-r--r--pkg/sentry/socket/netfilter/extensions.go16
-rw-r--r--pkg/sentry/socket/netfilter/ipv4.go9
-rw-r--r--pkg/sentry/socket/netfilter/ipv6.go9
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go22
-rw-r--r--pkg/sentry/socket/netfilter/owner_matcher.go24
-rw-r--r--pkg/sentry/socket/netfilter/targets.go4
-rw-r--r--pkg/sentry/socket/netfilter/tcp_matcher.go5
-rw-r--r--pkg/sentry/socket/netfilter/udp_matcher.go5
-rw-r--r--pkg/sentry/socket/netstack/netstack.go184
-rw-r--r--pkg/sentry/socket/netstack/stack.go10
-rw-r--r--pkg/sentry/socket/netstack/tun.go2
-rw-r--r--pkg/sentry/strace/BUILD2
-rw-r--r--pkg/sentry/strace/clone.go46
-rw-r--r--pkg/sentry/strace/linux64_amd64.go2
-rw-r--r--pkg/sentry/strace/linux64_arm64.go2
-rw-r--r--pkg/sentry/strace/mmap.go92
-rw-r--r--pkg/sentry/strace/open.go42
-rw-r--r--pkg/sentry/strace/signal.go4
-rw-r--r--pkg/sentry/strace/strace.go4
-rw-r--r--pkg/sentry/strace/syscalls.go6
-rw-r--r--pkg/sentry/syscalls/linux/sys_file.go34
-rw-r--r--pkg/sentry/syscalls/linux/sys_signal.go41
-rw-r--r--pkg/sentry/syscalls/linux/sys_thread.go16
-rw-r--r--pkg/sentry/syscalls/linux/sys_time.go64
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/setstat.go4
-rw-r--r--pkg/sentry/time/BUILD2
-rw-r--r--pkg/sentry/time/sampler_amd64.go2
-rw-r--r--pkg/sentry/time/sampler_arm64.go2
-rw-r--r--pkg/sentry/vfs/file_description.go8
-rw-r--r--pkg/sentry/vfs/file_description_impl_util.go66
-rw-r--r--pkg/sentry/vfs/memxattr/BUILD1
-rw-r--r--pkg/sentry/vfs/memxattr/xattr.go33
-rw-r--r--pkg/sentry/vfs/mount.go2
-rw-r--r--pkg/sentry/vfs/opath.go38
-rw-r--r--pkg/sentry/watchdog/watchdog.go10
132 files changed, 1251 insertions, 1383 deletions
diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD
index c9c52530d..61dacd2fb 100644
--- a/pkg/sentry/arch/BUILD
+++ b/pkg/sentry/arch/BUILD
@@ -14,12 +14,8 @@ go_library(
"arch_x86.go",
"arch_x86_impl.go",
"auxv.go",
- "signal.go",
- "signal_act.go",
"signal_amd64.go",
"signal_arm64.go",
- "signal_info.go",
- "signal_stack.go",
"stack.go",
"stack_unsafe.go",
"syscalls_amd64.go",
diff --git a/pkg/sentry/arch/arch.go b/pkg/sentry/arch/arch.go
index 290863ee6..c9393b091 100644
--- a/pkg/sentry/arch/arch.go
+++ b/pkg/sentry/arch/arch.go
@@ -134,21 +134,13 @@ type Context interface {
// RegisterMap returns a map of all registers.
RegisterMap() (map[string]uintptr, error)
- // NewSignalAct returns a new object that is equivalent to struct sigaction
- // in the guest architecture.
- NewSignalAct() NativeSignalAct
-
- // NewSignalStack returns a new object that is equivalent to stack_t in the
- // guest architecture.
- NewSignalStack() NativeSignalStack
-
// SignalSetup modifies the context in preparation for handling the
// given signal.
//
// st is the stack where the signal handler frame should be
// constructed.
//
- // act is the SignalAct that specifies how this signal is being
+ // act is the SigAction that specifies how this signal is being
// handled.
//
// info is the SignalInfo of the signal being delivered.
@@ -157,7 +149,7 @@ type Context interface {
// stack is not going to be used).
//
// sigset is the signal mask before entering the signal handler.
- SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt *SignalStack, sigset linux.SignalSet) error
+ SignalSetup(st *Stack, act *linux.SigAction, info *linux.SignalInfo, alt *linux.SignalStack, sigset linux.SignalSet) error
// SignalRestore restores context after returning from a signal
// handler.
@@ -167,7 +159,7 @@ type Context interface {
// rt is true if SignalRestore is being entered from rt_sigreturn and
// false if SignalRestore is being entered from sigreturn.
// SignalRestore returns the thread's new signal mask.
- SignalRestore(st *Stack, rt bool) (linux.SignalSet, SignalStack, error)
+ SignalRestore(st *Stack, rt bool) (linux.SignalSet, linux.SignalStack, error)
// CPUIDEmulate emulates a CPUID instruction according to current register state.
CPUIDEmulate(l log.Logger)
diff --git a/pkg/sentry/arch/signal.go b/pkg/sentry/arch/signal.go
deleted file mode 100644
index 67d7edf68..000000000
--- a/pkg/sentry/arch/signal.go
+++ /dev/null
@@ -1,276 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package arch
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/hostarch"
-)
-
-// SignalAct represents the action that should be taken when a signal is
-// delivered, and is equivalent to struct sigaction.
-//
-// +marshal
-// +stateify savable
-type SignalAct struct {
- Handler uint64
- Flags uint64
- Restorer uint64 // Only used on amd64.
- Mask linux.SignalSet
-}
-
-// SerializeFrom implements NativeSignalAct.SerializeFrom.
-func (s *SignalAct) SerializeFrom(other *SignalAct) {
- *s = *other
-}
-
-// DeserializeTo implements NativeSignalAct.DeserializeTo.
-func (s *SignalAct) DeserializeTo(other *SignalAct) {
- *other = *s
-}
-
-// SignalStack represents information about a user stack, and is equivalent to
-// stack_t.
-//
-// +marshal
-// +stateify savable
-type SignalStack struct {
- Addr uint64
- Flags uint32
- _ uint32
- Size uint64
-}
-
-// SerializeFrom implements NativeSignalStack.SerializeFrom.
-func (s *SignalStack) SerializeFrom(other *SignalStack) {
- *s = *other
-}
-
-// DeserializeTo implements NativeSignalStack.DeserializeTo.
-func (s *SignalStack) DeserializeTo(other *SignalStack) {
- *other = *s
-}
-
-// SignalInfo represents information about a signal being delivered, and is
-// equivalent to struct siginfo in linux kernel(linux/include/uapi/asm-generic/siginfo.h).
-//
-// +marshal
-// +stateify savable
-type SignalInfo struct {
- Signo int32 // Signal number
- Errno int32 // Errno value
- Code int32 // Signal code
- _ uint32
-
- // struct siginfo::_sifields is a union. In SignalInfo, fields in the union
- // are accessed through methods.
- //
- // For reference, here is the definition of _sifields: (_sigfault._trapno,
- // which does not exist on x86, omitted for clarity)
- //
- // union {
- // int _pad[SI_PAD_SIZE];
- //
- // /* kill() */
- // struct {
- // __kernel_pid_t _pid; /* sender's pid */
- // __ARCH_SI_UID_T _uid; /* sender's uid */
- // } _kill;
- //
- // /* POSIX.1b timers */
- // struct {
- // __kernel_timer_t _tid; /* timer id */
- // int _overrun; /* overrun count */
- // char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)];
- // sigval_t _sigval; /* same as below */
- // int _sys_private; /* not to be passed to user */
- // } _timer;
- //
- // /* POSIX.1b signals */
- // struct {
- // __kernel_pid_t _pid; /* sender's pid */
- // __ARCH_SI_UID_T _uid; /* sender's uid */
- // sigval_t _sigval;
- // } _rt;
- //
- // /* SIGCHLD */
- // struct {
- // __kernel_pid_t _pid; /* which child */
- // __ARCH_SI_UID_T _uid; /* sender's uid */
- // int _status; /* exit code */
- // __ARCH_SI_CLOCK_T _utime;
- // __ARCH_SI_CLOCK_T _stime;
- // } _sigchld;
- //
- // /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
- // struct {
- // void *_addr; /* faulting insn/memory ref. */
- // short _addr_lsb; /* LSB of the reported address */
- // } _sigfault;
- //
- // /* SIGPOLL */
- // struct {
- // __ARCH_SI_BAND_T _band; /* POLL_IN, POLL_OUT, POLL_MSG */
- // int _fd;
- // } _sigpoll;
- //
- // /* SIGSYS */
- // struct {
- // void *_call_addr; /* calling user insn */
- // int _syscall; /* triggering system call number */
- // unsigned int _arch; /* AUDIT_ARCH_* of syscall */
- // } _sigsys;
- // } _sifields;
- //
- // _sifields is padded so that the size of siginfo is SI_MAX_SIZE = 128
- // bytes.
- Fields [128 - 16]byte
-}
-
-// FixSignalCodeForUser fixes up si_code.
-//
-// The si_code we get from Linux may contain the kernel-specific code in the
-// top 16 bits if it's positive (e.g., from ptrace). Linux's
-// copy_siginfo_to_user does
-// err |= __put_user((short)from->si_code, &to->si_code);
-// to mask out those bits and we need to do the same.
-func (s *SignalInfo) FixSignalCodeForUser() {
- if s.Code > 0 {
- s.Code &= 0x0000ffff
- }
-}
-
-// PID returns the si_pid field.
-func (s *SignalInfo) PID() int32 {
- return int32(hostarch.ByteOrder.Uint32(s.Fields[0:4]))
-}
-
-// SetPID mutates the si_pid field.
-func (s *SignalInfo) SetPID(val int32) {
- hostarch.ByteOrder.PutUint32(s.Fields[0:4], uint32(val))
-}
-
-// UID returns the si_uid field.
-func (s *SignalInfo) UID() int32 {
- return int32(hostarch.ByteOrder.Uint32(s.Fields[4:8]))
-}
-
-// SetUID mutates the si_uid field.
-func (s *SignalInfo) SetUID(val int32) {
- hostarch.ByteOrder.PutUint32(s.Fields[4:8], uint32(val))
-}
-
-// Sigval returns the sigval field, which is aliased to both si_int and si_ptr.
-func (s *SignalInfo) Sigval() uint64 {
- return hostarch.ByteOrder.Uint64(s.Fields[8:16])
-}
-
-// SetSigval mutates the sigval field.
-func (s *SignalInfo) SetSigval(val uint64) {
- hostarch.ByteOrder.PutUint64(s.Fields[8:16], val)
-}
-
-// TimerID returns the si_timerid field.
-func (s *SignalInfo) TimerID() linux.TimerID {
- return linux.TimerID(hostarch.ByteOrder.Uint32(s.Fields[0:4]))
-}
-
-// SetTimerID sets the si_timerid field.
-func (s *SignalInfo) SetTimerID(val linux.TimerID) {
- hostarch.ByteOrder.PutUint32(s.Fields[0:4], uint32(val))
-}
-
-// Overrun returns the si_overrun field.
-func (s *SignalInfo) Overrun() int32 {
- return int32(hostarch.ByteOrder.Uint32(s.Fields[4:8]))
-}
-
-// SetOverrun sets the si_overrun field.
-func (s *SignalInfo) SetOverrun(val int32) {
- hostarch.ByteOrder.PutUint32(s.Fields[4:8], uint32(val))
-}
-
-// Addr returns the si_addr field.
-func (s *SignalInfo) Addr() uint64 {
- return hostarch.ByteOrder.Uint64(s.Fields[0:8])
-}
-
-// SetAddr sets the si_addr field.
-func (s *SignalInfo) SetAddr(val uint64) {
- hostarch.ByteOrder.PutUint64(s.Fields[0:8], val)
-}
-
-// Status returns the si_status field.
-func (s *SignalInfo) Status() int32 {
- return int32(hostarch.ByteOrder.Uint32(s.Fields[8:12]))
-}
-
-// SetStatus mutates the si_status field.
-func (s *SignalInfo) SetStatus(val int32) {
- hostarch.ByteOrder.PutUint32(s.Fields[8:12], uint32(val))
-}
-
-// CallAddr returns the si_call_addr field.
-func (s *SignalInfo) CallAddr() uint64 {
- return hostarch.ByteOrder.Uint64(s.Fields[0:8])
-}
-
-// SetCallAddr mutates the si_call_addr field.
-func (s *SignalInfo) SetCallAddr(val uint64) {
- hostarch.ByteOrder.PutUint64(s.Fields[0:8], val)
-}
-
-// Syscall returns the si_syscall field.
-func (s *SignalInfo) Syscall() int32 {
- return int32(hostarch.ByteOrder.Uint32(s.Fields[8:12]))
-}
-
-// SetSyscall mutates the si_syscall field.
-func (s *SignalInfo) SetSyscall(val int32) {
- hostarch.ByteOrder.PutUint32(s.Fields[8:12], uint32(val))
-}
-
-// Arch returns the si_arch field.
-func (s *SignalInfo) Arch() uint32 {
- return hostarch.ByteOrder.Uint32(s.Fields[12:16])
-}
-
-// SetArch mutates the si_arch field.
-func (s *SignalInfo) SetArch(val uint32) {
- hostarch.ByteOrder.PutUint32(s.Fields[12:16], val)
-}
-
-// Band returns the si_band field.
-func (s *SignalInfo) Band() int64 {
- return int64(hostarch.ByteOrder.Uint64(s.Fields[0:8]))
-}
-
-// SetBand mutates the si_band field.
-func (s *SignalInfo) SetBand(val int64) {
- // Note: this assumes the platform uses `long` as `__ARCH_SI_BAND_T`.
- // On some platforms, which gVisor doesn't support, `__ARCH_SI_BAND_T` is
- // `int`. See siginfo.h.
- hostarch.ByteOrder.PutUint64(s.Fields[0:8], uint64(val))
-}
-
-// FD returns the si_fd field.
-func (s *SignalInfo) FD() uint32 {
- return hostarch.ByteOrder.Uint32(s.Fields[8:12])
-}
-
-// SetFD mutates the si_fd field.
-func (s *SignalInfo) SetFD(val uint32) {
- hostarch.ByteOrder.PutUint32(s.Fields[8:12], val)
-}
diff --git a/pkg/sentry/arch/signal_act.go b/pkg/sentry/arch/signal_act.go
deleted file mode 100644
index d3e2324a8..000000000
--- a/pkg/sentry/arch/signal_act.go
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package arch
-
-import "gvisor.dev/gvisor/pkg/marshal"
-
-// Special values for SignalAct.Handler.
-const (
- // SignalActDefault is SIG_DFL and specifies that the default behavior for
- // a signal should be taken.
- SignalActDefault = 0
-
- // SignalActIgnore is SIG_IGN and specifies that a signal should be
- // ignored.
- SignalActIgnore = 1
-)
-
-// Available signal flags.
-const (
- SignalFlagNoCldStop = 0x00000001
- SignalFlagNoCldWait = 0x00000002
- SignalFlagSigInfo = 0x00000004
- SignalFlagRestorer = 0x04000000
- SignalFlagOnStack = 0x08000000
- SignalFlagRestart = 0x10000000
- SignalFlagInterrupt = 0x20000000
- SignalFlagNoDefer = 0x40000000
- SignalFlagResetHandler = 0x80000000
-)
-
-// IsSigInfo returns true iff this handle expects siginfo.
-func (s SignalAct) IsSigInfo() bool {
- return s.Flags&SignalFlagSigInfo != 0
-}
-
-// IsNoDefer returns true iff this SignalAct has the NoDefer flag set.
-func (s SignalAct) IsNoDefer() bool {
- return s.Flags&SignalFlagNoDefer != 0
-}
-
-// IsRestart returns true iff this SignalAct has the Restart flag set.
-func (s SignalAct) IsRestart() bool {
- return s.Flags&SignalFlagRestart != 0
-}
-
-// IsResetHandler returns true iff this SignalAct has the ResetHandler flag set.
-func (s SignalAct) IsResetHandler() bool {
- return s.Flags&SignalFlagResetHandler != 0
-}
-
-// IsOnStack returns true iff this SignalAct has the OnStack flag set.
-func (s SignalAct) IsOnStack() bool {
- return s.Flags&SignalFlagOnStack != 0
-}
-
-// HasRestorer returns true iff this SignalAct has the Restorer flag set.
-func (s SignalAct) HasRestorer() bool {
- return s.Flags&SignalFlagRestorer != 0
-}
-
-// NativeSignalAct is a type that is equivalent to struct sigaction in the
-// guest architecture.
-type NativeSignalAct interface {
- marshal.Marshallable
-
- // SerializeFrom copies the data in the host SignalAct s into this object.
- SerializeFrom(s *SignalAct)
-
- // DeserializeTo copies the data in this object into the host SignalAct s.
- DeserializeTo(s *SignalAct)
-}
diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go
index 082ed92b1..58e28dbba 100644
--- a/pkg/sentry/arch/signal_amd64.go
+++ b/pkg/sentry/arch/signal_amd64.go
@@ -76,21 +76,11 @@ const (
type UContext64 struct {
Flags uint64
Link uint64
- Stack SignalStack
+ Stack linux.SignalStack
MContext SignalContext64
Sigset linux.SignalSet
}
-// NewSignalAct implements Context.NewSignalAct.
-func (c *context64) NewSignalAct() NativeSignalAct {
- return &SignalAct{}
-}
-
-// NewSignalStack implements Context.NewSignalStack.
-func (c *context64) NewSignalStack() NativeSignalStack {
- return &SignalStack{}
-}
-
// From Linux 'arch/x86/include/uapi/asm/sigcontext.h' the following is the
// size of the magic cookie at the end of the xsave frame.
//
@@ -110,7 +100,7 @@ func (c *context64) fpuFrameSize() (size int, useXsave bool) {
// SignalSetup implements Context.SignalSetup. (Compare to Linux's
// arch/x86/kernel/signal.c:__setup_rt_frame().)
-func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt *SignalStack, sigset linux.SignalSet) error {
+func (c *context64) SignalSetup(st *Stack, act *linux.SigAction, info *linux.SignalInfo, alt *linux.SignalStack, sigset linux.SignalSet) error {
sp := st.Bottom
// "The 128-byte area beyond the location pointed to by %rsp is considered
@@ -187,7 +177,7 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
// Prior to proceeding, figure out if the frame will exhaust the range
// for the signal stack. This is not allowed, and should immediately
// force signal delivery (reverting to the default handler).
- if act.IsOnStack() && alt.IsEnabled() && !alt.Contains(frameBottom) {
+ if act.Flags&linux.SA_ONSTACK != 0 && alt.IsEnabled() && !alt.Contains(frameBottom) {
return unix.EFAULT
}
@@ -203,7 +193,7 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
return err
}
ucAddr := st.Bottom
- if act.HasRestorer() {
+ if act.Flags&linux.SA_RESTORER != 0 {
// Push the restorer return address.
// Note that this doesn't need to be popped.
if _, err := primitive.CopyUint64Out(st, StackBottomMagic, act.Restorer); err != nil {
@@ -237,15 +227,15 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
// SignalRestore implements Context.SignalRestore. (Compare to Linux's
// arch/x86/kernel/signal.c:sys_rt_sigreturn().)
-func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, SignalStack, error) {
+func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, linux.SignalStack, error) {
// Copy out the stack frame.
var uc UContext64
if _, err := uc.CopyIn(st, StackBottomMagic); err != nil {
- return 0, SignalStack{}, err
+ return 0, linux.SignalStack{}, err
}
- var info SignalInfo
+ var info linux.SignalInfo
if _, err := info.CopyIn(st, StackBottomMagic); err != nil {
- return 0, SignalStack{}, err
+ return 0, linux.SignalStack{}, err
}
// Restore registers.
diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go
index da71fb873..80df90076 100644
--- a/pkg/sentry/arch/signal_arm64.go
+++ b/pkg/sentry/arch/signal_arm64.go
@@ -61,7 +61,7 @@ type FpsimdContext struct {
type UContext64 struct {
Flags uint64
Link uint64
- Stack SignalStack
+ Stack linux.SignalStack
Sigset linux.SignalSet
// glibc uses a 1024-bit sigset_t
_pad [120]byte // (1024 - 64) / 8 = 120
@@ -71,18 +71,8 @@ type UContext64 struct {
MContext SignalContext64
}
-// NewSignalAct implements Context.NewSignalAct.
-func (c *context64) NewSignalAct() NativeSignalAct {
- return &SignalAct{}
-}
-
-// NewSignalStack implements Context.NewSignalStack.
-func (c *context64) NewSignalStack() NativeSignalStack {
- return &SignalStack{}
-}
-
// SignalSetup implements Context.SignalSetup.
-func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt *SignalStack, sigset linux.SignalSet) error {
+func (c *context64) SignalSetup(st *Stack, act *linux.SigAction, info *linux.SignalInfo, alt *linux.SignalStack, sigset linux.SignalSet) error {
sp := st.Bottom
// Construct the UContext64 now since we need its size.
@@ -114,7 +104,7 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
// Prior to proceeding, figure out if the frame will exhaust the range
// for the signal stack. This is not allowed, and should immediately
// force signal delivery (reverting to the default handler).
- if act.IsOnStack() && alt.IsEnabled() && !alt.Contains(frameBottom) {
+ if act.Flags&linux.SA_ONSTACK != 0 && alt.IsEnabled() && !alt.Contains(frameBottom) {
return unix.EFAULT
}
@@ -137,7 +127,7 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
c.Regs.Regs[0] = uint64(info.Signo)
c.Regs.Regs[1] = uint64(infoAddr)
c.Regs.Regs[2] = uint64(ucAddr)
- c.Regs.Regs[30] = uint64(act.Restorer)
+ c.Regs.Regs[30] = act.Restorer
// Save the thread's floating point state.
c.sigFPState = append(c.sigFPState, c.fpState)
@@ -147,15 +137,15 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
}
// SignalRestore implements Context.SignalRestore.
-func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, SignalStack, error) {
+func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, linux.SignalStack, error) {
// Copy out the stack frame.
var uc UContext64
if _, err := uc.CopyIn(st, StackBottomMagic); err != nil {
- return 0, SignalStack{}, err
+ return 0, linux.SignalStack{}, err
}
- var info SignalInfo
+ var info linux.SignalInfo
if _, err := info.CopyIn(st, StackBottomMagic); err != nil {
- return 0, SignalStack{}, err
+ return 0, linux.SignalStack{}, err
}
// Restore registers.
diff --git a/pkg/sentry/arch/signal_info.go b/pkg/sentry/arch/signal_info.go
deleted file mode 100644
index f93ee8b46..000000000
--- a/pkg/sentry/arch/signal_info.go
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package arch
-
-// Possible values for SignalInfo.Code. These values originate from the Linux
-// kernel's include/uapi/asm-generic/siginfo.h.
-const (
- // SignalInfoUser (properly SI_USER) indicates that a signal was sent from
- // a kill() or raise() syscall.
- SignalInfoUser = 0
-
- // SignalInfoKernel (properly SI_KERNEL) indicates that the signal was sent
- // by the kernel.
- SignalInfoKernel = 0x80
-
- // SignalInfoTimer (properly SI_TIMER) indicates that the signal was sent
- // by an expired timer.
- SignalInfoTimer = -2
-
- // SignalInfoTkill (properly SI_TKILL) indicates that the signal was sent
- // from a tkill() or tgkill() syscall.
- SignalInfoTkill = -6
-
- // CLD_* codes are only meaningful for SIGCHLD.
-
- // CLD_EXITED indicates that a task exited.
- CLD_EXITED = 1
-
- // CLD_KILLED indicates that a task was killed by a signal.
- CLD_KILLED = 2
-
- // CLD_DUMPED indicates that a task was killed by a signal and then dumped
- // core.
- CLD_DUMPED = 3
-
- // CLD_TRAPPED indicates that a task was stopped by ptrace.
- CLD_TRAPPED = 4
-
- // CLD_STOPPED indicates that a thread group completed a group stop.
- CLD_STOPPED = 5
-
- // CLD_CONTINUED indicates that a group-stopped thread group was continued.
- CLD_CONTINUED = 6
-
- // SYS_* codes are only meaningful for SIGSYS.
-
- // SYS_SECCOMP indicates that a signal originates from seccomp.
- SYS_SECCOMP = 1
-
- // TRAP_* codes are only meaningful for SIGTRAP.
-
- // TRAP_BRKPT indicates a breakpoint trap.
- TRAP_BRKPT = 1
-)
diff --git a/pkg/sentry/arch/signal_stack.go b/pkg/sentry/arch/signal_stack.go
deleted file mode 100644
index c732c7503..000000000
--- a/pkg/sentry/arch/signal_stack.go
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build 386 amd64 arm64
-
-package arch
-
-import (
- "gvisor.dev/gvisor/pkg/hostarch"
- "gvisor.dev/gvisor/pkg/marshal"
-)
-
-const (
- // SignalStackFlagOnStack is possible set on return from getaltstack,
- // in order to indicate that the thread is currently on the alt stack.
- SignalStackFlagOnStack = 1
-
- // SignalStackFlagDisable is a flag to indicate the stack is disabled.
- SignalStackFlagDisable = 2
-)
-
-// IsEnabled returns true iff this signal stack is marked as enabled.
-func (s SignalStack) IsEnabled() bool {
- return s.Flags&SignalStackFlagDisable == 0
-}
-
-// Top returns the stack's top address.
-func (s SignalStack) Top() hostarch.Addr {
- return hostarch.Addr(s.Addr + s.Size)
-}
-
-// SetOnStack marks this signal stack as in use.
-//
-// Note that there is no corresponding ClearOnStack, and that this should only
-// be called on copies that are serialized to userspace.
-func (s *SignalStack) SetOnStack() {
- s.Flags |= SignalStackFlagOnStack
-}
-
-// Contains checks if the stack pointer is within this stack.
-func (s *SignalStack) Contains(sp hostarch.Addr) bool {
- return hostarch.Addr(s.Addr) < sp && sp <= hostarch.Addr(s.Addr+s.Size)
-}
-
-// NativeSignalStack is a type that is equivalent to stack_t in the guest
-// architecture.
-type NativeSignalStack interface {
- marshal.Marshallable
-
- // SerializeFrom copies the data in the host SignalStack s into this
- // object.
- SerializeFrom(s *SignalStack)
-
- // DeserializeTo copies the data in this object into the host SignalStack
- // s.
- DeserializeTo(s *SignalStack)
-}
diff --git a/pkg/sentry/arch/stack.go b/pkg/sentry/arch/stack.go
index 65a794c7c..85e3515af 100644
--- a/pkg/sentry/arch/stack.go
+++ b/pkg/sentry/arch/stack.go
@@ -45,7 +45,7 @@ type Stack struct {
}
// scratchBufLen is the default length of Stack.scratchBuf. The
-// largest structs the stack regularly serializes are arch.SignalInfo
+// largest structs the stack regularly serializes are linux.SignalInfo
// and arch.UContext64. We'll set the default size as the larger of
// the two, arch.UContext64.
var scratchBufLen = (*UContext64)(nil).SizeBytes()
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go
index 367849e75..221e98a01 100644
--- a/pkg/sentry/control/proc.go
+++ b/pkg/sentry/control/proc.go
@@ -99,6 +99,9 @@ type ExecArgs struct {
// PIDNamespace is the pid namespace for the process being executed.
PIDNamespace *kernel.PIDNamespace
+
+ // Limits is the limit set for the process being executed.
+ Limits *limits.LimitSet
}
// String prints the arguments as a string.
@@ -151,6 +154,10 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
if pidns == nil {
pidns = proc.Kernel.RootPIDNamespace()
}
+ limitSet := args.Limits
+ if limitSet == nil {
+ limitSet = limits.NewLimitSet()
+ }
initArgs := kernel.CreateProcessArgs{
Filename: args.Filename,
Argv: args.Argv,
@@ -161,7 +168,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
Credentials: creds,
FDTable: fdTable,
Umask: 0022,
- Limits: limits.NewLimitSet(),
+ Limits: limitSet,
MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
UTSNamespace: proc.Kernel.RootUTSNamespace(),
IPCNamespace: proc.Kernel.RootIPCNamespace(),
diff --git a/pkg/sentry/fs/attr.go b/pkg/sentry/fs/attr.go
index b90f7c1be..4c99944e7 100644
--- a/pkg/sentry/fs/attr.go
+++ b/pkg/sentry/fs/attr.go
@@ -478,6 +478,20 @@ func (f FilePermissions) AnyRead() bool {
return f.User.Read || f.Group.Read || f.Other.Read
}
+// HasSetUIDOrGID returns true if either the setuid or setgid bit is set.
+func (f FilePermissions) HasSetUIDOrGID() bool {
+ return f.SetUID || f.SetGID
+}
+
+// DropSetUIDAndMaybeGID turns off setuid, and turns off setgid if f allows
+// group execution.
+func (f *FilePermissions) DropSetUIDAndMaybeGID() {
+ f.SetUID = false
+ if f.Group.Execute {
+ f.SetGID = false
+ }
+}
+
// FileOwner represents ownership of a file.
//
// +stateify savable
diff --git a/pkg/sentry/fs/dev/dev.go b/pkg/sentry/fs/dev/dev.go
index e84ba7a5d..c62effd52 100644
--- a/pkg/sentry/fs/dev/dev.go
+++ b/pkg/sentry/fs/dev/dev.go
@@ -16,6 +16,7 @@
package dev
import (
+ "fmt"
"math"
"gvisor.dev/gvisor/pkg/context"
@@ -90,6 +91,11 @@ func newSymlink(ctx context.Context, target string, msrc *fs.MountSource) *fs.In
// New returns the root node of a device filesystem.
func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ shm, err := tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc, nil /* parent */)
+ if err != nil {
+ panic(fmt.Sprintf("tmpfs.NewDir failed: %v", err))
+ }
+
contents := map[string]*fs.Inode{
"fd": newSymlink(ctx, "/proc/self/fd", msrc),
"stdin": newSymlink(ctx, "/proc/self/fd/0", msrc),
@@ -108,7 +114,7 @@ func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
"random": newMemDevice(ctx, newRandomDevice(ctx, fs.RootOwner, 0444), msrc, randomDevMinor),
"urandom": newMemDevice(ctx, newRandomDevice(ctx, fs.RootOwner, 0444), msrc, urandomDevMinor),
- "shm": tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc),
+ "shm": shm,
// A devpts is typically mounted at /dev/pts to provide
// pseudoterminal support. Place an empty directory there for
diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go
index e1e38b498..8ac3738e9 100644
--- a/pkg/sentry/fs/fsutil/host_mappable.go
+++ b/pkg/sentry/fs/fsutil/host_mappable.go
@@ -155,12 +155,20 @@ func (h *HostMappable) DecRef(fr memmap.FileRange) {
// T2: Appends to file causing it to grow
// T2: Writes to mapped pages and COW happens
// T1: Continues and wronly invalidates the page mapped in step above.
-func (h *HostMappable) Truncate(ctx context.Context, newSize int64) error {
+func (h *HostMappable) Truncate(ctx context.Context, newSize int64, uattr fs.UnstableAttr) error {
h.truncateMu.Lock()
defer h.truncateMu.Unlock()
mask := fs.AttrMask{Size: true}
attr := fs.UnstableAttr{Size: newSize}
+
+ // Truncating a file clears privilege bits.
+ if uattr.Perms.HasSetUIDOrGID() {
+ mask.Perms = true
+ attr.Perms = uattr.Perms
+ attr.Perms.DropSetUIDAndMaybeGID()
+ }
+
if err := h.backingFile.SetMaskedAttributes(ctx, mask, attr, false); err != nil {
return err
}
@@ -193,10 +201,17 @@ func (h *HostMappable) Allocate(ctx context.Context, offset int64, length int64)
}
// Write writes to the file backing this mappable.
-func (h *HostMappable) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+func (h *HostMappable) Write(ctx context.Context, src usermem.IOSequence, offset int64, uattr fs.UnstableAttr) (int64, error) {
h.truncateMu.RLock()
+ defer h.truncateMu.RUnlock()
n, err := src.CopyInTo(ctx, &writer{ctx: ctx, hostMappable: h, off: offset})
- h.truncateMu.RUnlock()
+ if n > 0 && uattr.Perms.HasSetUIDOrGID() {
+ mask := fs.AttrMask{Perms: true}
+ uattr.Perms.DropSetUIDAndMaybeGID()
+ if err := h.backingFile.SetMaskedAttributes(ctx, mask, uattr, false); err != nil {
+ return n, err
+ }
+ }
return n, err
}
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index 7856b354b..855029b84 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -310,6 +310,12 @@ func (c *CachingInodeOperations) Truncate(ctx context.Context, inode *fs.Inode,
now := ktime.NowFromContext(ctx)
masked := fs.AttrMask{Size: true}
attr := fs.UnstableAttr{Size: size}
+ if c.attr.Perms.HasSetUIDOrGID() {
+ masked.Perms = true
+ attr.Perms = c.attr.Perms
+ attr.Perms.DropSetUIDAndMaybeGID()
+ c.attr.Perms = attr.Perms
+ }
if err := c.backingFile.SetMaskedAttributes(ctx, masked, attr, false); err != nil {
c.dataMu.Unlock()
return err
@@ -685,13 +691,14 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
return done, nil
}
-// maybeGrowFile grows the file's size if data has been written past the old
-// size.
+// maybeUpdateAttrs updates the file's attributes after a write. It updates
+// size if data has been written past the old size, and setuid/setgid if any
+// bytes were written.
//
// Preconditions:
// * rw.c.attrMu must be locked.
// * rw.c.dataMu must be locked.
-func (rw *inodeReadWriter) maybeGrowFile() {
+func (rw *inodeReadWriter) maybeUpdateAttrs(nwritten uint64) {
// If the write ends beyond the file's previous size, it causes the
// file to grow.
if rw.offset > rw.c.attr.Size {
@@ -705,6 +712,12 @@ func (rw *inodeReadWriter) maybeGrowFile() {
rw.c.attr.Usage = rw.offset
rw.c.dirtyAttr.Usage = true
}
+
+ // If bytes were written, ensure setuid and setgid are cleared.
+ if nwritten > 0 && rw.c.attr.Perms.HasSetUIDOrGID() {
+ rw.c.dirtyAttr.Perms = true
+ rw.c.attr.Perms.DropSetUIDAndMaybeGID()
+ }
}
// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
@@ -732,7 +745,7 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
segMR := seg.Range().Intersect(mr)
ims, err := mf.MapInternal(seg.FileRangeOf(segMR), hostarch.Write)
if err != nil {
- rw.maybeGrowFile()
+ rw.maybeUpdateAttrs(done)
rw.c.dataMu.Unlock()
return done, err
}
@@ -744,7 +757,7 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
srcs = srcs.DropFirst64(n)
rw.c.dirty.MarkDirty(segMR)
if err != nil {
- rw.maybeGrowFile()
+ rw.maybeUpdateAttrs(done)
rw.c.dataMu.Unlock()
return done, err
}
@@ -765,7 +778,7 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
srcs = srcs.DropFirst64(n)
// Partial writes are fine. But we must stop writing.
if n != src.NumBytes() || err != nil {
- rw.maybeGrowFile()
+ rw.maybeUpdateAttrs(done)
rw.c.dataMu.Unlock()
return done, err
}
@@ -774,7 +787,7 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
seg, gap = gap.NextSegment(), FileRangeGapIterator{}
}
}
- rw.maybeGrowFile()
+ rw.maybeUpdateAttrs(done)
rw.c.dataMu.Unlock()
return done, nil
}
diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD
index c4a069832..94cb05246 100644
--- a/pkg/sentry/fs/gofer/BUILD
+++ b/pkg/sentry/fs/gofer/BUILD
@@ -29,6 +29,7 @@ go_library(
"//pkg/fd",
"//pkg/hostarch",
"//pkg/log",
+ "//pkg/metric",
"//pkg/p9",
"//pkg/refs",
"//pkg/safemem",
diff --git a/pkg/sentry/fs/gofer/cache_policy.go b/pkg/sentry/fs/gofer/cache_policy.go
index 07a564e92..f8b7a60fc 100644
--- a/pkg/sentry/fs/gofer/cache_policy.go
+++ b/pkg/sentry/fs/gofer/cache_policy.go
@@ -139,7 +139,7 @@ func (cp cachePolicy) revalidate(ctx context.Context, name string, parent, child
// Walk from parent to child again.
//
- // TODO(b/112031682): If we have a directory FD in the parent
+ // NOTE(b/112031682): If we have a directory FD in the parent
// inodeOperations, then we can use fstatat(2) to get the inode
// attributes instead of making this RPC.
qids, f, mask, attr, err := parentIops.fileState.file.walkGetAttr(ctx, []string{name})
diff --git a/pkg/sentry/fs/gofer/file.go b/pkg/sentry/fs/gofer/file.go
index 8f5a87120..73d80d9b5 100644
--- a/pkg/sentry/fs/gofer/file.go
+++ b/pkg/sentry/fs/gofer/file.go
@@ -21,6 +21,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/sentry/device"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -91,7 +92,7 @@ func NewFile(ctx context.Context, dirent *fs.Dirent, name string, flags fs.FileF
}
if flags.Write {
if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Execute: true}); err == nil {
- fsmetric.GoferOpensWX.Increment()
+ metric.SuspiciousOperationsMetric.Increment("opened_write_execute_file")
log.Warningf("Opened a writable executable: %q", name)
}
}
@@ -236,10 +237,20 @@ func (f *fileOperations) Write(ctx context.Context, file *fs.File, src usermem.I
// and availability of a host-mappable FD.
if f.inodeOperations.session().cachePolicy.useCachingInodeOps(file.Dirent.Inode) {
n, err = f.inodeOperations.cachingInodeOps.Write(ctx, src, offset)
- } else if f.inodeOperations.fileState.hostMappable != nil {
- n, err = f.inodeOperations.fileState.hostMappable.Write(ctx, src, offset)
} else {
- n, err = src.CopyInTo(ctx, f.handles.readWriterAt(ctx, offset))
+ uattr, e := f.UnstableAttr(ctx, file)
+ if e != nil {
+ return 0, e
+ }
+ if f.inodeOperations.fileState.hostMappable != nil {
+ n, err = f.inodeOperations.fileState.hostMappable.Write(ctx, src, offset, uattr)
+ } else {
+ n, err = src.CopyInTo(ctx, f.handles.readWriterAt(ctx, offset))
+ if n > 0 && uattr.Perms.HasSetUIDOrGID() {
+ uattr.Perms.DropSetUIDAndMaybeGID()
+ f.inodeOperations.SetPermissions(ctx, file.Dirent.Inode, uattr.Perms)
+ }
+ }
}
if n == 0 {
diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go
index b97635ec4..da3178527 100644
--- a/pkg/sentry/fs/gofer/inode.go
+++ b/pkg/sentry/fs/gofer/inode.go
@@ -600,11 +600,25 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length
if i.session().cachePolicy.useCachingInodeOps(inode) {
return i.cachingInodeOps.Truncate(ctx, inode, length)
}
+
+ uattr, err := i.fileState.unstableAttr(ctx)
+ if err != nil {
+ return err
+ }
+
if i.session().cachePolicy == cacheRemoteRevalidating {
- return i.fileState.hostMappable.Truncate(ctx, length)
+ return i.fileState.hostMappable.Truncate(ctx, length, uattr)
+ }
+
+ mask := p9.SetAttrMask{Size: true}
+ attr := p9.SetAttr{Size: uint64(length)}
+ if uattr.Perms.HasSetUIDOrGID() {
+ mask.Permissions = true
+ uattr.Perms.DropSetUIDAndMaybeGID()
+ attr.Permissions = p9.FileMode(uattr.Perms.LinuxMode())
}
- return i.fileState.file.setAttr(ctx, p9.SetAttrMask{Size: true}, p9.SetAttr{Size: uint64(length)})
+ return i.fileState.file.setAttr(ctx, mask, attr)
}
// GetXattr implements fs.InodeOperations.GetXattr.
diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go
index 6b3627813..940838a44 100644
--- a/pkg/sentry/fs/gofer/path.go
+++ b/pkg/sentry/fs/gofer/path.go
@@ -130,7 +130,16 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string
panic(fmt.Sprintf("Create called with unknown or unset open flags: %v", flags))
}
+ // If the parent directory has setgid enabled, change the new file's owner.
owner := fs.FileOwnerFromContext(ctx)
+ parentUattr, err := dir.UnstableAttr(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if parentUattr.Perms.SetGID {
+ owner.GID = parentUattr.Owner.GID
+ }
+
hostFile, err := newFile.create(ctx, name, openFlags, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID))
if err != nil {
// Could not create the file.
@@ -225,7 +234,18 @@ func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s
return syserror.ENAMETOOLONG
}
+ // If the parent directory has setgid enabled, change the new directory's
+ // owner and enable setgid.
owner := fs.FileOwnerFromContext(ctx)
+ parentUattr, err := dir.UnstableAttr(ctx)
+ if err != nil {
+ return err
+ }
+ if parentUattr.Perms.SetGID {
+ owner.GID = parentUattr.Owner.GID
+ perm.SetGID = true
+ }
+
if _, err := i.fileState.file.mkdir(ctx, s, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
return err
}
diff --git a/pkg/sentry/fs/proc/sys.go b/pkg/sentry/fs/proc/sys.go
index b998fb75d..085aa6d61 100644
--- a/pkg/sentry/fs/proc/sys.go
+++ b/pkg/sentry/fs/proc/sys.go
@@ -77,6 +77,27 @@ func (*overcommitMemory) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandl
}, 0
}
+// +stateify savable
+type maxMapCount struct{}
+
+// NeedsUpdate implements seqfile.SeqSource.
+func (*maxMapCount) NeedsUpdate(int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.
+func (*maxMapCount) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+ return []seqfile.SeqData{
+ {
+ Buf: []byte("2147483647\n"),
+ Handle: (*maxMapCount)(nil),
+ },
+ }, 0
+}
+
func (p *proc) newKernelDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
h := hostname{
SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC),
@@ -96,6 +117,7 @@ func (p *proc) newKernelDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode
func (p *proc) newVMDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
children := map[string]*fs.Inode{
+ "max_map_count": seqfile.NewSeqFileInode(ctx, &maxMapCount{}, msrc),
"mmap_min_addr": seqfile.NewSeqFileInode(ctx, &mmapMinAddrData{p.k}, msrc),
"overcommit_memory": seqfile.NewSeqFileInode(ctx, &overcommitMemory{}, msrc),
}
diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go
index 1d09afdd7..4893af56b 100644
--- a/pkg/sentry/fs/proc/sys_net.go
+++ b/pkg/sentry/fs/proc/sys_net.go
@@ -403,7 +403,7 @@ type ipForwarding struct {
// enabled stores the IPv4 forwarding state on save.
// We must save/restore this here, since a netstack instance
// is created on restore.
- enabled *bool
+ enabled bool
}
func newIPForwardingInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
@@ -461,13 +461,8 @@ func (f *ipForwardingFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOS
return 0, io.EOF
}
- if f.ipf.enabled == nil {
- enabled := f.stack.Forwarding(ipv4.ProtocolNumber)
- f.ipf.enabled = &enabled
- }
-
val := "0\n"
- if *f.ipf.enabled {
+ if f.ipf.enabled {
// Technically, this is not quite compatible with Linux. Linux
// stores these as an integer, so if you write "2" into
// ip_forward, you should get 2 back.
@@ -494,11 +489,8 @@ func (f *ipForwardingFile) Write(ctx context.Context, _ *fs.File, src usermem.IO
if err != nil {
return n, err
}
- if f.ipf.enabled == nil {
- f.ipf.enabled = new(bool)
- }
- *f.ipf.enabled = v != 0
- return n, f.stack.SetForwarding(ipv4.ProtocolNumber, *f.ipf.enabled)
+ f.ipf.enabled = v != 0
+ return n, f.stack.SetForwarding(ipv4.ProtocolNumber, f.ipf.enabled)
}
// portRangeInode implements fs.InodeOperations. It provides and allows
diff --git a/pkg/sentry/fs/proc/sys_net_state.go b/pkg/sentry/fs/proc/sys_net_state.go
index 4cb4741af..51d2be647 100644
--- a/pkg/sentry/fs/proc/sys_net_state.go
+++ b/pkg/sentry/fs/proc/sys_net_state.go
@@ -47,9 +47,7 @@ func (s *tcpSack) afterLoad() {
// afterLoad is invoked by stateify.
func (ipf *ipForwarding) afterLoad() {
- if ipf.enabled != nil {
- if err := ipf.stack.SetForwarding(ipv4.ProtocolNumber, *ipf.enabled); err != nil {
- panic(fmt.Sprintf("failed to set IPv4 forwarding [%v]: %v", *ipf.enabled, err))
- }
+ if err := ipf.stack.SetForwarding(ipv4.ProtocolNumber, ipf.enabled); err != nil {
+ panic(fmt.Sprintf("ipf.stack.SetForwarding(%d, %t): %s", ipv4.ProtocolNumber, ipf.enabled, err))
}
}
diff --git a/pkg/sentry/fs/tmpfs/fs.go b/pkg/sentry/fs/tmpfs/fs.go
index bc117ca6a..b48d475ed 100644
--- a/pkg/sentry/fs/tmpfs/fs.go
+++ b/pkg/sentry/fs/tmpfs/fs.go
@@ -151,5 +151,5 @@ func (f *Filesystem) Mount(ctx context.Context, device string, flags fs.MountSou
}
// Construct the tmpfs root.
- return NewDir(ctx, nil, owner, perms, msrc), nil
+ return NewDir(ctx, nil, owner, perms, msrc, nil /* parent */)
}
diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go
index f4de8c968..7faa822f0 100644
--- a/pkg/sentry/fs/tmpfs/inode_file.go
+++ b/pkg/sentry/fs/tmpfs/inode_file.go
@@ -226,6 +226,12 @@ func (f *fileInodeOperations) Truncate(ctx context.Context, _ *fs.Inode, size in
now := ktime.NowFromContext(ctx)
f.attr.ModificationTime = now
f.attr.StatusChangeTime = now
+
+ // Truncating clears privilege bits.
+ f.attr.Perms.SetUID = false
+ if f.attr.Perms.Group.Execute {
+ f.attr.Perms.SetGID = false
+ }
}
f.dataMu.Unlock()
@@ -363,7 +369,14 @@ func (f *fileInodeOperations) write(ctx context.Context, src usermem.IOSequence,
now := ktime.NowFromContext(ctx)
f.attr.ModificationTime = now
f.attr.StatusChangeTime = now
- return src.CopyInTo(ctx, &fileReadWriter{f, offset})
+ nwritten, err := src.CopyInTo(ctx, &fileReadWriter{f, offset})
+
+ // Writing clears privilege bits.
+ if nwritten > 0 {
+ f.attr.Perms.DropSetUIDAndMaybeGID()
+ }
+
+ return nwritten, err
}
type fileReadWriter struct {
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index 577052888..6aa8ff331 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -87,7 +87,20 @@ type Dir struct {
var _ fs.InodeOperations = (*Dir)(nil)
// NewDir returns a new directory.
-func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwner, perms fs.FilePermissions, msrc *fs.MountSource) *fs.Inode {
+func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwner, perms fs.FilePermissions, msrc *fs.MountSource, parent *fs.Inode) (*fs.Inode, error) {
+ // If the parent has setgid enabled, the new directory enables it and changes
+ // its GID.
+ if parent != nil {
+ parentUattr, err := parent.UnstableAttr(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if parentUattr.Perms.SetGID {
+ owner.GID = parentUattr.Owner.GID
+ perms.SetGID = true
+ }
+ }
+
d := &Dir{
ramfsDir: ramfs.NewDir(ctx, contents, owner, perms),
kernel: kernel.KernelFromContext(ctx),
@@ -101,7 +114,7 @@ func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwn
InodeID: tmpfsDevice.NextIno(),
BlockSize: hostarch.PageSize,
Type: fs.Directory,
- })
+ }), nil
}
// afterLoad is invoked by stateify.
@@ -219,11 +232,21 @@ func (d *Dir) SetTimestamps(ctx context.Context, i *fs.Inode, ts fs.TimeSpec) er
func (d *Dir) newCreateOps() *ramfs.CreateOps {
return &ramfs.CreateOps{
NewDir: func(ctx context.Context, dir *fs.Inode, perms fs.FilePermissions) (*fs.Inode, error) {
- return NewDir(ctx, nil, fs.FileOwnerFromContext(ctx), perms, dir.MountSource), nil
+ return NewDir(ctx, nil, fs.FileOwnerFromContext(ctx), perms, dir.MountSource, dir)
},
NewFile: func(ctx context.Context, dir *fs.Inode, perms fs.FilePermissions) (*fs.Inode, error) {
+ // If the parent has setgid enabled, change the GID of the new file.
+ owner := fs.FileOwnerFromContext(ctx)
+ parentUattr, err := dir.UnstableAttr(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if parentUattr.Perms.SetGID {
+ owner.GID = parentUattr.Owner.GID
+ }
+
uattr := fs.WithCurrentTime(ctx, fs.UnstableAttr{
- Owner: fs.FileOwnerFromContext(ctx),
+ Owner: owner,
Perms: perms,
// Always start unlinked.
Links: 0,
diff --git a/pkg/sentry/fs/user/user_test.go b/pkg/sentry/fs/user/user_test.go
index 12b786224..7f8fa8038 100644
--- a/pkg/sentry/fs/user/user_test.go
+++ b/pkg/sentry/fs/user/user_test.go
@@ -104,7 +104,10 @@ func TestGetExecUserHome(t *testing.T) {
t.Run(name, func(t *testing.T) {
ctx := contexttest.Context(t)
msrc := fs.NewPseudoMountSource(ctx)
- rootInode := tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc)
+ rootInode, err := tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc, nil /* parent */)
+ if err != nil {
+ t.Fatalf("tmpfs.NewDir failed: %v", err)
+ }
mns, err := fs.NewMountNamespace(ctx, rootInode)
if err != nil {
diff --git a/pkg/sentry/fsimpl/cgroupfs/base.go b/pkg/sentry/fsimpl/cgroupfs/base.go
index 0f54888d8..fe9871bdd 100644
--- a/pkg/sentry/fsimpl/cgroupfs/base.go
+++ b/pkg/sentry/fsimpl/cgroupfs/base.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -68,11 +67,6 @@ func (c *controllerCommon) Enabled() bool {
return true
}
-// Filesystem implements kernel.CgroupController.Filesystem.
-func (c *controllerCommon) Filesystem() *vfs.Filesystem {
- return c.fs.VFSFilesystem()
-}
-
// RootCgroup implements kernel.CgroupController.RootCgroup.
func (c *controllerCommon) RootCgroup() kernel.Cgroup {
return c.fs.rootCgroup()
@@ -139,6 +133,17 @@ func (c *cgroupInode) Controllers() []kernel.CgroupController {
return c.fs.kcontrollers
}
+// tasks returns a snapshot of the tasks inside the cgroup.
+func (c *cgroupInode) tasks() []*kernel.Task {
+ c.fs.tasksMu.RLock()
+ defer c.fs.tasksMu.RUnlock()
+ ts := make([]*kernel.Task, 0, len(c.ts))
+ for t := range c.ts {
+ ts = append(ts, t)
+ }
+ return ts
+}
+
// Enter implements kernel.CgroupImpl.Enter.
func (c *cgroupInode) Enter(t *kernel.Task) {
c.fs.tasksMu.Lock()
@@ -169,10 +174,7 @@ func (d *cgroupProcsData) Generate(ctx context.Context, buf *bytes.Buffer) error
pgids := make(map[kernel.ThreadID]struct{})
- d.fs.tasksMu.RLock()
- defer d.fs.tasksMu.RUnlock()
-
- for task := range d.ts {
+ for _, task := range d.tasks() {
// Map dedups pgid, since iterating over all tasks produces multiple
// entries for the group leaders.
if pgid := currPidns.IDOfThreadGroup(task.ThreadGroup()); pgid != 0 {
@@ -211,10 +213,7 @@ func (d *tasksData) Generate(ctx context.Context, buf *bytes.Buffer) error {
var pids []kernel.ThreadID
- d.fs.tasksMu.RLock()
- defer d.fs.tasksMu.RUnlock()
-
- for task := range d.ts {
+ for _, task := range d.tasks() {
if pid := currPidns.IDOfTask(task); pid != 0 {
pids = append(pids, pid)
}
diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
index bd3e69757..05d7eb4ce 100644
--- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
+++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
@@ -49,8 +49,9 @@
//
// kernel.CgroupRegistry.mu
// cgroupfs.filesystem.mu
-// Task.mu
-// cgroupfs.filesystem.tasksMu.
+// kernel.TaskSet.mu
+// kernel.Task.mu
+// cgroupfs.filesystem.tasksMu.
package cgroupfs
import (
@@ -109,7 +110,7 @@ type InternalData struct {
DefaultControlValues map[string]int64
}
-// filesystem implements vfs.FilesystemImpl.
+// filesystem implements vfs.FilesystemImpl and kernel.cgroupFS.
//
// +stateify savable
type filesystem struct {
@@ -139,6 +140,11 @@ type filesystem struct {
tasksMu sync.RWMutex `state:"nosave"`
}
+// InitializeHierarchyID implements kernel.cgroupFS.InitializeHierarchyID.
+func (fs *filesystem) InitializeHierarchyID(hid uint32) {
+ fs.hierarchyID = hid
+}
+
// Name implements vfs.FilesystemType.Name.
func (FilesystemType) Name() string {
return Name
@@ -284,14 +290,12 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
// Register controllers. The registry may be modified concurrently, so if we
// get an error, we raced with someone else who registered the same
// controllers first.
- hid, err := r.Register(fs.kcontrollers)
- if err != nil {
+ if err := r.Register(fs.kcontrollers, fs); err != nil {
ctx.Infof("cgroupfs.FilesystemType.GetFilesystem: failed to register new hierarchy with controllers %v: %v", wantControllers, err)
rootD.DecRef(ctx)
fs.VFSFilesystem().DecRef(ctx)
return nil, nil, syserror.EBUSY
}
- fs.hierarchyID = hid
// Move all existing tasks to the root of the new hierarchy.
k.PopulateNewCgroupHierarchy(fs.rootCgroup())
diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD
index 52879f871..368272f12 100644
--- a/pkg/sentry/fsimpl/gofer/BUILD
+++ b/pkg/sentry/fsimpl/gofer/BUILD
@@ -54,6 +54,7 @@ go_library(
"//pkg/fspath",
"//pkg/hostarch",
"//pkg/log",
+ "//pkg/metric",
"//pkg/p9",
"//pkg/refs",
"//pkg/refsvfs2",
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 97ce80853..eb09d54c3 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -961,7 +961,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
}
return &fd.vfsfd, nil
case linux.S_IFLNK:
- // Can't open symlinks without O_PATH (which is unimplemented).
+ // Can't open symlinks without O_PATH, which is handled at the VFS layer.
return nil, syserror.ELOOP
case linux.S_IFSOCK:
if d.isSynthetic() {
@@ -1194,11 +1194,7 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
// RenameAt implements vfs.FilesystemImpl.RenameAt.
func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
- if opts.Flags != 0 {
- // Requires 9P support.
- return syserror.EINVAL
- }
-
+ // Resolve newParent first to verify that it's on this Mount.
var ds *[]*dentry
fs.renameMu.Lock()
defer fs.renameMuUnlockAndCheckCaching(ctx, &ds)
@@ -1206,8 +1202,21 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if err != nil {
return err
}
+
+ if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
+ return syserror.EINVAL
+ }
+ if fs.opts.interop == InteropModeShared && opts.Flags&linux.RENAME_NOREPLACE != 0 {
+ // Requires 9P support to synchronize with other remote filesystem
+ // users.
+ return syserror.EINVAL
+ }
+
newName := rp.Component()
if newName == "." || newName == ".." {
+ if opts.Flags&linux.RENAME_NOREPLACE != 0 {
+ return syserror.EEXIST
+ }
return syserror.EBUSY
}
mnt := rp.Mount()
@@ -1280,6 +1289,9 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
var replacedVFSD *vfs.Dentry
if replaced != nil {
+ if opts.Flags&linux.RENAME_NOREPLACE != 0 {
+ return syserror.EEXIST
+ }
replacedVFSD = &replaced.vfsd
if replaced.isDir() {
if !renamed.isDir() {
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 21692d2ac..cf69e1b7a 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1282,9 +1282,12 @@ func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes)
}
func (d *dentry) checkXattrPermissions(creds *auth.Credentials, name string, ats vfs.AccessTypes) error {
- // We only support xattrs prefixed with "user." (see b/148380782). Currently,
- // there is no need to expose any other xattrs through a gofer.
- if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
+ // Deny access to the "security" and "system" namespaces since applications
+ // may expect these to affect kernel behavior in unimplemented ways
+ // (b/148380782). Allow all other extended attributes to be passed through
+ // to the remote filesystem. This is inconsistent with Linux's 9p client,
+ // but consistent with other filesystems (e.g. FUSE).
+ if strings.HasPrefix(name, linux.XATTR_SECURITY_PREFIX) || strings.HasPrefix(name, linux.XATTR_SYSTEM_PREFIX) {
return syserror.EOPNOTSUPP
}
mode := linux.FileMode(atomic.LoadUint32(&d.mode))
@@ -1684,7 +1687,7 @@ func (d *dentry) setDeleted() {
}
func (d *dentry) listXattr(ctx context.Context, creds *auth.Credentials, size uint64) ([]string, error) {
- if d.file.isNil() || !d.userXattrSupported() {
+ if d.file.isNil() {
return nil, nil
}
xattrMap, err := d.file.listXattr(ctx, size)
@@ -1693,10 +1696,7 @@ func (d *dentry) listXattr(ctx context.Context, creds *auth.Credentials, size ui
}
xattrs := make([]string, 0, len(xattrMap))
for x := range xattrMap {
- // We only support xattrs in the user.* namespace.
- if strings.HasPrefix(x, linux.XATTR_USER_PREFIX) {
- xattrs = append(xattrs, x)
- }
+ xattrs = append(xattrs, x)
}
return xattrs, nil
}
@@ -1731,13 +1731,6 @@ func (d *dentry) removeXattr(ctx context.Context, creds *auth.Credentials, name
return d.file.removeXattr(ctx, name)
}
-// Extended attributes in the user.* namespace are only supported for regular
-// files and directories.
-func (d *dentry) userXattrSupported() bool {
- filetype := linux.FileMode(atomic.LoadUint32(&d.mode)).FileType()
- return filetype == linux.ModeRegular || filetype == linux.ModeDirectory
-}
-
// Preconditions:
// * !d.isSynthetic().
// * d.isRegularFile() || d.isDir().
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index f0e7bbaf7..eed05e369 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -24,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
@@ -59,7 +60,7 @@ func newRegularFileFD(mnt *vfs.Mount, d *dentry, flags uint32) (*regularFileFD,
return nil, err
}
if fd.vfsfd.IsWritable() && (atomic.LoadUint32(&d.mode)&0111 != 0) {
- fsmetric.GoferOpensWX.Increment()
+ metric.SuspiciousOperationsMetric.Increment("opened_write_execute_file")
}
if atomic.LoadInt32(&d.mmapFD) >= 0 {
fsmetric.GoferOpensHost.Increment()
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index ac3b5b621..c12444b7e 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fsmetric"
@@ -100,7 +101,7 @@ func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*speci
d.fs.specialFileFDs[fd] = struct{}{}
d.fs.syncMu.Unlock()
if fd.vfsfd.IsWritable() && (atomic.LoadUint32(&d.mode)&0111 != 0) {
- fsmetric.GoferOpensWX.Increment()
+ metric.SuspiciousOperationsMetric.Increment("opened_write_execute_file")
}
if h.fd >= 0 {
fsmetric.GoferOpensHost.Increment()
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index f50b0fb08..8fac53c60 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -635,12 +635,6 @@ func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
// RenameAt implements vfs.FilesystemImpl.RenameAt.
func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
- // Only RENAME_NOREPLACE is supported.
- if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
- return syserror.EINVAL
- }
- noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0
-
fs.mu.Lock()
defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
@@ -651,6 +645,13 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if err != nil {
return err
}
+
+ // Only RENAME_NOREPLACE is supported.
+ if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
+ return syserror.EINVAL
+ }
+ noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0
+
mnt := rp.Mount()
if mnt != oldParentVD.Mount() {
return syserror.EXDEV
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 46c500427..6b6fa0bd5 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -1017,10 +1017,7 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
// RenameAt implements vfs.FilesystemImpl.RenameAt.
func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
- if opts.Flags != 0 {
- return syserror.EINVAL
- }
-
+ // Resolve newParent first to verify that it's on this Mount.
var ds *[]*dentry
fs.renameMu.Lock()
defer fs.renameMuUnlockAndCheckDrop(ctx, &ds)
@@ -1028,8 +1025,16 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if err != nil {
return err
}
+
+ if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
+ return syserror.EINVAL
+ }
+
newName := rp.Component()
if newName == "." || newName == ".." {
+ if opts.Flags&linux.RENAME_NOREPLACE != 0 {
+ return syserror.EEXIST
+ }
return syserror.EBUSY
}
mnt := rp.Mount()
@@ -1093,6 +1098,9 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
return err
}
if replaced != nil {
+ if opts.Flags&linux.RENAME_NOREPLACE != 0 {
+ return syserror.EEXIST
+ }
replacedVFSD = &replaced.vfsd
if replaced.isDir() {
if !renamed.isDir() {
diff --git a/pkg/sentry/fsimpl/overlay/regular_file.go b/pkg/sentry/fsimpl/overlay/regular_file.go
index 43bfd69a3..82491a0f8 100644
--- a/pkg/sentry/fsimpl/overlay/regular_file.go
+++ b/pkg/sentry/fsimpl/overlay/regular_file.go
@@ -207,9 +207,10 @@ func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) e
return err
}
- // Changing owners may clear one or both of the setuid and setgid bits,
- // so we may have to update opts before setting d.mode.
- if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 {
+ // Changing owners or truncating may clear one or both of the setuid and
+ // setgid bits, so we may have to update opts before setting d.mode.
+ inotifyMask := opts.Stat.Mask
+ if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID|linux.STATX_SIZE) != 0 {
stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{
Mask: linux.STATX_MODE,
})
@@ -218,10 +219,14 @@ func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) e
}
opts.Stat.Mode = stat.Mode
opts.Stat.Mask |= linux.STATX_MODE
+ // Don't generate inotify IN_ATTRIB for size-only changes (truncations).
+ if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 {
+ inotifyMask |= linux.STATX_MODE
+ }
}
d.updateAfterSetStatLocked(&opts)
- if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
+ if ev := vfs.InotifyEventFromStatMask(inotifyMask); ev != 0 {
d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent)
}
return nil
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index 9b14dd6b9..2bc98a94f 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -55,6 +55,7 @@ func (fs *filesystem) newSysDir(ctx context.Context, root *auth.Credentials, k *
}),
}),
"vm": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{
+ "max_map_count": fs.newInode(ctx, root, 0444, newStaticFile("2147483647\n")),
"mmap_min_addr": fs.newInode(ctx, root, 0444, &mmapMinAddrData{k: k}),
"overcommit_memory": fs.newInode(ctx, root, 0444, newStaticFile("0\n")),
}),
@@ -365,27 +366,22 @@ func (d *tcpMemData) writeSizeLocked(size inet.TCPBufferSize) error {
}
// ipForwarding implements vfs.WritableDynamicBytesSource for
-// /proc/sys/net/ipv4/ip_forwarding.
+// /proc/sys/net/ipv4/ip_forward.
//
// +stateify savable
type ipForwarding struct {
kernfs.DynamicBytesFile
stack inet.Stack `state:"wait"`
- enabled *bool
+ enabled bool
}
var _ vfs.WritableDynamicBytesSource = (*ipForwarding)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (ipf *ipForwarding) Generate(ctx context.Context, buf *bytes.Buffer) error {
- if ipf.enabled == nil {
- enabled := ipf.stack.Forwarding(ipv4.ProtocolNumber)
- ipf.enabled = &enabled
- }
-
val := "0\n"
- if *ipf.enabled {
+ if ipf.enabled {
// Technically, this is not quite compatible with Linux. Linux stores these
// as an integer, so if you write "2" into tcp_sack, you should get 2 back.
// Tough luck.
@@ -414,11 +410,8 @@ func (ipf *ipForwarding) Write(ctx context.Context, src usermem.IOSequence, offs
if err != nil {
return 0, err
}
- if ipf.enabled == nil {
- ipf.enabled = new(bool)
- }
- *ipf.enabled = v != 0
- if err := ipf.stack.SetForwarding(ipv4.ProtocolNumber, *ipf.enabled); err != nil {
+ ipf.enabled = v != 0
+ if err := ipf.stack.SetForwarding(ipv4.ProtocolNumber, ipf.enabled); err != nil {
return 0, err
}
return n, nil
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys_test.go b/pkg/sentry/fsimpl/proc/tasks_sys_test.go
index 6cee22823..19b012f7d 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys_test.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys_test.go
@@ -132,7 +132,7 @@ func TestConfigureIPForwarding(t *testing.T) {
t.Run(c.comment, func(t *testing.T) {
s.IPForwarding = c.initial
- file := &ipForwarding{stack: s, enabled: &c.initial}
+ file := &ipForwarding{stack: s, enabled: c.initial}
// Write the values.
src := usermem.BytesIOSequence([]byte(c.str))
diff --git a/pkg/sentry/fsimpl/testutil/BUILD b/pkg/sentry/fsimpl/testutil/BUILD
index c766164c7..b3f9d1010 100644
--- a/pkg/sentry/fsimpl/testutil/BUILD
+++ b/pkg/sentry/fsimpl/testutil/BUILD
@@ -17,7 +17,6 @@ go_library(
"//pkg/fspath",
"//pkg/hostarch",
"//pkg/memutil",
- "//pkg/metric",
"//pkg/sentry/fsbridge",
"//pkg/sentry/fsimpl/tmpfs",
"//pkg/sentry/kernel",
diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go
index 33e52ce64..97aa20cd1 100644
--- a/pkg/sentry/fsimpl/testutil/kernel.go
+++ b/pkg/sentry/fsimpl/testutil/kernel.go
@@ -25,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/memutil"
- "gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -63,8 +62,6 @@ func Boot() (*kernel.Kernel, error) {
return nil, fmt.Errorf("creating platform: %v", err)
}
- metric.CreateSentryMetrics()
-
kernel.VFS2Enabled = true
k := &kernel.Kernel{
Platform: plat,
@@ -88,6 +85,7 @@ func Boot() (*kernel.Kernel, error) {
return nil, fmt.Errorf("creating timekeeper: %v", err)
}
tk.SetClocks(time.NewCalibratedClocks())
+ k.SetTimekeeper(tk)
creds := auth.NewRootCredentials(auth.NewRootUserNamespace())
@@ -96,7 +94,6 @@ func Boot() (*kernel.Kernel, error) {
if err = k.Init(kernel.InitKernelArgs{
ApplicationCores: uint(runtime.GOMAXPROCS(-1)),
FeatureSet: cpuid.HostFeatureSet(),
- Timekeeper: tk,
RootUserNamespace: creds.UserNamespace,
Vdso: vdso,
RootUTSNamespace: kernel.NewUTSNamespace("hostname", "domain", creds.UserNamespace),
@@ -181,7 +178,7 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
memfile := os.NewFile(uintptr(memfd), memfileName)
mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{})
if err != nil {
- memfile.Close()
+ _ = memfile.Close()
return nil, fmt.Errorf("error creating pgalloc.MemoryFile: %v", err)
}
return mf, nil
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 5fdca1d46..f0f4297ef 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -465,7 +465,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
}
return &fd.vfsfd, nil
case *symlink:
- // TODO(gvisor.dev/issue/2782): Can't open symlinks without O_PATH.
+ // Can't open symlinks without O_PATH, which is handled at the VFS layer.
return nil, syserror.ELOOP
case *namedPipe:
return impl.pipe.Open(ctx, rp.Mount(), &d.vfsd, opts.Flags, &d.inode.locks)
@@ -496,20 +496,24 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
// RenameAt implements vfs.FilesystemImpl.RenameAt.
func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
- if opts.Flags != 0 {
- // TODO(b/145974740): Support renameat2 flags.
- return syserror.EINVAL
- }
-
- // Resolve newParent first to verify that it's on this Mount.
+ // Resolve newParentDir first to verify that it's on this Mount.
fs.mu.Lock()
defer fs.mu.Unlock()
newParentDir, err := walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry))
if err != nil {
return err
}
+
+ if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
+ // TODO(b/145974740): Support other renameat2 flags.
+ return syserror.EINVAL
+ }
+
newName := rp.Component()
if newName == "." || newName == ".." {
+ if opts.Flags&linux.RENAME_NOREPLACE != 0 {
+ return syserror.EEXIST
+ }
return syserror.EBUSY
}
mnt := rp.Mount()
@@ -556,6 +560,9 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
replaced, ok := newParentDir.childMap[newName]
if ok {
+ if opts.Flags&linux.RENAME_NOREPLACE != 0 {
+ return syserror.EEXIST
+ }
replacedDir, ok := replaced.inode.impl.(*directory)
if ok {
if !renamed.inode.isDir() {
@@ -815,7 +822,7 @@ func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
if err != nil {
return nil, err
}
- return d.inode.listXattr(size)
+ return d.inode.listXattr(rp.Credentials(), size)
}
// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 9ae25ce9e..6b4367c42 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -717,44 +717,63 @@ func (i *inode) touchCMtimeLocked() {
atomic.StoreInt64(&i.ctime, now)
}
-func (i *inode) listXattr(size uint64) ([]string, error) {
- return i.xattrs.ListXattr(size)
+func checkXattrName(name string) error {
+ // Linux's tmpfs supports "security" and "trusted" xattr namespaces, and
+ // (depending on build configuration) POSIX ACL xattr namespaces
+ // ("system.posix_acl_access" and "system.posix_acl_default"). We don't
+ // support POSIX ACLs or the "security" namespace (b/148380782).
+ if strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX) {
+ return nil
+ }
+ // We support the "user" namespace because we have tests that depend on
+ // this feature.
+ if strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
+ return nil
+ }
+ return syserror.EOPNOTSUPP
+}
+
+func (i *inode) listXattr(creds *auth.Credentials, size uint64) ([]string, error) {
+ return i.xattrs.ListXattr(creds, size)
}
func (i *inode) getXattr(creds *auth.Credentials, opts *vfs.GetXattrOptions) (string, error) {
- if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil {
+ if err := checkXattrName(opts.Name); err != nil {
return "", err
}
- return i.xattrs.GetXattr(opts)
+ mode := linux.FileMode(atomic.LoadUint32(&i.mode))
+ kuid := auth.KUID(atomic.LoadUint32(&i.uid))
+ kgid := auth.KGID(atomic.LoadUint32(&i.gid))
+ if err := vfs.GenericCheckPermissions(creds, vfs.MayRead, mode, kuid, kgid); err != nil {
+ return "", err
+ }
+ return i.xattrs.GetXattr(creds, mode, kuid, opts)
}
func (i *inode) setXattr(creds *auth.Credentials, opts *vfs.SetXattrOptions) error {
- if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil {
+ if err := checkXattrName(opts.Name); err != nil {
return err
}
- return i.xattrs.SetXattr(opts)
-}
-
-func (i *inode) removeXattr(creds *auth.Credentials, name string) error {
- if err := i.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil {
+ mode := linux.FileMode(atomic.LoadUint32(&i.mode))
+ kuid := auth.KUID(atomic.LoadUint32(&i.uid))
+ kgid := auth.KGID(atomic.LoadUint32(&i.gid))
+ if err := vfs.GenericCheckPermissions(creds, vfs.MayWrite, mode, kuid, kgid); err != nil {
return err
}
- return i.xattrs.RemoveXattr(name)
+ return i.xattrs.SetXattr(creds, mode, kuid, opts)
}
-func (i *inode) checkXattrPermissions(creds *auth.Credentials, name string, ats vfs.AccessTypes) error {
- // We currently only support extended attributes in the user.* and
- // trusted.* namespaces. See b/148380782.
- if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) && !strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX) {
- return syserror.EOPNOTSUPP
+func (i *inode) removeXattr(creds *auth.Credentials, name string) error {
+ if err := checkXattrName(name); err != nil {
+ return err
}
mode := linux.FileMode(atomic.LoadUint32(&i.mode))
kuid := auth.KUID(atomic.LoadUint32(&i.uid))
kgid := auth.KGID(atomic.LoadUint32(&i.gid))
- if err := vfs.GenericCheckPermissions(creds, ats, mode, kuid, kgid); err != nil {
+ if err := vfs.GenericCheckPermissions(creds, vfs.MayWrite, mode, kuid, kgid); err != nil {
return err
}
- return vfs.CheckXattrPermissions(creds, ats, mode, kuid, name)
+ return i.xattrs.RemoveXattr(creds, mode, kuid, name)
}
// fileDescription is embedded by tmpfs implementations of
@@ -807,7 +826,7 @@ func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) {
// ListXattr implements vfs.FileDescriptionImpl.ListXattr.
func (fd *fileDescription) ListXattr(ctx context.Context, size uint64) ([]string, error) {
- return fd.inode().listXattr(size)
+ return fd.inode().listXattr(auth.CredentialsFromContext(ctx), size)
}
// GetXattr implements vfs.FileDescriptionImpl.GetXattr.
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index 31d34ef60..969003613 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -868,6 +868,10 @@ func (fd *fileDescription) IterDirents(ctx context.Context, cb vfs.IterDirentsCa
fd.mu.Lock()
defer fd.mu.Unlock()
+ if _, err := fd.lowerFD.Seek(ctx, fd.off, linux.SEEK_SET); err != nil {
+ return err
+ }
+
var ds []vfs.Dirent
err := fd.lowerFD.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error {
// Do not include the Merkle tree files.
@@ -890,8 +894,8 @@ func (fd *fileDescription) IterDirents(ctx context.Context, cb vfs.IterDirentsCa
return err
}
- // The result should contain all children plus "." and "..".
- if fd.d.verityEnabled() && len(ds) != len(fd.d.childrenNames)+2 {
+ // The result should be a part of all children plus "." and "..", counting from fd.off.
+ if fd.d.verityEnabled() && len(ds) != len(fd.d.childrenNames)+2-int(fd.off) {
return fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Unexpected children number %d", len(ds)))
}
@@ -1299,6 +1303,11 @@ func (fd *fileDescription) ConfigureMMap(ctx context.Context, opts *memmap.MMapO
return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts)
}
+// SupportsLocks implements vfs.FileDescriptionImpl.SupportsLocks.
+func (fd *fileDescription) SupportsLocks() bool {
+ return fd.lowerFD.SupportsLocks()
+}
+
// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
return fd.lowerFD.LockBSD(ctx, ownerPID, t, block)
diff --git a/pkg/sentry/fsmetric/fsmetric.go b/pkg/sentry/fsmetric/fsmetric.go
index 7e535b527..17d0d5025 100644
--- a/pkg/sentry/fsmetric/fsmetric.go
+++ b/pkg/sentry/fsmetric/fsmetric.go
@@ -42,7 +42,6 @@ var (
// Metrics that only apply to fs/gofer and fsimpl/gofer.
var (
- GoferOpensWX = metric.MustCreateNewUint64Metric("/gofer/opened_write_execute_file", true /* sync */, "Number of times a executable file was opened writably from a gofer.")
GoferOpens9P = metric.MustCreateNewUint64Metric("/gofer/opens_9p", false /* sync */, "Number of times a file was opened from a gofer and did not have a host file descriptor.")
GoferOpensHost = metric.MustCreateNewUint64Metric("/gofer/opens_host", false /* sync */, "Number of times a file was opened from a gofer and did have a host file descriptor.")
GoferReads9P = metric.MustCreateNewUint64Metric("/gofer/reads_9p", false /* sync */, "Number of 9P file reads from a gofer.")
diff --git a/pkg/sentry/inet/inet.go b/pkg/sentry/inet/inet.go
index 6b71bd3a9..80dda1559 100644
--- a/pkg/sentry/inet/inet.go
+++ b/pkg/sentry/inet/inet.go
@@ -88,9 +88,6 @@ type Stack interface {
// for restoring a stack after a save.
RestoreCleanupEndpoints([]stack.TransportEndpoint)
- // Forwarding returns if packet forwarding between NICs is enabled.
- Forwarding(protocol tcpip.NetworkProtocolNumber) bool
-
// SetForwarding enables or disables packet forwarding between NICs.
SetForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) error
diff --git a/pkg/sentry/inet/test_stack.go b/pkg/sentry/inet/test_stack.go
index 03e2608c2..218d9dafc 100644
--- a/pkg/sentry/inet/test_stack.go
+++ b/pkg/sentry/inet/test_stack.go
@@ -154,11 +154,6 @@ func (s *TestStack) CleanupEndpoints() []stack.TransportEndpoint {
// RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints.
func (s *TestStack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {}
-// Forwarding implements inet.Stack.Forwarding.
-func (s *TestStack) Forwarding(protocol tcpip.NetworkProtocolNumber) bool {
- return s.IPForwarding
-}
-
// SetForwarding implements inet.Stack.SetForwarding.
func (s *TestStack) SetForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) error {
s.IPForwarding = enable
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index a1ec6daab..188c0ebff 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -32,7 +32,7 @@ go_template_instance(
out = "seqatomic_taskgoroutineschedinfo_unsafe.go",
package = "kernel",
suffix = "TaskGoroutineSchedInfo",
- template = "//pkg/sync:generic_seqatomic",
+ template = "//pkg/sync/seqatomic:generic_seqatomic",
types = {
"Value": "TaskGoroutineSchedInfo",
},
diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD
index 869e49ebc..12180351d 100644
--- a/pkg/sentry/kernel/auth/BUILD
+++ b/pkg/sentry/kernel/auth/BUILD
@@ -8,7 +8,7 @@ go_template_instance(
out = "atomicptr_credentials_unsafe.go",
package = "auth",
suffix = "Credentials",
- template = "//pkg/sync:generic_atomicptr",
+ template = "//pkg/sync/atomicptr:generic_atomicptr",
types = {
"Value": "Credentials",
},
diff --git a/pkg/sentry/kernel/auth/credentials.go b/pkg/sentry/kernel/auth/credentials.go
index 6862f2ef5..3325fedcb 100644
--- a/pkg/sentry/kernel/auth/credentials.go
+++ b/pkg/sentry/kernel/auth/credentials.go
@@ -125,7 +125,7 @@ func NewUserCredentials(kuid KUID, kgid KGID, extraKGIDs []KGID, capabilities *T
creds.EffectiveCaps = capabilities.EffectiveCaps
creds.BoundingCaps = capabilities.BoundingCaps
creds.InheritableCaps = capabilities.InheritableCaps
- // TODO(nlacasse): Support ambient capabilities.
+ // TODO(gvisor.dev/issue/3166): Support ambient capabilities.
} else {
// If no capabilities are specified, grant capabilities consistent with
// setresuid + setresgid from NewRootCredentials to the given uid and
diff --git a/pkg/sentry/kernel/cgroup.go b/pkg/sentry/kernel/cgroup.go
index 1f1c63f37..c93ef6ac1 100644
--- a/pkg/sentry/kernel/cgroup.go
+++ b/pkg/sentry/kernel/cgroup.go
@@ -48,10 +48,6 @@ type CgroupController interface {
// attached to. Returned value is valid for the lifetime of the controller.
HierarchyID() uint32
- // Filesystem returns the filesystem this controller is attached to.
- // Returned value is valid for the lifetime of the controller.
- Filesystem() *vfs.Filesystem
-
// RootCgroup returns the root cgroup for this controller. Returned value is
// valid for the lifetime of the controller.
RootCgroup() Cgroup
@@ -124,6 +120,19 @@ func (h *hierarchy) match(ctypes []CgroupControllerType) bool {
return true
}
+// cgroupFS is the public interface to cgroupfs. This lets the kernel package
+// refer to cgroupfs.filesystem methods without directly depending on the
+// cgroupfs package, which would lead to a circular dependency.
+type cgroupFS interface {
+ // Returns the vfs.Filesystem for the cgroupfs.
+ VFSFilesystem() *vfs.Filesystem
+
+ // InitializeHierarchyID sets the hierarchy ID for this filesystem during
+ // filesystem creation. May only be called before the filesystem is visible
+ // to the vfs layer.
+ InitializeHierarchyID(hid uint32)
+}
+
// CgroupRegistry tracks the active set of cgroup controllers on the system.
//
// +stateify savable
@@ -172,7 +181,23 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files
for _, h := range r.hierarchies {
if h.match(ctypes) {
- h.fs.IncRef()
+ if !h.fs.TryIncRef() {
+ // Racing with filesystem destruction, namely h.fs.Release.
+ // Since we hold r.mu, we know the hierarchy hasn't been
+ // unregistered yet, but its associated filesystem is tearing
+ // down.
+ //
+ // If we simply indicate the hierarchy wasn't found without
+ // cleaning up the registry, the caller can race with the
+ // unregister and find itself temporarily unable to create a new
+ // hierarchy with a subset of the relevant controllers.
+ //
+ // To keep the result of FindHierarchy consistent with the
+ // uniqueness of controllers enforced by Register, drop the
+ // dying hierarchy now. The eventual unregister by the FS
+ // teardown will become a no-op.
+ return nil
+ }
return h.fs
}
}
@@ -182,31 +207,35 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files
// Register registers the provided set of controllers with the registry as a new
// hierarchy. If any controller is already registered, the function returns an
-// error without modifying the registry. The hierarchy can be later referenced
-// by the returned id.
-func (r *CgroupRegistry) Register(cs []CgroupController) (uint32, error) {
+// error without modifying the registry. Register sets the hierarchy ID for the
+// filesystem on success.
+func (r *CgroupRegistry) Register(cs []CgroupController, fs cgroupFS) error {
r.mu.Lock()
defer r.mu.Unlock()
if len(cs) == 0 {
- return InvalidCgroupHierarchyID, fmt.Errorf("can't register hierarchy with no controllers")
+ return fmt.Errorf("can't register hierarchy with no controllers")
}
for _, c := range cs {
if _, ok := r.controllers[c.Type()]; ok {
- return InvalidCgroupHierarchyID, fmt.Errorf("controllers may only be mounted on a single hierarchy")
+ return fmt.Errorf("controllers may only be mounted on a single hierarchy")
}
}
hid, err := r.nextHierarchyID()
if err != nil {
- return hid, err
+ return err
}
+ // Must not fail below here, once we publish the hierarchy ID.
+
+ fs.InitializeHierarchyID(hid)
+
h := hierarchy{
id: hid,
controllers: make(map[CgroupControllerType]CgroupController),
- fs: cs[0].Filesystem(),
+ fs: fs.VFSFilesystem(),
}
for _, c := range cs {
n := c.Type()
@@ -214,15 +243,20 @@ func (r *CgroupRegistry) Register(cs []CgroupController) (uint32, error) {
h.controllers[n] = c
}
r.hierarchies[hid] = h
- return hid, nil
+ return nil
}
-// Unregister removes a previously registered hierarchy from the registry. If
-// the controller was not previously registered, Unregister is a no-op.
+// Unregister removes a previously registered hierarchy from the registry. If no
+// such hierarchy is registered, Unregister is a no-op.
func (r *CgroupRegistry) Unregister(hid uint32) {
r.mu.Lock()
- defer r.mu.Unlock()
+ r.unregisterLocked(hid)
+ r.mu.Unlock()
+}
+// Precondition: Caller must hold r.mu.
+// +checklocks:r.mu
+func (r *CgroupRegistry) unregisterLocked(hid uint32) {
if h, ok := r.hierarchies[hid]; ok {
for name, _ := range h.controllers {
delete(r.controllers, name)
@@ -253,6 +287,11 @@ func (r *CgroupRegistry) computeInitialGroups(inherit map[Cgroup]struct{}) map[C
for name, ctl := range r.controllers {
if _, ok := ctlSet[name]; !ok {
cg := ctl.RootCgroup()
+ // Multiple controllers may share the same hierarchy, so may have
+ // the same root cgroup. Grab a single ref per hierarchy root.
+ if _, ok := cgset[cg]; ok {
+ continue
+ }
cg.IncRef() // Ref transferred to caller.
cgset[cg] = struct{}{}
}
diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD
index f855f038b..6224a0cbd 100644
--- a/pkg/sentry/kernel/fasync/BUILD
+++ b/pkg/sentry/kernel/fasync/BUILD
@@ -8,7 +8,6 @@ go_library(
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
- "//pkg/sentry/arch",
"//pkg/sentry/fs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go
index dbbbaeeb0..5d584dc45 100644
--- a/pkg/sentry/kernel/fasync/fasync.go
+++ b/pkg/sentry/kernel/fasync/fasync.go
@@ -17,7 +17,6 @@ package fasync
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -125,9 +124,9 @@ func (a *FileAsync) Callback(e *waiter.Entry, mask waiter.EventMask) {
if !permCheck {
return
}
- signalInfo := &arch.SignalInfo{
+ signalInfo := &linux.SignalInfo{
Signo: int32(linux.SIGIO),
- Code: arch.SignalInfoKernel,
+ Code: linux.SI_KERNEL,
}
if a.signal != 0 {
signalInfo.Signo = int32(a.signal)
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
index 10885688c..62777faa8 100644
--- a/pkg/sentry/kernel/fd_table.go
+++ b/pkg/sentry/kernel/fd_table.go
@@ -154,9 +154,11 @@ func (f *FDTable) drop(ctx context.Context, file *fs.File) {
// dropVFS2 drops the table reference.
func (f *FDTable) dropVFS2(ctx context.Context, file *vfs.FileDescription) {
// Release any POSIX lock possibly held by the FDTable.
- err := file.UnlockPOSIX(ctx, f, lock.LockRange{0, lock.LockEOF})
- if err != nil && err != syserror.ENOLCK {
- panic(fmt.Sprintf("UnlockPOSIX failed: %v", err))
+ if file.SupportsLocks() {
+ err := file.UnlockPOSIX(ctx, f, lock.LockRange{0, lock.LockEOF})
+ if err != nil && err != syserror.ENOLCK {
+ panic(fmt.Sprintf("UnlockPOSIX failed: %v", err))
+ }
}
// Drop the table's reference.
diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD
index a75686cf3..6c31e082c 100644
--- a/pkg/sentry/kernel/futex/BUILD
+++ b/pkg/sentry/kernel/futex/BUILD
@@ -8,7 +8,7 @@ go_template_instance(
out = "atomicptr_bucket_unsafe.go",
package = "futex",
suffix = "Bucket",
- template = "//pkg/sync:generic_atomicptr",
+ template = "//pkg/sync/atomicptr:generic_atomicptr",
types = {
"Value": "bucket",
},
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index e6e9da898..d537e608a 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -306,9 +306,6 @@ type InitKernelArgs struct {
// FeatureSet is the emulated CPU feature set.
FeatureSet *cpuid.FeatureSet
- // Timekeeper manages time for all tasks in the system.
- Timekeeper *Timekeeper
-
// RootUserNamespace is the root user namespace.
RootUserNamespace *auth.UserNamespace
@@ -348,29 +345,34 @@ type InitKernelArgs struct {
PIDNamespace *PIDNamespace
}
+// SetTimekeeper sets Kernel.timekeeper. SetTimekeeper must be called before
+// Init.
+func (k *Kernel) SetTimekeeper(tk *Timekeeper) {
+ k.timekeeper = tk
+}
+
// Init initialize the Kernel with no tasks.
//
// Callers must manually set Kernel.Platform and call Kernel.SetMemoryFile
-// before calling Init.
+// and Kernel.SetTimekeeper before calling Init.
func (k *Kernel) Init(args InitKernelArgs) error {
if args.FeatureSet == nil {
- return fmt.Errorf("FeatureSet is nil")
+ return fmt.Errorf("args.FeatureSet is nil")
}
- if args.Timekeeper == nil {
- return fmt.Errorf("Timekeeper is nil")
+ if k.timekeeper == nil {
+ return fmt.Errorf("timekeeper is nil")
}
- if args.Timekeeper.clocks == nil {
+ if k.timekeeper.clocks == nil {
return fmt.Errorf("must call Timekeeper.SetClocks() before Kernel.Init()")
}
if args.RootUserNamespace == nil {
- return fmt.Errorf("RootUserNamespace is nil")
+ return fmt.Errorf("args.RootUserNamespace is nil")
}
if args.ApplicationCores == 0 {
- return fmt.Errorf("ApplicationCores is 0")
+ return fmt.Errorf("args.ApplicationCores is 0")
}
k.featureSet = args.FeatureSet
- k.timekeeper = args.Timekeeper
k.tasks = newTaskSet(args.PIDNamespace)
k.rootUserNamespace = args.RootUserNamespace
k.rootUTSNamespace = args.RootUTSNamespace
@@ -395,8 +397,8 @@ func (k *Kernel) Init(args InitKernelArgs) error {
}
k.extraAuxv = args.ExtraAuxv
k.vdso = args.Vdso
- k.realtimeClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Realtime}
- k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic}
+ k.realtimeClock = &timekeeperClock{tk: k.timekeeper, c: sentrytime.Realtime}
+ k.monotonicClock = &timekeeperClock{tk: k.timekeeper, c: sentrytime.Monotonic}
k.futexes = futex.NewManager()
k.netlinkPorts = port.New()
k.ptraceExceptions = make(map[*Task]*Task)
@@ -654,12 +656,12 @@ func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error {
defer k.tasks.mu.RUnlock()
for t := range k.tasks.Root.tids {
// We can skip locking Task.mu here since the kernel is paused.
- if mm := t.image.MemoryManager; mm != nil {
- if _, ok := invalidated[mm]; !ok {
- if err := mm.InvalidateUnsavable(ctx); err != nil {
+ if memMgr := t.image.MemoryManager; memMgr != nil {
+ if _, ok := invalidated[memMgr]; !ok {
+ if err := memMgr.InvalidateUnsavable(ctx); err != nil {
return err
}
- invalidated[mm] = struct{}{}
+ invalidated[memMgr] = struct{}{}
}
}
// I really wish we just had a sync.Map of all MMs...
@@ -1339,7 +1341,7 @@ func (k *Kernel) Unpause() {
// context is used only for debugging to describe how the signal was received.
//
// Preconditions: Kernel must have an init process.
-func (k *Kernel) SendExternalSignal(info *arch.SignalInfo, context string) {
+func (k *Kernel) SendExternalSignal(info *linux.SignalInfo, context string) {
k.extMu.Lock()
defer k.extMu.Unlock()
k.sendExternalSignal(info, context)
@@ -1347,7 +1349,7 @@ func (k *Kernel) SendExternalSignal(info *arch.SignalInfo, context string) {
// SendExternalSignalThreadGroup injects a signal into an specific ThreadGroup.
// This function doesn't skip signals like SendExternalSignal does.
-func (k *Kernel) SendExternalSignalThreadGroup(tg *ThreadGroup, info *arch.SignalInfo) error {
+func (k *Kernel) SendExternalSignalThreadGroup(tg *ThreadGroup, info *linux.SignalInfo) error {
k.extMu.Lock()
defer k.extMu.Unlock()
return tg.SendSignal(info)
@@ -1355,7 +1357,7 @@ func (k *Kernel) SendExternalSignalThreadGroup(tg *ThreadGroup, info *arch.Signa
// SendContainerSignal sends the given signal to all processes inside the
// namespace that match the given container ID.
-func (k *Kernel) SendContainerSignal(cid string, info *arch.SignalInfo) error {
+func (k *Kernel) SendContainerSignal(cid string, info *linux.SignalInfo) error {
k.extMu.Lock()
defer k.extMu.Unlock()
k.tasks.mu.RLock()
@@ -1553,22 +1555,23 @@ func (k *Kernel) SetSaveError(err error) {
var _ tcpip.Clock = (*Kernel)(nil)
-// NowNanoseconds implements tcpip.Clock.NowNanoseconds.
-func (k *Kernel) NowNanoseconds() int64 {
- now, err := k.timekeeper.GetTime(sentrytime.Realtime)
+// Now implements tcpip.Clock.NowNanoseconds.
+func (k *Kernel) Now() time.Time {
+ nsec, err := k.timekeeper.GetTime(sentrytime.Realtime)
if err != nil {
- panic("Kernel.NowNanoseconds: " + err.Error())
+ panic("timekeeper.GetTime(sentrytime.Realtime): " + err.Error())
}
- return now
+ return time.Unix(0, nsec)
}
// NowMonotonic implements tcpip.Clock.NowMonotonic.
-func (k *Kernel) NowMonotonic() int64 {
- now, err := k.timekeeper.GetTime(sentrytime.Monotonic)
+func (k *Kernel) NowMonotonic() tcpip.MonotonicTime {
+ nsec, err := k.timekeeper.GetTime(sentrytime.Monotonic)
if err != nil {
- panic("Kernel.NowMonotonic: " + err.Error())
+ panic("timekeeper.GetTime(sentrytime.Monotonic): " + err.Error())
}
- return now
+ var mt tcpip.MonotonicTime
+ return mt.Add(time.Duration(nsec) * time.Nanosecond)
}
// AfterFunc implements tcpip.Clock.AfterFunc.
@@ -1783,7 +1786,7 @@ func (k *Kernel) EmitUnimplementedEvent(ctx context.Context) {
})
t := TaskFromContext(ctx)
- k.unimplementedSyscallEmitter.Emit(&uspb.UnimplementedSyscall{
+ _, _ = k.unimplementedSyscallEmitter.Emit(&uspb.UnimplementedSyscall{
Tid: int32(t.ThreadID()),
Registers: t.Arch().StateData().Proto(),
})
@@ -1858,7 +1861,9 @@ func (k *Kernel) PopulateNewCgroupHierarchy(root Cgroup) {
return
}
t.mu.Lock()
- t.enterCgroupLocked(root)
+ // A task can be in the cgroup if it has been created after the
+ // cgroup hierarchy was registered.
+ t.enterCgroupIfNotYetLocked(root)
t.mu.Unlock()
})
k.tasks.mu.RUnlock()
@@ -1874,7 +1879,7 @@ func (k *Kernel) ReleaseCgroupHierarchy(hid uint32) {
return
}
t.mu.Lock()
- for cg, _ := range t.cgroups {
+ for cg := range t.cgroups {
if cg.HierarchyID() == hid {
t.leaveCgroupLocked(cg)
}
diff --git a/pkg/sentry/kernel/pending_signals.go b/pkg/sentry/kernel/pending_signals.go
index 77a35b788..af455c434 100644
--- a/pkg/sentry/kernel/pending_signals.go
+++ b/pkg/sentry/kernel/pending_signals.go
@@ -17,7 +17,6 @@ package kernel
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bits"
- "gvisor.dev/gvisor/pkg/sentry/arch"
)
const (
@@ -65,7 +64,7 @@ type pendingSignalQueue struct {
type pendingSignal struct {
// pendingSignalEntry links into a pendingSignalList.
pendingSignalEntry
- *arch.SignalInfo
+ *linux.SignalInfo
// If timer is not nil, it is the IntervalTimer which sent this signal.
timer *IntervalTimer
@@ -75,7 +74,7 @@ type pendingSignal struct {
// on failure (if the given signal's queue is full).
//
// Preconditions: info represents a valid signal.
-func (p *pendingSignals) enqueue(info *arch.SignalInfo, timer *IntervalTimer) bool {
+func (p *pendingSignals) enqueue(info *linux.SignalInfo, timer *IntervalTimer) bool {
sig := linux.Signal(info.Signo)
q := &p.signals[sig.Index()]
if sig.IsStandard() {
@@ -93,7 +92,7 @@ func (p *pendingSignals) enqueue(info *arch.SignalInfo, timer *IntervalTimer) bo
// dequeue dequeues and returns any pending signal not masked by mask. If no
// unmasked signals are pending, dequeue returns nil.
-func (p *pendingSignals) dequeue(mask linux.SignalSet) *arch.SignalInfo {
+func (p *pendingSignals) dequeue(mask linux.SignalSet) *linux.SignalInfo {
// "Real-time signals are delivered in a guaranteed order. Multiple
// real-time signals of the same type are delivered in the order they were
// sent. If different real-time signals are sent to a process, they are
@@ -111,7 +110,7 @@ func (p *pendingSignals) dequeue(mask linux.SignalSet) *arch.SignalInfo {
return p.dequeueSpecific(linux.Signal(lowestPendingUnblockedBit + 1))
}
-func (p *pendingSignals) dequeueSpecific(sig linux.Signal) *arch.SignalInfo {
+func (p *pendingSignals) dequeueSpecific(sig linux.Signal) *linux.SignalInfo {
q := &p.signals[sig.Index()]
ps := q.pendingSignalList.Front()
if ps == nil {
diff --git a/pkg/sentry/kernel/pending_signals_state.go b/pkg/sentry/kernel/pending_signals_state.go
index ca8b4e164..e77f1a254 100644
--- a/pkg/sentry/kernel/pending_signals_state.go
+++ b/pkg/sentry/kernel/pending_signals_state.go
@@ -14,13 +14,11 @@
package kernel
-import (
- "gvisor.dev/gvisor/pkg/sentry/arch"
-)
+import "gvisor.dev/gvisor/pkg/abi/linux"
// +stateify savable
type savedPendingSignal struct {
- si *arch.SignalInfo
+ si *linux.SignalInfo
timer *IntervalTimer
}
diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go
index 2d89b9ccd..3fa5d1d2f 100644
--- a/pkg/sentry/kernel/pipe/pipe_util.go
+++ b/pkg/sentry/kernel/pipe/pipe_util.go
@@ -86,6 +86,12 @@ func (p *Pipe) Write(ctx context.Context, src usermem.IOSequence) (int64, error)
if n > 0 {
p.Notify(waiter.ReadableEvents)
}
+ if err == unix.EPIPE {
+ // If we are returning EPIPE send SIGPIPE to the task.
+ if sendSig := linux.SignalNoInfoFuncFromContext(ctx); sendSig != nil {
+ sendSig(linux.SIGPIPE)
+ }
+ }
return n, err
}
@@ -129,7 +135,7 @@ func (p *Pipe) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArgume
v = math.MaxInt32 // Silently truncate.
}
// Copy result to userspace.
- iocc := primitive.IOCopyContext{
+ iocc := usermem.IOCopyContext{
IO: io,
Ctx: ctx,
Opts: usermem.IOOpts{
diff --git a/pkg/sentry/kernel/posixtimer.go b/pkg/sentry/kernel/posixtimer.go
index 2e861a5a8..d801a3d83 100644
--- a/pkg/sentry/kernel/posixtimer.go
+++ b/pkg/sentry/kernel/posixtimer.go
@@ -18,7 +18,6 @@ import (
"math"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/arch"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -97,7 +96,7 @@ func (it *IntervalTimer) ResumeTimer() {
}
// Preconditions: it.target's signal mutex must be locked.
-func (it *IntervalTimer) updateDequeuedSignalLocked(si *arch.SignalInfo) {
+func (it *IntervalTimer) updateDequeuedSignalLocked(si *linux.SignalInfo) {
it.sigpending = false
if it.sigorphan {
return
@@ -138,9 +137,9 @@ func (it *IntervalTimer) Notify(exp uint64, setting ktime.Setting) (ktime.Settin
it.sigpending = true
it.sigorphan = false
it.overrunCur += exp - 1
- si := &arch.SignalInfo{
+ si := &linux.SignalInfo{
Signo: int32(it.signo),
- Code: arch.SignalInfoTimer,
+ Code: linux.SI_TIMER,
}
si.SetTimerID(it.id)
si.SetSigval(it.sigval)
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index 57c7659e7..a6287fd6a 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -394,7 +393,7 @@ func (t *Task) ptraceTrapLocked(code int32) {
t.trapStopPending = false
t.tg.signalHandlers.mu.Unlock()
t.ptraceCode = code
- t.ptraceSiginfo = &arch.SignalInfo{
+ t.ptraceSiginfo = &linux.SignalInfo{
Signo: int32(linux.SIGTRAP),
Code: code,
}
@@ -402,7 +401,7 @@ func (t *Task) ptraceTrapLocked(code int32) {
t.ptraceSiginfo.SetUID(int32(t.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()))
if t.beginPtraceStopLocked() {
tracer := t.Tracer()
- tracer.signalStop(t, arch.CLD_TRAPPED, int32(linux.SIGTRAP))
+ tracer.signalStop(t, linux.CLD_TRAPPED, int32(linux.SIGTRAP))
tracer.tg.eventQueue.Notify(EventTraceeStop)
}
}
@@ -542,9 +541,9 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error {
// "Unlike PTRACE_ATTACH, PTRACE_SEIZE does not stop the process." -
// ptrace(2)
if !seize {
- target.sendSignalLocked(&arch.SignalInfo{
+ target.sendSignalLocked(&linux.SignalInfo{
Signo: int32(linux.SIGSTOP),
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
}, false /* group */)
}
// Undocumented Linux feature: If the tracee is already group-stopped (and
@@ -586,7 +585,7 @@ func (t *Task) exitPtrace() {
for target := range t.ptraceTracees {
if target.ptraceOpts.ExitKill {
target.tg.signalHandlers.mu.Lock()
- target.sendSignalLocked(&arch.SignalInfo{
+ target.sendSignalLocked(&linux.SignalInfo{
Signo: int32(linux.SIGKILL),
}, false /* group */)
target.tg.signalHandlers.mu.Unlock()
@@ -652,7 +651,7 @@ func (t *Task) forgetTracerLocked() {
// Preconditions:
// * The signal mutex must be locked.
// * The caller must be running on the task goroutine.
-func (t *Task) ptraceSignalLocked(info *arch.SignalInfo) bool {
+func (t *Task) ptraceSignalLocked(info *linux.SignalInfo) bool {
if linux.Signal(info.Signo) == linux.SIGKILL {
return false
}
@@ -678,7 +677,7 @@ func (t *Task) ptraceSignalLocked(info *arch.SignalInfo) bool {
t.ptraceSiginfo = info
t.Debugf("Entering signal-delivery-stop for signal %d", info.Signo)
if t.beginPtraceStopLocked() {
- tracer.signalStop(t, arch.CLD_TRAPPED, info.Signo)
+ tracer.signalStop(t, linux.CLD_TRAPPED, info.Signo)
tracer.tg.eventQueue.Notify(EventTraceeStop)
}
return true
@@ -829,7 +828,7 @@ func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions
if child.ptraceSeized {
child.trapStopPending = true
} else {
- child.pendingSignals.enqueue(&arch.SignalInfo{
+ child.pendingSignals.enqueue(&linux.SignalInfo{
Signo: int32(linux.SIGSTOP),
}, nil)
}
@@ -893,9 +892,9 @@ func (t *Task) ptraceExec(oldTID ThreadID) {
}
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
- t.sendSignalLocked(&arch.SignalInfo{
+ t.sendSignalLocked(&linux.SignalInfo{
Signo: int32(linux.SIGTRAP),
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
}, false /* group */)
}
@@ -1228,7 +1227,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
return err
case linux.PTRACE_SETSIGINFO:
- var info arch.SignalInfo
+ var info linux.SignalInfo
if _, err := info.CopyIn(t, data); err != nil {
return err
}
diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go
index a95e174a2..54ca43c2e 100644
--- a/pkg/sentry/kernel/seccomp.go
+++ b/pkg/sentry/kernel/seccomp.go
@@ -39,11 +39,11 @@ func dataAsBPFInput(t *Task, d *linux.SeccompData) bpf.Input {
}
}
-func seccompSiginfo(t *Task, errno, sysno int32, ip hostarch.Addr) *arch.SignalInfo {
- si := &arch.SignalInfo{
+func seccompSiginfo(t *Task, errno, sysno int32, ip hostarch.Addr) *linux.SignalInfo {
+ si := &linux.SignalInfo{
Signo: int32(linux.SIGSYS),
Errno: errno,
- Code: arch.SYS_SECCOMP,
+ Code: linux.SYS_SECCOMP,
}
si.SetCallAddr(uint64(ip))
si.SetSyscall(sysno)
diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go
index fe2ab1662..47bb66b42 100644
--- a/pkg/sentry/kernel/semaphore/semaphore.go
+++ b/pkg/sentry/kernel/semaphore/semaphore.go
@@ -35,10 +35,10 @@ const (
// Maximum number of semaphore sets.
setsMax = linux.SEMMNI
- // Maximum number of semaphroes in a semaphore set.
+ // Maximum number of semaphores in a semaphore set.
semsMax = linux.SEMMSL
- // Maximum number of semaphores in all semaphroe sets.
+ // Maximum number of semaphores in all semaphore sets.
semsTotalMax = linux.SEMMNS
)
@@ -171,10 +171,10 @@ func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linu
// Map semaphores and map indexes in a registry are of the same size,
// check map semaphores only here for the system limit.
if len(r.semaphores) >= setsMax {
- return nil, syserror.EINVAL
+ return nil, syserror.ENOSPC
}
if r.totalSems() > int(semsTotalMax-nsems) {
- return nil, syserror.EINVAL
+ return nil, syserror.ENOSPC
}
// Finally create a new set.
@@ -220,7 +220,7 @@ func (r *Registry) HighestIndex() int32 {
defer r.mu.Unlock()
// By default, highest used index is 0 even though
- // there is no semaphroe set.
+ // there is no semaphore set.
var highestIndex int32
for index := range r.indexes {
if index > highestIndex {
@@ -702,7 +702,9 @@ func (s *Set) checkPerms(creds *auth.Credentials, reqPerms fs.PermMask) bool {
return s.checkCapability(creds)
}
-// destroy destroys the set. Caller must hold 's.mu'.
+// destroy destroys the set.
+//
+// Preconditions: Caller must hold 's.mu'.
func (s *Set) destroy() {
// Notify all waiters. They will fail on the next attempt to execute
// operations and return error.
diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go
index 0cd9e2533..ca9076406 100644
--- a/pkg/sentry/kernel/sessions.go
+++ b/pkg/sentry/kernel/sessions.go
@@ -16,7 +16,6 @@ package kernel
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -233,7 +232,7 @@ func (pg *ProcessGroup) Session() *Session {
// SendSignal sends a signal to all processes inside the process group. It is
// analagous to kernel/signal.c:kill_pgrp.
-func (pg *ProcessGroup) SendSignal(info *arch.SignalInfo) error {
+func (pg *ProcessGroup) SendSignal(info *linux.SignalInfo) error {
tasks := pg.originator.TaskSet()
tasks.mu.RLock()
defer tasks.mu.RUnlock()
diff --git a/pkg/sentry/kernel/signal.go b/pkg/sentry/kernel/signal.go
index 2488ae7d5..e08474d25 100644
--- a/pkg/sentry/kernel/signal.go
+++ b/pkg/sentry/kernel/signal.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
)
@@ -36,7 +35,7 @@ const SignalPanic = linux.SIGUSR2
// context is used only for debugging to differentiate these cases.
//
// Preconditions: Kernel must have an init process.
-func (k *Kernel) sendExternalSignal(info *arch.SignalInfo, context string) {
+func (k *Kernel) sendExternalSignal(info *linux.SignalInfo, context string) {
switch linux.Signal(info.Signo) {
case linux.SIGURG:
// Sent by the Go 1.14+ runtime for asynchronous goroutine preemption.
@@ -60,18 +59,18 @@ func (k *Kernel) sendExternalSignal(info *arch.SignalInfo, context string) {
}
// SignalInfoPriv returns a SignalInfo equivalent to Linux's SEND_SIG_PRIV.
-func SignalInfoPriv(sig linux.Signal) *arch.SignalInfo {
- return &arch.SignalInfo{
+func SignalInfoPriv(sig linux.Signal) *linux.SignalInfo {
+ return &linux.SignalInfo{
Signo: int32(sig),
- Code: arch.SignalInfoKernel,
+ Code: linux.SI_KERNEL,
}
}
// SignalInfoNoInfo returns a SignalInfo equivalent to Linux's SEND_SIG_NOINFO.
-func SignalInfoNoInfo(sig linux.Signal, sender, receiver *Task) *arch.SignalInfo {
- info := &arch.SignalInfo{
+func SignalInfoNoInfo(sig linux.Signal, sender, receiver *Task) *linux.SignalInfo {
+ info := &linux.SignalInfo{
Signo: int32(sig),
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
}
info.SetPID(int32(receiver.tg.pidns.IDOfThreadGroup(sender.tg)))
info.SetUID(int32(sender.Credentials().RealKUID.In(receiver.UserNamespace()).OrOverflow()))
diff --git a/pkg/sentry/kernel/signal_handlers.go b/pkg/sentry/kernel/signal_handlers.go
index 768fda220..147cc41bb 100644
--- a/pkg/sentry/kernel/signal_handlers.go
+++ b/pkg/sentry/kernel/signal_handlers.go
@@ -16,7 +16,6 @@ package kernel
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sync"
)
@@ -30,14 +29,14 @@ type SignalHandlers struct {
mu sync.Mutex `state:"nosave"`
// actions is the action to be taken upon receiving each signal.
- actions map[linux.Signal]arch.SignalAct
+ actions map[linux.Signal]linux.SigAction
}
// NewSignalHandlers returns a new SignalHandlers specifying all default
// actions.
func NewSignalHandlers() *SignalHandlers {
return &SignalHandlers{
- actions: make(map[linux.Signal]arch.SignalAct),
+ actions: make(map[linux.Signal]linux.SigAction),
}
}
@@ -59,9 +58,9 @@ func (sh *SignalHandlers) CopyForExec() *SignalHandlers {
sh.mu.Lock()
defer sh.mu.Unlock()
for sig, act := range sh.actions {
- if act.Handler == arch.SignalActIgnore {
- sh2.actions[sig] = arch.SignalAct{
- Handler: arch.SignalActIgnore,
+ if act.Handler == linux.SIG_IGN {
+ sh2.actions[sig] = linux.SigAction{
+ Handler: linux.SIG_IGN,
}
}
}
@@ -73,15 +72,15 @@ func (sh *SignalHandlers) IsIgnored(sig linux.Signal) bool {
sh.mu.Lock()
defer sh.mu.Unlock()
sa, ok := sh.actions[sig]
- return ok && sa.Handler == arch.SignalActIgnore
+ return ok && sa.Handler == linux.SIG_IGN
}
// dequeueActionLocked returns the SignalAct that should be used to handle sig.
//
// Preconditions: sh.mu must be locked.
-func (sh *SignalHandlers) dequeueAction(sig linux.Signal) arch.SignalAct {
+func (sh *SignalHandlers) dequeueAction(sig linux.Signal) linux.SigAction {
act := sh.actions[sig]
- if act.IsResetHandler() {
+ if act.Flags&linux.SA_RESETHAND != 0 {
delete(sh.actions, sig)
}
return act
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index be1371855..2e3b4488a 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
"gvisor.dev/gvisor/pkg/hostarch"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -151,7 +150,7 @@ type Task struct {
// which the SA_ONSTACK flag is set.
//
// signalStack is exclusive to the task goroutine.
- signalStack arch.SignalStack
+ signalStack linux.SignalStack
// signalQueue is a set of registered waiters for signal-related events.
//
@@ -395,7 +394,7 @@ type Task struct {
// ptraceSiginfo is analogous to Linux's task_struct::last_siginfo.
//
// ptraceSiginfo is protected by the TaskSet mutex.
- ptraceSiginfo *arch.SignalInfo
+ ptraceSiginfo *linux.SignalInfo
// ptraceEventMsg is the value set by PTRACE_EVENT stops and returned to
// the tracer by ptrace(PTRACE_GETEVENTMSG).
@@ -853,15 +852,13 @@ func (t *Task) SetOOMScoreAdj(adj int32) error {
return nil
}
-// UID returns t's uid.
-// TODO(gvisor.dev/issue/170): This method is not namespaced yet.
-func (t *Task) UID() uint32 {
+// KUID returns t's kuid.
+func (t *Task) KUID() uint32 {
return uint32(t.Credentials().EffectiveKUID)
}
-// GID returns t's gid.
-// TODO(gvisor.dev/issue/170): This method is not namespaced yet.
-func (t *Task) GID() uint32 {
+// KGID returns t's kgid.
+func (t *Task) KGID() uint32 {
return uint32(t.Credentials().EffectiveKGID)
}
diff --git a/pkg/sentry/kernel/task_cgroup.go b/pkg/sentry/kernel/task_cgroup.go
index 25d2504fa..7c138e80f 100644
--- a/pkg/sentry/kernel/task_cgroup.go
+++ b/pkg/sentry/kernel/task_cgroup.go
@@ -85,6 +85,14 @@ func (t *Task) enterCgroupLocked(c Cgroup) {
c.Enter(t)
}
+// +checklocks:t.mu
+func (t *Task) enterCgroupIfNotYetLocked(c Cgroup) {
+ if _, ok := t.cgroups[c]; ok {
+ return
+ }
+ t.enterCgroupLocked(c)
+}
+
// LeaveCgroups removes t out from all its cgroups.
func (t *Task) LeaveCgroups() {
t.mu.Lock()
diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go
index 70b0699dc..c82d9e82b 100644
--- a/pkg/sentry/kernel/task_context.go
+++ b/pkg/sentry/kernel/task_context.go
@@ -17,6 +17,7 @@ package kernel
import (
"time"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -113,6 +114,10 @@ func (t *Task) contextValue(key interface{}, isTaskGoroutine bool) interface{} {
return t.k.RealtimeClock()
case limits.CtxLimits:
return t.tg.limits
+ case linux.CtxSignalNoInfoFunc:
+ return func(sig linux.Signal) error {
+ return t.SendSignal(SignalInfoNoInfo(sig, t, t))
+ }
case pgalloc.CtxMemoryFile:
return t.k.mf
case pgalloc.CtxMemoryFileProvider:
diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go
index d9897e802..cf8571262 100644
--- a/pkg/sentry/kernel/task_exec.go
+++ b/pkg/sentry/kernel/task_exec.go
@@ -66,7 +66,6 @@ package kernel
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -181,7 +180,7 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState {
t.tg.signalHandlers = t.tg.signalHandlers.CopyForExec()
t.endStopCond.L = &t.tg.signalHandlers.mu
// "Any alternate signal stack is not preserved (sigaltstack(2))." - execve(2)
- t.signalStack = arch.SignalStack{Flags: arch.SignalStackFlagDisable}
+ t.signalStack = linux.SignalStack{Flags: linux.SS_DISABLE}
// "The termination signal is reset to SIGCHLD (see clone(2))."
t.tg.terminationSignal = linux.SIGCHLD
// execed indicates that the process can no longer join a process group
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index b1af1a7ef..d115b8783 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -28,9 +28,9 @@ import (
"errors"
"fmt"
"strconv"
+ "strings"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
@@ -50,6 +50,23 @@ type ExitStatus struct {
Signo int
}
+func (es ExitStatus) String() string {
+ var b strings.Builder
+ if code := es.Code; code != 0 {
+ if b.Len() != 0 {
+ b.WriteByte(' ')
+ }
+ _, _ = fmt.Fprintf(&b, "Code=%d", code)
+ }
+ if signal := es.Signo; signal != 0 {
+ if b.Len() != 0 {
+ b.WriteByte(' ')
+ }
+ _, _ = fmt.Fprintf(&b, "Signal=%d", signal)
+ }
+ return b.String()
+}
+
// Signaled returns true if the ExitStatus indicates that the exiting task or
// thread group was killed by a signal.
func (es ExitStatus) Signaled() bool {
@@ -122,12 +139,12 @@ func (t *Task) killLocked() {
if t.stop != nil && t.stop.Killable() {
t.endInternalStopLocked()
}
- t.pendingSignals.enqueue(&arch.SignalInfo{
+ t.pendingSignals.enqueue(&linux.SignalInfo{
Signo: int32(linux.SIGKILL),
// Linux just sets SIGKILL in the pending signal bitmask without
// enqueueing an actual siginfo, such that
// kernel/signal.c:collect_signal() initializes si_code to SI_USER.
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
}, nil)
t.interrupt()
}
@@ -332,7 +349,7 @@ func (t *Task) exitThreadGroup() bool {
// signalStop must be called with t's signal mutex unlocked.
t.tg.signalHandlers.mu.Unlock()
if notifyParent && t.tg.leader.parent != nil {
- t.tg.leader.parent.signalStop(t, arch.CLD_STOPPED, int32(sig))
+ t.tg.leader.parent.signalStop(t, linux.CLD_STOPPED, int32(sig))
t.tg.leader.parent.tg.eventQueue.Notify(EventChildGroupStop)
}
return last
@@ -353,7 +370,7 @@ func (t *Task) exitChildren() {
continue
}
other.signalHandlers.mu.Lock()
- other.leader.sendSignalLocked(&arch.SignalInfo{
+ other.leader.sendSignalLocked(&linux.SignalInfo{
Signo: int32(linux.SIGKILL),
}, true /* group */)
other.signalHandlers.mu.Unlock()
@@ -368,9 +385,9 @@ func (t *Task) exitChildren() {
// wait for a parent to reap them.)
for c := range t.children {
if sig := c.ParentDeathSignal(); sig != 0 {
- siginfo := &arch.SignalInfo{
+ siginfo := &linux.SignalInfo{
Signo: int32(sig),
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
}
siginfo.SetPID(int32(c.tg.pidns.tids[t]))
siginfo.SetUID(int32(t.Credentials().RealKUID.In(c.UserNamespace()).OrOverflow()))
@@ -652,10 +669,10 @@ func (t *Task) exitNotifyLocked(fromPtraceDetach bool) {
t.parent.tg.signalHandlers.mu.Lock()
if t.tg.terminationSignal == linux.SIGCHLD || fromPtraceDetach {
if act, ok := t.parent.tg.signalHandlers.actions[linux.SIGCHLD]; ok {
- if act.Handler == arch.SignalActIgnore {
+ if act.Handler == linux.SIG_IGN {
t.exitParentAcked = true
signalParent = false
- } else if act.Flags&arch.SignalFlagNoCldWait != 0 {
+ } else if act.Flags&linux.SA_NOCLDWAIT != 0 {
t.exitParentAcked = true
}
}
@@ -705,17 +722,17 @@ func (t *Task) exitNotifyLocked(fromPtraceDetach bool) {
}
// Preconditions: The TaskSet mutex must be locked.
-func (t *Task) exitNotificationSignal(sig linux.Signal, receiver *Task) *arch.SignalInfo {
- info := &arch.SignalInfo{
+func (t *Task) exitNotificationSignal(sig linux.Signal, receiver *Task) *linux.SignalInfo {
+ info := &linux.SignalInfo{
Signo: int32(sig),
}
info.SetPID(int32(receiver.tg.pidns.tids[t]))
info.SetUID(int32(t.Credentials().RealKUID.In(receiver.UserNamespace()).OrOverflow()))
if t.exitStatus.Signaled() {
- info.Code = arch.CLD_KILLED
+ info.Code = linux.CLD_KILLED
info.SetStatus(int32(t.exitStatus.Signo))
} else {
- info.Code = arch.CLD_EXITED
+ info.Code = linux.CLD_EXITED
info.SetStatus(int32(t.exitStatus.Code))
}
// TODO(b/72102453): Set utime, stime.
diff --git a/pkg/sentry/kernel/task_sched.go b/pkg/sentry/kernel/task_sched.go
index 9ba5f8d78..f142feab4 100644
--- a/pkg/sentry/kernel/task_sched.go
+++ b/pkg/sentry/kernel/task_sched.go
@@ -536,7 +536,7 @@ func (tg *ThreadGroup) updateCPUTimersEnabledLocked() {
// appropriate for /proc/[pid]/status.
func (t *Task) StateStatus() string {
switch s := t.TaskGoroutineSchedInfo().State; s {
- case TaskGoroutineNonexistent:
+ case TaskGoroutineNonexistent, TaskGoroutineRunningSys:
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
switch t.exitState {
@@ -546,16 +546,16 @@ func (t *Task) StateStatus() string {
return "X (dead)"
default:
// The task goroutine can't exit before passing through
- // runExitNotify, so this indicates that the task has been created,
- // but the task goroutine hasn't yet started. The Linux equivalent
- // is struct task_struct::state == TASK_NEW
+ // runExitNotify, so if s == TaskGoroutineNonexistent, the task has
+ // been created but the task goroutine hasn't yet started. The
+ // Linux equivalent is struct task_struct::state == TASK_NEW
// (kernel/fork.c:copy_process() =>
// kernel/sched/core.c:sched_fork()), but the TASK_NEW bit is
// masked out by TASK_REPORT for /proc/[pid]/status, leaving only
// TASK_RUNNING.
return "R (running)"
}
- case TaskGoroutineRunningSys, TaskGoroutineRunningApp:
+ case TaskGoroutineRunningApp:
return "R (running)"
case TaskGoroutineBlockedInterruptible:
return "S (sleeping)"
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index c2b9fc08f..8ca61ed48 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -86,7 +86,7 @@ var defaultActions = map[linux.Signal]SignalAction{
}
// computeAction figures out what to do given a signal number
-// and an arch.SignalAct. SIGSTOP always results in a SignalActionStop,
+// and an linux.SigAction. SIGSTOP always results in a SignalActionStop,
// and SIGKILL always results in a SignalActionTerm.
// Signal 0 is always ignored as many programs use it for various internal functions
// and don't expect it to do anything.
@@ -97,7 +97,7 @@ var defaultActions = map[linux.Signal]SignalAction{
// 0, the default action is taken;
// 1, the signal is ignored;
// anything else, the function returns SignalActionHandler.
-func computeAction(sig linux.Signal, act arch.SignalAct) SignalAction {
+func computeAction(sig linux.Signal, act linux.SigAction) SignalAction {
switch sig {
case linux.SIGSTOP:
return SignalActionStop
@@ -108,9 +108,9 @@ func computeAction(sig linux.Signal, act arch.SignalAct) SignalAction {
}
switch act.Handler {
- case arch.SignalActDefault:
+ case linux.SIG_DFL:
return defaultActions[sig]
- case arch.SignalActIgnore:
+ case linux.SIG_IGN:
return SignalActionIgnore
default:
return SignalActionHandler
@@ -127,7 +127,7 @@ var StopSignals = linux.MakeSignalSet(linux.SIGSTOP, linux.SIGTSTP, linux.SIGTTI
// If there are no pending unmasked signals, dequeueSignalLocked returns nil.
//
// Preconditions: t.tg.signalHandlers.mu must be locked.
-func (t *Task) dequeueSignalLocked(mask linux.SignalSet) *arch.SignalInfo {
+func (t *Task) dequeueSignalLocked(mask linux.SignalSet) *linux.SignalInfo {
if info := t.pendingSignals.dequeue(mask); info != nil {
return info
}
@@ -155,7 +155,7 @@ func (t *Task) PendingSignals() linux.SignalSet {
}
// deliverSignal delivers the given signal and returns the following run state.
-func (t *Task) deliverSignal(info *arch.SignalInfo, act arch.SignalAct) taskRunState {
+func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRunState {
sigact := computeAction(linux.Signal(info.Signo), act)
if t.haveSyscallReturn {
@@ -172,7 +172,7 @@ func (t *Task) deliverSignal(info *arch.SignalInfo, act arch.SignalAct) taskRunS
fallthrough
case sre == syserror.ERESTART_RESTARTBLOCK:
fallthrough
- case (sre == syserror.ERESTARTSYS && !act.IsRestart()):
+ case (sre == syserror.ERESTARTSYS && act.Flags&linux.SA_RESTART == 0):
t.Debugf("Not restarting syscall %d after errno %d: interrupted by signal %d", t.Arch().SyscallNo(), sre, info.Signo)
t.Arch().SetReturn(uintptr(-ExtractErrno(syserror.EINTR, -1)))
default:
@@ -236,7 +236,7 @@ func (t *Task) deliverSignal(info *arch.SignalInfo, act arch.SignalAct) taskRunS
// deliverSignalToHandler changes the task's userspace state to enter the given
// user-configured handler for the given signal.
-func (t *Task) deliverSignalToHandler(info *arch.SignalInfo, act arch.SignalAct) error {
+func (t *Task) deliverSignalToHandler(info *linux.SignalInfo, act linux.SigAction) error {
// Signal delivery to an application handler interrupts restartable
// sequences.
t.rseqInterrupt()
@@ -248,8 +248,8 @@ func (t *Task) deliverSignalToHandler(info *arch.SignalInfo, act arch.SignalAct)
// N.B. This is a *copy* of the alternate stack that the user's signal
// handler expects to see in its ucontext (even if it's not in use).
alt := t.signalStack
- if act.IsOnStack() && alt.IsEnabled() {
- alt.SetOnStack()
+ if act.Flags&linux.SA_ONSTACK != 0 && alt.IsEnabled() {
+ alt.Flags |= linux.SS_ONSTACK
if !alt.Contains(sp) {
sp = hostarch.Addr(alt.Top())
}
@@ -289,7 +289,7 @@ func (t *Task) deliverSignalToHandler(info *arch.SignalInfo, act arch.SignalAct)
// Add our signal mask.
newMask := t.signalMask | act.Mask
- if !act.IsNoDefer() {
+ if act.Flags&linux.SA_NODEFER == 0 {
newMask |= linux.SignalSetOf(linux.Signal(info.Signo))
}
t.SetSignalMask(newMask)
@@ -326,7 +326,7 @@ func (t *Task) SignalReturn(rt bool) (*SyscallControl, error) {
// Preconditions:
// * The caller must be running on the task goroutine.
// * t.exitState < TaskExitZombie.
-func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*arch.SignalInfo, error) {
+func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux.SignalInfo, error) {
// set is the set of signals we're interested in; invert it to get the set
// of signals to block.
mask := ^(set &^ UnblockableSignals)
@@ -373,7 +373,7 @@ func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*arch.S
// syserror.EINVAL - The signal is not valid.
// syserror.EAGAIN - THe signal is realtime, and cannot be queued.
//
-func (t *Task) SendSignal(info *arch.SignalInfo) error {
+func (t *Task) SendSignal(info *linux.SignalInfo) error {
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
t.tg.signalHandlers.mu.Lock()
@@ -382,7 +382,7 @@ func (t *Task) SendSignal(info *arch.SignalInfo) error {
}
// SendGroupSignal sends the given signal to t's thread group.
-func (t *Task) SendGroupSignal(info *arch.SignalInfo) error {
+func (t *Task) SendGroupSignal(info *linux.SignalInfo) error {
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
t.tg.signalHandlers.mu.Lock()
@@ -392,7 +392,7 @@ func (t *Task) SendGroupSignal(info *arch.SignalInfo) error {
// SendSignal sends the given signal to tg, using tg's leader to determine if
// the signal is blocked.
-func (tg *ThreadGroup) SendSignal(info *arch.SignalInfo) error {
+func (tg *ThreadGroup) SendSignal(info *linux.SignalInfo) error {
tg.pidns.owner.mu.RLock()
defer tg.pidns.owner.mu.RUnlock()
tg.signalHandlers.mu.Lock()
@@ -400,11 +400,11 @@ func (tg *ThreadGroup) SendSignal(info *arch.SignalInfo) error {
return tg.leader.sendSignalLocked(info, true /* group */)
}
-func (t *Task) sendSignalLocked(info *arch.SignalInfo, group bool) error {
+func (t *Task) sendSignalLocked(info *linux.SignalInfo, group bool) error {
return t.sendSignalTimerLocked(info, group, nil)
}
-func (t *Task) sendSignalTimerLocked(info *arch.SignalInfo, group bool, timer *IntervalTimer) error {
+func (t *Task) sendSignalTimerLocked(info *linux.SignalInfo, group bool, timer *IntervalTimer) error {
if t.exitState == TaskExitDead {
return syserror.ESRCH
}
@@ -572,9 +572,9 @@ func (t *Task) forceSignal(sig linux.Signal, unconditional bool) {
func (t *Task) forceSignalLocked(sig linux.Signal, unconditional bool) {
blocked := linux.SignalSetOf(sig)&t.signalMask != 0
act := t.tg.signalHandlers.actions[sig]
- ignored := act.Handler == arch.SignalActIgnore
+ ignored := act.Handler == linux.SIG_IGN
if blocked || ignored || unconditional {
- act.Handler = arch.SignalActDefault
+ act.Handler = linux.SIG_DFL
t.tg.signalHandlers.actions[sig] = act
if blocked {
t.setSignalMaskLocked(t.signalMask &^ linux.SignalSetOf(sig))
@@ -641,17 +641,17 @@ func (t *Task) SetSavedSignalMask(mask linux.SignalSet) {
}
// SignalStack returns the task-private signal stack.
-func (t *Task) SignalStack() arch.SignalStack {
+func (t *Task) SignalStack() linux.SignalStack {
t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch())
alt := t.signalStack
if t.onSignalStack(alt) {
- alt.Flags |= arch.SignalStackFlagOnStack
+ alt.Flags |= linux.SS_ONSTACK
}
return alt
}
// onSignalStack returns true if the task is executing on the given signal stack.
-func (t *Task) onSignalStack(alt arch.SignalStack) bool {
+func (t *Task) onSignalStack(alt linux.SignalStack) bool {
sp := hostarch.Addr(t.Arch().Stack())
return alt.Contains(sp)
}
@@ -661,30 +661,30 @@ func (t *Task) onSignalStack(alt arch.SignalStack) bool {
// This value may not be changed if the task is currently executing on the
// signal stack, i.e. if t.onSignalStack returns true. In this case, this
// function will return false. Otherwise, true is returned.
-func (t *Task) SetSignalStack(alt arch.SignalStack) bool {
+func (t *Task) SetSignalStack(alt linux.SignalStack) bool {
// Check that we're not executing on the stack.
if t.onSignalStack(t.signalStack) {
return false
}
- if alt.Flags&arch.SignalStackFlagDisable != 0 {
+ if alt.Flags&linux.SS_DISABLE != 0 {
// Don't record anything beyond the flags.
- t.signalStack = arch.SignalStack{
- Flags: arch.SignalStackFlagDisable,
+ t.signalStack = linux.SignalStack{
+ Flags: linux.SS_DISABLE,
}
} else {
// Mask out irrelevant parts: only disable matters.
- alt.Flags &= arch.SignalStackFlagDisable
+ alt.Flags &= linux.SS_DISABLE
t.signalStack = alt
}
return true
}
-// SetSignalAct atomically sets the thread group's signal action for signal sig
+// SetSigAction atomically sets the thread group's signal action for signal sig
// to *actptr (if actptr is not nil) and returns the old signal action.
-func (tg *ThreadGroup) SetSignalAct(sig linux.Signal, actptr *arch.SignalAct) (arch.SignalAct, error) {
+func (tg *ThreadGroup) SetSigAction(sig linux.Signal, actptr *linux.SigAction) (linux.SigAction, error) {
if !sig.IsValid() {
- return arch.SignalAct{}, syserror.EINVAL
+ return linux.SigAction{}, syserror.EINVAL
}
tg.pidns.owner.mu.RLock()
@@ -718,48 +718,6 @@ func (tg *ThreadGroup) SetSignalAct(sig linux.Signal, actptr *arch.SignalAct) (a
return oldact, nil
}
-// CopyOutSignalAct converts the given SignalAct into an architecture-specific
-// type and then copies it out to task memory.
-func (t *Task) CopyOutSignalAct(addr hostarch.Addr, s *arch.SignalAct) error {
- n := t.Arch().NewSignalAct()
- n.SerializeFrom(s)
- _, err := n.CopyOut(t, addr)
- return err
-}
-
-// CopyInSignalAct copies an architecture-specific sigaction type from task
-// memory and then converts it into a SignalAct.
-func (t *Task) CopyInSignalAct(addr hostarch.Addr) (arch.SignalAct, error) {
- n := t.Arch().NewSignalAct()
- var s arch.SignalAct
- if _, err := n.CopyIn(t, addr); err != nil {
- return s, err
- }
- n.DeserializeTo(&s)
- return s, nil
-}
-
-// CopyOutSignalStack converts the given SignalStack into an
-// architecture-specific type and then copies it out to task memory.
-func (t *Task) CopyOutSignalStack(addr hostarch.Addr, s *arch.SignalStack) error {
- n := t.Arch().NewSignalStack()
- n.SerializeFrom(s)
- _, err := n.CopyOut(t, addr)
- return err
-}
-
-// CopyInSignalStack copies an architecture-specific stack_t from task memory
-// and then converts it into a SignalStack.
-func (t *Task) CopyInSignalStack(addr hostarch.Addr) (arch.SignalStack, error) {
- n := t.Arch().NewSignalStack()
- var s arch.SignalStack
- if _, err := n.CopyIn(t, addr); err != nil {
- return s, err
- }
- n.DeserializeTo(&s)
- return s, nil
-}
-
// groupStop is a TaskStop placed on tasks that have received a stop signal
// (SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU). (The term "group-stop" originates from
// the ptrace man page.)
@@ -774,7 +732,7 @@ func (*groupStop) Killable() bool { return true }
// previously-dequeued stop signal.
//
// Preconditions: The caller must be running on the task goroutine.
-func (t *Task) initiateGroupStop(info *arch.SignalInfo) {
+func (t *Task) initiateGroupStop(info *linux.SignalInfo) {
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
t.tg.signalHandlers.mu.Lock()
@@ -909,8 +867,8 @@ func (t *Task) signalStop(target *Task, code int32, status int32) {
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
act, ok := t.tg.signalHandlers.actions[linux.SIGCHLD]
- if !ok || (act.Handler != arch.SignalActIgnore && act.Flags&arch.SignalFlagNoCldStop == 0) {
- sigchld := &arch.SignalInfo{
+ if !ok || (act.Handler != linux.SIG_IGN && act.Flags&linux.SA_NOCLDSTOP == 0) {
+ sigchld := &linux.SignalInfo{
Signo: int32(linux.SIGCHLD),
Code: code,
}
@@ -955,14 +913,14 @@ func (*runInterrupt) execute(t *Task) taskRunState {
// notified its tracer accordingly. But it's consistent with
// Linux...
if intr {
- tracer.signalStop(t.tg.leader, arch.CLD_STOPPED, int32(sig))
+ tracer.signalStop(t.tg.leader, linux.CLD_STOPPED, int32(sig))
if !notifyParent {
tracer.tg.eventQueue.Notify(EventGroupContinue | EventTraceeStop | EventChildGroupStop)
} else {
tracer.tg.eventQueue.Notify(EventGroupContinue | EventTraceeStop)
}
} else {
- tracer.signalStop(t.tg.leader, arch.CLD_CONTINUED, int32(sig))
+ tracer.signalStop(t.tg.leader, linux.CLD_CONTINUED, int32(sig))
tracer.tg.eventQueue.Notify(EventGroupContinue)
}
}
@@ -974,10 +932,10 @@ func (*runInterrupt) execute(t *Task) taskRunState {
// SIGCHLD is a standard signal, so the latter would always be
// dropped. Hence sending only the former is equivalent.
if intr {
- t.tg.leader.parent.signalStop(t.tg.leader, arch.CLD_STOPPED, int32(sig))
+ t.tg.leader.parent.signalStop(t.tg.leader, linux.CLD_STOPPED, int32(sig))
t.tg.leader.parent.tg.eventQueue.Notify(EventGroupContinue | EventChildGroupStop)
} else {
- t.tg.leader.parent.signalStop(t.tg.leader, arch.CLD_CONTINUED, int32(sig))
+ t.tg.leader.parent.signalStop(t.tg.leader, linux.CLD_CONTINUED, int32(sig))
t.tg.leader.parent.tg.eventQueue.Notify(EventGroupContinue)
}
}
@@ -1018,7 +976,7 @@ func (*runInterrupt) execute(t *Task) taskRunState {
// without requiring an extra PTRACE_GETSIGINFO call." -
// "Group-stop", ptrace(2)
t.ptraceCode = int32(sig) | linux.PTRACE_EVENT_STOP<<8
- t.ptraceSiginfo = &arch.SignalInfo{
+ t.ptraceSiginfo = &linux.SignalInfo{
Signo: int32(sig),
Code: t.ptraceCode,
}
@@ -1029,7 +987,7 @@ func (*runInterrupt) execute(t *Task) taskRunState {
t.ptraceSiginfo = nil
}
if t.beginPtraceStopLocked() {
- tracer.signalStop(t, arch.CLD_STOPPED, int32(sig))
+ tracer.signalStop(t, linux.CLD_STOPPED, int32(sig))
// For consistency with Linux, if the parent and tracer are in the
// same thread group, deduplicate notification signals.
if notifyParent && tracer.tg == t.tg.leader.parent.tg {
@@ -1047,7 +1005,7 @@ func (*runInterrupt) execute(t *Task) taskRunState {
t.tg.signalHandlers.mu.Unlock()
}
if notifyParent {
- t.tg.leader.parent.signalStop(t.tg.leader, arch.CLD_STOPPED, int32(sig))
+ t.tg.leader.parent.signalStop(t.tg.leader, linux.CLD_STOPPED, int32(sig))
t.tg.leader.parent.tg.eventQueue.Notify(EventChildGroupStop)
}
t.tg.pidns.owner.mu.RUnlock()
@@ -1101,7 +1059,7 @@ func (*runInterruptAfterSignalDeliveryStop) execute(t *Task) taskRunState {
if sig != linux.Signal(info.Signo) {
info.Signo = int32(sig)
info.Errno = 0
- info.Code = arch.SignalInfoUser
+ info.Code = linux.SI_USER
// pid isn't a valid field for all signal numbers, but Linux
// doesn't care (kernel/signal.c:ptrace_signal()).
//
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index 32031cd70..41fd2d471 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -18,7 +18,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/hostarch"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
@@ -131,7 +130,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
runState: (*runApp)(nil),
interruptChan: make(chan struct{}, 1),
signalMask: cfg.SignalMask,
- signalStack: arch.SignalStack{Flags: arch.SignalStackFlagDisable},
+ signalStack: linux.SignalStack{Flags: linux.SS_DISABLE},
image: *image,
fsContext: cfg.FSContext,
fdTable: cfg.FDTable,
diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go
index b92e98fa1..891e2201d 100644
--- a/pkg/sentry/kernel/thread_group.go
+++ b/pkg/sentry/kernel/thread_group.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
@@ -446,10 +445,10 @@ func (tg *ThreadGroup) ReleaseControllingTTY(tty *TTY) error {
othertg.signalHandlers.mu.Lock()
othertg.tty = nil
if othertg.processGroup == tg.processGroup.session.foreground {
- if err := othertg.leader.sendSignalLocked(&arch.SignalInfo{Signo: int32(linux.SIGHUP)}, true /* group */); err != nil {
+ if err := othertg.leader.sendSignalLocked(&linux.SignalInfo{Signo: int32(linux.SIGHUP)}, true /* group */); err != nil {
lastErr = err
}
- if err := othertg.leader.sendSignalLocked(&arch.SignalInfo{Signo: int32(linux.SIGCONT)}, true /* group */); err != nil {
+ if err := othertg.leader.sendSignalLocked(&linux.SignalInfo{Signo: int32(linux.SIGCONT)}, true /* group */); err != nil {
lastErr = err
}
}
@@ -490,10 +489,10 @@ func (tg *ThreadGroup) SetForegroundProcessGroup(tty *TTY, pgid ProcessGroupID)
tg.signalHandlers.mu.Lock()
defer tg.signalHandlers.mu.Unlock()
- // TODO(b/129283598): "If tcsetpgrp() is called by a member of a
- // background process group in its session, and the calling process is
- // not blocking or ignoring SIGTTOU, a SIGTTOU signal is sent to all
- // members of this background process group."
+ // TODO(gvisor.dev/issue/6148): "If tcsetpgrp() is called by a member of a
+ // background process group in its session, and the calling process is not
+ // blocking or ignoring SIGTTOU, a SIGTTOU signal is sent to all members of
+ // this background process group."
// tty must be the controlling terminal.
if tg.tty != tty {
diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go
index f61a8e164..26aa34aa6 100644
--- a/pkg/sentry/kernel/time/time.go
+++ b/pkg/sentry/kernel/time/time.go
@@ -458,25 +458,6 @@ func NewTimer(clock Clock, listener TimerListener) *Timer {
return t
}
-// After waits for the duration to elapse according to clock and then sends a
-// notification on the returned channel. The timer is started immediately and
-// will fire exactly once. The second return value is the start time used with
-// the duration.
-//
-// Callers must call Timer.Destroy.
-func After(clock Clock, duration time.Duration) (*Timer, Time, <-chan struct{}) {
- notifier, tchan := NewChannelNotifier()
- t := NewTimer(clock, notifier)
- now := clock.Now()
-
- t.Swap(Setting{
- Enabled: true,
- Period: 0,
- Next: now.Add(duration),
- })
- return t, now, tchan
-}
-
// init initializes Timer state that is not preserved across save/restore. If
// init has already been called, calling it again is a no-op.
//
diff --git a/pkg/sentry/loader/interpreter.go b/pkg/sentry/loader/interpreter.go
index 3886b4d33..3e302d92c 100644
--- a/pkg/sentry/loader/interpreter.go
+++ b/pkg/sentry/loader/interpreter.go
@@ -59,7 +59,7 @@ func parseInterpreterScript(ctx context.Context, filename string, f fsbridge.Fil
// Linux silently truncates the remainder of the line if it exceeds
// interpMaxLineLength.
i := bytes.IndexByte(line, '\n')
- if i > 0 {
+ if i >= 0 {
line = line[:i]
}
diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go
index b81292c46..d1a883da4 100644
--- a/pkg/sentry/pgalloc/pgalloc.go
+++ b/pkg/sentry/pgalloc/pgalloc.go
@@ -1062,10 +1062,20 @@ func (f *MemoryFile) runReclaim() {
break
}
- // If ManualZeroing is in effect, pages will be zeroed on allocation
- // and may not be freed by decommitFile, so calling decommitFile is
- // unnecessary.
- if !f.opts.ManualZeroing {
+ if f.opts.ManualZeroing {
+ // If ManualZeroing is in effect, only hugepage-aligned regions may
+ // be safely passed to decommitFile. Pages will be zeroed on
+ // reallocation, so we don't need to perform any manual zeroing
+ // here, whether or not decommitFile succeeds.
+ if startAddr, ok := hostarch.Addr(fr.Start).HugeRoundUp(); ok {
+ if endAddr := hostarch.Addr(fr.End).HugeRoundDown(); startAddr < endAddr {
+ decommitFR := memmap.FileRange{uint64(startAddr), uint64(endAddr)}
+ if err := f.decommitFile(decommitFR); err != nil {
+ log.Warningf("Reclaim failed to decommit %v: %v", decommitFR, err)
+ }
+ }
+ }
+ } else {
if err := f.decommitFile(fr); err != nil {
log.Warningf("Reclaim failed to decommit %v: %v", fr, err)
// Zero the pages manually. This won't reduce memory usage, but at
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index b307832fd..8a490b3de 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -6,6 +6,8 @@ go_library(
name = "kvm",
srcs = [
"address_space.go",
+ "address_space_amd64.go",
+ "address_space_arm64.go",
"bluepill.go",
"bluepill_allocator.go",
"bluepill_amd64.go",
@@ -77,6 +79,7 @@ go_test(
"requires-kvm",
],
deps = [
+ "//pkg/abi/linux",
"//pkg/hostarch",
"//pkg/ring0",
"//pkg/ring0/pagetables",
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go
index 5524e8727..9929caebb 100644
--- a/pkg/sentry/platform/kvm/address_space.go
+++ b/pkg/sentry/platform/kvm/address_space.go
@@ -85,15 +85,6 @@ type addressSpace struct {
dirtySet *dirtySet
}
-// invalidate is the implementation for Invalidate.
-func (as *addressSpace) invalidate() {
- as.dirtySet.forEach(as.machine, func(c *vCPU) {
- if c.active.get() == as { // If this happens to be active,
- c.BounceToKernel() // ... force a kernel transition.
- }
- })
-}
-
// Invalidate interrupts all dirty contexts.
func (as *addressSpace) Invalidate() {
as.mu.Lock()
diff --git a/pkg/sentry/platform/kvm/address_space_amd64.go b/pkg/sentry/platform/kvm/address_space_amd64.go
new file mode 100644
index 000000000..d11d38679
--- /dev/null
+++ b/pkg/sentry/platform/kvm/address_space_amd64.go
@@ -0,0 +1,24 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kvm
+
+// invalidate is the implementation for Invalidate.
+func (as *addressSpace) invalidate() {
+ as.dirtySet.forEach(as.machine, func(c *vCPU) {
+ if c.active.get() == as { // If this happens to be active,
+ c.BounceToKernel() // ... force a kernel transition.
+ }
+ })
+}
diff --git a/pkg/sentry/platform/kvm/address_space_arm64.go b/pkg/sentry/platform/kvm/address_space_arm64.go
new file mode 100644
index 000000000..fb954418b
--- /dev/null
+++ b/pkg/sentry/platform/kvm/address_space_arm64.go
@@ -0,0 +1,25 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kvm
+
+import (
+ "gvisor.dev/gvisor/pkg/ring0"
+)
+
+// invalidate is the implementation for Invalidate.
+func (as *addressSpace) invalidate() {
+ bluepill(as.pageTables.Allocator.(*allocator).cpu)
+ ring0.FlushTlbAll()
+}
diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go
index f4d4473a8..183e741ea 100644
--- a/pkg/sentry/platform/kvm/context.go
+++ b/pkg/sentry/platform/kvm/context.go
@@ -17,6 +17,7 @@ package kvm
import (
"sync/atomic"
+ "gvisor.dev/gvisor/pkg/abi/linux"
pkgcontext "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
@@ -32,15 +33,15 @@ type context struct {
// machine is the parent machine, and is immutable.
machine *machine
- // info is the arch.SignalInfo cached for this context.
- info arch.SignalInfo
+ // info is the linux.SignalInfo cached for this context.
+ info linux.SignalInfo
// interrupt is the interrupt context.
interrupt interrupt.Forwarder
}
// Switch runs the provided context in the given address space.
-func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, _ int32) (*arch.SignalInfo, hostarch.AccessType, error) {
+func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, _ int32) (*linux.SignalInfo, hostarch.AccessType, error) {
as := mm.AddressSpace()
localAS := as.(*addressSpace)
diff --git a/pkg/sentry/platform/kvm/kvm_amd64_test.go b/pkg/sentry/platform/kvm/kvm_amd64_test.go
index b8dd1e4a5..b1cab89a0 100644
--- a/pkg/sentry/platform/kvm/kvm_amd64_test.go
+++ b/pkg/sentry/platform/kvm/kvm_amd64_test.go
@@ -19,6 +19,7 @@ package kvm
import (
"testing"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -30,7 +31,7 @@ func TestSegments(t *testing.T) {
applicationTest(t, true, testutil.TwiddleSegments, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTestSegments(regs)
for {
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -55,7 +56,7 @@ func stmxcsr(addr *uint32)
func TestMXCSR(t *testing.T) {
applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
- var si arch.SignalInfo
+ var si linux.SignalInfo
switchOpts := ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go
index ceff09a60..fe570aff9 100644
--- a/pkg/sentry/platform/kvm/kvm_test.go
+++ b/pkg/sentry/platform/kvm/kvm_test.go
@@ -22,6 +22,7 @@ import (
"time"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
@@ -157,7 +158,7 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func
func TestApplicationSyscall(t *testing.T) {
applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -171,7 +172,7 @@ func TestApplicationSyscall(t *testing.T) {
return false
})
applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -188,7 +189,7 @@ func TestApplicationSyscall(t *testing.T) {
func TestApplicationFault(t *testing.T) {
applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTouchTarget(regs, nil) // Cause fault.
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -203,7 +204,7 @@ func TestApplicationFault(t *testing.T) {
})
applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTouchTarget(regs, nil) // Cause fault.
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -221,7 +222,7 @@ func TestRegistersSyscall(t *testing.T) {
applicationTest(t, true, testutil.TwiddleRegsSyscall, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTestRegs(regs) // Fill values for all registers.
for {
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -244,7 +245,7 @@ func TestRegistersFault(t *testing.T) {
applicationTest(t, true, testutil.TwiddleRegsFault, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTestRegs(regs) // Fill values for all registers.
for {
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -270,7 +271,7 @@ func TestBounce(t *testing.T) {
time.Sleep(time.Millisecond)
c.BounceToKernel()
}()
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -285,7 +286,7 @@ func TestBounce(t *testing.T) {
time.Sleep(time.Millisecond)
c.BounceToKernel()
}()
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -317,7 +318,7 @@ func TestBounceStress(t *testing.T) {
c.BounceToKernel()
}()
randomSleep()
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -338,7 +339,7 @@ func TestInvalidate(t *testing.T) {
applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTouchTarget(regs, &data) // Read legitimate value.
for {
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -353,7 +354,7 @@ func TestInvalidate(t *testing.T) {
// Unmap the page containing data & invalidate.
pt.Unmap(hostarch.Addr(reflect.ValueOf(&data).Pointer() & ^uintptr(hostarch.PageSize-1)), hostarch.PageSize)
for {
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -371,13 +372,13 @@ func TestInvalidate(t *testing.T) {
}
// IsFault returns true iff the given signal represents a fault.
-func IsFault(err error, si *arch.SignalInfo) bool {
+func IsFault(err error, si *linux.SignalInfo) bool {
return err == platform.ErrContextSignal && si.Signo == int32(unix.SIGSEGV)
}
func TestEmptyAddressSpace(t *testing.T) {
applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -391,7 +392,7 @@ func TestEmptyAddressSpace(t *testing.T) {
return false
})
applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -467,7 +468,7 @@ func BenchmarkApplicationSyscall(b *testing.B) {
a int // Count for ErrContextInterrupt.
)
applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
@@ -504,7 +505,7 @@ func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) {
a int
)
applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
- var si arch.SignalInfo
+ var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
FloatingPointState: &dummyFPState,
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index 9a2337654..7c063c7f5 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -23,11 +23,11 @@ import (
"runtime/debug"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/platform"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
@@ -264,10 +264,10 @@ func (c *vCPU) setSystemTime() error {
// nonCanonical generates a canonical address return.
//
//go:nosplit
-func nonCanonical(addr uint64, signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) {
- *info = arch.SignalInfo{
+func nonCanonical(addr uint64, signal int32, info *linux.SignalInfo) (hostarch.AccessType, error) {
+ *info = linux.SignalInfo{
Signo: signal,
- Code: arch.SignalInfoKernel,
+ Code: linux.SI_KERNEL,
}
info.SetAddr(addr) // Include address.
return hostarch.NoAccess, platform.ErrContextSignal
@@ -276,7 +276,7 @@ func nonCanonical(addr uint64, signal int32, info *arch.SignalInfo) (hostarch.Ac
// fault generates an appropriate fault return.
//
//go:nosplit
-func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) {
+func (c *vCPU) fault(signal int32, info *linux.SignalInfo) (hostarch.AccessType, error) {
bluepill(c) // Probably no-op, but may not be.
faultAddr := ring0.ReadCR2()
code, user := c.ErrorCode()
@@ -287,7 +287,7 @@ func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (hostarch.AccessType,
return hostarch.NoAccess, platform.ErrContextInterrupt
}
// Reset the pointed SignalInfo.
- *info = arch.SignalInfo{Signo: signal}
+ *info = linux.SignalInfo{Signo: signal}
info.SetAddr(uint64(faultAddr))
accessType := hostarch.AccessType{
Read: code&(1<<1) == 0,
@@ -325,7 +325,7 @@ func prefaultFloatingPointState(data *fpu.State) {
}
// SwitchToUser unpacks architectural-details.
-func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (hostarch.AccessType, error) {
+func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *linux.SignalInfo) (hostarch.AccessType, error) {
// Check for canonical addresses.
if regs := switchOpts.Registers; !ring0.IsCanonical(regs.Rip) {
return nonCanonical(regs.Rip, int32(unix.SIGSEGV), info)
@@ -371,7 +371,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
return c.fault(int32(unix.SIGSEGV), info)
case ring0.Debug, ring0.Breakpoint:
- *info = arch.SignalInfo{
+ *info = linux.SignalInfo{
Signo: int32(unix.SIGTRAP),
Code: 1, // TRAP_BRKPT (breakpoint).
}
@@ -383,9 +383,9 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
ring0.BoundRangeExceeded,
ring0.InvalidTSS,
ring0.StackSegmentFault:
- *info = arch.SignalInfo{
+ *info = linux.SignalInfo{
Signo: int32(unix.SIGSEGV),
- Code: arch.SignalInfoKernel,
+ Code: linux.SI_KERNEL,
}
info.SetAddr(switchOpts.Registers.Rip) // Include address.
if vector == ring0.GeneralProtectionFault {
@@ -397,7 +397,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
return hostarch.AccessType{}, platform.ErrContextSignal
case ring0.InvalidOpcode:
- *info = arch.SignalInfo{
+ *info = linux.SignalInfo{
Signo: int32(unix.SIGILL),
Code: 1, // ILL_ILLOPC (illegal opcode).
}
@@ -405,7 +405,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
return hostarch.AccessType{}, platform.ErrContextSignal
case ring0.DivideByZero:
- *info = arch.SignalInfo{
+ *info = linux.SignalInfo{
Signo: int32(unix.SIGFPE),
Code: 1, // FPE_INTDIV (divide by zero).
}
@@ -413,7 +413,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
return hostarch.AccessType{}, platform.ErrContextSignal
case ring0.Overflow:
- *info = arch.SignalInfo{
+ *info = linux.SignalInfo{
Signo: int32(unix.SIGFPE),
Code: 2, // FPE_INTOVF (integer overflow).
}
@@ -422,7 +422,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
case ring0.X87FloatingPointException,
ring0.SIMDFloatingPointException:
- *info = arch.SignalInfo{
+ *info = linux.SignalInfo{
Signo: int32(unix.SIGFPE),
Code: 7, // FPE_FLTINV (invalid operation).
}
@@ -433,7 +433,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
return hostarch.NoAccess, platform.ErrContextInterrupt
case ring0.AlignmentCheck:
- *info = arch.SignalInfo{
+ *info = linux.SignalInfo{
Signo: int32(unix.SIGBUS),
Code: 2, // BUS_ADRERR (physical address does not exist).
}
@@ -469,7 +469,7 @@ func availableRegionsForSetMem() (phyRegions []physicalRegion) {
}
func (m *machine) mapUpperHalf(pageTable *pagetables.PageTables) {
- // Map all the executible regions so that all the entry functions
+ // Map all the executable regions so that all the entry functions
// are mapped in the upper half.
applyVirtualRegions(func(vr virtualRegion) {
if excludeVirtualRegion(vr) || vr.filename == "[vsyscall]" {
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
index 8926b1d9f..edaccf9bc 100644
--- a/pkg/sentry/platform/kvm/machine_arm64.go
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -21,10 +21,10 @@ import (
"sync/atomic"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/platform"
)
@@ -126,10 +126,10 @@ func availableRegionsForSetMem() (phyRegions []physicalRegion) {
// nonCanonical generates a canonical address return.
//
//go:nosplit
-func nonCanonical(addr uint64, signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) {
- *info = arch.SignalInfo{
+func nonCanonical(addr uint64, signal int32, info *linux.SignalInfo) (hostarch.AccessType, error) {
+ *info = linux.SignalInfo{
Signo: signal,
- Code: arch.SignalInfoKernel,
+ Code: linux.SI_KERNEL,
}
info.SetAddr(addr) // Include address.
return hostarch.NoAccess, platform.ErrContextSignal
@@ -157,7 +157,7 @@ func isWriteFault(code uint64) bool {
// fault generates an appropriate fault return.
//
//go:nosplit
-func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) {
+func (c *vCPU) fault(signal int32, info *linux.SignalInfo) (hostarch.AccessType, error) {
bluepill(c) // Probably no-op, but may not be.
faultAddr := c.GetFaultAddr()
code, user := c.ErrorCode()
@@ -170,7 +170,7 @@ func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (hostarch.AccessType,
}
// Reset the pointed SignalInfo.
- *info = arch.SignalInfo{Signo: signal}
+ *info = linux.SignalInfo{Signo: signal}
info.SetAddr(uint64(faultAddr))
ret := code & _ESR_ELx_FSC
diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
index 92edc992b..1b0a6e0a7 100644
--- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
@@ -23,10 +23,10 @@ import (
"unsafe"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/platform"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
@@ -272,7 +272,7 @@ func (c *vCPU) getOneRegister(reg *kvmOneReg) error {
}
// SwitchToUser unpacks architectural-details.
-func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (hostarch.AccessType, error) {
+func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *linux.SignalInfo) (hostarch.AccessType, error) {
// Check for canonical addresses.
if regs := switchOpts.Registers; !ring0.IsCanonical(regs.Pc) {
return nonCanonical(regs.Pc, int32(unix.SIGSEGV), info)
@@ -319,14 +319,14 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
case ring0.El0SyncUndef:
return c.fault(int32(unix.SIGILL), info)
case ring0.El0SyncDbg:
- *info = arch.SignalInfo{
+ *info = linux.SignalInfo{
Signo: int32(unix.SIGTRAP),
Code: 1, // TRAP_BRKPT (breakpoint).
}
info.SetAddr(switchOpts.Registers.Pc) // Include address.
return hostarch.AccessType{}, platform.ErrContextSignal
case ring0.El0SyncSpPc:
- *info = arch.SignalInfo{
+ *info = linux.SignalInfo{
Signo: int32(unix.SIGBUS),
Code: 2, // BUS_ADRERR (physical address does not exist).
}
diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go
index ef7814a6f..a26bc2316 100644
--- a/pkg/sentry/platform/platform.go
+++ b/pkg/sentry/platform/platform.go
@@ -195,8 +195,8 @@ type Context interface {
// - nil: The Context invoked a system call.
//
// - ErrContextSignal: The Context was interrupted by a signal. The
- // returned *arch.SignalInfo contains information about the signal. If
- // arch.SignalInfo.Signo == SIGSEGV, the returned hostarch.AccessType
+ // returned *linux.SignalInfo contains information about the signal. If
+ // linux.SignalInfo.Signo == SIGSEGV, the returned hostarch.AccessType
// contains the access type of the triggering fault. The caller owns
// the returned SignalInfo.
//
@@ -207,7 +207,7 @@ type Context interface {
// concurrent call to Switch().
//
// - ErrContextCPUPreempted: See the definition of that error for details.
- Switch(ctx context.Context, mm MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, hostarch.AccessType, error)
+ Switch(ctx context.Context, mm MemoryManager, ac arch.Context, cpu int32) (*linux.SignalInfo, hostarch.AccessType, error)
// PullFullState() pulls a full state of the application thread.
//
diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go
index 828458ce2..319b0cf1d 100644
--- a/pkg/sentry/platform/ptrace/ptrace.go
+++ b/pkg/sentry/platform/ptrace/ptrace.go
@@ -73,7 +73,7 @@ var (
type context struct {
// signalInfo is the signal info, if and when a signal is received.
- signalInfo arch.SignalInfo
+ signalInfo linux.SignalInfo
// interrupt is the interrupt context.
interrupt interrupt.Forwarder
@@ -96,7 +96,7 @@ type context struct {
}
// Switch runs the provided context in the given address space.
-func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, hostarch.AccessType, error) {
+func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, cpu int32) (*linux.SignalInfo, hostarch.AccessType, error) {
as := mm.AddressSpace()
s := as.(*subprocess)
isSyscall := s.switchToApp(c, ac)
diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
index facb96011..cc93396a9 100644
--- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go
+++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
@@ -101,7 +101,7 @@ func (t *thread) setFPRegs(fpState *fpu.State, fpLen uint64, useXsave bool) erro
}
// getSignalInfo retrieves information about the signal that caused the stop.
-func (t *thread) getSignalInfo(si *arch.SignalInfo) error {
+func (t *thread) getSignalInfo(si *linux.SignalInfo) error {
_, _, errno := unix.RawSyscall6(
unix.SYS_PTRACE,
unix.PTRACE_GETSIGINFO,
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index 9c73a725a..0931795c5 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -20,6 +20,7 @@ import (
"runtime"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/procid"
@@ -524,7 +525,7 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool {
// Check for interrupts, and ensure that future interrupts will signal t.
if !c.interrupt.Enable(t) {
// Pending interrupt; simulate.
- c.signalInfo = arch.SignalInfo{Signo: int32(platform.SignalInterrupt)}
+ c.signalInfo = linux.SignalInfo{Signo: int32(platform.SignalInterrupt)}
return false
}
defer c.interrupt.Disable()
diff --git a/pkg/sentry/platform/ptrace/subprocess_amd64.go b/pkg/sentry/platform/ptrace/subprocess_amd64.go
index 9252c0bd7..90b1ead56 100644
--- a/pkg/sentry/platform/ptrace/subprocess_amd64.go
+++ b/pkg/sentry/platform/ptrace/subprocess_amd64.go
@@ -155,7 +155,7 @@ func initChildProcessPPID(initregs *arch.Registers, ppid int32) {
//
// Note that this should only be called after verifying that the signalInfo has
// been generated by the kernel.
-func patchSignalInfo(regs *arch.Registers, signalInfo *arch.SignalInfo) {
+func patchSignalInfo(regs *arch.Registers, signalInfo *linux.SignalInfo) {
if linux.Signal(signalInfo.Signo) == linux.SIGSYS {
signalInfo.Signo = int32(linux.SIGSEGV)
diff --git a/pkg/sentry/platform/ptrace/subprocess_arm64.go b/pkg/sentry/platform/ptrace/subprocess_arm64.go
index c0cbc0686..e4257e3bf 100644
--- a/pkg/sentry/platform/ptrace/subprocess_arm64.go
+++ b/pkg/sentry/platform/ptrace/subprocess_arm64.go
@@ -138,7 +138,7 @@ func initChildProcessPPID(initregs *arch.Registers, ppid int32) {
//
// Note that this should only be called after verifying that the signalInfo has
// been generated by the kernel.
-func patchSignalInfo(regs *arch.Registers, signalInfo *arch.SignalInfo) {
+func patchSignalInfo(regs *arch.Registers, signalInfo *linux.SignalInfo) {
if linux.Signal(signalInfo.Signo) == linux.SIGSYS {
signalInfo.Signo = int32(linux.SIGSEGV)
diff --git a/pkg/sentry/sighandling/sighandling_unsafe.go b/pkg/sentry/sighandling/sighandling_unsafe.go
index d6a2fbe34..3fe5c6770 100644
--- a/pkg/sentry/sighandling/sighandling_unsafe.go
+++ b/pkg/sentry/sighandling/sighandling_unsafe.go
@@ -21,25 +21,16 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
)
-// FIXME(gvisor.dev/issue/214): Move to pkg/abi/linux along with definitions in
-// pkg/sentry/arch.
-type sigaction struct {
- handler uintptr
- flags uint64
- restorer uintptr
- mask uint64
-}
-
// IgnoreChildStop sets the SA_NOCLDSTOP flag, causing child processes to not
// generate SIGCHLD when they stop.
func IgnoreChildStop() error {
- var sa sigaction
+ var sa linux.SigAction
// Get the existing signal handler information, and set the flag.
if _, _, e := unix.RawSyscall6(unix.SYS_RT_SIGACTION, uintptr(unix.SIGCHLD), 0, uintptr(unsafe.Pointer(&sa)), linux.SignalSetSize, 0, 0); e != 0 {
return e
}
- sa.flags |= linux.SA_NOCLDSTOP
+ sa.Flags |= linux.SA_NOCLDSTOP
if _, _, e := unix.RawSyscall6(unix.SYS_RT_SIGACTION, uintptr(unix.SIGCHLD), uintptr(unsafe.Pointer(&sa)), 0, linux.SignalSetSize, 0, 0); e != 0 {
return e
}
diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD
index 2e3064565..3c6511ead 100644
--- a/pkg/sentry/socket/hostinet/BUILD
+++ b/pkg/sentry/socket/hostinet/BUILD
@@ -39,8 +39,6 @@ go_library(
"//pkg/syserr",
"//pkg/syserror",
"//pkg/tcpip",
- "//pkg/tcpip/network/ipv4",
- "//pkg/tcpip/network/ipv6",
"//pkg/tcpip/stack",
"//pkg/usermem",
"//pkg/waiter",
diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go
index 393a1ab3a..cbb1e905d 100644
--- a/pkg/sentry/socket/hostinet/stack.go
+++ b/pkg/sentry/socket/hostinet/stack.go
@@ -35,8 +35,6 @@ import (
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
- "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -66,8 +64,6 @@ type Stack struct {
tcpSACKEnabled bool
netDevFile *os.File
netSNMPFile *os.File
- ipv4Forwarding bool
- ipv6Forwarding bool
}
// NewStack returns an empty Stack containing no configuration.
@@ -127,13 +123,6 @@ func (s *Stack) Configure() error {
s.netSNMPFile = f
}
- s.ipv6Forwarding = false
- if ipForwarding, err := ioutil.ReadFile("/proc/sys/net/ipv6/conf/all/forwarding"); err == nil {
- s.ipv6Forwarding = strings.TrimSpace(string(ipForwarding)) != "0"
- } else {
- log.Warningf("Failed to read if ipv6 forwarding is enabled, setting to false")
- }
-
return nil
}
@@ -492,19 +481,6 @@ func (s *Stack) CleanupEndpoints() []stack.TransportEndpoint { return nil }
// RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints.
func (s *Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {}
-// Forwarding implements inet.Stack.Forwarding.
-func (s *Stack) Forwarding(protocol tcpip.NetworkProtocolNumber) bool {
- switch protocol {
- case ipv4.ProtocolNumber:
- return s.ipv4Forwarding
- case ipv6.ProtocolNumber:
- return s.ipv6Forwarding
- default:
- log.Warningf("Forwarding(%v) failed: unsupported protocol", protocol)
- return false
- }
-}
-
// SetForwarding implements inet.Stack.SetForwarding.
func (s *Stack) SetForwarding(tcpip.NetworkProtocolNumber, bool) error {
return syserror.EACCES
diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD
index 61b2c9755..608474fa1 100644
--- a/pkg/sentry/socket/netfilter/BUILD
+++ b/pkg/sentry/socket/netfilter/BUILD
@@ -25,6 +25,7 @@ go_library(
"//pkg/log",
"//pkg/marshal",
"//pkg/sentry/kernel",
+ "//pkg/sentry/kernel/auth",
"//pkg/syserr",
"//pkg/tcpip",
"//pkg/tcpip/header",
diff --git a/pkg/sentry/socket/netfilter/extensions.go b/pkg/sentry/socket/netfilter/extensions.go
index 6fc7781ad..3f1b4a17b 100644
--- a/pkg/sentry/socket/netfilter/extensions.go
+++ b/pkg/sentry/socket/netfilter/extensions.go
@@ -19,20 +19,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
-// TODO(gvisor.dev/issue/170): The following per-matcher params should be
-// supported:
-// - Table name
-// - Match size
-// - User size
-// - Hooks
-// - Proto
-// - Family
-
// matchMaker knows how to (un)marshal the matcher named name().
type matchMaker interface {
// name is the matcher name as stored in the xt_entry_match struct.
@@ -43,7 +35,7 @@ type matchMaker interface {
// unmarshal converts from the ABI matcher struct to an
// stack.Matcher.
- unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Matcher, error)
+ unmarshal(task *kernel.Task, buf []byte, filter stack.IPHeaderFilter) (stack.Matcher, error)
}
type matcher interface {
@@ -94,12 +86,12 @@ func marshalEntryMatch(name string, data []byte) []byte {
return buf
}
-func unmarshalMatcher(match linux.XTEntryMatch, filter stack.IPHeaderFilter, buf []byte) (stack.Matcher, error) {
+func unmarshalMatcher(task *kernel.Task, match linux.XTEntryMatch, filter stack.IPHeaderFilter, buf []byte) (stack.Matcher, error) {
matchMaker, ok := matchMakers[match.Name.String()]
if !ok {
return nil, fmt.Errorf("unsupported matcher with name %q", match.Name.String())
}
- return matchMaker.unmarshal(buf, filter)
+ return matchMaker.unmarshal(task, buf, filter)
}
// targetMaker knows how to (un)marshal a target. Once registered,
diff --git a/pkg/sentry/socket/netfilter/ipv4.go b/pkg/sentry/socket/netfilter/ipv4.go
index cb78ef60b..d8bd86292 100644
--- a/pkg/sentry/socket/netfilter/ipv4.go
+++ b/pkg/sentry/socket/netfilter/ipv4.go
@@ -18,6 +18,7 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -123,7 +124,7 @@ func getEntries4(table stack.Table, tablename linux.TableName) (linux.KernelIPTG
return entries, info
}
-func modifyEntries4(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) {
+func modifyEntries4(task *kernel.Task, stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) {
nflog("set entries: setting entries in table %q", replace.Name.String())
// Convert input into a list of rules and their offsets.
@@ -148,23 +149,19 @@ func modifyEntries4(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace,
return nil, syserr.ErrInvalidArgument
}
- // TODO(gvisor.dev/issue/170): We should support more IPTIP
- // filtering fields.
filter, err := filterFromIPTIP(entry.IP)
if err != nil {
nflog("bad iptip: %v", err)
return nil, syserr.ErrInvalidArgument
}
- // TODO(gvisor.dev/issue/170): Matchers and targets can specify
- // that they only work for certain protocols, hooks, tables.
// Get matchers.
matchersSize := entry.TargetOffset - linux.SizeOfIPTEntry
if len(optVal) < int(matchersSize) {
nflog("entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal))
return nil, syserr.ErrInvalidArgument
}
- matchers, err := parseMatchers(filter, optVal[:matchersSize])
+ matchers, err := parseMatchers(task, filter, optVal[:matchersSize])
if err != nil {
nflog("failed to parse matchers: %v", err)
return nil, syserr.ErrInvalidArgument
diff --git a/pkg/sentry/socket/netfilter/ipv6.go b/pkg/sentry/socket/netfilter/ipv6.go
index 5cb7fe4aa..c68230847 100644
--- a/pkg/sentry/socket/netfilter/ipv6.go
+++ b/pkg/sentry/socket/netfilter/ipv6.go
@@ -18,6 +18,7 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -126,7 +127,7 @@ func getEntries6(table stack.Table, tablename linux.TableName) (linux.KernelIP6T
return entries, info
}
-func modifyEntries6(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) {
+func modifyEntries6(task *kernel.Task, stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) {
nflog("set entries: setting entries in table %q", replace.Name.String())
// Convert input into a list of rules and their offsets.
@@ -151,23 +152,19 @@ func modifyEntries6(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace,
return nil, syserr.ErrInvalidArgument
}
- // TODO(gvisor.dev/issue/170): We should support more IPTIP
- // filtering fields.
filter, err := filterFromIP6TIP(entry.IPv6)
if err != nil {
nflog("bad iptip: %v", err)
return nil, syserr.ErrInvalidArgument
}
- // TODO(gvisor.dev/issue/170): Matchers and targets can specify
- // that they only work for certain protocols, hooks, tables.
// Get matchers.
matchersSize := entry.TargetOffset - linux.SizeOfIP6TEntry
if len(optVal) < int(matchersSize) {
nflog("entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal))
return nil, syserr.ErrInvalidArgument
}
- matchers, err := parseMatchers(filter, optVal[:matchersSize])
+ matchers, err := parseMatchers(task, filter, optVal[:matchersSize])
if err != nil {
nflog("failed to parse matchers: %v", err)
return nil, syserr.ErrInvalidArgument
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index f42d73178..e3eade180 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -58,8 +58,8 @@ var nameToID = map[string]stack.TableID{
// DefaultLinuxTables returns the rules of stack.DefaultTables() wrapped for
// compatibility with netfilter extensions.
-func DefaultLinuxTables() *stack.IPTables {
- tables := stack.DefaultTables()
+func DefaultLinuxTables(seed uint32) *stack.IPTables {
+ tables := stack.DefaultTables(seed)
tables.VisitTargets(func(oldTarget stack.Target) stack.Target {
switch val := oldTarget.(type) {
case *stack.AcceptTarget:
@@ -174,13 +174,12 @@ func setHooksAndUnderflow(info *linux.IPTGetinfo, table stack.Table, offset uint
// SetEntries sets iptables rules for a single table. See
// net/ipv4/netfilter/ip_tables.c:translate_table for reference.
-func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error {
+func SetEntries(task *kernel.Task, stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error {
var replace linux.IPTReplace
replaceBuf := optVal[:linux.SizeOfIPTReplace]
optVal = optVal[linux.SizeOfIPTReplace:]
replace.UnmarshalBytes(replaceBuf)
- // TODO(gvisor.dev/issue/170): Support other tables.
var table stack.Table
switch replace.Name.String() {
case filterTable:
@@ -188,16 +187,16 @@ func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error {
case natTable:
table = stack.EmptyNATTable()
default:
- nflog("we don't yet support writing to the %q table (gvisor.dev/issue/170)", replace.Name.String())
+ nflog("unknown iptables table %q", replace.Name.String())
return syserr.ErrInvalidArgument
}
var err *syserr.Error
var offsets map[uint32]int
if ipv6 {
- offsets, err = modifyEntries6(stk, optVal, &replace, &table)
+ offsets, err = modifyEntries6(task, stk, optVal, &replace, &table)
} else {
- offsets, err = modifyEntries4(stk, optVal, &replace, &table)
+ offsets, err = modifyEntries4(task, stk, optVal, &replace, &table)
}
if err != nil {
return err
@@ -272,7 +271,6 @@ func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error {
table.Rules[ruleIdx] = rule
}
- // TODO(gvisor.dev/issue/170): Support other chains.
// Since we don't support FORWARD, yet, make sure all other chains point to
// ACCEPT rules.
for hook, ruleIdx := range table.BuiltinChains {
@@ -287,7 +285,7 @@ func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error {
}
}
- // TODO(gvisor.dev/issue/170): Check the following conditions:
+ // TODO(gvisor.dev/issue/6167): Check the following conditions:
// - There are no loops.
// - There are no chains without an unconditional final rule.
// - There are no chains without an unconditional underflow rule.
@@ -297,7 +295,7 @@ func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error {
// parseMatchers parses 0 or more matchers from optVal. optVal should contain
// only the matchers.
-func parseMatchers(filter stack.IPHeaderFilter, optVal []byte) ([]stack.Matcher, error) {
+func parseMatchers(task *kernel.Task, filter stack.IPHeaderFilter, optVal []byte) ([]stack.Matcher, error) {
nflog("set entries: parsing matchers of size %d", len(optVal))
var matchers []stack.Matcher
for len(optVal) > 0 {
@@ -321,13 +319,13 @@ func parseMatchers(filter stack.IPHeaderFilter, optVal []byte) ([]stack.Matcher,
}
// Parse the specific matcher.
- matcher, err := unmarshalMatcher(match, filter, optVal[linux.SizeOfXTEntryMatch:match.MatchSize])
+ matcher, err := unmarshalMatcher(task, match, filter, optVal[linux.SizeOfXTEntryMatch:match.MatchSize])
if err != nil {
return nil, fmt.Errorf("failed to create matcher: %v", err)
}
matchers = append(matchers, matcher)
- // TODO(gvisor.dev/issue/170): Check the revision field.
+ // TODO(gvisor.dev/issue/6167): Check the revision field.
optVal = optVal[match.MatchSize:]
}
diff --git a/pkg/sentry/socket/netfilter/owner_matcher.go b/pkg/sentry/socket/netfilter/owner_matcher.go
index 60845cab3..6eff2ae65 100644
--- a/pkg/sentry/socket/netfilter/owner_matcher.go
+++ b/pkg/sentry/socket/netfilter/owner_matcher.go
@@ -19,6 +19,8 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/marshal"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -40,8 +42,8 @@ func (ownerMarshaler) name() string {
func (ownerMarshaler) marshal(mr matcher) []byte {
matcher := mr.(*OwnerMatcher)
iptOwnerInfo := linux.IPTOwnerInfo{
- UID: matcher.uid,
- GID: matcher.gid,
+ UID: uint32(matcher.uid),
+ GID: uint32(matcher.gid),
}
// Support for UID and GID match.
@@ -63,7 +65,7 @@ func (ownerMarshaler) marshal(mr matcher) []byte {
}
// unmarshal implements matchMaker.unmarshal.
-func (ownerMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Matcher, error) {
+func (ownerMarshaler) unmarshal(task *kernel.Task, buf []byte, filter stack.IPHeaderFilter) (stack.Matcher, error) {
if len(buf) < linux.SizeOfIPTOwnerInfo {
return nil, fmt.Errorf("buf has insufficient size for owner match: %d", len(buf))
}
@@ -72,11 +74,12 @@ func (ownerMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.
// exceed what's strictly necessary to hold matchData.
var matchData linux.IPTOwnerInfo
matchData.UnmarshalUnsafe(buf[:linux.SizeOfIPTOwnerInfo])
- nflog("parseMatchers: parsed IPTOwnerInfo: %+v", matchData)
+ nflog("parsed IPTOwnerInfo: %+v", matchData)
var owner OwnerMatcher
- owner.uid = matchData.UID
- owner.gid = matchData.GID
+ creds := task.Credentials()
+ owner.uid = creds.UserNamespace.MapToKUID(auth.UID(matchData.UID))
+ owner.gid = creds.UserNamespace.MapToKGID(auth.GID(matchData.GID))
// Check flags.
if matchData.Match&linux.XT_OWNER_UID != 0 {
@@ -97,8 +100,8 @@ func (ownerMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.
// OwnerMatcher matches against a UID and/or GID.
type OwnerMatcher struct {
- uid uint32
- gid uint32
+ uid auth.KUID
+ gid auth.KGID
matchUID bool
matchGID bool
invertUID bool
@@ -113,7 +116,6 @@ func (*OwnerMatcher) name() string {
// Match implements Matcher.Match.
func (om *OwnerMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, _, _ string) (bool, bool) {
// Support only for OUTPUT chain.
- // TODO(gvisor.dev/issue/170): Need to support for POSTROUTING chain also.
if hook != stack.Output {
return false, true
}
@@ -126,7 +128,7 @@ func (om *OwnerMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, _, _ str
var matches bool
// Check for UID match.
if om.matchUID {
- if pkt.Owner.UID() == om.uid {
+ if auth.KUID(pkt.Owner.KUID()) == om.uid {
matches = true
}
if matches == om.invertUID {
@@ -137,7 +139,7 @@ func (om *OwnerMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, _, _ str
// Check for GID match.
if om.matchGID {
matches = false
- if pkt.Owner.GID() == om.gid {
+ if auth.KGID(pkt.Owner.KGID()) == om.gid {
matches = true
}
if matches == om.invertGID {
diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go
index fa5456eee..7d83e708f 100644
--- a/pkg/sentry/socket/netfilter/targets.go
+++ b/pkg/sentry/socket/netfilter/targets.go
@@ -331,7 +331,6 @@ func (*redirectTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (
return nil, syserr.ErrInvalidArgument
}
- // TODO(gvisor.dev/issue/170): Check if the flags are valid.
// Also check if we need to map ports or IP.
// For now, redirect target only supports destination port change.
// Port range and IP range are not supported yet.
@@ -340,7 +339,6 @@ func (*redirectTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (
return nil, syserr.ErrInvalidArgument
}
- // TODO(gvisor.dev/issue/170): Port range is not supported yet.
if nfRange.RangeIPV4.MinPort != nfRange.RangeIPV4.MaxPort {
nflog("redirectTargetMaker: MinPort != MaxPort (%d, %d)", nfRange.RangeIPV4.MinPort, nfRange.RangeIPV4.MaxPort)
return nil, syserr.ErrInvalidArgument
@@ -502,7 +500,6 @@ func (*snatTargetMakerV4) unmarshal(buf []byte, filter stack.IPHeaderFilter) (ta
return nil, syserr.ErrInvalidArgument
}
- // TODO(gvisor.dev/issue/170): Port range is not supported yet.
if nfRange.RangeIPV4.MinPort != nfRange.RangeIPV4.MaxPort {
nflog("snatTargetMakerV4: MinPort != MaxPort (%d, %d)", nfRange.RangeIPV4.MinPort, nfRange.RangeIPV4.MaxPort)
return nil, syserr.ErrInvalidArgument
@@ -594,7 +591,6 @@ func (*snatTargetMakerV6) unmarshal(buf []byte, filter stack.IPHeaderFilter) (ta
// translateToStandardTarget translates from the value in a
// linux.XTStandardTarget to an stack.Verdict.
func translateToStandardTarget(val int32, netProto tcpip.NetworkProtocolNumber) (target, *syserr.Error) {
- // TODO(gvisor.dev/issue/170): Support other verdicts.
switch val {
case -linux.NF_ACCEPT - 1:
return &acceptTarget{stack.AcceptTarget{
diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go
index 95bb9826e..e5b73a976 100644
--- a/pkg/sentry/socket/netfilter/tcp_matcher.go
+++ b/pkg/sentry/socket/netfilter/tcp_matcher.go
@@ -19,6 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/marshal"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -50,7 +51,7 @@ func (tcpMarshaler) marshal(mr matcher) []byte {
}
// unmarshal implements matchMaker.unmarshal.
-func (tcpMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Matcher, error) {
+func (tcpMarshaler) unmarshal(_ *kernel.Task, buf []byte, filter stack.IPHeaderFilter) (stack.Matcher, error) {
if len(buf) < linux.SizeOfXTTCP {
return nil, fmt.Errorf("buf has insufficient size for TCP match: %d", len(buf))
}
@@ -95,8 +96,6 @@ func (*TCPMatcher) name() string {
// Match implements Matcher.Match.
func (tm *TCPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, _, _ string) (bool, bool) {
- // TODO(gvisor.dev/issue/170): Proto checks should ultimately be moved
- // into the stack.Check codepath as matchers are added.
switch pkt.NetworkProtocolNumber {
case header.IPv4ProtocolNumber:
netHeader := header.IPv4(pkt.NetworkHeader().View())
diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go
index fb8be27e6..aa72ee70c 100644
--- a/pkg/sentry/socket/netfilter/udp_matcher.go
+++ b/pkg/sentry/socket/netfilter/udp_matcher.go
@@ -19,6 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/marshal"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -50,7 +51,7 @@ func (udpMarshaler) marshal(mr matcher) []byte {
}
// unmarshal implements matchMaker.unmarshal.
-func (udpMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Matcher, error) {
+func (udpMarshaler) unmarshal(_ *kernel.Task, buf []byte, filter stack.IPHeaderFilter) (stack.Matcher, error) {
if len(buf) < linux.SizeOfXTUDP {
return nil, fmt.Errorf("buf has insufficient size for UDP match: %d", len(buf))
}
@@ -92,8 +93,6 @@ func (*UDPMatcher) name() string {
// Match implements Matcher.Match.
func (um *UDPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, _, _ string) (bool, bool) {
- // TODO(gvisor.dev/issue/170): Proto checks should ultimately be moved
- // into the stack.Check codepath as matchers are added.
switch pkt.NetworkProtocolNumber {
case header.IPv4ProtocolNumber:
netHeader := header.IPv4(pkt.NetworkHeader().View())
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 0b64a24c3..11ba80497 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -77,41 +77,59 @@ func mustCreateGauge(name, description string) *tcpip.StatCounter {
// Metrics contains metrics exported by netstack.
var Metrics = tcpip.Stats{
- UnknownProtocolRcvdPackets: mustCreateMetric("/netstack/unknown_protocol_received_packets", "Number of packets received by netstack that were for an unknown or unsupported protocol."),
- MalformedRcvdPackets: mustCreateMetric("/netstack/malformed_received_packets", "Number of packets received by netstack that were deemed malformed."),
- DroppedPackets: mustCreateMetric("/netstack/dropped_packets", "Number of packets dropped by netstack due to full queues."),
+ DroppedPackets: mustCreateMetric("/netstack/dropped_packets", "Number of packets dropped at the transport layer."),
+ NICs: tcpip.NICStats{
+ UnknownL3ProtocolRcvdPackets: mustCreateMetric("/netstack/nic/unknown_l3_protocol_received_packets", "Number of packets received that were for an unknown or unsupported L3 protocol."),
+ UnknownL4ProtocolRcvdPackets: mustCreateMetric("/netstack/nic/unknown_l4_protocol_received_packets", "Number of packets received that were for an unknown or unsupported L4 protocol."),
+ MalformedL4RcvdPackets: mustCreateMetric("/netstack/nic/malformed_l4_received_packets", "Number of packets received that failed L4 header parsing."),
+ Tx: tcpip.NICPacketStats{
+ Packets: mustCreateMetric("/netstack/nic/tx/packets", "Number of packets transmitted."),
+ Bytes: mustCreateMetric("/netstack/nic/tx/bytes", "Number of bytes transmitted."),
+ },
+ Rx: tcpip.NICPacketStats{
+ Packets: mustCreateMetric("/netstack/nic/rx/packets", "Number of packets received."),
+ Bytes: mustCreateMetric("/netstack/nic/rx/bytes", "Number of bytes received."),
+ },
+ DisabledRx: tcpip.NICPacketStats{
+ Packets: mustCreateMetric("/netstack/nic/disabled_rx/packets", "Number of packets received on disabled NICs."),
+ Bytes: mustCreateMetric("/netstack/nic/disabled_rx/bytes", "Number of bytes received on disabled NICs."),
+ },
+ Neighbor: tcpip.NICNeighborStats{
+ UnreachableEntryLookups: mustCreateMetric("/netstack/nic/neighbor/unreachable_entry_loopups", "Number of lookups performed on a neighbor entry in Unreachable state."),
+ },
+ },
ICMP: tcpip.ICMPStats{
V4: tcpip.ICMPv4Stats{
PacketsSent: tcpip.ICMPv4SentPacketStats{
ICMPv4PacketStats: tcpip.ICMPv4PacketStats{
- EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_request", "Number of ICMPv4 echo request packets sent by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_reply", "Number of ICMPv4 echo reply packets sent by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_sent/dst_unreachable", "Number of ICMPv4 destination unreachable packets sent by netstack."),
- SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_sent/src_quench", "Number of ICMPv4 source quench packets sent by netstack."),
- Redirect: mustCreateMetric("/netstack/icmp/v4/packets_sent/redirect", "Number of ICMPv4 redirect packets sent by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_sent/time_exceeded", "Number of ICMPv4 time exceeded packets sent by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_sent/param_problem", "Number of ICMPv4 parameter problem packets sent by netstack."),
- Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp", "Number of ICMPv4 timestamp packets sent by netstack."),
- TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp_reply", "Number of ICMPv4 timestamp reply packets sent by netstack."),
- InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_request", "Number of ICMPv4 information request packets sent by netstack."),
- InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_reply", "Number of ICMPv4 information reply packets sent by netstack."),
+ EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_request", "Number of ICMPv4 echo request packets sent."),
+ EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_reply", "Number of ICMPv4 echo reply packets sent."),
+ DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_sent/dst_unreachable", "Number of ICMPv4 destination unreachable packets sent."),
+ SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_sent/src_quench", "Number of ICMPv4 source quench packets sent."),
+ Redirect: mustCreateMetric("/netstack/icmp/v4/packets_sent/redirect", "Number of ICMPv4 redirect packets sent."),
+ TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_sent/time_exceeded", "Number of ICMPv4 time exceeded packets sent."),
+ ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_sent/param_problem", "Number of ICMPv4 parameter problem packets sent."),
+ Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp", "Number of ICMPv4 timestamp packets sent."),
+ TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp_reply", "Number of ICMPv4 timestamp reply packets sent."),
+ InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_request", "Number of ICMPv4 information request packets sent."),
+ InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_reply", "Number of ICMPv4 information reply packets sent."),
},
- Dropped: mustCreateMetric("/netstack/icmp/v4/packets_sent/dropped", "Number of ICMPv4 packets dropped by netstack due to link layer errors."),
- RateLimited: mustCreateMetric("/netstack/icmp/v4/packets_sent/rate_limited", "Number of ICMPv4 packets dropped by netstack due to rate limit being exceeded."),
+ Dropped: mustCreateMetric("/netstack/icmp/v4/packets_sent/dropped", "Number of ICMPv4 packets dropped due to link layer errors."),
+ RateLimited: mustCreateMetric("/netstack/icmp/v4/packets_sent/rate_limited", "Number of ICMPv4 packets dropped due to rate limit being exceeded."),
},
PacketsReceived: tcpip.ICMPv4ReceivedPacketStats{
ICMPv4PacketStats: tcpip.ICMPv4PacketStats{
- EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_request", "Number of ICMPv4 echo request packets received by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_reply", "Number of ICMPv4 echo reply packets received by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_received/dst_unreachable", "Number of ICMPv4 destination unreachable packets received by netstack."),
- SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_received/src_quench", "Number of ICMPv4 source quench packets received by netstack."),
- Redirect: mustCreateMetric("/netstack/icmp/v4/packets_received/redirect", "Number of ICMPv4 redirect packets received by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_received/time_exceeded", "Number of ICMPv4 time exceeded packets received by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_received/param_problem", "Number of ICMPv4 parameter problem packets received by netstack."),
- Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp", "Number of ICMPv4 timestamp packets received by netstack."),
- TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp_reply", "Number of ICMPv4 timestamp reply packets received by netstack."),
- InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/info_request", "Number of ICMPv4 information request packets received by netstack."),
- InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/info_reply", "Number of ICMPv4 information reply packets received by netstack."),
+ EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_request", "Number of ICMPv4 echo request packets received."),
+ EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_reply", "Number of ICMPv4 echo reply packets received."),
+ DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_received/dst_unreachable", "Number of ICMPv4 destination unreachable packets received."),
+ SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_received/src_quench", "Number of ICMPv4 source quench packets received."),
+ Redirect: mustCreateMetric("/netstack/icmp/v4/packets_received/redirect", "Number of ICMPv4 redirect packets received."),
+ TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_received/time_exceeded", "Number of ICMPv4 time exceeded packets received."),
+ ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_received/param_problem", "Number of ICMPv4 parameter problem packets received."),
+ Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp", "Number of ICMPv4 timestamp packets received."),
+ TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp_reply", "Number of ICMPv4 timestamp reply packets received."),
+ InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/info_request", "Number of ICMPv4 information request packets received."),
+ InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/info_reply", "Number of ICMPv4 information reply packets received."),
},
Invalid: mustCreateMetric("/netstack/icmp/v4/packets_received/invalid", "Number of ICMPv4 packets received that the transport layer could not parse."),
},
@@ -119,40 +137,40 @@ var Metrics = tcpip.Stats{
V6: tcpip.ICMPv6Stats{
PacketsSent: tcpip.ICMPv6SentPacketStats{
ICMPv6PacketStats: tcpip.ICMPv6PacketStats{
- EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_request", "Number of ICMPv6 echo request packets sent by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_reply", "Number of ICMPv6 echo reply packets sent by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_sent/dst_unreachable", "Number of ICMPv6 destination unreachable packets sent by netstack."),
- PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_sent/packet_too_big", "Number of ICMPv6 packet too big packets sent by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_sent/time_exceeded", "Number of ICMPv6 time exceeded packets sent by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_sent/param_problem", "Number of ICMPv6 parameter problem packets sent by netstack."),
- RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_solicit", "Number of ICMPv6 router solicit packets sent by netstack."),
- RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_advert", "Number of ICMPv6 router advert packets sent by netstack."),
- NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets sent by netstack."),
- NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_advert", "Number of ICMPv6 neighbor advert packets sent by netstack."),
- RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_sent/redirect_msg", "Number of ICMPv6 redirect message packets sent by netstack."),
- MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_query", "Number of ICMPv6 multicast listener query packets sent by netstack."),
- MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent by netstack."),
- MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent by netstack."),
+ EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_request", "Number of ICMPv6 echo request packets sent."),
+ EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_reply", "Number of ICMPv6 echo reply packets sent."),
+ DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_sent/dst_unreachable", "Number of ICMPv6 destination unreachable packets sent."),
+ PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_sent/packet_too_big", "Number of ICMPv6 packet too big packets sent."),
+ TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_sent/time_exceeded", "Number of ICMPv6 time exceeded packets sent."),
+ ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_sent/param_problem", "Number of ICMPv6 parameter problem packets sent."),
+ RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_solicit", "Number of ICMPv6 router solicit packets sent."),
+ RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_advert", "Number of ICMPv6 router advert packets sent."),
+ NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets sent."),
+ NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_advert", "Number of ICMPv6 neighbor advert packets sent."),
+ RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_sent/redirect_msg", "Number of ICMPv6 redirect message packets sent."),
+ MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_query", "Number of ICMPv6 multicast listener query packets sent."),
+ MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent."),
+ MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent."),
},
- Dropped: mustCreateMetric("/netstack/icmp/v6/packets_sent/dropped", "Number of ICMPv6 packets dropped by netstack due to link layer errors."),
- RateLimited: mustCreateMetric("/netstack/icmp/v6/packets_sent/rate_limited", "Number of ICMPv6 packets dropped by netstack due to rate limit being exceeded."),
+ Dropped: mustCreateMetric("/netstack/icmp/v6/packets_sent/dropped", "Number of ICMPv6 packets dropped due to link layer errors."),
+ RateLimited: mustCreateMetric("/netstack/icmp/v6/packets_sent/rate_limited", "Number of ICMPv6 packets dropped due to rate limit being exceeded."),
},
PacketsReceived: tcpip.ICMPv6ReceivedPacketStats{
ICMPv6PacketStats: tcpip.ICMPv6PacketStats{
- EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_request", "Number of ICMPv6 echo request packets received by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_reply", "Number of ICMPv6 echo reply packets received by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_received/dst_unreachable", "Number of ICMPv6 destination unreachable packets received by netstack."),
- PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_received/packet_too_big", "Number of ICMPv6 packet too big packets received by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_received/time_exceeded", "Number of ICMPv6 time exceeded packets received by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_received/param_problem", "Number of ICMPv6 parameter problem packets received by netstack."),
- RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/router_solicit", "Number of ICMPv6 router solicit packets received by netstack."),
- RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/router_advert", "Number of ICMPv6 router advert packets received by netstack."),
- NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets received by netstack."),
- NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_advert", "Number of ICMPv6 neighbor advert packets received by netstack."),
- RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_received/redirect_msg", "Number of ICMPv6 redirect message packets received by netstack."),
- MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_query", "Number of ICMPv6 multicast listener query packets received by netstack."),
- MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent by netstack."),
- MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent by netstack."),
+ EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_request", "Number of ICMPv6 echo request packets received."),
+ EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_reply", "Number of ICMPv6 echo reply packets received."),
+ DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_received/dst_unreachable", "Number of ICMPv6 destination unreachable packets received."),
+ PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_received/packet_too_big", "Number of ICMPv6 packet too big packets received."),
+ TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_received/time_exceeded", "Number of ICMPv6 time exceeded packets received."),
+ ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_received/param_problem", "Number of ICMPv6 parameter problem packets received."),
+ RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/router_solicit", "Number of ICMPv6 router solicit packets received."),
+ RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/router_advert", "Number of ICMPv6 router advert packets received."),
+ NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets received."),
+ NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_advert", "Number of ICMPv6 neighbor advert packets received."),
+ RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_received/redirect_msg", "Number of ICMPv6 redirect message packets received."),
+ MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_query", "Number of ICMPv6 multicast listener query packets received."),
+ MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent."),
+ MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent."),
},
Unrecognized: mustCreateMetric("/netstack/icmp/v6/packets_received/unrecognized", "Number of ICMPv6 packets received that the transport layer does not know how to parse."),
Invalid: mustCreateMetric("/netstack/icmp/v6/packets_received/invalid", "Number of ICMPv6 packets received that the transport layer could not parse."),
@@ -163,23 +181,23 @@ var Metrics = tcpip.Stats{
IGMP: tcpip.IGMPStats{
PacketsSent: tcpip.IGMPSentPacketStats{
IGMPPacketStats: tcpip.IGMPPacketStats{
- MembershipQuery: mustCreateMetric("/netstack/igmp/packets_sent/membership_query", "Number of IGMP Membership Query messages sent by netstack."),
- V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v1_membership_report", "Number of IGMPv1 Membership Report messages sent by netstack."),
- V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v2_membership_report", "Number of IGMPv2 Membership Report messages sent by netstack."),
- LeaveGroup: mustCreateMetric("/netstack/igmp/packets_sent/leave_group", "Number of IGMP Leave Group messages sent by netstack."),
+ MembershipQuery: mustCreateMetric("/netstack/igmp/packets_sent/membership_query", "Number of IGMP Membership Query messages sent."),
+ V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v1_membership_report", "Number of IGMPv1 Membership Report messages sent."),
+ V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v2_membership_report", "Number of IGMPv2 Membership Report messages sent."),
+ LeaveGroup: mustCreateMetric("/netstack/igmp/packets_sent/leave_group", "Number of IGMP Leave Group messages sent."),
},
- Dropped: mustCreateMetric("/netstack/igmp/packets_sent/dropped", "Number of IGMP packets dropped by netstack due to link layer errors."),
+ Dropped: mustCreateMetric("/netstack/igmp/packets_sent/dropped", "Number of IGMP packets dropped due to link layer errors."),
},
PacketsReceived: tcpip.IGMPReceivedPacketStats{
IGMPPacketStats: tcpip.IGMPPacketStats{
- MembershipQuery: mustCreateMetric("/netstack/igmp/packets_received/membership_query", "Number of IGMP Membership Query messages received by netstack."),
- V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v1_membership_report", "Number of IGMPv1 Membership Report messages received by netstack."),
- V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v2_membership_report", "Number of IGMPv2 Membership Report messages received by netstack."),
- LeaveGroup: mustCreateMetric("/netstack/igmp/packets_received/leave_group", "Number of IGMP Leave Group messages received by netstack."),
+ MembershipQuery: mustCreateMetric("/netstack/igmp/packets_received/membership_query", "Number of IGMP Membership Query messages received."),
+ V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v1_membership_report", "Number of IGMPv1 Membership Report messages received."),
+ V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v2_membership_report", "Number of IGMPv2 Membership Report messages received."),
+ LeaveGroup: mustCreateMetric("/netstack/igmp/packets_received/leave_group", "Number of IGMP Leave Group messages received."),
},
- Invalid: mustCreateMetric("/netstack/igmp/packets_received/invalid", "Number of IGMP packets received by netstack that could not be parsed."),
+ Invalid: mustCreateMetric("/netstack/igmp/packets_received/invalid", "Number of IGMP packets received that could not be parsed."),
ChecksumErrors: mustCreateMetric("/netstack/igmp/packets_received/checksum_errors", "Number of received IGMP packets with bad checksums."),
- Unrecognized: mustCreateMetric("/netstack/igmp/packets_received/unrecognized", "Number of unrecognized IGMP packets received by netstack."),
+ Unrecognized: mustCreateMetric("/netstack/igmp/packets_received/unrecognized", "Number of unrecognized IGMP packets received."),
},
},
IP: tcpip.IPStats{
@@ -205,7 +223,8 @@ var Metrics = tcpip.Stats{
LinkLocalSource: mustCreateMetric("/netstack/ip/forwarding/link_local_source_address", "Number of IP packets received which could not be forwarded due to a link-local source address."),
LinkLocalDestination: mustCreateMetric("/netstack/ip/forwarding/link_local_destination_address", "Number of IP packets received which could not be forwarded due to a link-local destination address."),
ExtensionHeaderProblem: mustCreateMetric("/netstack/ip/forwarding/extension_header_problem", "Number of IP packets received which could not be forwarded due to a problem processing their IPv6 extension headers."),
- PacketTooBig: mustCreateMetric("/netstack/ip/forwarding/packet_too_big", "Number of IP packets received which could not fit within the outgoing MTU."),
+ PacketTooBig: mustCreateMetric("/netstack/ip/forwarding/packet_too_big", "Number of IP packets received which could not be forwarded because they could not fit within the outgoing MTU."),
+ HostUnreachable: mustCreateMetric("/netstack/ip/forwarding/host_unreachable", "Number of IP packets received which could not be forwarded due to unresolvable next hop."),
Errors: mustCreateMetric("/netstack/ip/forwarding/errors", "Number of IP packets which couldn't be forwarded."),
},
},
@@ -1126,7 +1145,14 @@ func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name,
// TODO(b/64800844): Translate fields once they are added to
// tcpip.TCPInfoOption.
- info := linux.TCPInfo{}
+ info := linux.TCPInfo{
+ State: uint8(v.State),
+ RTO: uint32(v.RTO / time.Microsecond),
+ RTT: uint32(v.RTT / time.Microsecond),
+ RTTVar: uint32(v.RTTVar / time.Microsecond),
+ SndSsthresh: v.SndSsthresh,
+ SndCwnd: v.SndCwnd,
+ }
switch v.CcState {
case tcpip.RTORecovery:
info.CaState = linux.TCP_CA_Loss
@@ -1137,11 +1163,6 @@ func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name,
case tcpip.Open:
info.CaState = linux.TCP_CA_Open
}
- info.RTO = uint32(v.RTO / time.Microsecond)
- info.RTT = uint32(v.RTT / time.Microsecond)
- info.RTTVar = uint32(v.RTTVar / time.Microsecond)
- info.SndSsthresh = v.SndSsthresh
- info.SndCwnd = v.SndCwnd
// In netstack reorderSeen is updated only when RACK is enabled.
// We only track whether the reordering is seen, which is
@@ -1777,11 +1798,6 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
}
v := hostarch.ByteOrder.Uint32(optVal)
-
- if v == 0 {
- socket.SetSockOptEmitUnimplementedEvent(t, name)
- }
-
ep.SocketOptions().SetOutOfBandInline(v != 0)
return nil
@@ -2089,10 +2105,10 @@ func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name
return syserr.ErrNoDevice
}
// Stack must be a netstack stack.
- return netfilter.SetEntries(stack.(*Stack).Stack, optVal, true)
+ return netfilter.SetEntries(t, stack.(*Stack).Stack, optVal, true)
case linux.IP6T_SO_SET_ADD_COUNTERS:
- // TODO(gvisor.dev/issue/170): Counter support.
+ log.Infof("IP6T_SO_SET_ADD_COUNTERS is not supported")
return nil
default:
@@ -2332,10 +2348,10 @@ func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
return syserr.ErrNoDevice
}
// Stack must be a netstack stack.
- return netfilter.SetEntries(stack.(*Stack).Stack, optVal, false)
+ return netfilter.SetEntries(t, stack.(*Stack).Stack, optVal, false)
case linux.IPT_SO_SET_ADD_COUNTERS:
- // TODO(gvisor.dev/issue/170): Counter support.
+ log.Infof("IPT_SO_SET_ADD_COUNTERS is not supported")
return nil
case linux.IP_ADD_SOURCE_MEMBERSHIP,
diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go
index 9cc1c57d7..eef5e6519 100644
--- a/pkg/sentry/socket/netstack/stack.go
+++ b/pkg/sentry/socket/netstack/stack.go
@@ -458,16 +458,6 @@ func (s *Stack) RestoreCleanupEndpoints(es []stack.TransportEndpoint) {
s.Stack.RestoreCleanupEndpoints(es)
}
-// Forwarding implements inet.Stack.Forwarding.
-func (s *Stack) Forwarding(protocol tcpip.NetworkProtocolNumber) bool {
- switch protocol {
- case ipv4.ProtocolNumber, ipv6.ProtocolNumber:
- return s.Stack.Forwarding(protocol)
- default:
- panic(fmt.Sprintf("Forwarding(%v) failed: unsupported protocol", protocol))
- }
-}
-
// SetForwarding implements inet.Stack.SetForwarding.
func (s *Stack) SetForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) error {
if err := s.Stack.SetForwardingDefaultAndAllNICs(protocol, enable); err != nil {
diff --git a/pkg/sentry/socket/netstack/tun.go b/pkg/sentry/socket/netstack/tun.go
index 288dd0c9e..c7ed52702 100644
--- a/pkg/sentry/socket/netstack/tun.go
+++ b/pkg/sentry/socket/netstack/tun.go
@@ -40,7 +40,7 @@ func LinuxToTUNFlags(flags uint16) (tun.Flags, error) {
// Linux adds IFF_NOFILTER (the same value as IFF_NO_PI unfortunately)
// when there is no sk_filter. See __tun_chr_ioctl() in
// net/drivers/tun.c.
- if flags&^uint16(linux.IFF_TUN|linux.IFF_TAP|linux.IFF_NO_PI) != 0 {
+ if flags&^uint16(linux.IFF_TUN|linux.IFF_TAP|linux.IFF_NO_PI|linux.IFF_ONE_QUEUE) != 0 {
return tun.Flags{}, syserror.EINVAL
}
return tun.Flags{
diff --git a/pkg/sentry/strace/BUILD b/pkg/sentry/strace/BUILD
index 1fbbd133c..369541c7a 100644
--- a/pkg/sentry/strace/BUILD
+++ b/pkg/sentry/strace/BUILD
@@ -11,6 +11,7 @@ go_library(
"futex.go",
"linux64_amd64.go",
"linux64_arm64.go",
+ "mmap.go",
"open.go",
"poll.go",
"ptrace.go",
@@ -35,7 +36,6 @@ go_library(
"//pkg/sentry/socket",
"//pkg/sentry/socket/netlink",
"//pkg/sentry/syscalls/linux",
- "@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/strace/clone.go b/pkg/sentry/strace/clone.go
index ab1060426..bfb4d7f5c 100644
--- a/pkg/sentry/strace/clone.go
+++ b/pkg/sentry/strace/clone.go
@@ -15,98 +15,98 @@
package strace
import (
- "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// CloneFlagSet is the set of clone(2) flags.
var CloneFlagSet = abi.FlagSet{
{
- Flag: unix.CLONE_VM,
+ Flag: linux.CLONE_VM,
Name: "CLONE_VM",
},
{
- Flag: unix.CLONE_FS,
+ Flag: linux.CLONE_FS,
Name: "CLONE_FS",
},
{
- Flag: unix.CLONE_FILES,
+ Flag: linux.CLONE_FILES,
Name: "CLONE_FILES",
},
{
- Flag: unix.CLONE_SIGHAND,
+ Flag: linux.CLONE_SIGHAND,
Name: "CLONE_SIGHAND",
},
{
- Flag: unix.CLONE_PTRACE,
+ Flag: linux.CLONE_PTRACE,
Name: "CLONE_PTRACE",
},
{
- Flag: unix.CLONE_VFORK,
+ Flag: linux.CLONE_VFORK,
Name: "CLONE_VFORK",
},
{
- Flag: unix.CLONE_PARENT,
+ Flag: linux.CLONE_PARENT,
Name: "CLONE_PARENT",
},
{
- Flag: unix.CLONE_THREAD,
+ Flag: linux.CLONE_THREAD,
Name: "CLONE_THREAD",
},
{
- Flag: unix.CLONE_NEWNS,
+ Flag: linux.CLONE_NEWNS,
Name: "CLONE_NEWNS",
},
{
- Flag: unix.CLONE_SYSVSEM,
+ Flag: linux.CLONE_SYSVSEM,
Name: "CLONE_SYSVSEM",
},
{
- Flag: unix.CLONE_SETTLS,
+ Flag: linux.CLONE_SETTLS,
Name: "CLONE_SETTLS",
},
{
- Flag: unix.CLONE_PARENT_SETTID,
+ Flag: linux.CLONE_PARENT_SETTID,
Name: "CLONE_PARENT_SETTID",
},
{
- Flag: unix.CLONE_CHILD_CLEARTID,
+ Flag: linux.CLONE_CHILD_CLEARTID,
Name: "CLONE_CHILD_CLEARTID",
},
{
- Flag: unix.CLONE_DETACHED,
+ Flag: linux.CLONE_DETACHED,
Name: "CLONE_DETACHED",
},
{
- Flag: unix.CLONE_UNTRACED,
+ Flag: linux.CLONE_UNTRACED,
Name: "CLONE_UNTRACED",
},
{
- Flag: unix.CLONE_CHILD_SETTID,
+ Flag: linux.CLONE_CHILD_SETTID,
Name: "CLONE_CHILD_SETTID",
},
{
- Flag: unix.CLONE_NEWUTS,
+ Flag: linux.CLONE_NEWUTS,
Name: "CLONE_NEWUTS",
},
{
- Flag: unix.CLONE_NEWIPC,
+ Flag: linux.CLONE_NEWIPC,
Name: "CLONE_NEWIPC",
},
{
- Flag: unix.CLONE_NEWUSER,
+ Flag: linux.CLONE_NEWUSER,
Name: "CLONE_NEWUSER",
},
{
- Flag: unix.CLONE_NEWPID,
+ Flag: linux.CLONE_NEWPID,
Name: "CLONE_NEWPID",
},
{
- Flag: unix.CLONE_NEWNET,
+ Flag: linux.CLONE_NEWNET,
Name: "CLONE_NEWNET",
},
{
- Flag: unix.CLONE_IO,
+ Flag: linux.CLONE_IO,
Name: "CLONE_IO",
},
}
diff --git a/pkg/sentry/strace/linux64_amd64.go b/pkg/sentry/strace/linux64_amd64.go
index d66befe81..6ce1bb592 100644
--- a/pkg/sentry/strace/linux64_amd64.go
+++ b/pkg/sentry/strace/linux64_amd64.go
@@ -33,7 +33,7 @@ var linuxAMD64 = SyscallMap{
6: makeSyscallInfo("lstat", Path, Stat),
7: makeSyscallInfo("poll", PollFDs, Hex, Hex),
8: makeSyscallInfo("lseek", Hex, Hex, Hex),
- 9: makeSyscallInfo("mmap", Hex, Hex, Hex, Hex, FD, Hex),
+ 9: makeSyscallInfo("mmap", Hex, Hex, MmapProt, MmapFlags, FD, Hex),
10: makeSyscallInfo("mprotect", Hex, Hex, Hex),
11: makeSyscallInfo("munmap", Hex, Hex),
12: makeSyscallInfo("brk", Hex),
diff --git a/pkg/sentry/strace/linux64_arm64.go b/pkg/sentry/strace/linux64_arm64.go
index 1a2d7d75f..ce5594301 100644
--- a/pkg/sentry/strace/linux64_arm64.go
+++ b/pkg/sentry/strace/linux64_arm64.go
@@ -246,7 +246,7 @@ var linuxARM64 = SyscallMap{
219: makeSyscallInfo("keyctl", Hex, Hex, Hex, Hex, Hex),
220: makeSyscallInfo("clone", CloneFlags, Hex, Hex, Hex, Hex),
221: makeSyscallInfo("execve", Path, ExecveStringVector, ExecveStringVector),
- 222: makeSyscallInfo("mmap", Hex, Hex, Hex, Hex, FD, Hex),
+ 222: makeSyscallInfo("mmap", Hex, Hex, MmapProt, MmapFlags, FD, Hex),
223: makeSyscallInfo("fadvise64", FD, Hex, Hex, Hex),
224: makeSyscallInfo("swapon", Hex, Hex),
225: makeSyscallInfo("swapoff", Hex),
diff --git a/pkg/sentry/strace/mmap.go b/pkg/sentry/strace/mmap.go
new file mode 100644
index 000000000..0035be586
--- /dev/null
+++ b/pkg/sentry/strace/mmap.go
@@ -0,0 +1,92 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package strace
+
+import (
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+)
+
+// ProtectionFlagSet represents the protection to mmap(2).
+var ProtectionFlagSet = abi.FlagSet{
+ {
+ Flag: linux.PROT_READ,
+ Name: "PROT_READ",
+ },
+ {
+ Flag: linux.PROT_WRITE,
+ Name: "PROT_WRITE",
+ },
+ {
+ Flag: linux.PROT_EXEC,
+ Name: "PROT_EXEC",
+ },
+}
+
+// MmapFlagSet is the set of mmap(2) flags.
+var MmapFlagSet = abi.FlagSet{
+ {
+ Flag: linux.MAP_SHARED,
+ Name: "MAP_SHARED",
+ },
+ {
+ Flag: linux.MAP_PRIVATE,
+ Name: "MAP_PRIVATE",
+ },
+ {
+ Flag: linux.MAP_FIXED,
+ Name: "MAP_FIXED",
+ },
+ {
+ Flag: linux.MAP_ANONYMOUS,
+ Name: "MAP_ANONYMOUS",
+ },
+ {
+ Flag: linux.MAP_GROWSDOWN,
+ Name: "MAP_GROWSDOWN",
+ },
+ {
+ Flag: linux.MAP_DENYWRITE,
+ Name: "MAP_DENYWRITE",
+ },
+ {
+ Flag: linux.MAP_EXECUTABLE,
+ Name: "MAP_EXECUTABLE",
+ },
+ {
+ Flag: linux.MAP_LOCKED,
+ Name: "MAP_LOCKED",
+ },
+ {
+ Flag: linux.MAP_NORESERVE,
+ Name: "MAP_NORESERVE",
+ },
+ {
+ Flag: linux.MAP_POPULATE,
+ Name: "MAP_POPULATE",
+ },
+ {
+ Flag: linux.MAP_NONBLOCK,
+ Name: "MAP_NONBLOCK",
+ },
+ {
+ Flag: linux.MAP_STACK,
+ Name: "MAP_STACK",
+ },
+ {
+ Flag: linux.MAP_HUGETLB,
+ Name: "MAP_HUGETLB",
+ },
+}
diff --git a/pkg/sentry/strace/open.go b/pkg/sentry/strace/open.go
index 5769360da..e7c7649f4 100644
--- a/pkg/sentry/strace/open.go
+++ b/pkg/sentry/strace/open.go
@@ -15,61 +15,61 @@
package strace
import (
- "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// OpenMode represents the mode to open(2) a file.
var OpenMode = abi.ValueSet{
- unix.O_RDWR: "O_RDWR",
- unix.O_WRONLY: "O_WRONLY",
- unix.O_RDONLY: "O_RDONLY",
+ linux.O_RDWR: "O_RDWR",
+ linux.O_WRONLY: "O_WRONLY",
+ linux.O_RDONLY: "O_RDONLY",
}
// OpenFlagSet is the set of open(2) flags.
var OpenFlagSet = abi.FlagSet{
{
- Flag: unix.O_APPEND,
+ Flag: linux.O_APPEND,
Name: "O_APPEND",
},
{
- Flag: unix.O_ASYNC,
+ Flag: linux.O_ASYNC,
Name: "O_ASYNC",
},
{
- Flag: unix.O_CLOEXEC,
+ Flag: linux.O_CLOEXEC,
Name: "O_CLOEXEC",
},
{
- Flag: unix.O_CREAT,
+ Flag: linux.O_CREAT,
Name: "O_CREAT",
},
{
- Flag: unix.O_DIRECT,
+ Flag: linux.O_DIRECT,
Name: "O_DIRECT",
},
{
- Flag: unix.O_DIRECTORY,
+ Flag: linux.O_DIRECTORY,
Name: "O_DIRECTORY",
},
{
- Flag: unix.O_EXCL,
+ Flag: linux.O_EXCL,
Name: "O_EXCL",
},
{
- Flag: unix.O_NOATIME,
+ Flag: linux.O_NOATIME,
Name: "O_NOATIME",
},
{
- Flag: unix.O_NOCTTY,
+ Flag: linux.O_NOCTTY,
Name: "O_NOCTTY",
},
{
- Flag: unix.O_NOFOLLOW,
+ Flag: linux.O_NOFOLLOW,
Name: "O_NOFOLLOW",
},
{
- Flag: unix.O_NONBLOCK,
+ Flag: linux.O_NONBLOCK,
Name: "O_NONBLOCK",
},
{
@@ -77,18 +77,22 @@ var OpenFlagSet = abi.FlagSet{
Name: "O_PATH",
},
{
- Flag: unix.O_SYNC,
+ Flag: linux.O_SYNC,
Name: "O_SYNC",
},
{
- Flag: unix.O_TRUNC,
+ Flag: linux.O_TMPFILE,
+ Name: "O_TMPFILE",
+ },
+ {
+ Flag: linux.O_TRUNC,
Name: "O_TRUNC",
},
}
func open(val uint64) string {
- s := OpenMode.Parse(val & unix.O_ACCMODE)
- if flags := OpenFlagSet.Parse(val &^ unix.O_ACCMODE); flags != "" {
+ s := OpenMode.Parse(val & linux.O_ACCMODE)
+ if flags := OpenFlagSet.Parse(val &^ linux.O_ACCMODE); flags != "" {
s += "|" + flags
}
return s
diff --git a/pkg/sentry/strace/signal.go b/pkg/sentry/strace/signal.go
index e5b379a20..5afc9525b 100644
--- a/pkg/sentry/strace/signal.go
+++ b/pkg/sentry/strace/signal.go
@@ -130,8 +130,8 @@ func sigAction(t *kernel.Task, addr hostarch.Addr) string {
return "null"
}
- sa, err := t.CopyInSignalAct(addr)
- if err != nil {
+ var sa linux.SigAction
+ if _, err := sa.CopyIn(t, addr); err != nil {
return fmt.Sprintf("%#x (error copying sigaction: %v)", addr, err)
}
diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go
index ec5d5f846..af7088847 100644
--- a/pkg/sentry/strace/strace.go
+++ b/pkg/sentry/strace/strace.go
@@ -489,6 +489,10 @@ func (i *SyscallInfo) pre(t *kernel.Task, args arch.SyscallArguments, maximumBlo
output = append(output, epollEvents(t, args[arg].Pointer(), 0 /* numEvents */, uint64(maximumBlobSize)))
case SelectFDSet:
output = append(output, fdSet(t, int(args[0].Int()), args[arg].Pointer()))
+ case MmapProt:
+ output = append(output, ProtectionFlagSet.Parse(uint64(args[arg].Uint())))
+ case MmapFlags:
+ output = append(output, MmapFlagSet.Parse(uint64(args[arg].Uint())))
case Oct:
output = append(output, "0o"+strconv.FormatUint(args[arg].Uint64(), 8))
case Hex:
diff --git a/pkg/sentry/strace/syscalls.go b/pkg/sentry/strace/syscalls.go
index 7e69b9279..5893443a7 100644
--- a/pkg/sentry/strace/syscalls.go
+++ b/pkg/sentry/strace/syscalls.go
@@ -238,6 +238,12 @@ const (
// EpollEvents is an array of struct epoll_event. It is the events
// argument in epoll_wait(2)/epoll_pwait(2).
EpollEvents
+
+ // MmapProt is the protection argument in mmap(2).
+ MmapProt
+
+ // MmapFlags is the flags argument in mmap(2).
+ MmapFlags
)
// defaultFormat is the syscall argument format to use if the actual format is
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index 9cd238efd..90a719ba2 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -1569,9 +1569,9 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
if uint64(length) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur {
- t.SendSignal(&arch.SignalInfo{
+ t.SendSignal(&linux.SignalInfo{
Signo: int32(linux.SIGXFSZ),
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
})
return 0, nil, syserror.EFBIG
}
@@ -1632,9 +1632,9 @@ func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
if uint64(length) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur {
- t.SendSignal(&arch.SignalInfo{
+ t.SendSignal(&linux.SignalInfo{
Signo: int32(linux.SIGXFSZ),
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
})
return 0, nil, syserror.EFBIG
}
@@ -1673,9 +1673,11 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error {
if err != nil {
return err
}
+
c := t.Credentials()
hasCap := d.Inode.CheckCapability(t, linux.CAP_CHOWN)
isOwner := uattr.Owner.UID == c.EffectiveKUID
+ var clearPrivilege bool
if uid.Ok() {
kuid := c.UserNamespace.MapToKUID(uid)
// Valid UID must be supplied if UID is to be changed.
@@ -1693,6 +1695,11 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error {
return syserror.EPERM
}
+ // The setuid and setgid bits are cleared during a chown.
+ if uattr.Owner.UID != kuid {
+ clearPrivilege = true
+ }
+
owner.UID = kuid
}
if gid.Ok() {
@@ -1711,6 +1718,11 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error {
return syserror.EPERM
}
+ // The setuid and setgid bits are cleared during a chown.
+ if uattr.Owner.GID != kgid {
+ clearPrivilege = true
+ }
+
owner.GID = kgid
}
@@ -1721,10 +1733,14 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error {
if err := d.Inode.SetOwner(t, d, owner); err != nil {
return err
}
+ // Clear privilege bits if needed and they are set.
+ if clearPrivilege && uattr.Perms.HasSetUIDOrGID() && !fs.IsDir(d.Inode.StableAttr) {
+ uattr.Perms.DropSetUIDAndMaybeGID()
+ if !d.Inode.SetPermissions(t, d, uattr.Perms) {
+ return syserror.EPERM
+ }
+ }
- // When the owner or group are changed by an unprivileged user,
- // chown(2) also clears the set-user-ID and set-group-ID bits, but
- // we do not support them.
return nil
}
@@ -2124,9 +2140,9 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, syserror.EFBIG
}
if uint64(size) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur {
- t.SendSignal(&arch.SignalInfo{
+ t.SendSignal(&linux.SignalInfo{
Signo: int32(linux.SIGXFSZ),
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
})
return 0, nil, syserror.EFBIG
}
diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go
index 53b12dc41..27a7f7fe1 100644
--- a/pkg/sentry/syscalls/linux/sys_signal.go
+++ b/pkg/sentry/syscalls/linux/sys_signal.go
@@ -84,9 +84,9 @@ func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
if !mayKill(t, target, sig) {
return 0, nil, syserror.EPERM
}
- info := &arch.SignalInfo{
+ info := &linux.SignalInfo{
Signo: int32(sig),
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
}
info.SetPID(int32(target.PIDNamespace().IDOfTask(t)))
info.SetUID(int32(t.Credentials().RealKUID.In(target.UserNamespace()).OrOverflow()))
@@ -123,9 +123,9 @@ func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
// depend on the iteration order. We at least implement the
// semantics documented by the man page: "On success (at least
// one signal was sent), zero is returned."
- info := &arch.SignalInfo{
+ info := &linux.SignalInfo{
Signo: int32(sig),
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
}
info.SetPID(int32(tg.PIDNamespace().IDOfTask(t)))
info.SetUID(int32(t.Credentials().RealKUID.In(tg.Leader().UserNamespace()).OrOverflow()))
@@ -167,9 +167,9 @@ func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
continue
}
- info := &arch.SignalInfo{
+ info := &linux.SignalInfo{
Signo: int32(sig),
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
}
info.SetPID(int32(tg.PIDNamespace().IDOfTask(t)))
info.SetUID(int32(t.Credentials().RealKUID.In(tg.Leader().UserNamespace()).OrOverflow()))
@@ -184,10 +184,10 @@ func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
}
}
-func tkillSigInfo(sender, receiver *kernel.Task, sig linux.Signal) *arch.SignalInfo {
- info := &arch.SignalInfo{
+func tkillSigInfo(sender, receiver *kernel.Task, sig linux.Signal) *linux.SignalInfo {
+ info := &linux.SignalInfo{
Signo: int32(sig),
- Code: arch.SignalInfoTkill,
+ Code: linux.SI_TKILL,
}
info.SetPID(int32(receiver.PIDNamespace().IDOfThreadGroup(sender.ThreadGroup())))
info.SetUID(int32(sender.Credentials().RealKUID.In(receiver.UserNamespace()).OrOverflow()))
@@ -251,20 +251,20 @@ func RtSigaction(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
return 0, nil, syserror.EINVAL
}
- var newactptr *arch.SignalAct
+ var newactptr *linux.SigAction
if newactarg != 0 {
- newact, err := t.CopyInSignalAct(newactarg)
- if err != nil {
+ var newact linux.SigAction
+ if _, err := newact.CopyIn(t, newactarg); err != nil {
return 0, nil, err
}
newactptr = &newact
}
- oldact, err := t.ThreadGroup().SetSignalAct(sig, newactptr)
+ oldact, err := t.ThreadGroup().SetSigAction(sig, newactptr)
if err != nil {
return 0, nil, err
}
if oldactarg != 0 {
- if err := t.CopyOutSignalAct(oldactarg, &oldact); err != nil {
+ if _, err := oldact.CopyOut(t, oldactarg); err != nil {
return 0, nil, err
}
}
@@ -325,13 +325,12 @@ func Sigaltstack(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
alt := t.SignalStack()
if oldaddr != 0 {
- if err := t.CopyOutSignalStack(oldaddr, &alt); err != nil {
+ if _, err := alt.CopyOut(t, oldaddr); err != nil {
return 0, nil, err
}
}
if setaddr != 0 {
- alt, err := t.CopyInSignalStack(setaddr)
- if err != nil {
+ if _, err := alt.CopyIn(t, setaddr); err != nil {
return 0, nil, err
}
// The signal stack cannot be changed if the task is currently
@@ -410,7 +409,7 @@ func RtSigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
// We must ensure that the Signo is set (Linux overrides this in the
// same way), and that the code is in the allowed set. This same logic
// appears below in RtSigtgqueueinfo and should be kept in sync.
- var info arch.SignalInfo
+ var info linux.SignalInfo
if _, err := info.CopyIn(t, infoAddr); err != nil {
return 0, nil, err
}
@@ -426,7 +425,7 @@ func RtSigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
// If the sender is not the receiver, it can't use si_codes used by the
// kernel or SI_TKILL.
- if (info.Code >= 0 || info.Code == arch.SignalInfoTkill) && target != t {
+ if (info.Code >= 0 || info.Code == linux.SI_TKILL) && target != t {
return 0, nil, syserror.EPERM
}
@@ -454,7 +453,7 @@ func RtTgsigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *ker
}
// Copy in the info. See RtSigqueueinfo above.
- var info arch.SignalInfo
+ var info linux.SignalInfo
if _, err := info.CopyIn(t, infoAddr); err != nil {
return 0, nil, err
}
@@ -469,7 +468,7 @@ func RtTgsigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *ker
// If the sender is not the receiver, it can't use si_codes used by the
// kernel or SI_TKILL.
- if (info.Code >= 0 || info.Code == arch.SignalInfoTkill) && target != t {
+ if (info.Code >= 0 || info.Code == linux.SI_TKILL) && target != t {
return 0, nil, syserror.EPERM
}
diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go
index 3185ea527..0d5056303 100644
--- a/pkg/sentry/syscalls/linux/sys_thread.go
+++ b/pkg/sentry/syscalls/linux/sys_thread.go
@@ -398,7 +398,7 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// out the fields it would set for a successful waitid in this case
// as well.
if infop != 0 {
- var si arch.SignalInfo
+ var si linux.SignalInfo
_, err = si.CopyOut(t, infop)
}
}
@@ -413,7 +413,7 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
if infop == 0 {
return 0, nil, nil
}
- si := arch.SignalInfo{
+ si := linux.SignalInfo{
Signo: int32(linux.SIGCHLD),
}
si.SetPID(int32(wr.TID))
@@ -423,24 +423,24 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
s := unix.WaitStatus(wr.Status)
switch {
case s.Exited():
- si.Code = arch.CLD_EXITED
+ si.Code = linux.CLD_EXITED
si.SetStatus(int32(s.ExitStatus()))
case s.Signaled():
- si.Code = arch.CLD_KILLED
+ si.Code = linux.CLD_KILLED
si.SetStatus(int32(s.Signal()))
case s.CoreDump():
- si.Code = arch.CLD_DUMPED
+ si.Code = linux.CLD_DUMPED
si.SetStatus(int32(s.Signal()))
case s.Stopped():
if wr.Event == kernel.EventTraceeStop {
- si.Code = arch.CLD_TRAPPED
+ si.Code = linux.CLD_TRAPPED
si.SetStatus(int32(s.TrapCause()))
} else {
- si.Code = arch.CLD_STOPPED
+ si.Code = linux.CLD_STOPPED
si.SetStatus(int32(s.StopSignal()))
}
case s.Continued():
- si.Code = arch.CLD_CONTINUED
+ si.Code = linux.CLD_CONTINUED
si.SetStatus(int32(linux.SIGCONT))
default:
t.Warningf("waitid got incomprehensible wait status %d", s)
diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go
index 83b777bbd..5c3b3dee2 100644
--- a/pkg/sentry/syscalls/linux/sys_time.go
+++ b/pkg/sentry/syscalls/linux/sys_time.go
@@ -180,21 +180,21 @@ func Time(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
//
// +stateify savable
type clockNanosleepRestartBlock struct {
- c ktime.Clock
- duration time.Duration
- rem hostarch.Addr
+ c ktime.Clock
+ end ktime.Time
+ rem hostarch.Addr
}
// Restart implements kernel.SyscallRestartBlock.Restart.
func (n *clockNanosleepRestartBlock) Restart(t *kernel.Task) (uintptr, error) {
- return 0, clockNanosleepFor(t, n.c, n.duration, n.rem)
+ return 0, clockNanosleepUntil(t, n.c, n.end, n.rem, true)
}
// clockNanosleepUntil blocks until a specified time.
//
// If blocking is interrupted, the syscall is restarted with the original
// arguments.
-func clockNanosleepUntil(t *kernel.Task, c ktime.Clock, ts linux.Timespec) error {
+func clockNanosleepUntil(t *kernel.Task, c ktime.Clock, end ktime.Time, rem hostarch.Addr, needRestartBlock bool) error {
notifier, tchan := ktime.NewChannelNotifier()
timer := ktime.NewTimer(c, notifier)
@@ -202,43 +202,22 @@ func clockNanosleepUntil(t *kernel.Task, c ktime.Clock, ts linux.Timespec) error
timer.Swap(ktime.Setting{
Period: 0,
Enabled: true,
- Next: ktime.FromTimespec(ts),
+ Next: end,
})
err := t.BlockWithTimer(nil, tchan)
timer.Destroy()
- // Did we just block until the timeout happened?
- if err == syserror.ETIMEDOUT {
- return nil
- }
-
- return syserror.ConvertIntr(err, syserror.ERESTARTNOHAND)
-}
-
-// clockNanosleepFor blocks for a specified duration.
-//
-// If blocking is interrupted, the syscall is restarted with the remaining
-// duration timeout.
-func clockNanosleepFor(t *kernel.Task, c ktime.Clock, dur time.Duration, rem hostarch.Addr) error {
- timer, start, tchan := ktime.After(c, dur)
-
- err := t.BlockWithTimer(nil, tchan)
-
- after := c.Now()
-
- timer.Destroy()
-
switch err {
case syserror.ETIMEDOUT:
// Slept for entire timeout.
return nil
case syserror.ErrInterrupted:
// Interrupted.
- remaining := dur - after.Sub(start)
- if remaining < 0 {
- remaining = time.Duration(0)
+ remaining := end.Sub(c.Now())
+ if remaining <= 0 {
+ return nil
}
// Copy out remaining time.
@@ -248,14 +227,16 @@ func clockNanosleepFor(t *kernel.Task, c ktime.Clock, dur time.Duration, rem hos
return err
}
}
-
- // Arrange for a restart with the remaining duration.
- t.SetSyscallRestartBlock(&clockNanosleepRestartBlock{
- c: c,
- duration: remaining,
- rem: rem,
- })
- return syserror.ERESTART_RESTARTBLOCK
+ if needRestartBlock {
+ // Arrange for a restart with the remaining duration.
+ t.SetSyscallRestartBlock(&clockNanosleepRestartBlock{
+ c: c,
+ end: end,
+ rem: rem,
+ })
+ return syserror.ERESTART_RESTARTBLOCK
+ }
+ return syserror.ERESTARTNOHAND
default:
panic(fmt.Sprintf("Impossible BlockWithTimer error %v", err))
}
@@ -278,7 +259,8 @@ func Nanosleep(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
// Just like linux, we cap the timeout with the max number that int64 can
// represent which is roughly 292 years.
dur := time.Duration(ts.ToNsecCapped()) * time.Nanosecond
- return 0, nil, clockNanosleepFor(t, t.Kernel().MonotonicClock(), dur, rem)
+ c := t.Kernel().MonotonicClock()
+ return 0, nil, clockNanosleepUntil(t, c, c.Now().Add(dur), rem, true)
}
// ClockNanosleep implements linux syscall clock_nanosleep(2).
@@ -312,11 +294,11 @@ func ClockNanosleep(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
}
if flags&linux.TIMER_ABSTIME != 0 {
- return 0, nil, clockNanosleepUntil(t, c, req)
+ return 0, nil, clockNanosleepUntil(t, c, ktime.FromTimespec(req), 0, false)
}
dur := time.Duration(req.ToNsecCapped()) * time.Nanosecond
- return 0, nil, clockNanosleepFor(t, c, dur, rem)
+ return 0, nil, clockNanosleepUntil(t, c, c.Now().Add(dur), rem, true)
}
// Gettimeofday implements linux syscall gettimeofday(2).
diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go
index c6330c21a..647e089d0 100644
--- a/pkg/sentry/syscalls/linux/vfs2/setstat.go
+++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go
@@ -242,9 +242,9 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
limit := limits.FromContext(t).Get(limits.FileSize).Cur
if uint64(size) >= limit {
- t.SendSignal(&arch.SignalInfo{
+ t.SendSignal(&linux.SignalInfo{
Signo: int32(linux.SIGXFSZ),
- Code: arch.SignalInfoUser,
+ Code: linux.SI_USER,
})
return 0, nil, syserror.EFBIG
}
diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD
index bc75a47fc..202486a1e 100644
--- a/pkg/sentry/time/BUILD
+++ b/pkg/sentry/time/BUILD
@@ -8,7 +8,7 @@ go_template_instance(
out = "seqatomic_parameters_unsafe.go",
package = "time",
suffix = "Parameters",
- template = "//pkg/sync:generic_seqatomic",
+ template = "//pkg/sync/seqatomic:generic_seqatomic",
types = {
"Value": "Parameters",
},
diff --git a/pkg/sentry/time/sampler_amd64.go b/pkg/sentry/time/sampler_amd64.go
index 72cb74be3..9f1b4b2fb 100644
--- a/pkg/sentry/time/sampler_amd64.go
+++ b/pkg/sentry/time/sampler_amd64.go
@@ -16,7 +16,7 @@
package time
-const(
+const (
// defaultOverheadTSC is the default estimated syscall overhead in TSC cycles.
// It is further refined as syscalls are made.
defaultOverheadCycles = 1 * 1000
diff --git a/pkg/sentry/time/sampler_arm64.go b/pkg/sentry/time/sampler_arm64.go
index b9d0273b7..4c8d33ae4 100644
--- a/pkg/sentry/time/sampler_arm64.go
+++ b/pkg/sentry/time/sampler_arm64.go
@@ -30,7 +30,7 @@ func getDefaultArchOverheadCycles() TSCValue {
// x86 devided by frqRatio
cntfrq := getCNTFRQ()
frqRatio := 1000000000 / cntfrq
- overheadCycles := ( 1 * 1000 ) / frqRatio
+ overheadCycles := (1 * 1000) / frqRatio
return overheadCycles
}
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 176bcc242..ef8d8a813 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -454,6 +454,9 @@ type FileDescriptionImpl interface {
// RemoveXattr removes the given extended attribute from the file.
RemoveXattr(ctx context.Context, name string) error
+ // SupportsLocks indicates whether file locks are supported.
+ SupportsLocks() bool
+
// LockBSD tries to acquire a BSD-style advisory file lock.
LockBSD(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, block lock.Blocker) error
@@ -818,6 +821,11 @@ func (fd *FileDescription) Msync(ctx context.Context, mr memmap.MappableRange) e
return fd.Sync(ctx)
}
+// SupportsLocks indicates whether file locks are supported.
+func (fd *FileDescription) SupportsLocks() bool {
+ return fd.impl.SupportsLocks()
+}
+
// LockBSD tries to acquire a BSD-style advisory file lock.
func (fd *FileDescription) LockBSD(ctx context.Context, ownerPID int32, lockType lock.LockType, blocker lock.Blocker) error {
atomic.StoreUint32(&fd.usedLockBSD, 1)
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index b87d9690a..2b6f47b4b 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -413,6 +413,11 @@ type LockFD struct {
locks *FileLocks
}
+// SupportsLocks implements FileDescriptionImpl.SupportsLocks.
+func (LockFD) SupportsLocks() bool {
+ return true
+}
+
// Init initializes fd with FileLocks to use.
func (fd *LockFD) Init(locks *FileLocks) {
fd.locks = locks
@@ -423,28 +428,28 @@ func (fd *LockFD) Locks() *FileLocks {
return fd.locks
}
-// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
+// LockBSD implements FileDescriptionImpl.LockBSD.
func (fd *LockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
return fd.locks.LockBSD(ctx, uid, ownerPID, t, block)
}
-// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD.
+// UnlockBSD implements FileDescriptionImpl.UnlockBSD.
func (fd *LockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
fd.locks.UnlockBSD(uid)
return nil
}
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+// LockPOSIX implements FileDescriptionImpl.LockPOSIX.
func (fd *LockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
return fd.locks.LockPOSIX(ctx, uid, ownerPID, t, r, block)
}
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+// UnlockPOSIX implements FileDescriptionImpl.UnlockPOSIX.
func (fd *LockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error {
return fd.locks.UnlockPOSIX(ctx, uid, r)
}
-// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX.
+// TestPOSIX implements FileDescriptionImpl.TestPOSIX.
func (fd *LockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
return fd.locks.TestPOSIX(ctx, uid, t, r)
}
@@ -455,27 +460,68 @@ func (fd *LockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.L
// +stateify savable
type NoLockFD struct{}
-// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
+// SupportsLocks implements FileDescriptionImpl.SupportsLocks.
+func (NoLockFD) SupportsLocks() bool {
+ return false
+}
+
+// LockBSD implements FileDescriptionImpl.LockBSD.
func (NoLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
return syserror.ENOLCK
}
-// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD.
+// UnlockBSD implements FileDescriptionImpl.UnlockBSD.
func (NoLockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
return syserror.ENOLCK
}
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+// LockPOSIX implements FileDescriptionImpl.LockPOSIX.
func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
return syserror.ENOLCK
}
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+// UnlockPOSIX implements FileDescriptionImpl.UnlockPOSIX.
func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error {
return syserror.ENOLCK
}
-// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX.
+// TestPOSIX implements FileDescriptionImpl.TestPOSIX.
func (NoLockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
return linux.Flock{}, syserror.ENOLCK
}
+
+// BadLockFD implements Lock*/Unlock* portion of FileDescriptionImpl interface
+// returning EBADF.
+//
+// +stateify savable
+type BadLockFD struct{}
+
+// SupportsLocks implements FileDescriptionImpl.SupportsLocks.
+func (BadLockFD) SupportsLocks() bool {
+ return false
+}
+
+// LockBSD implements FileDescriptionImpl.LockBSD.
+func (BadLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
+ return syserror.EBADF
+}
+
+// UnlockBSD implements FileDescriptionImpl.UnlockBSD.
+func (BadLockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
+ return syserror.EBADF
+}
+
+// LockPOSIX implements FileDescriptionImpl.LockPOSIX.
+func (BadLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
+ return syserror.EBADF
+}
+
+// UnlockPOSIX implements FileDescriptionImpl.UnlockPOSIX.
+func (BadLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error {
+ return syserror.EBADF
+}
+
+// TestPOSIX implements FileDescriptionImpl.TestPOSIX.
+func (BadLockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
+ return linux.Flock{}, syserror.EBADF
+}
diff --git a/pkg/sentry/vfs/memxattr/BUILD b/pkg/sentry/vfs/memxattr/BUILD
index d8c4d27b9..ea82f4987 100644
--- a/pkg/sentry/vfs/memxattr/BUILD
+++ b/pkg/sentry/vfs/memxattr/BUILD
@@ -8,6 +8,7 @@ go_library(
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
+ "//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserror",
diff --git a/pkg/sentry/vfs/memxattr/xattr.go b/pkg/sentry/vfs/memxattr/xattr.go
index 638b5d830..9b7953fa3 100644
--- a/pkg/sentry/vfs/memxattr/xattr.go
+++ b/pkg/sentry/vfs/memxattr/xattr.go
@@ -17,7 +17,10 @@
package memxattr
import (
+ "strings"
+
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
@@ -26,6 +29,9 @@ import (
// SimpleExtendedAttributes implements extended attributes using a map of
// names to values.
//
+// SimpleExtendedAttributes calls vfs.CheckXattrPermissions, so callers are not
+// required to do so.
+//
// +stateify savable
type SimpleExtendedAttributes struct {
// mu protects the below fields.
@@ -34,7 +40,11 @@ type SimpleExtendedAttributes struct {
}
// GetXattr returns the value at 'name'.
-func (x *SimpleExtendedAttributes) GetXattr(opts *vfs.GetXattrOptions) (string, error) {
+func (x *SimpleExtendedAttributes) GetXattr(creds *auth.Credentials, mode linux.FileMode, kuid auth.KUID, opts *vfs.GetXattrOptions) (string, error) {
+ if err := vfs.CheckXattrPermissions(creds, vfs.MayRead, mode, kuid, opts.Name); err != nil {
+ return "", err
+ }
+
x.mu.RLock()
value, ok := x.xattrs[opts.Name]
x.mu.RUnlock()
@@ -50,7 +60,11 @@ func (x *SimpleExtendedAttributes) GetXattr(opts *vfs.GetXattrOptions) (string,
}
// SetXattr sets 'value' at 'name'.
-func (x *SimpleExtendedAttributes) SetXattr(opts *vfs.SetXattrOptions) error {
+func (x *SimpleExtendedAttributes) SetXattr(creds *auth.Credentials, mode linux.FileMode, kuid auth.KUID, opts *vfs.SetXattrOptions) error {
+ if err := vfs.CheckXattrPermissions(creds, vfs.MayWrite, mode, kuid, opts.Name); err != nil {
+ return err
+ }
+
x.mu.Lock()
defer x.mu.Unlock()
if x.xattrs == nil {
@@ -73,12 +87,19 @@ func (x *SimpleExtendedAttributes) SetXattr(opts *vfs.SetXattrOptions) error {
}
// ListXattr returns all names in xattrs.
-func (x *SimpleExtendedAttributes) ListXattr(size uint64) ([]string, error) {
+func (x *SimpleExtendedAttributes) ListXattr(creds *auth.Credentials, size uint64) ([]string, error) {
// Keep track of the size of the buffer needed in listxattr(2) for the list.
listSize := 0
x.mu.RLock()
names := make([]string, 0, len(x.xattrs))
+ haveCap := creds.HasCapability(linux.CAP_SYS_ADMIN)
for n := range x.xattrs {
+ // Hide extended attributes in the "trusted" namespace from
+ // non-privileged users. This is consistent with Linux's
+ // fs/xattr.c:simple_xattr_list().
+ if !haveCap && strings.HasPrefix(n, linux.XATTR_TRUSTED_PREFIX) {
+ continue
+ }
names = append(names, n)
// Add one byte per null terminator.
listSize += len(n) + 1
@@ -91,7 +112,11 @@ func (x *SimpleExtendedAttributes) ListXattr(size uint64) ([]string, error) {
}
// RemoveXattr removes the xattr at 'name'.
-func (x *SimpleExtendedAttributes) RemoveXattr(name string) error {
+func (x *SimpleExtendedAttributes) RemoveXattr(creds *auth.Credentials, mode linux.FileMode, kuid auth.KUID, name string) error {
+ if err := vfs.CheckXattrPermissions(creds, vfs.MayWrite, mode, kuid, name); err != nil {
+ return err
+ }
+
x.mu.Lock()
defer x.mu.Unlock()
if _, ok := x.xattrs[name]; !ok {
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index 82fd382c2..f93da3af1 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -220,7 +220,7 @@ func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Cr
vdDentry := vd.dentry
vdDentry.mu.Lock()
for {
- if vdDentry.dead {
+ if vd.mount.umounted || vdDentry.dead {
vdDentry.mu.Unlock()
vfs.mountMu.Unlock()
vd.DecRef(ctx)
diff --git a/pkg/sentry/vfs/opath.go b/pkg/sentry/vfs/opath.go
index 47848c76b..e9651b631 100644
--- a/pkg/sentry/vfs/opath.go
+++ b/pkg/sentry/vfs/opath.go
@@ -24,96 +24,96 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
-// opathFD implements vfs.FileDescriptionImpl for a file description opened with O_PATH.
+// opathFD implements FileDescriptionImpl for a file description opened with O_PATH.
//
// +stateify savable
type opathFD struct {
vfsfd FileDescription
FileDescriptionDefaultImpl
- NoLockFD
+ BadLockFD
}
-// Release implements vfs.FileDescriptionImpl.Release.
+// Release implements FileDescriptionImpl.Release.
func (fd *opathFD) Release(context.Context) {
// noop
}
-// Allocate implements vfs.FileDescriptionImpl.Allocate.
+// Allocate implements FileDescriptionImpl.Allocate.
func (fd *opathFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
return syserror.EBADF
}
-// PRead implements vfs.FileDescriptionImpl.PRead.
+// PRead implements FileDescriptionImpl.PRead.
func (fd *opathFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
return 0, syserror.EBADF
}
-// Read implements vfs.FileDescriptionImpl.Read.
+// Read implements FileDescriptionImpl.Read.
func (fd *opathFD) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) {
return 0, syserror.EBADF
}
-// PWrite implements vfs.FileDescriptionImpl.PWrite.
+// PWrite implements FileDescriptionImpl.PWrite.
func (fd *opathFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) {
return 0, syserror.EBADF
}
-// Write implements vfs.FileDescriptionImpl.Write.
+// Write implements FileDescriptionImpl.Write.
func (fd *opathFD) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) {
return 0, syserror.EBADF
}
-// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
+// Ioctl implements FileDescriptionImpl.Ioctl.
func (fd *opathFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
return 0, syserror.EBADF
}
-// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+// IterDirents implements FileDescriptionImpl.IterDirents.
func (fd *opathFD) IterDirents(ctx context.Context, cb IterDirentsCallback) error {
return syserror.EBADF
}
-// Seek implements vfs.FileDescriptionImpl.Seek.
+// Seek implements FileDescriptionImpl.Seek.
func (fd *opathFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
return 0, syserror.EBADF
}
-// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
+// ConfigureMMap implements FileDescriptionImpl.ConfigureMMap.
func (fd *opathFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
return syserror.EBADF
}
-// ListXattr implements vfs.FileDescriptionImpl.ListXattr.
+// ListXattr implements FileDescriptionImpl.ListXattr.
func (fd *opathFD) ListXattr(ctx context.Context, size uint64) ([]string, error) {
return nil, syserror.EBADF
}
-// GetXattr implements vfs.FileDescriptionImpl.GetXattr.
+// GetXattr implements FileDescriptionImpl.GetXattr.
func (fd *opathFD) GetXattr(ctx context.Context, opts GetXattrOptions) (string, error) {
return "", syserror.EBADF
}
-// SetXattr implements vfs.FileDescriptionImpl.SetXattr.
+// SetXattr implements FileDescriptionImpl.SetXattr.
func (fd *opathFD) SetXattr(ctx context.Context, opts SetXattrOptions) error {
return syserror.EBADF
}
-// RemoveXattr implements vfs.FileDescriptionImpl.RemoveXattr.
+// RemoveXattr implements FileDescriptionImpl.RemoveXattr.
func (fd *opathFD) RemoveXattr(ctx context.Context, name string) error {
return syserror.EBADF
}
-// Sync implements vfs.FileDescriptionImpl.Sync.
+// Sync implements FileDescriptionImpl.Sync.
func (fd *opathFD) Sync(ctx context.Context) error {
return syserror.EBADF
}
-// SetStat implements vfs.FileDescriptionImpl.SetStat.
+// SetStat implements FileDescriptionImpl.SetStat.
func (fd *opathFD) SetStat(ctx context.Context, opts SetStatOptions) error {
return syserror.EBADF
}
-// Stat implements vfs.FileDescriptionImpl.Stat.
+// Stat implements FileDescriptionImpl.Stat.
func (fd *opathFD) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) {
vfsObj := fd.vfsfd.vd.mount.vfs
rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go
index 8e3146d8d..8d563d53a 100644
--- a/pkg/sentry/watchdog/watchdog.go
+++ b/pkg/sentry/watchdog/watchdog.go
@@ -115,14 +115,14 @@ func (a *Action) Get() interface{} {
}
// String returns Action's string representation.
-func (a *Action) String() string {
- switch *a {
+func (a Action) String() string {
+ switch a {
case LogWarning:
return "logWarning"
case Panic:
return "panic"
default:
- panic(fmt.Sprintf("Invalid watchdog action: %d", *a))
+ panic(fmt.Sprintf("Invalid watchdog action: %d", a))
}
}
@@ -243,6 +243,7 @@ func (w *Watchdog) waitForStart() {
}
stuckStartup.Increment()
+ metric.WeirdnessMetric.Increment("watchdog_stuck_startup")
var buf bytes.Buffer
buf.WriteString(fmt.Sprintf("Watchdog.Start() not called within %s", w.StartupTimeout))
@@ -312,10 +313,11 @@ func (w *Watchdog) runTurn() {
// New stuck task detected.
//
// Note that tasks blocked doing IO may be considered stuck in kernel,
- // unless they are surrounded b
+ // unless they are surrounded by
// Task.UninterruptibleSleepStart/Finish.
tc = &offender{lastUpdateTime: lastUpdateTime}
stuckTasks.Increment()
+ metric.WeirdnessMetric.Increment("watchdog_stuck_tasks")
newTaskFound = true
}
newOffenders[t] = tc