summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/compat.go12
-rw-r--r--runsc/boot/compat_amd64.go4
-rw-r--r--runsc/boot/controller.go4
-rw-r--r--runsc/boot/filter/config.go379
-rw-r--r--runsc/boot/filter/config_amd64.go33
-rw-r--r--runsc/boot/filter/config_arm64.go17
-rw-r--r--runsc/boot/filter/config_profile.go7
-rw-r--r--runsc/boot/filter/extra_filters_msan.go11
-rw-r--r--runsc/boot/filter/extra_filters_race.go25
-rw-r--r--runsc/boot/fs.go12
-rw-r--r--runsc/boot/limits.go8
-rw-r--r--runsc/boot/loader_test.go7
-rw-r--r--runsc/boot/network.go4
-rw-r--r--runsc/cgroup/cgroup.go9
-rw-r--r--runsc/cli/BUILD1
-rw-r--r--runsc/cli/main.go8
-rw-r--r--runsc/cmd/BUILD3
-rw-r--r--runsc/cmd/boot.go5
-rw-r--r--runsc/cmd/checkpoint.go4
-rw-r--r--runsc/cmd/chroot.go16
-rw-r--r--runsc/cmd/cmd.go10
-rw-r--r--runsc/cmd/debug.go6
-rw-r--r--runsc/cmd/do.go6
-rw-r--r--runsc/cmd/exec.go12
-rw-r--r--runsc/cmd/gofer.go43
-rw-r--r--runsc/cmd/kill.go7
-rw-r--r--runsc/cmd/mitigate.go122
-rw-r--r--runsc/cmd/mitigate_test.go169
-rw-r--r--runsc/cmd/restore.go4
-rw-r--r--runsc/cmd/run.go4
-rw-r--r--runsc/cmd/wait.go6
-rw-r--r--runsc/config/config.go3
-rw-r--r--runsc/config/flags.go1
-rw-r--r--runsc/container/BUILD1
-rw-r--r--runsc/container/console_test.go15
-rw-r--r--runsc/container/container.go35
-rw-r--r--runsc/container/container_test.go50
-rw-r--r--runsc/container/multi_container_test.go34
-rw-r--r--runsc/container/state_file.go6
-rw-r--r--runsc/fsgofer/filter/config.go153
-rw-r--r--runsc/fsgofer/filter/config_amd64.go35
-rw-r--r--runsc/fsgofer/filter/config_arm64.go19
-rw-r--r--runsc/fsgofer/filter/extra_filters_msan.go7
-rw-r--r--runsc/fsgofer/filter/extra_filters_race.go27
-rw-r--r--runsc/fsgofer/fsgofer.go21
-rw-r--r--runsc/fsgofer/fsgofer_test.go32
-rw-r--r--runsc/mitigate/BUILD22
-rw-r--r--runsc/mitigate/cpu.go423
-rw-r--r--runsc/mitigate/cpu_test.go605
-rw-r--r--runsc/mitigate/mitigate.go467
-rw-r--r--runsc/mitigate/mitigate_conf.go37
-rw-r--r--runsc/mitigate/mitigate_test.go579
-rw-r--r--runsc/mitigate/mock/BUILD11
-rw-r--r--runsc/mitigate/mock/mock.go141
-rw-r--r--runsc/sandbox/network.go25
-rw-r--r--runsc/sandbox/network_unsafe.go3
-rw-r--r--runsc/sandbox/sandbox.go35
-rw-r--r--runsc/specutils/fs.go78
-rw-r--r--runsc/specutils/namespace.go12
-rw-r--r--runsc/specutils/seccomp/BUILD2
-rw-r--r--runsc/specutils/seccomp/seccomp.go6
-rw-r--r--runsc/specutils/seccomp/seccomp_test.go40
-rw-r--r--runsc/specutils/specutils.go14
63 files changed, 1997 insertions, 1900 deletions
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index a3a76b609..28e82e117 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -17,8 +17,8 @@ package boot
import (
"fmt"
"os"
- "syscall"
+ "golang.org/x/sys/unix"
"google.golang.org/protobuf/proto"
"gvisor.dev/gvisor/pkg/eventchannel"
"gvisor.dev/gvisor/pkg/log"
@@ -93,19 +93,19 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) {
tr := c.trackers[sysnr]
if tr == nil {
switch sysnr {
- case syscall.SYS_PRCTL:
+ case unix.SYS_PRCTL:
// args: cmd, ...
tr = newArgsTracker(0)
- case syscall.SYS_IOCTL, syscall.SYS_EPOLL_CTL, syscall.SYS_SHMCTL, syscall.SYS_FUTEX, syscall.SYS_FALLOCATE:
+ case unix.SYS_IOCTL, unix.SYS_EPOLL_CTL, unix.SYS_SHMCTL, unix.SYS_FUTEX, unix.SYS_FALLOCATE:
// args: fd/addr, cmd, ...
tr = newArgsTracker(1)
- case syscall.SYS_GETSOCKOPT, syscall.SYS_SETSOCKOPT:
+ case unix.SYS_GETSOCKOPT, unix.SYS_SETSOCKOPT:
// args: fd, level, name, ...
tr = newArgsTracker(1, 2)
- case syscall.SYS_SEMCTL:
+ case unix.SYS_SEMCTL:
// args: semid, semnum, cmd, ...
tr = newArgsTracker(2)
@@ -131,7 +131,7 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) {
}
func (c *compatEmitter) emitUncaughtSignal(msg *ucspb.UncaughtSignal) {
- sig := syscall.Signal(msg.SignalNumber)
+ sig := unix.Signal(msg.SignalNumber)
c.sink.Infof(
"Uncaught signal: %q (%d), PID: %d, TID: %d, fault addr: %#x",
sig, msg.SignalNumber, msg.Pid, msg.Tid, msg.FaultAddr)
diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go
index 8eb76b2ba..7e13ff87c 100644
--- a/runsc/boot/compat_amd64.go
+++ b/runsc/boot/compat_amd64.go
@@ -16,8 +16,8 @@ package boot
import (
"fmt"
- "syscall"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi"
"gvisor.dev/gvisor/pkg/sentry/arch"
rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
@@ -92,7 +92,7 @@ func syscallNum(regs *rpb.Registers) uint64 {
func newArchArgsTracker(sysnr uint64) syscallTracker {
switch sysnr {
- case syscall.SYS_ARCH_PRCTL:
+ case unix.SYS_ARCH_PRCTL:
// args: cmd, ...
return newArgsTracker(0)
}
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 5e849cb37..1cd5fba5c 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -18,9 +18,9 @@ import (
"errors"
"fmt"
"os"
- "syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/control/server"
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/log"
@@ -366,7 +366,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
case 2:
// The device file is donated to the platform.
// Can't take ownership away from os.File. dup them to get a new FD.
- fd, err := syscall.Dup(int(o.Files[1].Fd()))
+ fd, err := unix.Dup(int(o.Files[1].Fd()))
if err != nil {
return fmt.Errorf("failed to dup file: %v", err)
}
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 2a8c916d5..49b503f99 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -16,7 +16,6 @@ package filter
import (
"os"
- "syscall"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -26,19 +25,19 @@ import (
// allowedSyscalls is the set of syscalls executed by the Sentry to the host OS.
var allowedSyscalls = seccomp.SyscallRules{
- syscall.SYS_CLOCK_GETTIME: {},
- syscall.SYS_CLOSE: {},
- syscall.SYS_DUP: {},
- syscall.SYS_DUP3: []seccomp.Rule{
+ unix.SYS_CLOCK_GETTIME: {},
+ unix.SYS_CLOSE: {},
+ unix.SYS_DUP: {},
+ unix.SYS_DUP3: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.O_CLOEXEC),
+ seccomp.EqualTo(unix.O_CLOEXEC),
},
},
- syscall.SYS_EPOLL_CREATE1: {},
- syscall.SYS_EPOLL_CTL: {},
- syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{
+ unix.SYS_EPOLL_CREATE1: {},
+ unix.SYS_EPOLL_CTL: {},
+ unix.SYS_EPOLL_PWAIT: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
@@ -47,34 +46,34 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.EqualTo(0),
},
},
- syscall.SYS_EVENTFD2: []seccomp.Rule{
+ unix.SYS_EVENTFD2: []seccomp.Rule{
{
seccomp.EqualTo(0),
seccomp.EqualTo(0),
},
},
- syscall.SYS_EXIT: {},
- syscall.SYS_EXIT_GROUP: {},
- syscall.SYS_FALLOCATE: {},
- syscall.SYS_FCHMOD: {},
- syscall.SYS_FCNTL: []seccomp.Rule{
+ unix.SYS_EXIT: {},
+ unix.SYS_EXIT_GROUP: {},
+ unix.SYS_FALLOCATE: {},
+ unix.SYS_FCHMOD: {},
+ unix.SYS_FCNTL: []seccomp.Rule{
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.F_GETFL),
+ seccomp.EqualTo(unix.F_GETFL),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.F_SETFL),
+ seccomp.EqualTo(unix.F_SETFL),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.F_GETFD),
+ seccomp.EqualTo(unix.F_GETFD),
},
},
- syscall.SYS_FSTAT: {},
- syscall.SYS_FSYNC: {},
- syscall.SYS_FTRUNCATE: {},
- syscall.SYS_FUTEX: []seccomp.Rule{
+ unix.SYS_FSTAT: {},
+ unix.SYS_FSYNC: {},
+ unix.SYS_FTRUNCATE: {},
+ unix.SYS_FUTEX: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
@@ -109,35 +108,35 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.EqualTo(0),
},
},
- syscall.SYS_GETPID: {},
+ unix.SYS_GETPID: {},
unix.SYS_GETRANDOM: {},
- syscall.SYS_GETSOCKOPT: []seccomp.Rule{
+ unix.SYS_GETSOCKOPT: []seccomp.Rule{
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_DOMAIN),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_DOMAIN),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_TYPE),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_TYPE),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_ERROR),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_ERROR),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_SNDBUF),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_SNDBUF),
},
},
- syscall.SYS_GETTID: {},
- syscall.SYS_GETTIMEOFDAY: {},
+ unix.SYS_GETTID: {},
+ unix.SYS_GETTIMEOFDAY: {},
// SYS_IOCTL is needed for terminal support, but we only allow
// setting/getting termios and winsize.
- syscall.SYS_IOCTL: []seccomp.Rule{
+ unix.SYS_IOCTL: []seccomp.Rule{
{
seccomp.MatchAny{}, /* fd */
seccomp.EqualTo(linux.TCGETS),
@@ -169,94 +168,94 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.MatchAny{}, /* winsize struct */
},
},
- syscall.SYS_LSEEK: {},
- syscall.SYS_MADVISE: {},
+ unix.SYS_LSEEK: {},
+ unix.SYS_MADVISE: {},
unix.SYS_MEMBARRIER: []seccomp.Rule{
{
seccomp.EqualTo(linux.MEMBARRIER_CMD_GLOBAL),
seccomp.EqualTo(0),
},
},
- syscall.SYS_MINCORE: {},
+ unix.SYS_MINCORE: {},
// Used by the Go runtime as a temporarily workaround for a Linux
// 5.2-5.4 bug.
//
// See src/runtime/os_linux_x86.go.
//
// TODO(b/148688965): Remove once this is gone from Go.
- syscall.SYS_MLOCK: []seccomp.Rule{
+ unix.SYS_MLOCK: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.EqualTo(4096),
},
},
- syscall.SYS_MMAP: []seccomp.Rule{
+ unix.SYS_MMAP: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MAP_SHARED),
+ seccomp.EqualTo(unix.MAP_SHARED),
},
{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MAP_PRIVATE),
+ seccomp.EqualTo(unix.MAP_PRIVATE),
},
{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS),
+ seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS),
},
{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_STACK),
+ seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS | unix.MAP_STACK),
},
{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_NORESERVE),
+ seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS | unix.MAP_NORESERVE),
},
{
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.PROT_WRITE | syscall.PROT_READ),
- seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED),
+ seccomp.EqualTo(unix.PROT_WRITE | unix.PROT_READ),
+ seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS | unix.MAP_FIXED),
},
},
- syscall.SYS_MPROTECT: {},
- syscall.SYS_MUNMAP: {},
- syscall.SYS_NANOSLEEP: {},
- syscall.SYS_PPOLL: {},
- syscall.SYS_PREAD64: {},
- syscall.SYS_PREADV: {},
- unix.SYS_PREADV2: {},
- syscall.SYS_PWRITE64: {},
- syscall.SYS_PWRITEV: {},
- unix.SYS_PWRITEV2: {},
- syscall.SYS_READ: {},
- syscall.SYS_RECVMSG: []seccomp.Rule{
+ unix.SYS_MPROTECT: {},
+ unix.SYS_MUNMAP: {},
+ unix.SYS_NANOSLEEP: {},
+ unix.SYS_PPOLL: {},
+ unix.SYS_PREAD64: {},
+ unix.SYS_PREADV: {},
+ unix.SYS_PREADV2: {},
+ unix.SYS_PWRITE64: {},
+ unix.SYS_PWRITEV: {},
+ unix.SYS_PWRITEV2: {},
+ unix.SYS_READ: {},
+ unix.SYS_RECVMSG: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC),
+ seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_TRUNC),
},
{
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK),
+ seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_TRUNC | unix.MSG_PEEK),
},
},
- syscall.SYS_RECVMMSG: []seccomp.Rule{
+ unix.SYS_RECVMMSG: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.EqualTo(fdbased.MaxMsgsPerRecv),
- seccomp.EqualTo(syscall.MSG_DONTWAIT),
+ seccomp.EqualTo(unix.MSG_DONTWAIT),
seccomp.EqualTo(0),
},
},
@@ -265,34 +264,34 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MSG_DONTWAIT),
+ seccomp.EqualTo(unix.MSG_DONTWAIT),
seccomp.EqualTo(0),
},
},
- syscall.SYS_RESTART_SYSCALL: {},
- syscall.SYS_RT_SIGACTION: {},
- syscall.SYS_RT_SIGPROCMASK: {},
- syscall.SYS_RT_SIGRETURN: {},
- syscall.SYS_SCHED_YIELD: {},
- syscall.SYS_SENDMSG: []seccomp.Rule{
+ unix.SYS_RESTART_SYSCALL: {},
+ unix.SYS_RT_SIGACTION: {},
+ unix.SYS_RT_SIGPROCMASK: {},
+ unix.SYS_RT_SIGRETURN: {},
+ unix.SYS_SCHED_YIELD: {},
+ unix.SYS_SENDMSG: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL),
+ seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_NOSIGNAL),
},
},
- syscall.SYS_SETITIMER: {},
- syscall.SYS_SHUTDOWN: []seccomp.Rule{
+ unix.SYS_SETITIMER: {},
+ unix.SYS_SHUTDOWN: []seccomp.Rule{
// Used by fs/host to shutdown host sockets.
- {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RD)},
- {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_WR)},
+ {seccomp.MatchAny{}, seccomp.EqualTo(unix.SHUT_RD)},
+ {seccomp.MatchAny{}, seccomp.EqualTo(unix.SHUT_WR)},
// Used by unet to shutdown connections.
- {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RDWR)},
+ {seccomp.MatchAny{}, seccomp.EqualTo(unix.SHUT_RDWR)},
},
- syscall.SYS_SIGALTSTACK: {},
- unix.SYS_STATX: {},
- syscall.SYS_SYNC_FILE_RANGE: {},
- syscall.SYS_TEE: []seccomp.Rule{
+ unix.SYS_SIGALTSTACK: {},
+ unix.SYS_STATX: {},
+ unix.SYS_SYNC_FILE_RANGE: {},
+ unix.SYS_TEE: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
@@ -300,12 +299,12 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.EqualTo(unix.SPLICE_F_NONBLOCK), /* flags */
},
},
- syscall.SYS_TGKILL: []seccomp.Rule{
+ unix.SYS_TGKILL: []seccomp.Rule{
{
seccomp.EqualTo(uint64(os.Getpid())),
},
},
- syscall.SYS_UTIMENSAT: []seccomp.Rule{
+ unix.SYS_UTIMENSAT: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.EqualTo(0), /* null pathname */
@@ -313,9 +312,9 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.EqualTo(0), /* flags */
},
},
- syscall.SYS_WRITE: {},
+ unix.SYS_WRITE: {},
// For rawfile.NonBlockingWriteIovec.
- syscall.SYS_WRITEV: []seccomp.Rule{
+ unix.SYS_WRITEV: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
@@ -327,313 +326,313 @@ var allowedSyscalls = seccomp.SyscallRules{
// hostInetFilters contains syscalls that are needed by sentry/socket/hostinet.
func hostInetFilters() seccomp.SyscallRules {
return seccomp.SyscallRules{
- syscall.SYS_ACCEPT4: []seccomp.Rule{
+ unix.SYS_ACCEPT4: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.EqualTo(unix.SOCK_NONBLOCK | unix.SOCK_CLOEXEC),
},
},
- syscall.SYS_BIND: {},
- syscall.SYS_CONNECT: {},
- syscall.SYS_GETPEERNAME: {},
- syscall.SYS_GETSOCKNAME: {},
- syscall.SYS_GETSOCKOPT: []seccomp.Rule{
+ unix.SYS_BIND: {},
+ unix.SYS_CONNECT: {},
+ unix.SYS_GETPEERNAME: {},
+ unix.SYS_GETSOCKNAME: {},
+ unix.SYS_GETSOCKOPT: []seccomp.Rule{
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IP),
- seccomp.EqualTo(syscall.IP_TOS),
+ seccomp.EqualTo(unix.SOL_IP),
+ seccomp.EqualTo(unix.IP_TOS),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IP),
- seccomp.EqualTo(syscall.IP_RECVTOS),
+ seccomp.EqualTo(unix.SOL_IP),
+ seccomp.EqualTo(unix.IP_RECVTOS),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IP),
- seccomp.EqualTo(syscall.IP_PKTINFO),
+ seccomp.EqualTo(unix.SOL_IP),
+ seccomp.EqualTo(unix.IP_PKTINFO),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IP),
- seccomp.EqualTo(syscall.IP_RECVORIGDSTADDR),
+ seccomp.EqualTo(unix.SOL_IP),
+ seccomp.EqualTo(unix.IP_RECVORIGDSTADDR),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IP),
- seccomp.EqualTo(syscall.IP_RECVERR),
+ seccomp.EqualTo(unix.SOL_IP),
+ seccomp.EqualTo(unix.IP_RECVERR),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IPV6),
- seccomp.EqualTo(syscall.IPV6_TCLASS),
+ seccomp.EqualTo(unix.SOL_IPV6),
+ seccomp.EqualTo(unix.IPV6_TCLASS),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IPV6),
- seccomp.EqualTo(syscall.IPV6_RECVTCLASS),
+ seccomp.EqualTo(unix.SOL_IPV6),
+ seccomp.EqualTo(unix.IPV6_RECVTCLASS),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IPV6),
- seccomp.EqualTo(syscall.IPV6_RECVERR),
+ seccomp.EqualTo(unix.SOL_IPV6),
+ seccomp.EqualTo(unix.IPV6_RECVERR),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IPV6),
- seccomp.EqualTo(syscall.IPV6_V6ONLY),
+ seccomp.EqualTo(unix.SOL_IPV6),
+ seccomp.EqualTo(unix.IPV6_V6ONLY),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IPV6),
+ seccomp.EqualTo(unix.SOL_IPV6),
seccomp.EqualTo(linux.IPV6_RECVORIGDSTADDR),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_ERROR),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_ERROR),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_KEEPALIVE),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_KEEPALIVE),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_SNDBUF),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_SNDBUF),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_RCVBUF),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_RCVBUF),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_REUSEADDR),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_REUSEADDR),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_TYPE),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_TYPE),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_LINGER),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_LINGER),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_TIMESTAMP),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_TIMESTAMP),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_TCP),
- seccomp.EqualTo(syscall.TCP_NODELAY),
+ seccomp.EqualTo(unix.SOL_TCP),
+ seccomp.EqualTo(unix.TCP_NODELAY),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_TCP),
- seccomp.EqualTo(syscall.TCP_INFO),
+ seccomp.EqualTo(unix.SOL_TCP),
+ seccomp.EqualTo(unix.TCP_INFO),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_TCP),
+ seccomp.EqualTo(unix.SOL_TCP),
seccomp.EqualTo(linux.TCP_INQ),
},
},
- syscall.SYS_IOCTL: []seccomp.Rule{
+ unix.SYS_IOCTL: []seccomp.Rule{
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.TIOCOUTQ),
+ seccomp.EqualTo(unix.TIOCOUTQ),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.TIOCINQ),
+ seccomp.EqualTo(unix.TIOCINQ),
},
},
- syscall.SYS_LISTEN: {},
- syscall.SYS_READV: {},
- syscall.SYS_RECVFROM: {},
- syscall.SYS_RECVMSG: {},
- syscall.SYS_SENDMSG: {},
- syscall.SYS_SENDTO: {},
- syscall.SYS_SETSOCKOPT: []seccomp.Rule{
+ unix.SYS_LISTEN: {},
+ unix.SYS_READV: {},
+ unix.SYS_RECVFROM: {},
+ unix.SYS_RECVMSG: {},
+ unix.SYS_SENDMSG: {},
+ unix.SYS_SENDTO: {},
+ unix.SYS_SETSOCKOPT: []seccomp.Rule{
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_SNDBUF),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_SNDBUF),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_RCVBUF),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_RCVBUF),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_REUSEADDR),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_REUSEADDR),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_TIMESTAMP),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_TIMESTAMP),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_TCP),
- seccomp.EqualTo(syscall.TCP_NODELAY),
+ seccomp.EqualTo(unix.SOL_TCP),
+ seccomp.EqualTo(unix.TCP_NODELAY),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_TCP),
+ seccomp.EqualTo(unix.SOL_TCP),
seccomp.EqualTo(linux.TCP_INQ),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IP),
- seccomp.EqualTo(syscall.IP_TOS),
+ seccomp.EqualTo(unix.SOL_IP),
+ seccomp.EqualTo(unix.IP_TOS),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IP),
- seccomp.EqualTo(syscall.IP_RECVTOS),
+ seccomp.EqualTo(unix.SOL_IP),
+ seccomp.EqualTo(unix.IP_RECVTOS),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IP),
- seccomp.EqualTo(syscall.IP_PKTINFO),
+ seccomp.EqualTo(unix.SOL_IP),
+ seccomp.EqualTo(unix.IP_PKTINFO),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IP),
- seccomp.EqualTo(syscall.IP_RECVORIGDSTADDR),
+ seccomp.EqualTo(unix.SOL_IP),
+ seccomp.EqualTo(unix.IP_RECVORIGDSTADDR),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IP),
- seccomp.EqualTo(syscall.IP_RECVERR),
+ seccomp.EqualTo(unix.SOL_IP),
+ seccomp.EqualTo(unix.IP_RECVERR),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IPV6),
- seccomp.EqualTo(syscall.IPV6_TCLASS),
+ seccomp.EqualTo(unix.SOL_IPV6),
+ seccomp.EqualTo(unix.IPV6_TCLASS),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IPV6),
- seccomp.EqualTo(syscall.IPV6_RECVTCLASS),
+ seccomp.EqualTo(unix.SOL_IPV6),
+ seccomp.EqualTo(unix.IPV6_RECVTCLASS),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IPV6),
+ seccomp.EqualTo(unix.SOL_IPV6),
seccomp.EqualTo(linux.IPV6_RECVORIGDSTADDR),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IPV6),
- seccomp.EqualTo(syscall.IPV6_RECVERR),
+ seccomp.EqualTo(unix.SOL_IPV6),
+ seccomp.EqualTo(unix.IPV6_RECVERR),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_IPV6),
- seccomp.EqualTo(syscall.IPV6_V6ONLY),
+ seccomp.EqualTo(unix.SOL_IPV6),
+ seccomp.EqualTo(unix.IPV6_V6ONLY),
seccomp.MatchAny{},
seccomp.EqualTo(4),
},
},
- syscall.SYS_SHUTDOWN: []seccomp.Rule{
+ unix.SYS_SHUTDOWN: []seccomp.Rule{
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SHUT_RD),
+ seccomp.EqualTo(unix.SHUT_RD),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SHUT_WR),
+ seccomp.EqualTo(unix.SHUT_WR),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SHUT_RDWR),
+ seccomp.EqualTo(unix.SHUT_RDWR),
},
},
- syscall.SYS_SOCKET: []seccomp.Rule{
+ unix.SYS_SOCKET: []seccomp.Rule{
{
- seccomp.EqualTo(syscall.AF_INET),
- seccomp.EqualTo(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.EqualTo(unix.AF_INET),
+ seccomp.EqualTo(unix.SOCK_STREAM | unix.SOCK_NONBLOCK | unix.SOCK_CLOEXEC),
seccomp.EqualTo(0),
},
{
- seccomp.EqualTo(syscall.AF_INET),
- seccomp.EqualTo(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.EqualTo(unix.AF_INET),
+ seccomp.EqualTo(unix.SOCK_DGRAM | unix.SOCK_NONBLOCK | unix.SOCK_CLOEXEC),
seccomp.EqualTo(0),
},
{
- seccomp.EqualTo(syscall.AF_INET6),
- seccomp.EqualTo(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.EqualTo(unix.AF_INET6),
+ seccomp.EqualTo(unix.SOCK_STREAM | unix.SOCK_NONBLOCK | unix.SOCK_CLOEXEC),
seccomp.EqualTo(0),
},
{
- seccomp.EqualTo(syscall.AF_INET6),
- seccomp.EqualTo(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.EqualTo(unix.AF_INET6),
+ seccomp.EqualTo(unix.SOCK_DGRAM | unix.SOCK_NONBLOCK | unix.SOCK_CLOEXEC),
seccomp.EqualTo(0),
},
},
- syscall.SYS_WRITEV: {},
+ unix.SYS_WRITEV: {},
}
}
func controlServerFilters(fd int) seccomp.SyscallRules {
return seccomp.SyscallRules{
- syscall.SYS_ACCEPT: []seccomp.Rule{
+ unix.SYS_ACCEPT: []seccomp.Rule{
{
seccomp.EqualTo(fd),
},
},
- syscall.SYS_LISTEN: []seccomp.Rule{
+ unix.SYS_LISTEN: []seccomp.Rule{
{
seccomp.EqualTo(fd),
seccomp.EqualTo(16 /* unet.backlog */),
},
},
- syscall.SYS_GETSOCKOPT: []seccomp.Rule{
+ unix.SYS_GETSOCKOPT: []seccomp.Rule{
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.SOL_SOCKET),
- seccomp.EqualTo(syscall.SO_PEERCRED),
+ seccomp.EqualTo(unix.SOL_SOCKET),
+ seccomp.EqualTo(unix.SO_PEERCRED),
},
},
}
diff --git a/runsc/boot/filter/config_amd64.go b/runsc/boot/filter/config_amd64.go
index cea5613b8..42cb8ed3a 100644
--- a/runsc/boot/filter/config_amd64.go
+++ b/runsc/boot/filter/config_amd64.go
@@ -17,30 +17,29 @@
package filter
import (
- "syscall"
-
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/seccomp"
)
func init() {
- allowedSyscalls[syscall.SYS_ARCH_PRCTL] = []seccomp.Rule{
+ allowedSyscalls[unix.SYS_ARCH_PRCTL] = []seccomp.Rule{
// TODO(b/168828518): No longer used in Go 1.16+.
{seccomp.EqualTo(linux.ARCH_SET_FS)},
}
- allowedSyscalls[syscall.SYS_CLONE] = []seccomp.Rule{
+ allowedSyscalls[unix.SYS_CLONE] = []seccomp.Rule{
// parent_tidptr and child_tidptr are always 0 because neither
// CLONE_PARENT_SETTID nor CLONE_CHILD_SETTID are used.
{
seccomp.EqualTo(
- syscall.CLONE_VM |
- syscall.CLONE_FS |
- syscall.CLONE_FILES |
- syscall.CLONE_SETTLS |
- syscall.CLONE_SIGHAND |
- syscall.CLONE_SYSVSEM |
- syscall.CLONE_THREAD),
+ unix.CLONE_VM |
+ unix.CLONE_FS |
+ unix.CLONE_FILES |
+ unix.CLONE_SETTLS |
+ unix.CLONE_SIGHAND |
+ unix.CLONE_SYSVSEM |
+ unix.CLONE_THREAD),
seccomp.MatchAny{}, // newsp
seccomp.EqualTo(0), // parent_tidptr
seccomp.EqualTo(0), // child_tidptr
@@ -49,12 +48,12 @@ func init() {
{
// TODO(b/168828518): No longer used in Go 1.16+ (on amd64).
seccomp.EqualTo(
- syscall.CLONE_VM |
- syscall.CLONE_FS |
- syscall.CLONE_FILES |
- syscall.CLONE_SIGHAND |
- syscall.CLONE_SYSVSEM |
- syscall.CLONE_THREAD),
+ unix.CLONE_VM |
+ unix.CLONE_FS |
+ unix.CLONE_FILES |
+ unix.CLONE_SIGHAND |
+ unix.CLONE_SYSVSEM |
+ unix.CLONE_THREAD),
seccomp.MatchAny{}, // newsp
seccomp.EqualTo(0), // parent_tidptr
seccomp.EqualTo(0), // child_tidptr
diff --git a/runsc/boot/filter/config_arm64.go b/runsc/boot/filter/config_arm64.go
index 37313f97f..f162f87ff 100644
--- a/runsc/boot/filter/config_arm64.go
+++ b/runsc/boot/filter/config_arm64.go
@@ -17,21 +17,20 @@
package filter
import (
- "syscall"
-
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/seccomp"
)
func init() {
- allowedSyscalls[syscall.SYS_CLONE] = []seccomp.Rule{
+ allowedSyscalls[unix.SYS_CLONE] = []seccomp.Rule{
{
seccomp.EqualTo(
- syscall.CLONE_VM |
- syscall.CLONE_FS |
- syscall.CLONE_FILES |
- syscall.CLONE_SIGHAND |
- syscall.CLONE_SYSVSEM |
- syscall.CLONE_THREAD),
+ unix.CLONE_VM |
+ unix.CLONE_FS |
+ unix.CLONE_FILES |
+ unix.CLONE_SIGHAND |
+ unix.CLONE_SYSVSEM |
+ unix.CLONE_THREAD),
seccomp.MatchAny{}, // newsp
// These arguments are left uninitialized by the Go
// runtime, so they may be anything (and are unused by
diff --git a/runsc/boot/filter/config_profile.go b/runsc/boot/filter/config_profile.go
index 7b8669595..89b66a6da 100644
--- a/runsc/boot/filter/config_profile.go
+++ b/runsc/boot/filter/config_profile.go
@@ -15,19 +15,18 @@
package filter
import (
- "syscall"
-
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/seccomp"
)
// profileFilters returns extra syscalls made by runtime/pprof package.
func profileFilters() seccomp.SyscallRules {
return seccomp.SyscallRules{
- syscall.SYS_OPENAT: []seccomp.Rule{
+ unix.SYS_OPENAT: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC),
+ seccomp.EqualTo(unix.O_RDONLY | unix.O_LARGEFILE | unix.O_CLOEXEC),
},
},
}
diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go
index 209e646a7..41baa78cd 100644
--- a/runsc/boot/filter/extra_filters_msan.go
+++ b/runsc/boot/filter/extra_filters_msan.go
@@ -17,8 +17,7 @@
package filter
import (
- "syscall"
-
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/seccomp"
)
@@ -26,9 +25,9 @@ import (
func instrumentationFilters() seccomp.SyscallRules {
Report("MSAN is enabled: syscall filters less restrictive!")
return seccomp.SyscallRules{
- syscall.SYS_CLONE: {},
- syscall.SYS_MMAP: {},
- syscall.SYS_SCHED_GETAFFINITY: {},
- syscall.SYS_SET_ROBUST_LIST: {},
+ unix.SYS_CLONE: {},
+ unix.SYS_MMAP: {},
+ unix.SYS_SCHED_GETAFFINITY: {},
+ unix.SYS_SET_ROBUST_LIST: {},
}
}
diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go
index 5b99eb8cd..79b2104f0 100644
--- a/runsc/boot/filter/extra_filters_race.go
+++ b/runsc/boot/filter/extra_filters_race.go
@@ -17,8 +17,7 @@
package filter
import (
- "syscall"
-
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/seccomp"
)
@@ -26,17 +25,17 @@ import (
func instrumentationFilters() seccomp.SyscallRules {
Report("TSAN is enabled: syscall filters less restrictive!")
return seccomp.SyscallRules{
- syscall.SYS_BRK: {},
- syscall.SYS_CLOCK_NANOSLEEP: {},
- syscall.SYS_CLONE: {},
- syscall.SYS_FUTEX: {},
- syscall.SYS_MMAP: {},
- syscall.SYS_MUNLOCK: {},
- syscall.SYS_NANOSLEEP: {},
- syscall.SYS_OPEN: {},
- syscall.SYS_OPENAT: {},
- syscall.SYS_SET_ROBUST_LIST: {},
+ unix.SYS_BRK: {},
+ unix.SYS_CLOCK_NANOSLEEP: {},
+ unix.SYS_CLONE: {},
+ unix.SYS_FUTEX: {},
+ unix.SYS_MMAP: {},
+ unix.SYS_MUNLOCK: {},
+ unix.SYS_NANOSLEEP: {},
+ unix.SYS_OPEN: {},
+ unix.SYS_OPENAT: {},
+ unix.SYS_SET_ROBUST_LIST: {},
// Used within glibc's malloc.
- syscall.SYS_TIME: {},
+ unix.SYS_TIME: {},
}
}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 2b0d2cd51..77f632bb9 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -20,9 +20,9 @@ import (
"sort"
"strconv"
"strings"
- "syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fd"
@@ -312,11 +312,11 @@ func setupContainerFS(ctx context.Context, conf *config.Config, mntr *containerM
}
func adjustDirentCache(k *kernel.Kernel) error {
- var hl syscall.Rlimit
- if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &hl); err != nil {
+ var hl unix.Rlimit
+ if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &hl); err != nil {
return fmt.Errorf("getting RLIMIT_NOFILE: %v", err)
}
- if int64(hl.Cur) != syscall.RLIM_INFINITY {
+ if hl.Cur != unix.RLIM_INFINITY {
newSize := hl.Cur / 2
if newSize < gofer.DefaultDirentCacheSize {
log.Infof("Setting gofer dirent cache size to %d", newSize)
@@ -844,10 +844,10 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Confi
// than simply printed to the logs for the 'runsc boot' command.
//
// We check the error message string rather than type because the
- // actual error types (syscall.EIO, syscall.EPIPE) are lost by file system
+ // actual error types (unix.EIO, unix.EPIPE) are lost by file system
// implementation (e.g. p9).
// TODO(gvisor.dev/issue/1765): Remove message when bug is resolved.
- if strings.Contains(err.Error(), syscall.EIO.Error()) || strings.Contains(err.Error(), syscall.EPIPE.Error()) {
+ if strings.Contains(err.Error(), unix.EIO.Error()) || strings.Contains(err.Error(), unix.EPIPE.Error()) {
return fmt.Errorf("%v: %s", err, specutils.FaqErrorMsg("memlock", "you may be encountering a Linux kernel bug"))
}
return err
diff --git a/runsc/boot/limits.go b/runsc/boot/limits.go
index ce62236e5..3d2b3506d 100644
--- a/runsc/boot/limits.go
+++ b/runsc/boot/limits.go
@@ -16,9 +16,9 @@ package boot
import (
"fmt"
- "syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sync"
@@ -104,9 +104,9 @@ func (d *defs) initDefaults() error {
// Read host limits that directly affect the sandbox and adjust the defaults
// based on them.
- for _, res := range []int{syscall.RLIMIT_FSIZE, syscall.RLIMIT_NOFILE} {
- var hl syscall.Rlimit
- if err := syscall.Getrlimit(res, &hl); err != nil {
+ for _, res := range []int{unix.RLIMIT_FSIZE, unix.RLIMIT_NOFILE} {
+ var hl unix.Rlimit
+ if err := unix.Getrlimit(res, &hl); err != nil {
return err
}
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index b77b4762e..3121ca6eb 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -19,7 +19,6 @@ import (
"math/rand"
"os"
"reflect"
- "syscall"
"testing"
"time"
@@ -78,7 +77,7 @@ func testSpec() *specs.Spec {
// sandbox side of the connection, and a function that when called will stop the
// gofer.
func startGofer(root string) (int, func(), error) {
- fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
+ fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
if err != nil {
return 0, nil, err
}
@@ -86,8 +85,8 @@ func startGofer(root string) (int, func(), error) {
socket, err := unet.NewSocket(goferEnd)
if err != nil {
- syscall.Close(sandboxEnd)
- syscall.Close(goferEnd)
+ unix.Close(sandboxEnd)
+ unix.Close(goferEnd)
return 0, nil, fmt.Errorf("error creating server on FD %d: %v", goferEnd, err)
}
at, err := fsgofer.NewAttachPoint(root, fsgofer.Config{ROMount: true})
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 3d3a813df..7e627e4c6 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -19,8 +19,8 @@ import (
"net"
"runtime"
"strings"
- "syscall"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
@@ -195,7 +195,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
for j := 0; j < link.NumChannels; j++ {
// Copy the underlying FD.
oldFD := args.FilePayload.Files[fdOffset].Fd()
- newFD, err := syscall.Dup(int(oldFD))
+ newFD, err := unix.Dup(int(oldFD))
if err != nil {
return fmt.Errorf("failed to dup FD %v: %v", oldFD, err)
}
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index ac9e4e3a8..438b7ef3e 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -27,7 +27,6 @@ import (
"path/filepath"
"strconv"
"strings"
- "syscall"
"time"
"github.com/cenkalti/backoff"
@@ -111,7 +110,7 @@ func setValue(path, name, data string) error {
err := ioutil.WriteFile(fullpath, []byte(data), 0700)
if err == nil {
return nil
- } else if !errors.Is(err, syscall.EINTR) {
+ } else if !errors.Is(err, unix.EINTR) {
return err
}
}
@@ -161,7 +160,7 @@ func fillFromAncestor(path string) (string, error) {
err := ioutil.WriteFile(path, []byte(val), 0700)
if err == nil {
break
- } else if !errors.Is(err, syscall.EINTR) {
+ } else if !errors.Is(err, unix.EINTR) {
return "", err
}
}
@@ -337,7 +336,7 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error {
c.Own[key] = true
if err := os.MkdirAll(path, 0755); err != nil {
- if cfg.optional && errors.Is(err, syscall.EROFS) {
+ if cfg.optional && errors.Is(err, unix.EROFS) {
log.Infof("Skipping cgroup %q", key)
continue
}
@@ -370,7 +369,7 @@ func (c *Cgroup) Uninstall() error {
defer cancel()
b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
fn := func() error {
- err := syscall.Rmdir(path)
+ err := unix.Rmdir(path)
if os.IsNotExist(err) {
return nil
}
diff --git a/runsc/cli/BUILD b/runsc/cli/BUILD
index 32cce2a18..f1e3cce68 100644
--- a/runsc/cli/BUILD
+++ b/runsc/cli/BUILD
@@ -18,5 +18,6 @@ go_library(
"//runsc/flag",
"//runsc/specutils",
"@com_github_google_subcommands//:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/runsc/cli/main.go b/runsc/cli/main.go
index bf6928941..a3c515f4b 100644
--- a/runsc/cli/main.go
+++ b/runsc/cli/main.go
@@ -23,10 +23,10 @@ import (
"os"
"os/signal"
"runtime"
- "syscall"
"time"
"github.com/google/subcommands"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/platform"
@@ -198,7 +198,7 @@ func Main(version string) {
// want with them. Since Docker and Containerd both eat boot's stderr, we
// dup our stderr to the provided log FD so that panics will appear in the
// logs, rather than just disappear.
- if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil {
+ if err := unix.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil {
cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err)
}
} else if conf.AlsoLogToStderr {
@@ -227,11 +227,11 @@ func Main(version string) {
// SIGTERM is sent to all processes if a test exceeds its
// timeout and this case is handled by syscall_test_runner.
log.Warningf("Block the TERM signal. This is only safe in tests!")
- signal.Ignore(syscall.SIGTERM)
+ signal.Ignore(unix.SIGTERM)
}
// Call the subcommand and pass in the configuration.
- var ws syscall.WaitStatus
+ var ws unix.WaitStatus
subcmdCode := subcommands.Execute(context.Background(), conf, &ws)
if subcmdCode == subcommands.ExitSuccess {
log.Infof("Exiting with status: %v", ws)
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index e3e289da3..2c3b4058b 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -77,6 +77,7 @@ go_test(
"delete_test.go",
"exec_test.go",
"gofer_test.go",
+ "mitigate_test.go",
],
data = [
"//runsc",
@@ -91,6 +92,8 @@ go_test(
"//pkg/urpc",
"//runsc/config",
"//runsc/container",
+ "//runsc/mitigate",
+ "//runsc/mitigate/mock",
"//runsc/specutils",
"@com_github_google_go_cmp//cmp:go_default_library",
"@com_github_google_go_cmp//cmp/cmpopts:go_default_library",
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index 2c92e3067..a14249641 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -19,7 +19,6 @@ import (
"os"
"runtime/debug"
"strings"
- "syscall"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
@@ -259,8 +258,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
ws := l.WaitExit()
log.Infof("application exiting with %+v", ws)
- waitStatus := args[1].(*syscall.WaitStatus)
- *waitStatus = syscall.WaitStatus(ws.Status())
+ waitStatus := args[1].(*unix.WaitStatus)
+ *waitStatus = unix.WaitStatus(ws.Status())
l.Destroy()
return subcommands.ExitSuccess
}
diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go
index 124198239..a9dbe86de 100644
--- a/runsc/cmd/checkpoint.go
+++ b/runsc/cmd/checkpoint.go
@@ -18,9 +18,9 @@ import (
"context"
"os"
"path/filepath"
- "syscall"
"github.com/google/subcommands"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/config"
"gvisor.dev/gvisor/runsc/container"
@@ -73,7 +73,7 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa
id := f.Arg(0)
conf := args[0].(*config.Config)
- waitStatus := args[1].(*syscall.WaitStatus)
+ waitStatus := args[1].(*unix.WaitStatus)
cont, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{})
if err != nil {
diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go
index 189244765..e988247da 100644
--- a/runsc/cmd/chroot.go
+++ b/runsc/cmd/chroot.go
@@ -18,8 +18,8 @@ import (
"fmt"
"os"
"path/filepath"
- "syscall"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/specutils"
)
@@ -49,11 +49,11 @@ func pivotRoot(root string) error {
// will be moved to "/" too. The parent mount of the old_root will be
// new_root, so after umounting the old_root, we will see only
// the new_root in "/".
- if err := syscall.PivotRoot(".", "."); err != nil {
+ if err := unix.PivotRoot(".", "."); err != nil {
return fmt.Errorf("pivot_root failed, make sure that the root mount has a parent: %v", err)
}
- if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil {
+ if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
return fmt.Errorf("error umounting the old root file system: %v", err)
}
return nil
@@ -70,26 +70,26 @@ func setUpChroot(pidns bool) error {
// Convert all shared mounts into slave to be sure that nothing will be
// propagated outside of our namespace.
- if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
+ if err := unix.Mount("", "/", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
return fmt.Errorf("error converting mounts: %v", err)
}
- if err := syscall.Mount("runsc-root", chroot, "tmpfs", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC, ""); err != nil {
+ if err := unix.Mount("runsc-root", chroot, "tmpfs", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, ""); err != nil {
return fmt.Errorf("error mounting tmpfs in choot: %v", err)
}
if pidns {
- flags := uint32(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC | syscall.MS_RDONLY)
+ flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY)
if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil {
return fmt.Errorf("error mounting proc in chroot: %v", err)
}
} else {
- if err := mountInChroot(chroot, "/proc", "/proc", "bind", syscall.MS_BIND|syscall.MS_RDONLY|syscall.MS_REC); err != nil {
+ if err := mountInChroot(chroot, "/proc", "/proc", "bind", unix.MS_BIND|unix.MS_RDONLY|unix.MS_REC); err != nil {
return fmt.Errorf("error mounting proc in chroot: %v", err)
}
}
- if err := syscall.Mount("", chroot, "", syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_BIND, ""); err != nil {
+ if err := unix.Mount("", chroot, "", unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_BIND, ""); err != nil {
return fmt.Errorf("error remounting chroot in read-only: %v", err)
}
diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go
index f1a4887ef..4dd55cc33 100644
--- a/runsc/cmd/cmd.go
+++ b/runsc/cmd/cmd.go
@@ -19,9 +19,9 @@ import (
"fmt"
"runtime"
"strconv"
- "syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/specutils"
)
@@ -71,7 +71,7 @@ func setCapsAndCallSelf(args []string, caps *specs.LinuxCapabilities) error {
binPath := specutils.ExePath
log.Infof("Execve %q again, bye!", binPath)
- err := syscall.Exec(binPath, args, []string{})
+ err := unix.Exec(binPath, args, []string{})
return fmt.Errorf("error executing %s: %v", binPath, err)
}
@@ -83,16 +83,16 @@ func callSelfAsNobody(args []string) error {
const nobody = 65534
- if _, _, err := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(nobody), 0, 0); err != 0 {
+ if _, _, err := unix.RawSyscall(unix.SYS_SETGID, uintptr(nobody), 0, 0); err != 0 {
return fmt.Errorf("error setting uid: %v", err)
}
- if _, _, err := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(nobody), 0, 0); err != 0 {
+ if _, _, err := unix.RawSyscall(unix.SYS_SETUID, uintptr(nobody), 0, 0); err != 0 {
return fmt.Errorf("error setting gid: %v", err)
}
binPath := specutils.ExePath
log.Infof("Execve %q again, bye!", binPath)
- err := syscall.Exec(binPath, args, []string{})
+ err := unix.Exec(binPath, args, []string{})
return fmt.Errorf("error executing %s: %v", binPath, err)
}
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index b84142b0d..6212ffb2e 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -21,10 +21,10 @@ import (
"strconv"
"strings"
"sync"
- "syscall"
"time"
"github.com/google/subcommands"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/runsc/config"
@@ -135,7 +135,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Perform synchronous actions.
if d.signal > 0 {
log.Infof("Sending signal %d to process: %d", d.signal, c.Sandbox.Pid)
- if err := syscall.Kill(c.Sandbox.Pid, syscall.Signal(d.signal)); err != nil {
+ if err := unix.Kill(c.Sandbox.Pid, unix.Signal(d.signal)); err != nil {
return Errorf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid)
}
}
@@ -317,7 +317,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
wg.Wait()
}()
signals := make(chan os.Signal, 1)
- signal.Notify(signals, syscall.SIGTERM, syscall.SIGINT)
+ signal.Notify(signals, unix.SIGTERM, unix.SIGINT)
select {
case <-readyChan:
break // Safe to proceed.
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index 8a8d9f752..22c1dfeb8 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -26,10 +26,10 @@ import (
"path/filepath"
"strconv"
"strings"
- "syscall"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/config"
"gvisor.dev/gvisor/runsc/container"
@@ -86,7 +86,7 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
}
conf := args[0].(*config.Config)
- waitStatus := args[1].(*syscall.WaitStatus)
+ waitStatus := args[1].(*unix.WaitStatus)
if conf.Rootless {
if err := specutils.MaybeRunAsRoot(); err != nil {
@@ -225,7 +225,7 @@ func resolvePath(path string) (string, error) {
return "", fmt.Errorf("resolving %q: %v", path, err)
}
path = filepath.Clean(path)
- if err := syscall.Access(path, 0); err != nil {
+ if err := unix.Access(path, 0); err != nil {
return "", fmt.Errorf("unable to access %q: %v", path, err)
}
return path, nil
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index e9726401a..242d474b8 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -24,11 +24,11 @@ import (
"path/filepath"
"strconv"
"strings"
- "syscall"
"time"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -110,7 +110,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
if err != nil {
Fatalf("parsing process spec: %v", err)
}
- waitStatus := args[1].(*syscall.WaitStatus)
+ waitStatus := args[1].(*unix.WaitStatus)
c, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{})
if err != nil {
@@ -149,7 +149,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
return ex.exec(c, e, waitStatus)
}
-func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *syscall.WaitStatus) subcommands.ExitStatus {
+func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *unix.WaitStatus) subcommands.ExitStatus {
// Start the new process and get its pid.
pid, err := c.Execute(e)
if err != nil {
@@ -189,7 +189,7 @@ func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *sy
return subcommands.ExitSuccess
}
-func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStatus {
+func (ex *Exec) execChildAndWait(waitStatus *unix.WaitStatus) subcommands.ExitStatus {
var args []string
for _, a := range os.Args[1:] {
if !strings.Contains(a, "detach") {
@@ -233,7 +233,7 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi
cmd.Stdin = tty
cmd.Stdout = tty
cmd.Stderr = tty
- cmd.SysProcAttr = &syscall.SysProcAttr{
+ cmd.SysProcAttr = &unix.SysProcAttr{
Setsid: true,
Setctty: true,
// The Ctty FD must be the FD in the child process's FD
@@ -263,7 +263,7 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi
}
return pid == cmd.Process.Pid, nil
}
- if pe, ok := err.(*os.PathError); !ok || pe.Err != syscall.ENOENT {
+ if pe, ok := err.(*os.PathError); !ok || pe.Err != unix.ENOENT {
return false, err
}
// No file yet, continue to wait...
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 371fcc0ae..639b2219c 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -21,7 +21,6 @@ import (
"os"
"path/filepath"
"strings"
- "syscall"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
@@ -149,16 +148,16 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// fsgofer should run with a umask of 0, because we want to preserve file
// modes exactly as sent by the sandbox, which will have applied its own umask.
- syscall.Umask(0)
+ unix.Umask(0)
if err := fsgofer.OpenProcSelfFD(); err != nil {
Fatalf("failed to open /proc/self/fd: %v", err)
}
- if err := syscall.Chroot(root); err != nil {
+ if err := unix.Chroot(root); err != nil {
Fatalf("failed to chroot to %q: %v", root, err)
}
- if err := syscall.Chdir("/"); err != nil {
+ if err := unix.Chdir("/"); err != nil {
Fatalf("changing working dir: %v", err)
}
log.Infof("Process chroot'd to %q", root)
@@ -166,7 +165,8 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Start with root mount, then add any other additional mount as needed.
ats := make([]p9.Attacher, 0, len(spec.Mounts)+1)
ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{
- ROMount: spec.Root.Readonly || conf.Overlay,
+ ROMount: spec.Root.Readonly || conf.Overlay,
+ EnableXattr: conf.Verity,
})
if err != nil {
Fatalf("creating attach point: %v", err)
@@ -178,8 +178,9 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
for _, m := range spec.Mounts {
if specutils.Is9PMount(m) {
cfg := fsgofer.Config{
- ROMount: isReadonlyMount(m.Options) || conf.Overlay,
- HostUDS: conf.FSGoferHostUDS,
+ ROMount: isReadonlyMount(m.Options) || conf.Overlay,
+ HostUDS: conf.FSGoferHostUDS,
+ EnableXattr: conf.Verity,
}
ap, err := fsgofer.NewAttachPoint(m.Destination, cfg)
if err != nil {
@@ -262,7 +263,7 @@ func isReadonlyMount(opts []string) bool {
func setupRootFS(spec *specs.Spec, conf *config.Config) error {
// Convert all shared mounts into slaves to be sure that nothing will be
// propagated outside of our namespace.
- if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
+ if err := unix.Mount("", "/", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
Fatalf("error converting mounts: %v", err)
}
@@ -274,30 +275,30 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error {
//
// We need a directory to construct a new root and we know that
// runsc can't start without /proc, so we can use it for this.
- flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC)
- if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil {
+ flags := uintptr(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC)
+ if err := unix.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil {
Fatalf("error mounting tmpfs: %v", err)
}
// Prepare tree structure for pivot_root(2).
os.Mkdir("/proc/proc", 0755)
os.Mkdir("/proc/root", 0755)
- if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil {
+ if err := unix.Mount("runsc-proc", "/proc/proc", "proc", flags|unix.MS_RDONLY, ""); err != nil {
Fatalf("error mounting proc: %v", err)
}
root = "/proc/root"
}
// Mount root path followed by submounts.
- if err := syscall.Mount(spec.Root.Path, root, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
+ if err := unix.Mount(spec.Root.Path, root, "bind", unix.MS_BIND|unix.MS_REC, ""); err != nil {
return fmt.Errorf("mounting root on root (%q) err: %v", root, err)
}
- flags := uint32(syscall.MS_SLAVE | syscall.MS_REC)
+ flags := uint32(unix.MS_SLAVE | unix.MS_REC)
if spec.Linux != nil && spec.Linux.RootfsPropagation != "" {
flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation})
}
- if err := syscall.Mount("", root, "", uintptr(flags), ""); err != nil {
+ if err := unix.Mount("", root, "", uintptr(flags), ""); err != nil {
return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", root, flags, err)
}
@@ -323,8 +324,8 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error {
// If root is a mount point but not read-only, we can change mount options
// to make it read-only for extra safety.
log.Infof("Remounting root as readonly: %q", root)
- flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC)
- if err := syscall.Mount(root, root, "bind", flags, ""); err != nil {
+ flags := uintptr(unix.MS_BIND | unix.MS_REMOUNT | unix.MS_RDONLY | unix.MS_REC)
+ if err := unix.Mount(root, root, "bind", flags, ""); err != nil {
return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", root, root, flags, err)
}
}
@@ -354,10 +355,10 @@ func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error {
return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
}
- flags := specutils.OptionsToFlags(m.Options) | syscall.MS_BIND
+ flags := specutils.OptionsToFlags(m.Options) | unix.MS_BIND
if conf.Overlay {
// Force mount read-only if writes are not going to be sent to it.
- flags |= syscall.MS_RDONLY
+ flags |= unix.MS_RDONLY
}
log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags)
@@ -368,7 +369,7 @@ func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error {
// Set propagation options that cannot be set together with other options.
flags = specutils.PropOptionsToFlags(m.Options)
if flags != 0 {
- if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil {
+ if err := unix.Mount("", dst, "", uintptr(flags), ""); err != nil {
return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err)
}
}
@@ -469,8 +470,8 @@ func adjustMountOptions(conf *config.Config, path string, opts []string) ([]stri
copy(rv, opts)
if conf.OverlayfsStaleRead {
- statfs := syscall.Statfs_t{}
- if err := syscall.Statfs(path, &statfs); err != nil {
+ statfs := unix.Statfs_t{}
+ if err := unix.Statfs(path, &statfs); err != nil {
return nil, err
}
if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC {
diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go
index e0df39266..239fc7ac2 100644
--- a/runsc/cmd/kill.go
+++ b/runsc/cmd/kill.go
@@ -19,7 +19,6 @@ import (
"fmt"
"strconv"
"strings"
- "syscall"
"github.com/google/subcommands"
"golang.org/x/sys/unix"
@@ -99,10 +98,10 @@ func (k *Kill) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
return subcommands.ExitSuccess
}
-func parseSignal(s string) (syscall.Signal, error) {
+func parseSignal(s string) (unix.Signal, error) {
n, err := strconv.Atoi(s)
if err == nil {
- sig := syscall.Signal(n)
+ sig := unix.Signal(n)
for _, msig := range signalMap {
if sig == msig {
return sig, nil
@@ -116,7 +115,7 @@ func parseSignal(s string) (syscall.Signal, error) {
return -1, fmt.Errorf("unknown signal %q", s)
}
-var signalMap = map[string]syscall.Signal{
+var signalMap = map[string]unix.Signal{
"ABRT": unix.SIGABRT,
"ALRM": unix.SIGALRM,
"BUS": unix.SIGBUS,
diff --git a/runsc/cmd/mitigate.go b/runsc/cmd/mitigate.go
index 822af1917..fddf0e0dd 100644
--- a/runsc/cmd/mitigate.go
+++ b/runsc/cmd/mitigate.go
@@ -16,6 +16,8 @@ package cmd
import (
"context"
+ "fmt"
+ "io/ioutil"
"github.com/google/subcommands"
"gvisor.dev/gvisor/pkg/log"
@@ -23,9 +25,23 @@ import (
"gvisor.dev/gvisor/runsc/mitigate"
)
+const (
+ // cpuInfo is the path used to parse CPU info.
+ cpuInfo = "/proc/cpuinfo"
+ // allPossibleCPUs is the path used to enable CPUs.
+ allPossibleCPUs = "/sys/devices/system/cpu/possible"
+)
+
// Mitigate implements subcommands.Command for the "mitigate" command.
type Mitigate struct {
- mitigate mitigate.Mitigate
+ // Run the command without changing the underlying system.
+ dryRun bool
+ // Reverse mitigate by turning on all CPU cores.
+ reverse bool
+ // Path to file to read to create CPUSet.
+ path string
+ // Callback to check if a given thread is vulnerable.
+ vulnerable func(other mitigate.Thread) bool
}
// Name implements subcommands.command.name.
@@ -38,14 +54,19 @@ func (*Mitigate) Synopsis() string {
return "mitigate mitigates the underlying system against side channel attacks"
}
-// Usage implements subcommands.Command.Usage.
-func (m *Mitigate) Usage() string {
- return m.mitigate.Usage()
+// Usage implments Usage for cmd.Mitigate.
+func (m Mitigate) Usage() string {
+ return `mitigate [flags]
+
+mitigate mitigates a system to the "MDS" vulnerability by implementing a manual shutdown of SMT. The command checks /proc/cpuinfo for cpus having the MDS vulnerability, and if found, shutdown all but one CPU per hyperthread pair via /sys/devices/system/cpu/cpu{N}/online. CPUs can be restored by writing "2" to each file in /sys/devices/system/cpu/cpu{N}/online or performing a system reboot.
+
+The command can be reversed with --reverse, which reads the total CPUs from /sys/devices/system/cpu/possible and enables all with /sys/devices/system/cpu/cpu{N}/online.`
}
-// SetFlags implements subcommands.Command.SetFlags.
+// SetFlags sets flags for the command Mitigate.
func (m *Mitigate) SetFlags(f *flag.FlagSet) {
- m.mitigate.SetFlags(f)
+ f.BoolVar(&m.dryRun, "dryrun", false, "run the command without changing system")
+ f.BoolVar(&m.reverse, "reverse", false, "reverse mitigate by enabling all CPUs")
}
// Execute implements subcommands.Command.Execute.
@@ -55,10 +76,97 @@ func (m *Mitigate) Execute(_ context.Context, f *flag.FlagSet, args ...interface
return subcommands.ExitUsageError
}
- if err := m.mitigate.Execute(); err != nil {
+ m.path = cpuInfo
+ if m.reverse {
+ m.path = allPossibleCPUs
+ }
+
+ m.vulnerable = func(other mitigate.Thread) bool {
+ return other.IsVulnerable()
+ }
+
+ if _, err := m.doExecute(); err != nil {
log.Warningf("Execute failed: %v", err)
return subcommands.ExitFailure
}
return subcommands.ExitSuccess
}
+
+// Execute executes the Mitigate command.
+func (m *Mitigate) doExecute() (mitigate.CPUSet, error) {
+ if m.dryRun {
+ log.Infof("Running with DryRun. No cpu settings will be changed.")
+ }
+ if m.reverse {
+ data, err := ioutil.ReadFile(m.path)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read %s: %v", m.path, err)
+ }
+
+ set, err := m.doReverse(data)
+ if err != nil {
+ return nil, fmt.Errorf("reverse operation failed: %v", err)
+ }
+ return set, nil
+ }
+
+ data, err := ioutil.ReadFile(m.path)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read %s: %v", m.path, err)
+ }
+ set, err := m.doMitigate(data)
+ if err != nil {
+ return nil, fmt.Errorf("mitigate operation failed: %v", err)
+ }
+ return set, nil
+}
+
+func (m *Mitigate) doMitigate(data []byte) (mitigate.CPUSet, error) {
+ set, err := mitigate.NewCPUSet(data, m.vulnerable)
+ if err != nil {
+ return nil, err
+ }
+
+ log.Infof("Mitigate found the following CPUs...")
+ log.Infof("%s", set)
+
+ disableList := set.GetShutdownList()
+ log.Infof("Disabling threads on thread pairs.")
+ for _, t := range disableList {
+ log.Infof("Disable thread: %s", t)
+ if m.dryRun {
+ continue
+ }
+ if err := t.Disable(); err != nil {
+ return nil, fmt.Errorf("error disabling thread: %s err: %v", t, err)
+ }
+ }
+ log.Infof("Shutdown successful.")
+ return set, nil
+}
+
+func (m *Mitigate) doReverse(data []byte) (mitigate.CPUSet, error) {
+ set, err := mitigate.NewCPUSetFromPossible(data)
+ if err != nil {
+ return nil, err
+ }
+
+ log.Infof("Reverse mitigate found the following CPUs...")
+ log.Infof("%s", set)
+
+ enableList := set.GetRemainingList()
+
+ log.Infof("Enabling all CPUs...")
+ for _, t := range enableList {
+ log.Infof("Enabling thread: %s", t)
+ if m.dryRun {
+ continue
+ }
+ if err := t.Enable(); err != nil {
+ return nil, fmt.Errorf("error enabling thread: %s err: %v", t, err)
+ }
+ }
+ log.Infof("Enable successful.")
+ return set, nil
+}
diff --git a/runsc/cmd/mitigate_test.go b/runsc/cmd/mitigate_test.go
new file mode 100644
index 000000000..163fece42
--- /dev/null
+++ b/runsc/cmd/mitigate_test.go
@@ -0,0 +1,169 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "strings"
+ "testing"
+
+ "gvisor.dev/gvisor/runsc/mitigate"
+ "gvisor.dev/gvisor/runsc/mitigate/mock"
+)
+
+type executeTestCase struct {
+ name string
+ mitigateData string
+ mitigateError error
+ mitigateCPU int
+ reverseData string
+ reverseError error
+ reverseCPU int
+}
+
+func TestExecute(t *testing.T) {
+
+ partial := `processor : 1
+vendor_id : AuthenticAMD
+cpu family : 23
+model : 49
+model name : AMD EPYC 7B12
+physical id : 0
+bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+power management:
+`
+
+ for _, tc := range []executeTestCase{
+ {
+ name: "CascadeLake4",
+ mitigateData: mock.CascadeLake4.MakeCPUString(),
+ mitigateCPU: 2,
+ reverseData: mock.CascadeLake4.MakeSysPossibleString(),
+ reverseCPU: 4,
+ },
+ {
+ name: "Empty",
+ mitigateData: "",
+ mitigateError: fmt.Errorf(`mitigate operation failed: no cpus found for: ""`),
+ reverseData: "",
+ reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from possible: ""`),
+ },
+ {
+ name: "Partial",
+ mitigateData: `processor : 0
+vendor_id : AuthenticAMD
+cpu family : 23
+model : 49
+model name : AMD EPYC 7B12
+physical id : 0
+core id : 0
+cpu cores : 1
+bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+power management::84
+
+` + partial,
+ mitigateError: fmt.Errorf(`mitigate operation failed: failed to match key "core id": %q`, partial),
+ reverseData: "1-",
+ reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from possible: %q`, "1-"),
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ m := &Mitigate{
+ dryRun: true,
+ vulnerable: func(other mitigate.Thread) bool {
+ return other.IsVulnerable()
+ },
+ }
+ m.doExecuteTest(t, "Mitigate", tc.mitigateData, tc.mitigateCPU, tc.mitigateError)
+
+ m.reverse = true
+ m.doExecuteTest(t, "Reverse", tc.reverseData, tc.reverseCPU, tc.reverseError)
+ })
+ }
+}
+
+func TestExecuteSmoke(t *testing.T) {
+ smokeMitigate, err := ioutil.ReadFile(cpuInfo)
+ if err != nil {
+ t.Fatalf("Failed to read %s: %v", cpuInfo, err)
+ }
+
+ m := &Mitigate{
+ dryRun: true,
+ vulnerable: func(other mitigate.Thread) bool {
+ return other.IsVulnerable()
+ },
+ }
+
+ m.doExecuteTest(t, "Mitigate", string(smokeMitigate), 0, nil)
+
+ smokeReverse, err := ioutil.ReadFile(allPossibleCPUs)
+ if err != nil {
+ t.Fatalf("Failed to read %s: %v", allPossibleCPUs, err)
+ }
+
+ m.reverse = true
+ m.doExecuteTest(t, "Reverse", string(smokeReverse), 0, nil)
+}
+
+// doExecuteTest runs Execute with the mitigate operation and reverse operation.
+func (m *Mitigate) doExecuteTest(t *testing.T, name, data string, want int, wantErr error) {
+ t.Run(name, func(t *testing.T) {
+ file, err := ioutil.TempFile("", "outfile.txt")
+ if err != nil {
+ t.Fatalf("Failed to create tmpfile: %v", err)
+ }
+ defer os.Remove(file.Name())
+
+ if _, err := file.WriteString(data); err != nil {
+ t.Fatalf("Failed to write to file: %v", err)
+ }
+
+ // Set fields for mitigate and dryrun to keep test hermetic.
+ m.path = file.Name()
+
+ set, err := m.doExecute()
+ if err = checkErr(wantErr, err); err != nil {
+ t.Fatalf("Mitigate error mismatch: %v", err)
+ }
+
+ // case where test should end in error or we don't care
+ // about how many cpus are returned.
+ if wantErr != nil || want < 1 {
+ return
+ }
+ got := len(set.GetRemainingList())
+ if want != got {
+ t.Fatalf("Failed wrong number of remaining CPUs: want %d, got %d", want, got)
+ }
+
+ })
+}
+
+// checkErr checks error for equality.
+func checkErr(want, got error) error {
+ switch {
+ case want == nil && got == nil:
+ case want != nil && got == nil:
+ fallthrough
+ case want == nil && got != nil:
+ fallthrough
+ case want.Error() != strings.Trim(got.Error(), " "):
+ return fmt.Errorf("got: %v want: %v", got, want)
+ }
+ return nil
+}
diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go
index 096ec814c..b21f05921 100644
--- a/runsc/cmd/restore.go
+++ b/runsc/cmd/restore.go
@@ -17,9 +17,9 @@ package cmd
import (
"context"
"path/filepath"
- "syscall"
"github.com/google/subcommands"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/runsc/config"
"gvisor.dev/gvisor/runsc/container"
"gvisor.dev/gvisor/runsc/flag"
@@ -78,7 +78,7 @@ func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{
id := f.Arg(0)
conf := args[0].(*config.Config)
- waitStatus := args[1].(*syscall.WaitStatus)
+ waitStatus := args[1].(*unix.WaitStatus)
if conf.Rootless {
return Errorf("Rootless mode not supported with %q", r.Name())
diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go
index c48cbe4cd..722181aff 100644
--- a/runsc/cmd/run.go
+++ b/runsc/cmd/run.go
@@ -16,9 +16,9 @@ package cmd
import (
"context"
- "syscall"
"github.com/google/subcommands"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/runsc/config"
"gvisor.dev/gvisor/runsc/container"
"gvisor.dev/gvisor/runsc/flag"
@@ -65,7 +65,7 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
id := f.Arg(0)
conf := args[0].(*config.Config)
- waitStatus := args[1].(*syscall.WaitStatus)
+ waitStatus := args[1].(*unix.WaitStatus)
if conf.Rootless {
return Errorf("Rootless mode not supported with %q", r.Name())
diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go
index 5d55422c7..d7a783b88 100644
--- a/runsc/cmd/wait.go
+++ b/runsc/cmd/wait.go
@@ -18,9 +18,9 @@ import (
"context"
"encoding/json"
"os"
- "syscall"
"github.com/google/subcommands"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/runsc/config"
"gvisor.dev/gvisor/runsc/container"
"gvisor.dev/gvisor/runsc/flag"
@@ -77,7 +77,7 @@ func (wt *Wait) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
Fatalf("loading container: %v", err)
}
- var waitStatus syscall.WaitStatus
+ var waitStatus unix.WaitStatus
switch {
// Wait on the whole container.
case wt.rootPID == unsetPID && wt.pid == unsetPID:
@@ -119,7 +119,7 @@ type waitResult struct {
// exitStatus returns the correct exit status for a process based on if it
// was signaled or exited cleanly.
-func exitStatus(status syscall.WaitStatus) int {
+func exitStatus(status unix.WaitStatus) int {
if status.Signaled() {
return 128 + int(status.Signal())
}
diff --git a/runsc/config/config.go b/runsc/config/config.go
index e9fd7708f..34ef48825 100644
--- a/runsc/config/config.go
+++ b/runsc/config/config.go
@@ -64,6 +64,9 @@ type Config struct {
// Overlay is whether to wrap the root filesystem in an overlay.
Overlay bool `flag:"overlay"`
+ // Verity is whether there's one or more verity file system to mount.
+ Verity bool `flag:"verity"`
+
// FSGoferHostUDS enables the gofer to mount a host UDS.
FSGoferHostUDS bool `flag:"fsgofer-host-uds"`
diff --git a/runsc/config/flags.go b/runsc/config/flags.go
index 7e738dfdf..adbee506c 100644
--- a/runsc/config/flags.go
+++ b/runsc/config/flags.go
@@ -69,6 +69,7 @@ func RegisterFlags() {
// Flags that control sandbox runtime behavior: FS related.
flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
+ flag.Bool("verity", false, "specifies whether a verity file system will be mounted.")
flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem")
flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
flag.Bool("vfs2", false, "enables VFSv2. This uses the new VFS layer that is faster than the previous one.")
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 8793c8916..3620dc8c3 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -30,6 +30,7 @@ go_library(
"@com_github_cenkalti_backoff//:go_default_library",
"@com_github_gofrs_flock//:go_default_library",
"@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index 7a3d5a523..79b056fce 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -21,7 +21,6 @@ import (
"math/rand"
"os"
"path/filepath"
- "syscall"
"testing"
"time"
@@ -320,7 +319,7 @@ func TestJobControlSignalExec(t *testing.T) {
// Send a SIGTERM to the foreground process for the exec PID. Note that
// although we pass in the PID of "bash", it should actually terminate
// "sleep", since that is the foreground process.
- if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.SIGTERM, true /* fgProcess */); err != nil {
+ if err := c.Sandbox.SignalProcess(c.ID, pid, unix.SIGTERM, true /* fgProcess */); err != nil {
t.Fatalf("error signaling container: %v", err)
}
@@ -340,7 +339,7 @@ func TestJobControlSignalExec(t *testing.T) {
// Send a SIGKILL to the foreground process again. This time "bash"
// should be killed. We use SIGKILL instead of SIGTERM or SIGINT
// because bash ignores those.
- if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.SIGKILL, true /* fgProcess */); err != nil {
+ if err := c.Sandbox.SignalProcess(c.ID, pid, unix.SIGKILL, true /* fgProcess */); err != nil {
t.Fatalf("error signaling container: %v", err)
}
expectedPL = expectedPL[:1]
@@ -356,7 +355,7 @@ func TestJobControlSignalExec(t *testing.T) {
if !ws.Signaled() {
t.Error("ws.Signaled() got false, want true")
}
- if got, want := ws.Signal(), syscall.SIGKILL; got != want {
+ if got, want := ws.Signal(), unix.SIGKILL; got != want {
t.Errorf("ws.Signal() got %v, want %v", got, want)
}
}
@@ -423,7 +422,7 @@ func TestJobControlSignalRootContainer(t *testing.T) {
// very early, otherwise it might exit before we have a chance to call
// Wait.
var (
- ws syscall.WaitStatus
+ ws unix.WaitStatus
wg sync.WaitGroup
)
wg.Add(1)
@@ -459,7 +458,7 @@ func TestJobControlSignalRootContainer(t *testing.T) {
// Send a SIGTERM to the foreground process. We pass PID=0, indicating
// that the root process should be killed. However, by setting
// fgProcess=true, the signal should actually be sent to sleep.
- if err := c.Sandbox.SignalProcess(c.ID, 0 /* PID */, syscall.SIGTERM, true /* fgProcess */); err != nil {
+ if err := c.Sandbox.SignalProcess(c.ID, 0 /* PID */, unix.SIGTERM, true /* fgProcess */); err != nil {
t.Fatalf("error signaling container: %v", err)
}
@@ -479,7 +478,7 @@ func TestJobControlSignalRootContainer(t *testing.T) {
// Send a SIGKILL to the foreground process again. This time "bash"
// should be killed. We use SIGKILL instead of SIGTERM or SIGINT
// because bash ignores those.
- if err := c.Sandbox.SignalProcess(c.ID, 0 /* PID */, syscall.SIGKILL, true /* fgProcess */); err != nil {
+ if err := c.Sandbox.SignalProcess(c.ID, 0 /* PID */, unix.SIGKILL, true /* fgProcess */); err != nil {
t.Fatalf("error signaling container: %v", err)
}
@@ -488,7 +487,7 @@ func TestJobControlSignalRootContainer(t *testing.T) {
if !ws.Signaled() {
t.Error("ws.Signaled() got false, want true")
}
- if got, want := ws.Signal(), syscall.SIGKILL; got != want {
+ if got, want := ws.Signal(), unix.SIGKILL; got != want {
t.Errorf("ws.Signal() got %v, want %v", got, want)
}
}
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 40812efb8..f9d83c118 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -30,6 +30,7 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/log"
@@ -244,7 +245,7 @@ func New(conf *config.Config, args Args) (*Container, error) {
// If there is cgroup config, install it before creating sandbox process.
if err := cg.Install(args.Spec.Linux.Resources); err != nil {
switch {
- case errors.Is(err, syscall.EACCES) && conf.Rootless:
+ case errors.Is(err, unix.EACCES) && conf.Rootless:
log.Warningf("Skipping cgroup configuration in rootless mode: %v", err)
cg = nil
default:
@@ -447,7 +448,7 @@ func (c *Container) Restore(spec *specs.Spec, conf *config.Config, restoreFile s
}
// Run is a helper that calls Create + Start + Wait.
-func Run(conf *config.Config, args Args) (syscall.WaitStatus, error) {
+func Run(conf *config.Config, args Args) (unix.WaitStatus, error) {
log.Debugf("Run container, cid: %s, rootDir: %q", args.ID, conf.RootDir)
c, err := New(conf, args)
if err != nil {
@@ -517,7 +518,7 @@ func (c *Container) SandboxPid() int {
// Wait waits for the container to exit, and returns its WaitStatus.
// Call to wait on a stopped container is needed to retrieve the exit status
// and wait returns immediately.
-func (c *Container) Wait() (syscall.WaitStatus, error) {
+func (c *Container) Wait() (unix.WaitStatus, error) {
log.Debugf("Wait on container, cid: %s", c.ID)
ws, err := c.Sandbox.Wait(c.ID)
if err == nil {
@@ -529,7 +530,7 @@ func (c *Container) Wait() (syscall.WaitStatus, error) {
// WaitRootPID waits for process 'pid' in the sandbox's PID namespace and
// returns its WaitStatus.
-func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) {
+func (c *Container) WaitRootPID(pid int32) (unix.WaitStatus, error) {
log.Debugf("Wait on process %d in sandbox, cid: %s", pid, c.Sandbox.ID)
if !c.IsSandboxRunning() {
return 0, fmt.Errorf("sandbox is not running")
@@ -539,7 +540,7 @@ func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) {
// WaitPID waits for process 'pid' in the container's PID namespace and returns
// its WaitStatus.
-func (c *Container) WaitPID(pid int32) (syscall.WaitStatus, error) {
+func (c *Container) WaitPID(pid int32) (unix.WaitStatus, error) {
log.Debugf("Wait on process %d in container, cid: %s", pid, c.ID)
if !c.IsSandboxRunning() {
return 0, fmt.Errorf("sandbox is not running")
@@ -551,7 +552,7 @@ func (c *Container) WaitPID(pid int32) (syscall.WaitStatus, error) {
// is SIGKILL, then waits for all processes to exit before returning.
// SignalContainer returns an error if the container is already stopped.
// TODO(b/113680494): Distinguish different error types.
-func (c *Container) SignalContainer(sig syscall.Signal, all bool) error {
+func (c *Container) SignalContainer(sig unix.Signal, all bool) error {
log.Debugf("Signal container, cid: %s, signal: %v (%d)", c.ID, sig, sig)
// Signaling container in Stopped state is allowed. When all=false,
// an error will be returned anyway; when all=true, this allows
@@ -568,7 +569,7 @@ func (c *Container) SignalContainer(sig syscall.Signal, all bool) error {
}
// SignalProcess sends sig to a specific process in the container.
-func (c *Container) SignalProcess(sig syscall.Signal, pid int32) error {
+func (c *Container) SignalProcess(sig unix.Signal, pid int32) error {
log.Debugf("Signal process %d in container, cid: %s, signal: %v (%d)", pid, c.ID, sig, sig)
if err := c.requireStatus("signal a process inside", Running); err != nil {
return err
@@ -586,7 +587,7 @@ func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() {
log.Debugf("Forwarding all signals to container, cid: %s, PIDPID: %d, fgProcess: %t", c.ID, pid, fgProcess)
stop := sighandling.StartSignalForwarding(func(sig linux.Signal) {
log.Debugf("Forwarding signal %d to container, cid: %s, PID: %d, fgProcess: %t", sig, c.ID, pid, fgProcess)
- if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.Signal(sig), fgProcess); err != nil {
+ if err := c.Sandbox.SignalProcess(c.ID, pid, unix.Signal(sig), fgProcess); err != nil {
log.Warningf("error forwarding signal %d to container %q: %v", sig, c.ID, err)
}
})
@@ -768,9 +769,9 @@ func (c *Container) stop() error {
// Try killing gofer if it does not exit with container.
if c.GoferPid != 0 {
log.Debugf("Killing gofer for container, cid: %s, PID: %d", c.ID, c.GoferPid)
- if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil {
+ if err := unix.Kill(c.GoferPid, unix.SIGKILL); err != nil {
// The gofer may already be stopped, log the error.
- log.Warningf("Error sending signal %d to gofer %d: %v", syscall.SIGKILL, c.GoferPid, err)
+ log.Warningf("Error sending signal %d to gofer %d: %v", unix.SIGKILL, c.GoferPid, err)
}
}
@@ -793,7 +794,7 @@ func (c *Container) waitForStopped() error {
b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
op := func() error {
if c.IsSandboxRunning() {
- if err := c.SignalContainer(syscall.Signal(0), false); err == nil {
+ if err := c.SignalContainer(unix.Signal(0), false); err == nil {
return fmt.Errorf("container is still running")
}
}
@@ -803,7 +804,7 @@ func (c *Container) waitForStopped() error {
if c.goferIsChild {
// The gofer process is a child of the current process,
// so we can wait it and collect its zombie.
- wpid, err := syscall.Wait4(int(c.GoferPid), nil, syscall.WNOHANG, nil)
+ wpid, err := unix.Wait4(int(c.GoferPid), nil, unix.WNOHANG, nil)
if err != nil {
return fmt.Errorf("error waiting the gofer process: %v", err)
}
@@ -811,7 +812,7 @@ func (c *Container) waitForStopped() error {
return fmt.Errorf("gofer is still running")
}
- } else if err := syscall.Kill(c.GoferPid, 0); err == nil {
+ } else if err := unix.Kill(c.GoferPid, 0); err == nil {
return fmt.Errorf("gofer is still running")
}
c.GoferPid = 0
@@ -892,7 +893,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu
sandEnds := make([]*os.File, 0, mountCount)
for i := 0; i < mountCount; i++ {
- fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
+ fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
@@ -914,8 +915,8 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu
if attached {
// The gofer is attached to the lifetime of this process, so it
// should synchronously die when this process dies.
- cmd.SysProcAttr = &syscall.SysProcAttr{
- Pdeathsig: syscall.SIGKILL,
+ cmd.SysProcAttr = &unix.SysProcAttr{
+ Pdeathsig: unix.SIGKILL,
}
}
@@ -1113,7 +1114,7 @@ func setOOMScoreAdj(pid int, scoreAdj int) error {
}
defer f.Close()
if _, err := f.WriteString(strconv.Itoa(scoreAdj)); err != nil {
- if errors.Is(err, syscall.ESRCH) {
+ if errors.Is(err, unix.ESRCH) {
log.Warningf("Process (%d) exited while setting oom_score_adj", pid)
return nil
}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 862d9444d..5a0c468a4 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -27,12 +27,12 @@ import (
"reflect"
"strconv"
"strings"
- "syscall"
"testing"
"time"
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bits"
"gvisor.dev/gvisor/pkg/log"
@@ -103,7 +103,7 @@ func waitForProcessCount(cont *Container, want int) error {
func blockUntilWaitable(pid int) error {
_, _, err := specutils.RetryEintr(func() (uintptr, uintptr, error) {
var err error
- _, _, err1 := syscall.Syscall6(syscall.SYS_WAITID, 1, uintptr(pid), 0, syscall.WEXITED|syscall.WNOWAIT, 0, 0)
+ _, _, err1 := unix.Syscall6(unix.SYS_WAITID, 1, uintptr(pid), 0, unix.WEXITED|unix.WNOWAIT, 0, 0)
if err1 != 0 {
err = err1
}
@@ -468,7 +468,7 @@ func TestLifecycle(t *testing.T) {
if err != nil {
ch <- err
}
- if got, want := ws.Signal(), syscall.SIGTERM; got != want {
+ if got, want := ws.Signal(), unix.SIGTERM; got != want {
ch <- fmt.Errorf("got signal %v, want %v", got, want)
}
ch <- nil
@@ -479,8 +479,8 @@ func TestLifecycle(t *testing.T) {
time.Sleep(time.Second)
// Send the container a SIGTERM which will cause it to stop.
- if err := c.SignalContainer(syscall.SIGTERM, false); err != nil {
- t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err)
+ if err := c.SignalContainer(unix.SIGTERM, false); err != nil {
+ t.Fatalf("error sending signal %v to container: %v", unix.SIGTERM, err)
}
// Wait for it to die.
@@ -815,11 +815,11 @@ func TestExec(t *testing.T) {
t.Run("nonexist", func(t *testing.T) {
// b/179114837 found by Syzkaller that causes nil pointer panic when
// trying to dec-ref an unix socket FD.
- fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0)
+ fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM, 0)
if err != nil {
t.Fatal(err)
}
- defer syscall.Close(fds[0])
+ defer unix.Close(fds[0])
_, err = cont.executeSync(&control.ExecArgs{
Argv: []string{"/nonexist"},
@@ -956,7 +956,7 @@ func TestKillPid(t *testing.T) {
pid = int32(p.PID)
}
}
- if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil {
+ if err := cont.SignalProcess(unix.SIGKILL, pid); err != nil {
t.Fatalf("failed to signal process %d: %v", pid, err)
}
@@ -1601,12 +1601,12 @@ func TestReadonlyRoot(t *testing.T) {
}
// Read mounts to check that root is readonly.
- out, err := executeCombinedOutput(c, "/bin/sh", "-c", "mount | grep ' / '")
+ out, err := executeCombinedOutput(c, "/bin/sh", "-c", "mount | grep ' / ' | grep -o -e '(.*)'")
if err != nil {
t.Fatalf("exec failed: %v", err)
}
- t.Logf("root mount: %q", out)
- if !strings.Contains(string(out), "(ro)") {
+ t.Logf("root mount options: %q", out)
+ if !strings.Contains(string(out), "ro") {
t.Errorf("root not mounted readonly: %q", out)
}
@@ -1615,7 +1615,7 @@ func TestReadonlyRoot(t *testing.T) {
if err != nil {
t.Fatalf("touch file in ro mount: %v", err)
}
- if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
+ if !ws.Exited() || unix.Errno(ws.ExitStatus()) != unix.EPERM {
t.Fatalf("wrong waitStatus: %v", ws)
}
})
@@ -1659,13 +1659,13 @@ func TestReadonlyMount(t *testing.T) {
}
// Read mounts to check that volume is readonly.
- cmd := fmt.Sprintf("mount | grep ' %s '", dir)
+ cmd := fmt.Sprintf("mount | grep ' %s ' | grep -o -e '(.*)'", dir)
out, err := executeCombinedOutput(c, "/bin/sh", "-c", cmd)
if err != nil {
t.Fatalf("exec failed, err: %v", err)
}
- t.Logf("mount: %q", out)
- if !strings.Contains(string(out), "(ro)") {
+ t.Logf("mount options: %q", out)
+ if !strings.Contains(string(out), "ro") {
t.Errorf("volume not mounted readonly: %q", out)
}
@@ -1674,7 +1674,7 @@ func TestReadonlyMount(t *testing.T) {
if err != nil {
t.Fatalf("touch file in ro mount: %v", err)
}
- if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
+ if !ws.Exited() || unix.Errno(ws.ExitStatus()) != unix.EPERM {
t.Fatalf("wrong WaitStatus: %v", ws)
}
})
@@ -1750,8 +1750,8 @@ func TestUIDMap(t *testing.T) {
if !ws.Exited() || ws.ExitStatus() != 0 {
t.Fatalf("container failed, waitStatus: %v", ws)
}
- st := syscall.Stat_t{}
- if err := syscall.Stat(testFile, &st); err != nil {
+ st := unix.Stat_t{}
+ if err := unix.Stat(testFile, &st); err != nil {
t.Fatalf("error stat /testfile: %v", err)
}
@@ -1880,7 +1880,7 @@ func doGoferExitTest(t *testing.T, vfs2 bool) {
}
err = blockUntilWaitable(c.GoferPid)
- if err != nil && err != syscall.ECHILD {
+ if err != nil && err != unix.ECHILD {
t.Errorf("error waiting for gofer to exit: %v", err)
}
}
@@ -1929,7 +1929,7 @@ func TestUserLog(t *testing.T) {
}
// sched_rr_get_interval - not implemented in gvisor.
- num := strconv.Itoa(syscall.SYS_SCHED_RR_GET_INTERVAL)
+ num := strconv.Itoa(unix.SYS_SCHED_RR_GET_INTERVAL)
spec := testutil.NewSpecWithArgs(app, "syscall", "--syscall="+num)
conf := testutil.TestConfig(t)
_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
@@ -2159,10 +2159,10 @@ func TestMountPropagation(t *testing.T) {
f.Close()
// Setup src as a shared mount.
- if err := syscall.Mount(src, src, "bind", syscall.MS_BIND, ""); err != nil {
+ if err := unix.Mount(src, src, "bind", unix.MS_BIND, ""); err != nil {
t.Fatalf("mount(%q, %q, MS_BIND): %v", dir, srcMnt, err)
}
- if err := syscall.Mount("", src, "", syscall.MS_SHARED, ""); err != nil {
+ if err := unix.Mount("", src, "", unix.MS_SHARED, ""); err != nil {
t.Fatalf("mount(%q, MS_SHARED): %v", srcMnt, err)
}
@@ -2209,7 +2209,7 @@ func TestMountPropagation(t *testing.T) {
// After the container is started, mount dir inside source and check what
// happens to both destinations.
- if err := syscall.Mount(dir, srcMnt, "bind", syscall.MS_BIND, ""); err != nil {
+ if err := unix.Mount(dir, srcMnt, "bind", unix.MS_BIND, ""); err != nil {
t.Fatalf("mount(%q, %q, MS_BIND): %v", dir, srcMnt, err)
}
@@ -2449,7 +2449,7 @@ func TestCreateWithCorruptedStateFile(t *testing.T) {
}
}
-func execute(cont *Container, name string, arg ...string) (syscall.WaitStatus, error) {
+func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) {
args := &control.ExecArgs{
Filename: name,
Argv: append([]string{name}, arg...),
@@ -2483,7 +2483,7 @@ func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte,
}
// executeSync synchronously executes a new process.
-func (c *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) {
+func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) {
pid, err := c.Execute(args)
if err != nil {
return 0, fmt.Errorf("error executing: %v", err)
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index b434cdb23..0f0a223ce 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -22,11 +22,11 @@ import (
"path"
"path/filepath"
"strings"
- "syscall"
"testing"
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -403,7 +403,7 @@ func TestMultiPIDNSKill(t *testing.T) {
t.Logf("Container %q procs: %s", c.ID, procListToString(procs))
pidToKill := procs[processes-1].PID
t.Logf("PID to kill: %d", pidToKill)
- if err := c.SignalProcess(syscall.SIGKILL, int32(pidToKill)); err != nil {
+ if err := c.SignalProcess(unix.SIGKILL, int32(pidToKill)); err != nil {
t.Errorf("container.SignalProcess: %v", err)
}
// Wait for the process to get killed.
@@ -432,7 +432,7 @@ func TestMultiPIDNSKill(t *testing.T) {
pidToKill = procs[len(procs)-1].PID
t.Logf("PID that should not be killed: %d", pidToKill)
- err = c.SignalProcess(syscall.SIGKILL, int32(pidToKill))
+ err = c.SignalProcess(unix.SIGKILL, int32(pidToKill))
if err == nil {
t.Fatalf("killing another container's process should fail")
}
@@ -640,7 +640,7 @@ func TestMultiContainerSignal(t *testing.T) {
}
// Kill process 2.
- if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil {
+ if err := containers[1].SignalContainer(unix.SIGKILL, false); err != nil {
t.Errorf("failed to kill process 2: %v", err)
}
@@ -660,10 +660,10 @@ func TestMultiContainerSignal(t *testing.T) {
t.Errorf("failed to destroy container: %v", err)
}
_, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) {
- cpid, err := syscall.Wait4(goferPid, nil, 0, nil)
+ cpid, err := unix.Wait4(goferPid, nil, 0, nil)
return uintptr(cpid), 0, err
})
- if err != syscall.ECHILD {
+ if err != unix.ECHILD {
t.Errorf("error waiting for gofer to exit: %v", err)
}
// Make sure process 1 is still running.
@@ -673,28 +673,28 @@ func TestMultiContainerSignal(t *testing.T) {
// Now that process 2 is gone, ensure we get an error trying to
// signal it again.
- if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil {
+ if err := containers[1].SignalContainer(unix.SIGKILL, false); err == nil {
t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID)
}
// Kill process 1.
- if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil {
+ if err := containers[0].SignalContainer(unix.SIGKILL, false); err != nil {
t.Errorf("failed to kill process 1: %v", err)
}
// Ensure that container's gofer and sandbox process are no more.
err = blockUntilWaitable(containers[0].GoferPid)
- if err != nil && err != syscall.ECHILD {
+ if err != nil && err != unix.ECHILD {
t.Errorf("error waiting for gofer to exit: %v", err)
}
err = blockUntilWaitable(containers[0].Sandbox.Pid)
- if err != nil && err != syscall.ECHILD {
+ if err != nil && err != unix.ECHILD {
t.Errorf("error waiting for sandbox to exit: %v", err)
}
// The sentry should be gone, so signaling should yield an error.
- if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil {
+ if err := containers[0].SignalContainer(unix.SIGKILL, false); err == nil {
t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID)
}
@@ -893,7 +893,7 @@ func TestMultiContainerKillAll(t *testing.T) {
if tc.killContainer {
// First kill the init process to make the container be stopped with
// processes still running inside.
- containers[1].SignalContainer(syscall.SIGKILL, false)
+ containers[1].SignalContainer(unix.SIGKILL, false)
op := func() error {
c, err := Load(conf.RootDir, FullID{ContainerID: ids[1]}, LoadOpts{})
if err != nil {
@@ -914,7 +914,7 @@ func TestMultiContainerKillAll(t *testing.T) {
t.Fatalf("failed to load child container %q: %v", c.ID, err)
}
// Kill'Em All
- if err := c.SignalContainer(syscall.SIGKILL, true); err != nil {
+ if err := c.SignalContainer(unix.SIGKILL, true); err != nil {
t.Fatalf("failed to send SIGKILL to container %q: %v", c.ID, err)
}
@@ -1640,8 +1640,8 @@ func TestMultiContainerGoferKilled(t *testing.T) {
}
// Kill container's gofer.
- if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil {
- t.Fatalf("syscall.Kill(%d, SIGKILL)=%v", c.GoferPid, err)
+ if err := unix.Kill(c.GoferPid, unix.SIGKILL); err != nil {
+ t.Fatalf("unix.Kill(%d, SIGKILL)=%v", c.GoferPid, err)
}
// Wait until container stops.
@@ -1672,8 +1672,8 @@ func TestMultiContainerGoferKilled(t *testing.T) {
// Kill root container's gofer to bring entire sandbox down.
c = containers[0]
- if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil {
- t.Fatalf("syscall.Kill(%d, SIGKILL)=%v", c.GoferPid, err)
+ if err := unix.Kill(c.GoferPid, unix.SIGKILL); err != nil {
+ t.Fatalf("unix.Kill(%d, SIGKILL)=%v", c.GoferPid, err)
}
// Wait until sandbox stops. waitForProcessList will loop until sandbox exits
diff --git a/runsc/container/state_file.go b/runsc/container/state_file.go
index c46322ba4..0399903a0 100644
--- a/runsc/container/state_file.go
+++ b/runsc/container/state_file.go
@@ -22,9 +22,9 @@ import (
"path/filepath"
"regexp"
"strings"
- "syscall"
"github.com/gofrs/flock"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sync"
)
@@ -89,7 +89,7 @@ func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) {
c.changeStatus(Stopped)
}
case Running:
- if err := c.SignalContainer(syscall.Signal(0), false); err != nil {
+ if err := c.SignalContainer(unix.Signal(0), false); err != nil {
c.changeStatus(Stopped)
}
}
@@ -245,7 +245,7 @@ type StateFile struct {
// lock globally locks all locking operations for the container.
func (s *StateFile) lock() error {
s.once.Do(func() {
- s.flock = flock.NewFlock(s.lockPath())
+ s.flock = flock.New(s.lockPath())
})
if err := s.flock.Lock(); err != nil {
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index d1af539cb..fd72414ce 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -16,7 +16,6 @@ package filter
import (
"os"
- "syscall"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -25,12 +24,12 @@ import (
// allowedSyscalls is the set of syscalls executed by the gofer.
var allowedSyscalls = seccomp.SyscallRules{
- syscall.SYS_ACCEPT: {},
- syscall.SYS_CLOCK_GETTIME: {},
- syscall.SYS_CLOSE: {},
- syscall.SYS_DUP: {},
- syscall.SYS_EPOLL_CTL: {},
- syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{
+ unix.SYS_ACCEPT: {},
+ unix.SYS_CLOCK_GETTIME: {},
+ unix.SYS_CLOSE: {},
+ unix.SYS_DUP: {},
+ unix.SYS_EPOLL_CTL: {},
+ unix.SYS_EPOLL_PWAIT: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
@@ -39,34 +38,34 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.EqualTo(0),
},
},
- syscall.SYS_EVENTFD2: []seccomp.Rule{
+ unix.SYS_EVENTFD2: []seccomp.Rule{
{
seccomp.EqualTo(0),
seccomp.EqualTo(0),
},
},
- syscall.SYS_EXIT: {},
- syscall.SYS_EXIT_GROUP: {},
- syscall.SYS_FALLOCATE: []seccomp.Rule{
+ unix.SYS_EXIT: {},
+ unix.SYS_EXIT_GROUP: {},
+ unix.SYS_FALLOCATE: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.EqualTo(0),
},
},
- syscall.SYS_FCHMOD: {},
- syscall.SYS_FCHOWNAT: {},
- syscall.SYS_FCNTL: []seccomp.Rule{
+ unix.SYS_FCHMOD: {},
+ unix.SYS_FCHOWNAT: {},
+ unix.SYS_FCNTL: []seccomp.Rule{
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.F_GETFL),
+ seccomp.EqualTo(unix.F_GETFL),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.F_SETFL),
+ seccomp.EqualTo(unix.F_SETFL),
},
{
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.F_GETFD),
+ seccomp.EqualTo(unix.F_GETFD),
},
// Used by flipcall.PacketWindowAllocator.Init().
{
@@ -74,11 +73,11 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.EqualTo(unix.F_ADD_SEALS),
},
},
- syscall.SYS_FSTAT: {},
- syscall.SYS_FSTATFS: {},
- syscall.SYS_FSYNC: {},
- syscall.SYS_FTRUNCATE: {},
- syscall.SYS_FUTEX: {
+ unix.SYS_FSTAT: {},
+ unix.SYS_FSTATFS: {},
+ unix.SYS_FSYNC: {},
+ unix.SYS_FTRUNCATE: {},
+ unix.SYS_FUTEX: {
seccomp.Rule{
seccomp.MatchAny{},
seccomp.EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
@@ -116,78 +115,78 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.EqualTo(0),
},
},
- syscall.SYS_GETDENTS64: {},
- syscall.SYS_GETPID: {},
- unix.SYS_GETRANDOM: {},
- syscall.SYS_GETTID: {},
- syscall.SYS_GETTIMEOFDAY: {},
- syscall.SYS_LINKAT: {},
- syscall.SYS_LSEEK: {},
- syscall.SYS_MADVISE: {},
- unix.SYS_MEMFD_CREATE: {}, /// Used by flipcall.PacketWindowAllocator.Init().
- syscall.SYS_MKDIRAT: {},
- syscall.SYS_MKNODAT: {},
+ unix.SYS_GETDENTS64: {},
+ unix.SYS_GETPID: {},
+ unix.SYS_GETRANDOM: {},
+ unix.SYS_GETTID: {},
+ unix.SYS_GETTIMEOFDAY: {},
+ unix.SYS_LINKAT: {},
+ unix.SYS_LSEEK: {},
+ unix.SYS_MADVISE: {},
+ unix.SYS_MEMFD_CREATE: {}, /// Used by flipcall.PacketWindowAllocator.Init().
+ unix.SYS_MKDIRAT: {},
+ unix.SYS_MKNODAT: {},
// Used by the Go runtime as a temporarily workaround for a Linux
// 5.2-5.4 bug.
//
// See src/runtime/os_linux_x86.go.
//
// TODO(b/148688965): Remove once this is gone from Go.
- syscall.SYS_MLOCK: []seccomp.Rule{
+ unix.SYS_MLOCK: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.EqualTo(4096),
},
},
- syscall.SYS_MMAP: []seccomp.Rule{
+ unix.SYS_MMAP: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MAP_SHARED),
+ seccomp.EqualTo(unix.MAP_SHARED),
},
{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS),
+ seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS),
},
{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED),
+ seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS | unix.MAP_FIXED),
},
},
- syscall.SYS_MPROTECT: {},
- syscall.SYS_MUNMAP: {},
- syscall.SYS_NANOSLEEP: {},
- syscall.SYS_OPENAT: {},
- syscall.SYS_PPOLL: {},
- syscall.SYS_PREAD64: {},
- syscall.SYS_PWRITE64: {},
- syscall.SYS_READ: {},
- syscall.SYS_READLINKAT: {},
- syscall.SYS_RECVMSG: []seccomp.Rule{
+ unix.SYS_MPROTECT: {},
+ unix.SYS_MUNMAP: {},
+ unix.SYS_NANOSLEEP: {},
+ unix.SYS_OPENAT: {},
+ unix.SYS_PPOLL: {},
+ unix.SYS_PREAD64: {},
+ unix.SYS_PWRITE64: {},
+ unix.SYS_READ: {},
+ unix.SYS_READLINKAT: {},
+ unix.SYS_RECVMSG: []seccomp.Rule{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC),
+ seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_TRUNC),
},
{
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK),
+ seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_TRUNC | unix.MSG_PEEK),
},
},
- syscall.SYS_RENAMEAT: {},
- syscall.SYS_RESTART_SYSCALL: {},
+ unix.SYS_RENAMEAT: {},
+ unix.SYS_RESTART_SYSCALL: {},
// May be used by the runtime during panic().
- syscall.SYS_RT_SIGACTION: {},
- syscall.SYS_RT_SIGPROCMASK: {},
- syscall.SYS_RT_SIGRETURN: {},
- syscall.SYS_SCHED_YIELD: {},
- syscall.SYS_SENDMSG: []seccomp.Rule{
+ unix.SYS_RT_SIGACTION: {},
+ unix.SYS_RT_SIGPROCMASK: {},
+ unix.SYS_RT_SIGRETURN: {},
+ unix.SYS_SCHED_YIELD: {},
+ unix.SYS_SENDMSG: []seccomp.Rule{
// Used by fdchannel.Endpoint.SendFD().
{
seccomp.MatchAny{},
@@ -198,51 +197,51 @@ var allowedSyscalls = seccomp.SyscallRules{
{
seccomp.MatchAny{},
seccomp.MatchAny{},
- seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL),
+ seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_NOSIGNAL),
},
},
- syscall.SYS_SHUTDOWN: []seccomp.Rule{
- {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RDWR)},
+ unix.SYS_SHUTDOWN: []seccomp.Rule{
+ {seccomp.MatchAny{}, seccomp.EqualTo(unix.SHUT_RDWR)},
},
- syscall.SYS_SIGALTSTACK: {},
+ unix.SYS_SIGALTSTACK: {},
// Used by fdchannel.NewConnectedSockets().
- syscall.SYS_SOCKETPAIR: {
+ unix.SYS_SOCKETPAIR: {
{
- seccomp.EqualTo(syscall.AF_UNIX),
- seccomp.EqualTo(syscall.SOCK_SEQPACKET | syscall.SOCK_CLOEXEC),
+ seccomp.EqualTo(unix.AF_UNIX),
+ seccomp.EqualTo(unix.SOCK_SEQPACKET | unix.SOCK_CLOEXEC),
seccomp.EqualTo(0),
},
},
- syscall.SYS_SYMLINKAT: {},
- syscall.SYS_TGKILL: []seccomp.Rule{
+ unix.SYS_SYMLINKAT: {},
+ unix.SYS_TGKILL: []seccomp.Rule{
{
seccomp.EqualTo(uint64(os.Getpid())),
},
},
- syscall.SYS_UNLINKAT: {},
- syscall.SYS_UTIMENSAT: {},
- syscall.SYS_WRITE: {},
+ unix.SYS_UNLINKAT: {},
+ unix.SYS_UTIMENSAT: {},
+ unix.SYS_WRITE: {},
}
var udsSyscalls = seccomp.SyscallRules{
- syscall.SYS_SOCKET: []seccomp.Rule{
+ unix.SYS_SOCKET: []seccomp.Rule{
{
- seccomp.EqualTo(syscall.AF_UNIX),
- seccomp.EqualTo(syscall.SOCK_STREAM),
+ seccomp.EqualTo(unix.AF_UNIX),
+ seccomp.EqualTo(unix.SOCK_STREAM),
seccomp.EqualTo(0),
},
{
- seccomp.EqualTo(syscall.AF_UNIX),
- seccomp.EqualTo(syscall.SOCK_DGRAM),
+ seccomp.EqualTo(unix.AF_UNIX),
+ seccomp.EqualTo(unix.SOCK_DGRAM),
seccomp.EqualTo(0),
},
{
- seccomp.EqualTo(syscall.AF_UNIX),
- seccomp.EqualTo(syscall.SOCK_SEQPACKET),
+ seccomp.EqualTo(unix.AF_UNIX),
+ seccomp.EqualTo(unix.SOCK_SEQPACKET),
seccomp.EqualTo(0),
},
},
- syscall.SYS_CONNECT: []seccomp.Rule{
+ unix.SYS_CONNECT: []seccomp.Rule{
{
seccomp.MatchAny{},
},
diff --git a/runsc/fsgofer/filter/config_amd64.go b/runsc/fsgofer/filter/config_amd64.go
index 686753d96..2d0151dcc 100644
--- a/runsc/fsgofer/filter/config_amd64.go
+++ b/runsc/fsgofer/filter/config_amd64.go
@@ -17,30 +17,29 @@
package filter
import (
- "syscall"
-
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/seccomp"
)
func init() {
- allowedSyscalls[syscall.SYS_ARCH_PRCTL] = []seccomp.Rule{
+ allowedSyscalls[unix.SYS_ARCH_PRCTL] = []seccomp.Rule{
// TODO(b/168828518): No longer used in Go 1.16+.
{seccomp.EqualTo(linux.ARCH_SET_FS)},
}
- allowedSyscalls[syscall.SYS_CLONE] = []seccomp.Rule{
+ allowedSyscalls[unix.SYS_CLONE] = []seccomp.Rule{
// parent_tidptr and child_tidptr are always 0 because neither
// CLONE_PARENT_SETTID nor CLONE_CHILD_SETTID are used.
{
seccomp.EqualTo(
- syscall.CLONE_VM |
- syscall.CLONE_FS |
- syscall.CLONE_FILES |
- syscall.CLONE_SETTLS |
- syscall.CLONE_SIGHAND |
- syscall.CLONE_SYSVSEM |
- syscall.CLONE_THREAD),
+ unix.CLONE_VM |
+ unix.CLONE_FS |
+ unix.CLONE_FILES |
+ unix.CLONE_SETTLS |
+ unix.CLONE_SIGHAND |
+ unix.CLONE_SYSVSEM |
+ unix.CLONE_THREAD),
seccomp.MatchAny{}, // newsp
seccomp.EqualTo(0), // parent_tidptr
seccomp.EqualTo(0), // child_tidptr
@@ -49,12 +48,12 @@ func init() {
{
// TODO(b/168828518): No longer used in Go 1.16+ (on amd64).
seccomp.EqualTo(
- syscall.CLONE_VM |
- syscall.CLONE_FS |
- syscall.CLONE_FILES |
- syscall.CLONE_SIGHAND |
- syscall.CLONE_SYSVSEM |
- syscall.CLONE_THREAD),
+ unix.CLONE_VM |
+ unix.CLONE_FS |
+ unix.CLONE_FILES |
+ unix.CLONE_SIGHAND |
+ unix.CLONE_SYSVSEM |
+ unix.CLONE_THREAD),
seccomp.MatchAny{}, // newsp
seccomp.EqualTo(0), // parent_tidptr
seccomp.EqualTo(0), // child_tidptr
@@ -62,5 +61,5 @@ func init() {
},
}
- allowedSyscalls[syscall.SYS_NEWFSTATAT] = []seccomp.Rule{}
+ allowedSyscalls[unix.SYS_NEWFSTATAT] = []seccomp.Rule{}
}
diff --git a/runsc/fsgofer/filter/config_arm64.go b/runsc/fsgofer/filter/config_arm64.go
index ff0cf77a0..7d458c02d 100644
--- a/runsc/fsgofer/filter/config_arm64.go
+++ b/runsc/fsgofer/filter/config_arm64.go
@@ -17,23 +17,22 @@
package filter
import (
- "syscall"
-
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/seccomp"
)
func init() {
- allowedSyscalls[syscall.SYS_CLONE] = []seccomp.Rule{
+ allowedSyscalls[unix.SYS_CLONE] = []seccomp.Rule{
// parent_tidptr and child_tidptr are always 0 because neither
// CLONE_PARENT_SETTID nor CLONE_CHILD_SETTID are used.
{
seccomp.EqualTo(
- syscall.CLONE_VM |
- syscall.CLONE_FS |
- syscall.CLONE_FILES |
- syscall.CLONE_SIGHAND |
- syscall.CLONE_SYSVSEM |
- syscall.CLONE_THREAD),
+ unix.CLONE_VM |
+ unix.CLONE_FS |
+ unix.CLONE_FILES |
+ unix.CLONE_SIGHAND |
+ unix.CLONE_SYSVSEM |
+ unix.CLONE_THREAD),
seccomp.MatchAny{}, // newsp
// These arguments are left uninitialized by the Go
// runtime, so they may be anything (and are unused by
@@ -44,5 +43,5 @@ func init() {
},
}
- allowedSyscalls[syscall.SYS_FSTATAT] = []seccomp.Rule{}
+ allowedSyscalls[unix.SYS_FSTATAT] = []seccomp.Rule{}
}
diff --git a/runsc/fsgofer/filter/extra_filters_msan.go b/runsc/fsgofer/filter/extra_filters_msan.go
index 8c6179c8f..d768ed0bb 100644
--- a/runsc/fsgofer/filter/extra_filters_msan.go
+++ b/runsc/fsgofer/filter/extra_filters_msan.go
@@ -17,8 +17,7 @@
package filter
import (
- "syscall"
-
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/seccomp"
)
@@ -27,7 +26,7 @@ import (
func instrumentationFilters() seccomp.SyscallRules {
log.Warningf("*** SECCOMP WARNING: MSAN is enabled: syscall filters less restrictive!")
return seccomp.SyscallRules{
- syscall.SYS_SCHED_GETAFFINITY: {},
- syscall.SYS_SET_ROBUST_LIST: {},
+ unix.SYS_SCHED_GETAFFINITY: {},
+ unix.SYS_SET_ROBUST_LIST: {},
}
}
diff --git a/runsc/fsgofer/filter/extra_filters_race.go b/runsc/fsgofer/filter/extra_filters_race.go
index cbd5c487e..9e75c025d 100644
--- a/runsc/fsgofer/filter/extra_filters_race.go
+++ b/runsc/fsgofer/filter/extra_filters_race.go
@@ -17,8 +17,7 @@
package filter
import (
- "syscall"
-
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/seccomp"
)
@@ -27,18 +26,18 @@ import (
func instrumentationFilters() seccomp.SyscallRules {
log.Warningf("*** SECCOMP WARNING: TSAN is enabled: syscall filters less restrictive!")
return seccomp.SyscallRules{
- syscall.SYS_BRK: {},
- syscall.SYS_CLOCK_NANOSLEEP: {},
- syscall.SYS_CLONE: {},
- syscall.SYS_FUTEX: {},
- syscall.SYS_MADVISE: {},
- syscall.SYS_MMAP: {},
- syscall.SYS_MUNLOCK: {},
- syscall.SYS_NANOSLEEP: {},
- syscall.SYS_OPEN: {},
- syscall.SYS_OPENAT: {},
- syscall.SYS_SET_ROBUST_LIST: {},
+ unix.SYS_BRK: {},
+ unix.SYS_CLOCK_NANOSLEEP: {},
+ unix.SYS_CLONE: {},
+ unix.SYS_FUTEX: {},
+ unix.SYS_MADVISE: {},
+ unix.SYS_MMAP: {},
+ unix.SYS_MUNLOCK: {},
+ unix.SYS_NANOSLEEP: {},
+ unix.SYS_OPEN: {},
+ unix.SYS_OPENAT: {},
+ unix.SYS_SET_ROBUST_LIST: {},
// Used within glibc's malloc.
- syscall.SYS_TIME: {},
+ unix.SYS_TIME: {},
}
}
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index cfa3796b1..1e80a634d 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -66,6 +66,9 @@ type Config struct {
// HostUDS signals whether the gofer can mount a host's UDS.
HostUDS bool
+
+ // enableXattr allows Get/SetXattr for the mounted file systems.
+ EnableXattr bool
}
type attachPoint struct {
@@ -795,12 +798,22 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
return err
}
-func (*localFile) GetXattr(string, uint64) (string, error) {
- return "", unix.EOPNOTSUPP
+func (l *localFile) GetXattr(name string, size uint64) (string, error) {
+ if !l.attachPoint.conf.EnableXattr {
+ return "", unix.EOPNOTSUPP
+ }
+ buffer := make([]byte, size)
+ if _, err := unix.Fgetxattr(l.file.FD(), name, buffer); err != nil {
+ return "", err
+ }
+ return string(buffer), nil
}
-func (*localFile) SetXattr(string, string, uint32) error {
- return unix.EOPNOTSUPP
+func (l *localFile) SetXattr(name string, value string, flags uint32) error {
+ if !l.attachPoint.conf.EnableXattr {
+ return unix.EOPNOTSUPP
+ }
+ return unix.Fsetxattr(l.file.FD(), name, []byte(value), int(flags))
}
func (*localFile) ListXattr(uint64) (map[string]struct{}, error) {
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index 99ea9bd32..a5f09f88f 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -565,6 +565,38 @@ func TestSetAttrOwner(t *testing.T) {
})
}
+func SetGetXattr(l *localFile, name string, value string) error {
+ if err := l.SetXattr(name, value, 0 /* flags */); err != nil {
+ return err
+ }
+ ret, err := l.GetXattr(name, uint64(len(value)))
+ if err != nil {
+ return err
+ }
+ if ret != value {
+ return fmt.Errorf("Got value %s, want %s", ret, value)
+ }
+ return nil
+}
+
+func TestSetGetXattr(t *testing.T) {
+ xattrConfs := []Config{{ROMount: false, EnableXattr: false}, {ROMount: false, EnableXattr: true}}
+ runCustom(t, []uint32{unix.S_IFREG}, xattrConfs, func(t *testing.T, s state) {
+ name := "user.test"
+ value := "tmp"
+ err := SetGetXattr(s.file, name, value)
+ if s.conf.EnableXattr {
+ if err != nil {
+ t.Fatalf("%v: SetGetXattr failed, err: %v", s, err)
+ }
+ } else {
+ if err == nil {
+ t.Fatalf("%v: SetGetXattr should have failed", s)
+ }
+ }
+ })
+}
+
func TestLink(t *testing.T) {
if !specutils.HasCapabilities(capability.CAP_DAC_READ_SEARCH) {
t.Skipf("Link test requires CAP_DAC_READ_SEARCH, running as %d", os.Getuid())
diff --git a/runsc/mitigate/BUILD b/runsc/mitigate/BUILD
index 561854e66..1238890fc 100644
--- a/runsc/mitigate/BUILD
+++ b/runsc/mitigate/BUILD
@@ -4,28 +4,20 @@ package(licenses = ["notice"])
go_library(
name = "mitigate",
- srcs = [
- "cpu.go",
- "mitigate.go",
- "mitigate_conf.go",
- ],
+ srcs = ["mitigate.go"],
visibility = [
"//runsc:__subpackages__",
],
- deps = [
- "//pkg/log",
- "//runsc/flag",
- "@in_gopkg_yaml_v2//:go_default_library",
- ],
+ deps = ["@in_gopkg_yaml_v2//:go_default_library"],
)
go_test(
name = "mitigate_test",
size = "small",
- srcs = [
- "cpu_test.go",
- "mitigate_test.go",
- ],
+ srcs = ["mitigate_test.go"],
library = ":mitigate",
- deps = ["@com_github_google_go_cmp//cmp:go_default_library"],
+ deps = [
+ "//runsc/mitigate/mock",
+ "@com_github_google_go_cmp//cmp:go_default_library",
+ ],
)
diff --git a/runsc/mitigate/cpu.go b/runsc/mitigate/cpu.go
deleted file mode 100644
index 4b2aa351f..000000000
--- a/runsc/mitigate/cpu.go
+++ /dev/null
@@ -1,423 +0,0 @@
-// Copyright 2021 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mitigate
-
-import (
- "fmt"
- "io/ioutil"
- "regexp"
- "strconv"
- "strings"
-)
-
-const (
- // mds is the only bug we care about.
- mds = "mds"
-
- // Constants for parsing /proc/cpuinfo.
- processorKey = "processor"
- vendorIDKey = "vendor_id"
- cpuFamilyKey = "cpu family"
- modelKey = "model"
- physicalIDKey = "physical id"
- coreIDKey = "core id"
- bugsKey = "bugs"
-
- // Path to shutdown a CPU.
- cpuOnlineTemplate = "/sys/devices/system/cpu/cpu%d/online"
-)
-
-// cpuSet contains a map of all CPUs on the system, mapped
-// by Physical ID and CoreIDs. threads with the same
-// Core and Physical ID are Hyperthread pairs.
-type cpuSet map[cpuID]*threadGroup
-
-// newCPUSet creates a CPUSet from data read from /proc/cpuinfo.
-func newCPUSet(data []byte, vulnerable func(thread) bool) (cpuSet, error) {
- processors, err := getThreads(string(data))
- if err != nil {
- return nil, err
- }
-
- set := make(cpuSet)
- for _, p := range processors {
- // Each ID is of the form physicalID:coreID. Hyperthread pairs
- // have identical physical and core IDs. We need to match
- // Hyperthread pairs so that we can shutdown all but one per
- // pair.
- core, ok := set[p.id]
- if !ok {
- core = &threadGroup{}
- set[p.id] = core
- }
- core.isVulnerable = core.isVulnerable || vulnerable(p)
- core.threads = append(core.threads, p)
- }
- return set, nil
-}
-
-// newCPUSetFromPossible makes a cpuSet data read from
-// /sys/devices/system/cpu/possible. This is used in enable operations
-// where the caller simply wants to enable all CPUS.
-func newCPUSetFromPossible(data []byte) (cpuSet, error) {
- threads, err := getThreadsFromPossible(data)
- if err != nil {
- return nil, err
- }
-
- // We don't care if a CPU is vulnerable or not, we just
- // want to return a list of all CPUs on the host.
- set := cpuSet{
- threads[0].id: &threadGroup{
- threads: threads,
- isVulnerable: false,
- },
- }
- return set, nil
-}
-
-// String implements the String method for CPUSet.
-func (c cpuSet) String() string {
- ret := ""
- for _, tg := range c {
- ret += fmt.Sprintf("%s\n", tg)
- }
- return ret
-}
-
-// getRemainingList returns the list of threads that will remain active
-// after mitigation.
-func (c cpuSet) getRemainingList() []thread {
- threads := make([]thread, 0, len(c))
- for _, core := range c {
- // If we're vulnerable, take only one thread from the pair.
- if core.isVulnerable {
- threads = append(threads, core.threads[0])
- continue
- }
- // Otherwise don't shutdown anything.
- threads = append(threads, core.threads...)
- }
- return threads
-}
-
-// getShutdownList returns the list of threads that will be shutdown on
-// mitigation.
-func (c cpuSet) getShutdownList() []thread {
- threads := make([]thread, 0)
- for _, core := range c {
- // Only if we're vulnerable do shutdown anything. In this case,
- // shutdown all but the first entry.
- if core.isVulnerable && len(core.threads) > 1 {
- threads = append(threads, core.threads[1:]...)
- }
- }
- return threads
-}
-
-// threadGroup represents Hyperthread pairs on the same physical/core ID.
-type threadGroup struct {
- threads []thread
- isVulnerable bool
-}
-
-// String implements the String method for threadGroup.
-func (c threadGroup) String() string {
- ret := fmt.Sprintf("ThreadGroup:\nIsVulnerable: %t\n", c.isVulnerable)
- for _, processor := range c.threads {
- ret += fmt.Sprintf("%s\n", processor)
- }
- return ret
-}
-
-// getThreads returns threads structs from reading /proc/cpuinfo.
-func getThreads(data string) ([]thread, error) {
- // Each processor entry should start with the
- // processor key. Find the beginings of each.
- r := buildRegex(processorKey, `\d+`)
- indices := r.FindAllStringIndex(data, -1)
- if len(indices) < 1 {
- return nil, fmt.Errorf("no cpus found for: %q", data)
- }
-
- // Add the ending index for last entry.
- indices = append(indices, []int{len(data), -1})
-
- // Valid cpus are now defined by strings in between
- // indexes (e.g. data[index[i], index[i+1]]).
- // There should be len(indicies) - 1 CPUs
- // since the last index is the end of the string.
- cpus := make([]thread, 0, len(indices))
- // Find each string that represents a CPU. These begin "processor".
- for i := 1; i < len(indices); i++ {
- start := indices[i-1][0]
- end := indices[i][0]
- // Parse the CPU entry, which should be between start/end.
- c, err := newThread(data[start:end])
- if err != nil {
- return nil, err
- }
- cpus = append(cpus, c)
- }
- return cpus, nil
-}
-
-// getThreadsFromPossible makes threads from data read from /sys/devices/system/cpu/possible.
-func getThreadsFromPossible(data []byte) ([]thread, error) {
- possibleRegex := regexp.MustCompile(`(?m)^(\d+)(-(\d+))?$`)
- matches := possibleRegex.FindStringSubmatch(string(data))
- if len(matches) != 4 {
- return nil, fmt.Errorf("mismatch regex from %s: %q", allPossibleCPUs, string(data))
- }
-
- // If matches[3] is empty, we only have one cpu entry.
- if matches[3] == "" {
- matches[3] = matches[1]
- }
-
- begin, err := strconv.ParseInt(matches[1], 10, 64)
- if err != nil {
- return nil, fmt.Errorf("failed to parse begin: %v", err)
- }
- end, err := strconv.ParseInt(matches[3], 10, 64)
- if err != nil {
- return nil, fmt.Errorf("failed to parse end: %v", err)
- }
- if begin > end || begin < 0 || end < 0 {
- return nil, fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", begin, end)
- }
-
- ret := make([]thread, 0, end-begin)
- for i := begin; i <= end; i++ {
- ret = append(ret, thread{
- processorNumber: i,
- id: cpuID{
- physicalID: 0, // we don't care about id for enable ops.
- coreID: 0,
- },
- })
- }
-
- return ret, nil
-}
-
-// cpuID for each thread is defined by the physical and
-// core IDs. If equal, two threads are Hyperthread pairs.
-type cpuID struct {
- physicalID int64
- coreID int64
-}
-
-// type cpu represents pertinent info about a cpu.
-type thread struct {
- processorNumber int64 // the processor number of this CPU.
- vendorID string // the vendorID of CPU (e.g. AuthenticAMD).
- cpuFamily int64 // CPU family number (e.g. 6 for CascadeLake/Skylake).
- model int64 // CPU model number (e.g. 85 for CascadeLake/Skylake).
- id cpuID // id for this thread
- bugs map[string]struct{} // map of vulnerabilities parsed from the 'bugs' field.
-}
-
-// newThread parses a CPU from a single cpu entry from /proc/cpuinfo.
-func newThread(data string) (thread, error) {
- empty := thread{}
- processor, err := parseProcessor(data)
- if err != nil {
- return empty, err
- }
-
- vendorID, err := parseVendorID(data)
- if err != nil {
- return empty, err
- }
-
- cpuFamily, err := parseCPUFamily(data)
- if err != nil {
- return empty, err
- }
-
- model, err := parseModel(data)
- if err != nil {
- return empty, err
- }
-
- physicalID, err := parsePhysicalID(data)
- if err != nil {
- return empty, err
- }
-
- coreID, err := parseCoreID(data)
- if err != nil {
- return empty, err
- }
-
- bugs, err := parseBugs(data)
- if err != nil {
- return empty, err
- }
-
- return thread{
- processorNumber: processor,
- vendorID: vendorID,
- cpuFamily: cpuFamily,
- model: model,
- id: cpuID{
- physicalID: physicalID,
- coreID: coreID,
- },
- bugs: bugs,
- }, nil
-}
-
-// String implements the String method for thread.
-func (t thread) String() string {
- template := `CPU: %d
-CPU ID: %+v
-Vendor: %s
-Family/Model: %d/%d
-Bugs: %s
-`
- bugs := make([]string, 0)
- for bug := range t.bugs {
- bugs = append(bugs, bug)
- }
-
- return fmt.Sprintf(template, t.processorNumber, t.id, t.vendorID, t.cpuFamily, t.model, strings.Join(bugs, ","))
-}
-
-// enable turns on the CPU by writing 1 to /sys/devices/cpu/cpu{N}/online.
-func (t thread) enable() error {
- cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber)
- return ioutil.WriteFile(cpuPath, []byte{'1'}, 0644)
-}
-
-// disable turns off the CPU by writing 0 to /sys/devices/cpu/cpu{N}/online.
-func (t thread) disable() error {
- cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber)
- return ioutil.WriteFile(cpuPath, []byte{'0'}, 0644)
-}
-
-// isVulnerable checks if a CPU is vulnerable to mds.
-func (t thread) isVulnerable() bool {
- _, ok := t.bugs[mds]
- return ok
-}
-
-// isActive checks if a CPU is active from /sys/devices/system/cpu/cpu{N}/online
-// If the file does not exist (ioutil returns in error), we assume the CPU is on.
-func (t thread) isActive() bool {
- cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber)
- data, err := ioutil.ReadFile(cpuPath)
- if err != nil {
- return true
- }
- return len(data) > 0 && data[0] != '0'
-}
-
-// similarTo checks family/model/bugs fields for equality of two
-// processors.
-func (t thread) similarTo(other thread) bool {
- if t.vendorID != other.vendorID {
- return false
- }
-
- if other.cpuFamily != t.cpuFamily {
- return false
- }
-
- if other.model != t.model {
- return false
- }
-
- if len(other.bugs) != len(t.bugs) {
- return false
- }
-
- for bug := range t.bugs {
- if _, ok := other.bugs[bug]; !ok {
- return false
- }
- }
- return true
-}
-
-// parseProcessor grabs the processor field from /proc/cpuinfo output.
-func parseProcessor(data string) (int64, error) {
- return parseIntegerResult(data, processorKey)
-}
-
-// parseVendorID grabs the vendor_id field from /proc/cpuinfo output.
-func parseVendorID(data string) (string, error) {
- return parseRegex(data, vendorIDKey, `[\w\d]+`)
-}
-
-// parseCPUFamily grabs the cpu family field from /proc/cpuinfo output.
-func parseCPUFamily(data string) (int64, error) {
- return parseIntegerResult(data, cpuFamilyKey)
-}
-
-// parseModel grabs the model field from /proc/cpuinfo output.
-func parseModel(data string) (int64, error) {
- return parseIntegerResult(data, modelKey)
-}
-
-// parsePhysicalID parses the physical id field.
-func parsePhysicalID(data string) (int64, error) {
- return parseIntegerResult(data, physicalIDKey)
-}
-
-// parseCoreID parses the core id field.
-func parseCoreID(data string) (int64, error) {
- return parseIntegerResult(data, coreIDKey)
-}
-
-// parseBugs grabs the bugs field from /proc/cpuinfo output.
-func parseBugs(data string) (map[string]struct{}, error) {
- result, err := parseRegex(data, bugsKey, `[\d\w\s]*`)
- if err != nil {
- return nil, err
- }
- bugs := strings.Split(result, " ")
- ret := make(map[string]struct{}, len(bugs))
- for _, bug := range bugs {
- ret[bug] = struct{}{}
- }
- return ret, nil
-}
-
-// parseIntegerResult parses fields expecting an integer.
-func parseIntegerResult(data, key string) (int64, error) {
- result, err := parseRegex(data, key, `\d+`)
- if err != nil {
- return 0, err
- }
- return strconv.ParseInt(result, 0, 64)
-}
-
-// buildRegex builds a regex for parsing each CPU field.
-func buildRegex(key, match string) *regexp.Regexp {
- reg := fmt.Sprintf(`(?m)^%s\s*:\s*(.*)$`, key)
- return regexp.MustCompile(reg)
-}
-
-// parseRegex parses data with key inserted into a standard regex template.
-func parseRegex(data, key, match string) (string, error) {
- r := buildRegex(key, match)
- matches := r.FindStringSubmatch(data)
- if len(matches) < 2 {
- return "", fmt.Errorf("failed to match key %q: %q", key, data)
- }
- return matches[1], nil
-}
diff --git a/runsc/mitigate/cpu_test.go b/runsc/mitigate/cpu_test.go
deleted file mode 100644
index 374333465..000000000
--- a/runsc/mitigate/cpu_test.go
+++ /dev/null
@@ -1,605 +0,0 @@
-// Copyright 2021 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mitigate
-
-import (
- "fmt"
- "io/ioutil"
- "strings"
- "testing"
-)
-
-// mockCPU represents data from CPUs that will be mitigated.
-type mockCPU struct {
- name string
- vendorID string
- family int
- model int
- modelName string
- bugs string
- physicalCores int
- cores int
- threadsPerCore int
-}
-
-var cascadeLake4 = mockCPU{
- name: "CascadeLake",
- vendorID: "GenuineIntel",
- family: 6,
- model: 85,
- modelName: "Intel(R) Xeon(R) CPU",
- bugs: "spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa",
- physicalCores: 1,
- cores: 2,
- threadsPerCore: 2,
-}
-
-var haswell2 = mockCPU{
- name: "Haswell",
- vendorID: "GenuineIntel",
- family: 6,
- model: 63,
- modelName: "Intel(R) Xeon(R) CPU",
- bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs",
- physicalCores: 1,
- cores: 1,
- threadsPerCore: 2,
-}
-
-var haswell2core = mockCPU{
- name: "Haswell2Physical",
- vendorID: "GenuineIntel",
- family: 6,
- model: 63,
- modelName: "Intel(R) Xeon(R) CPU",
- bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs",
- physicalCores: 2,
- cores: 1,
- threadsPerCore: 1,
-}
-
-var amd8 = mockCPU{
- name: "AMD",
- vendorID: "AuthenticAMD",
- family: 23,
- model: 49,
- modelName: "AMD EPYC 7B12",
- bugs: "sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass",
- physicalCores: 4,
- cores: 1,
- threadsPerCore: 2,
-}
-
-// makeCPUString makes a string formated like /proc/cpuinfo for each cpuTestCase
-func (tc mockCPU) makeCPUString() string {
- template := `processor : %d
-vendor_id : %s
-cpu family : %d
-model : %d
-model name : %s
-physical id : %d
-core id : %d
-cpu cores : %d
-bugs : %s
-`
- ret := ``
- for i := 0; i < tc.physicalCores; i++ {
- for j := 0; j < tc.cores; j++ {
- for k := 0; k < tc.threadsPerCore; k++ {
- processorNum := (i*tc.cores+j)*tc.threadsPerCore + k
- ret += fmt.Sprintf(template,
- processorNum, /*processor*/
- tc.vendorID, /*vendor_id*/
- tc.family, /*cpu family*/
- tc.model, /*model*/
- tc.modelName, /*model name*/
- i, /*physical id*/
- j, /*core id*/
- tc.cores*tc.physicalCores, /*cpu cores*/
- tc.bugs /*bugs*/)
- }
- }
- }
- return ret
-}
-
-func (tc mockCPU) makeSysPossibleString() string {
- max := tc.physicalCores * tc.cores * tc.threadsPerCore
- if max == 1 {
- return "0"
- }
- return fmt.Sprintf("0-%d", max-1)
-}
-
-// TestMockCPUSet tests mock cpu test cases against the cpuSet functions.
-func TestMockCPUSet(t *testing.T) {
- for _, tc := range []struct {
- testCase mockCPU
- isVulnerable bool
- }{
- {
- testCase: amd8,
- isVulnerable: false,
- },
- {
- testCase: haswell2,
- isVulnerable: true,
- },
- {
- testCase: haswell2core,
- isVulnerable: true,
- },
-
- {
- testCase: cascadeLake4,
- isVulnerable: true,
- },
- } {
- t.Run(tc.testCase.name, func(t *testing.T) {
- data := tc.testCase.makeCPUString()
- vulnerable := func(t thread) bool {
- return t.isVulnerable()
- }
- set, err := newCPUSet([]byte(data), vulnerable)
- if err != nil {
- t.Fatalf("Failed to ")
- }
- remaining := set.getRemainingList()
- // In the non-vulnerable case, no cores should be shutdown so all should remain.
- want := tc.testCase.physicalCores * tc.testCase.cores * tc.testCase.threadsPerCore
- if tc.isVulnerable {
- want = tc.testCase.physicalCores * tc.testCase.cores
- }
-
- if want != len(remaining) {
- t.Fatalf("Failed to shutdown the correct number of cores: want: %d got: %d", want, len(remaining))
- }
-
- if !tc.isVulnerable {
- return
- }
-
- // If the set is vulnerable, we expect only 1 thread per hyperthread pair.
- for _, r := range remaining {
- if _, ok := set[r.id]; !ok {
- t.Fatalf("Entry %+v not in map, there must be two entries in the same thread group.", r)
- }
- delete(set, r.id)
- }
-
- possible := tc.testCase.makeSysPossibleString()
- set, err = newCPUSetFromPossible([]byte(possible))
- if err != nil {
- t.Fatalf("Failed to make cpuSet: %v", err)
- }
-
- want = tc.testCase.physicalCores * tc.testCase.cores * tc.testCase.threadsPerCore
- got := len(set.getRemainingList())
- if got != want {
- t.Fatalf("Returned the wrong number of CPUs want: %d got: %d", want, got)
- }
- })
- }
-}
-
-// TestGetCPU tests basic parsing of single CPU strings from reading
-// /proc/cpuinfo.
-func TestGetCPU(t *testing.T) {
- data := `processor : 0
-vendor_id : GenuineIntel
-cpu family : 6
-model : 85
-physical id: 0
-core id : 0
-bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa itlb_multihit
-`
- want := thread{
- processorNumber: 0,
- vendorID: "GenuineIntel",
- cpuFamily: 6,
- model: 85,
- id: cpuID{
- physicalID: 0,
- coreID: 0,
- },
- bugs: map[string]struct{}{
- "cpu_meltdown": struct{}{},
- "spectre_v1": struct{}{},
- "spectre_v2": struct{}{},
- "spec_store_bypass": struct{}{},
- "l1tf": struct{}{},
- "mds": struct{}{},
- "swapgs": struct{}{},
- "taa": struct{}{},
- "itlb_multihit": struct{}{},
- },
- }
-
- got, err := newThread(data)
- if err != nil {
- t.Fatalf("getCpu failed with error: %v", err)
- }
-
- if !want.similarTo(got) {
- t.Fatalf("Failed cpus not similar: got: %+v, want: %+v", got, want)
- }
-
- if !got.isVulnerable() {
- t.Fatalf("Failed: cpu should be vulnerable.")
- }
-}
-
-func TestInvalid(t *testing.T) {
- result, err := getThreads(`something not a processor`)
- if err == nil {
- t.Fatalf("getCPU set didn't return an error: %+v", result)
- }
-
- if !strings.Contains(err.Error(), "no cpus") {
- t.Fatalf("Incorrect error returned: %v", err)
- }
-}
-
-// TestCPUSet tests getting the right number of CPUs from
-// parsing full output of /proc/cpuinfo.
-func TestCPUSet(t *testing.T) {
- data := `processor : 0
-vendor_id : GenuineIntel
-cpu family : 6
-model : 63
-model name : Intel(R) Xeon(R) CPU @ 2.30GHz
-stepping : 0
-microcode : 0x1
-cpu MHz : 2299.998
-cache size : 46080 KB
-physical id : 0
-siblings : 2
-core id : 0
-cpu cores : 1
-apicid : 0
-initial apicid : 0
-fpu : yes
-fpu_exception : yes
-cpuid level : 13
-wp : yes
-flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities
-bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs
-bogomips : 4599.99
-clflush size : 64
-cache_alignment : 64
-address sizes : 46 bits physical, 48 bits virtual
-power management:
-
-processor : 1
-vendor_id : GenuineIntel
-cpu family : 6
-model : 63
-model name : Intel(R) Xeon(R) CPU @ 2.30GHz
-stepping : 0
-microcode : 0x1
-cpu MHz : 2299.998
-cache size : 46080 KB
-physical id : 0
-siblings : 2
-core id : 0
-cpu cores : 1
-apicid : 1
-initial apicid : 1
-fpu : yes
-fpu_exception : yes
-cpuid level : 13
-wp : yes
-flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities
-bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs
-bogomips : 4599.99
-clflush size : 64
-cache_alignment : 64
-address sizes : 46 bits physical, 48 bits virtual
-power management:
-`
- cpuSet, err := getThreads(data)
- if err != nil {
- t.Fatalf("getCPUSet failed: %v", err)
- }
-
- wantCPULen := 2
- if len(cpuSet) != wantCPULen {
- t.Fatalf("Num CPU mismatch: want: %d, got: %d", wantCPULen, len(cpuSet))
- }
-
- wantCPU := thread{
- vendorID: "GenuineIntel",
- cpuFamily: 6,
- model: 63,
- bugs: map[string]struct{}{
- "cpu_meltdown": struct{}{},
- "spectre_v1": struct{}{},
- "spectre_v2": struct{}{},
- "spec_store_bypass": struct{}{},
- "l1tf": struct{}{},
- "mds": struct{}{},
- "swapgs": struct{}{},
- },
- }
-
- for _, c := range cpuSet {
- if !wantCPU.similarTo(c) {
- t.Fatalf("Failed cpus not equal: got: %+v, want: %+v", c, wantCPU)
- }
- }
-}
-
-// TestReadFile is a smoke test for parsing methods.
-func TestReadFile(t *testing.T) {
- data, err := ioutil.ReadFile("/proc/cpuinfo")
- if err != nil {
- t.Fatalf("Failed to read cpuinfo: %v", err)
- }
-
- vulnerable := func(t thread) bool {
- return t.isVulnerable()
- }
-
- set, err := newCPUSet(data, vulnerable)
- if err != nil {
- t.Fatalf("Failed to parse CPU data %v\n%s", err, data)
- }
-
- if len(set) < 1 {
- t.Fatalf("Failed to parse any CPUs: %d", len(set))
- }
-
- t.Log(set)
-}
-
-// TestVulnerable tests if the isVulnerable method is correct
-// among known CPUs in GCP.
-func TestVulnerable(t *testing.T) {
- const haswell = `processor : 0
-vendor_id : GenuineIntel
-cpu family : 6
-model : 63
-model name : Intel(R) Xeon(R) CPU @ 2.30GHz
-stepping : 0
-microcode : 0x1
-cpu MHz : 2299.998
-cache size : 46080 KB
-physical id : 0
-siblings : 4
-core id : 0
-cpu cores : 2
-apicid : 0
-initial apicid : 0
-fpu : yes
-fpu_exception : yes
-cpuid level : 13
-wp : yes
-flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities
-bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs
-bogomips : 4599.99
-clflush size : 64
-cache_alignment : 64
-address sizes : 46 bits physical, 48 bits virtual
-power management:`
-
- const skylake = `processor : 0
-vendor_id : GenuineIntel
-cpu family : 6
-model : 85
-model name : Intel(R) Xeon(R) CPU @ 2.00GHz
-stepping : 3
-microcode : 0x1
-cpu MHz : 2000.180
-cache size : 39424 KB
-physical id : 0
-siblings : 2
-core id : 0
-cpu cores : 1
-apicid : 0
-initial apicid : 0
-fpu : yes
-fpu_exception : yes
-cpuid level : 13
-wp : yes
-flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat md_clear arch_capabilities
-bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa
-bogomips : 4000.36
-clflush size : 64
-cache_alignment : 64
-address sizes : 46 bits physical, 48 bits virtual
-power management:`
-
- const cascade = `processor : 0
-vendor_id : GenuineIntel
-cpu family : 6
-model : 85
-model name : Intel(R) Xeon(R) CPU
-stepping : 7
-microcode : 0x1
-cpu MHz : 2800.198
-cache size : 33792 KB
-physical id : 0
-siblings : 2
-core id : 0
-cpu cores : 1
-apicid : 0
-initial apicid : 0
-fpu : yes
-fpu_exception : yes
-cpuid level : 13
-wp : yes
-flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2
- ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmu
-lqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowpr
-efetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid r
-tm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves a
-rat avx512_vnni md_clear arch_capabilities
-bugs : spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa
-bogomips : 5600.39
-clflush size : 64
-cache_alignment : 64
-address sizes : 46 bits physical, 48 bits virtual
-power management:`
-
- const amd = `processor : 0
-vendor_id : AuthenticAMD
-cpu family : 23
-model : 49
-model name : AMD EPYC 7B12
-stepping : 0
-microcode : 0x1000065
-cpu MHz : 2250.000
-cache size : 512 KB
-physical id : 0
-siblings : 2
-core id : 0
-cpu cores : 1
-apicid : 0
-initial apicid : 0
-fpu : yes
-fpu_exception : yes
-cpuid level : 13
-wp : yes
-flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid extd_apicid tsc_known_freq pni pclmulqdq ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm cmp_legacy cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw topoext ssbd ibrs ibpb stibp vmmcall fsgsbase tsc_adjust bmi1 avx2 smep bmi2 rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 clzero xsaveerptr arat npt nrip_save umip rdpid
-bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
-bogomips : 4500.00
-TLB size : 3072 4K pages
-clflush size : 64
-cache_alignment : 64
-address sizes : 48 bits physical, 48 bits virtual
-power management:`
-
- for _, tc := range []struct {
- name string
- cpuString string
- vulnerable bool
- }{
- {
- name: "haswell",
- cpuString: haswell,
- vulnerable: true,
- }, {
- name: "skylake",
- cpuString: skylake,
- vulnerable: true,
- }, {
- name: "amd",
- cpuString: amd,
- vulnerable: false,
- },
- } {
- t.Run(tc.name, func(t *testing.T) {
- set, err := getThreads(tc.cpuString)
- if err != nil {
- t.Fatalf("Failed to getCPUSet:%v\n %s", err, tc.cpuString)
- }
-
- if len(set) < 1 {
- t.Fatalf("Returned empty cpu set: %v", set)
- }
-
- for _, c := range set {
- got := func() bool {
- return c.isVulnerable()
- }()
-
- if got != tc.vulnerable {
- t.Fatalf("Mismatch vulnerable for cpu %+s: got %t want: %t", tc.name, tc.vulnerable, got)
- }
- }
- })
- }
-}
-
-func TestReverse(t *testing.T) {
- const noParse = "-1-"
- for _, tc := range []struct {
- name string
- output string
- wantErr error
- wantCount int
- }{
- {
- name: "base",
- output: "0-7",
- wantErr: nil,
- wantCount: 8,
- },
- {
- name: "huge",
- output: "0-111",
- wantErr: nil,
- wantCount: 112,
- },
- {
- name: "not zero",
- output: "50-53",
- wantErr: nil,
- wantCount: 4,
- },
- {
- name: "small",
- output: "0",
- wantErr: nil,
- wantCount: 1,
- },
- {
- name: "invalid order",
- output: "10-6",
- wantErr: fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", 10, 6),
- },
- {
- name: "no parse",
- output: noParse,
- wantErr: fmt.Errorf(`mismatch regex from /sys/devices/system/cpu/possible: %q`, noParse),
- },
- } {
- t.Run(tc.name, func(t *testing.T) {
- threads, err := getThreadsFromPossible([]byte(tc.output))
-
- switch {
- case tc.wantErr == nil:
- if err != nil {
- t.Fatalf("Wanted nil err, got: %v", err)
- }
- case err == nil:
- t.Fatalf("Want error: %v got: %v", tc.wantErr, err)
- default:
- if tc.wantErr.Error() != err.Error() {
- t.Fatalf("Want error: %v got error: %v", tc.wantErr, err)
- }
- }
-
- if len(threads) != tc.wantCount {
- t.Fatalf("Want count: %d got: %d", tc.wantCount, len(threads))
- }
- })
- }
-}
-
-func TestReverseSmoke(t *testing.T) {
- data, err := ioutil.ReadFile(allPossibleCPUs)
- if err != nil {
- t.Fatalf("Failed to read from possible: %v", err)
- }
- threads, err := getThreadsFromPossible(data)
- if err != nil {
- t.Fatalf("Could not parse possible output: %v", err)
- }
-
- if len(threads) <= 0 {
- t.Fatalf("Didn't get any CPU cores: %d", len(threads))
- }
-}
diff --git a/runsc/mitigate/mitigate.go b/runsc/mitigate/mitigate.go
index 91de623e3..24f67414c 100644
--- a/runsc/mitigate/mitigate.go
+++ b/runsc/mitigate/mitigate.go
@@ -14,121 +14,440 @@
// Package mitigate provides libraries for the mitigate command. The
// mitigate command mitigates side channel attacks such as MDS. Mitigate
-// shuts down CPUs via /sys/devices/system/cpu/cpu{N}/online. In addition,
-// the mitigate also handles computing available CPU in kubernetes kube_config
-// files.
+// shuts down CPUs via /sys/devices/system/cpu/cpu{N}/online.
package mitigate
import (
"fmt"
"io/ioutil"
-
- "gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/runsc/flag"
+ "os"
+ "regexp"
+ "sort"
+ "strconv"
+ "strings"
)
const (
- cpuInfo = "/proc/cpuinfo"
- allPossibleCPUs = "/sys/devices/system/cpu/possible"
+ // mds is the only bug we care about.
+ mds = "mds"
+
+ // Constants for parsing /proc/cpuinfo.
+ processorKey = "processor"
+ vendorIDKey = "vendor_id"
+ cpuFamilyKey = "cpu family"
+ modelKey = "model"
+ physicalIDKey = "physical id"
+ coreIDKey = "core id"
+ bugsKey = "bugs"
+
+ // Path to shutdown a CPU.
+ cpuOnlineTemplate = "/sys/devices/system/cpu/cpu%d/online"
)
-// Mitigate handles high level mitigate operations provided to runsc.
-type Mitigate struct {
- dryRun bool // Run the command without changing the underlying system.
- reverse bool // Reverse mitigate by turning on all CPU cores.
- other mitigate // Struct holds extra mitigate logic.
- path string // path to read for each operation (e.g. /proc/cpuinfo).
+// CPUSet contains a map of all CPUs on the system, mapped
+// by Physical ID and CoreIDs. threads with the same
+// Core and Physical ID are Hyperthread pairs.
+type CPUSet map[threadID]*ThreadGroup
+
+// NewCPUSet creates a CPUSet from data read from /proc/cpuinfo.
+func NewCPUSet(data []byte, vulnerable func(Thread) bool) (CPUSet, error) {
+ processors, err := getThreads(string(data))
+ if err != nil {
+ return nil, err
+ }
+
+ set := make(CPUSet)
+ for _, p := range processors {
+ // Each ID is of the form physicalID:coreID. Hyperthread pairs
+ // have identical physical and core IDs. We need to match
+ // Hyperthread pairs so that we can shutdown all but one per
+ // pair.
+ core, ok := set[p.id]
+ if !ok {
+ core = &ThreadGroup{}
+ set[p.id] = core
+ }
+ core.isVulnerable = core.isVulnerable || vulnerable(p)
+ core.threads = append(core.threads, p)
+ }
+
+ // We need to make sure we shutdown the lowest number processor per
+ // thread group.
+ for _, tg := range set {
+ sort.Slice(tg.threads, func(i, j int) bool {
+ return tg.threads[i].processorNumber < tg.threads[j].processorNumber
+ })
+ }
+ return set, nil
}
-// Usage implments Usage for cmd.Mitigate.
-func (m Mitigate) Usage() string {
- usageString := `mitigate [flags]
+// NewCPUSetFromPossible makes a cpuSet data read from
+// /sys/devices/system/cpu/possible. This is used in enable operations
+// where the caller simply wants to enable all CPUS.
+func NewCPUSetFromPossible(data []byte) (CPUSet, error) {
+ threads, err := GetThreadsFromPossible(data)
+ if err != nil {
+ return nil, err
+ }
+
+ // We don't care if a CPU is vulnerable or not, we just
+ // want to return a list of all CPUs on the host.
+ set := CPUSet{
+ threads[0].id: &ThreadGroup{
+ threads: threads,
+ isVulnerable: false,
+ },
+ }
+ return set, nil
+}
-Mitigate mitigates a system to the "MDS" vulnerability by implementing a manual shutdown of SMT. The command checks /proc/cpuinfo for cpus having the MDS vulnerability, and if found, shutdown all but one CPU per hyperthread pair via /sys/devices/system/cpu/cpu{N}/online. CPUs can be restored by writing "2" to each file in /sys/devices/system/cpu/cpu{N}/online or performing a system reboot.
+// String implements the String method for CPUSet.
+func (c CPUSet) String() string {
+ ret := ""
+ for _, tg := range c {
+ ret += fmt.Sprintf("%s\n", tg)
+ }
+ return ret
+}
-The command can be reversed with --reverse, which reads the total CPUs from /sys/devices/system/cpu/possible and enables all with /sys/devices/system/cpu/cpu{N}/online.
-`
- return usageString + m.other.usage()
+// GetRemainingList returns the list of threads that will remain active
+// after mitigation.
+func (c CPUSet) GetRemainingList() []Thread {
+ threads := make([]Thread, 0, len(c))
+ for _, core := range c {
+ // If we're vulnerable, take only one thread from the pair.
+ if core.isVulnerable {
+ threads = append(threads, core.threads[0])
+ continue
+ }
+ // Otherwise don't shutdown anything.
+ threads = append(threads, core.threads...)
+ }
+ return threads
}
-// SetFlags sets flags for the command Mitigate.
-func (m Mitigate) SetFlags(f *flag.FlagSet) {
- f.BoolVar(&m.dryRun, "dryrun", false, "run the command without changing system")
- f.BoolVar(&m.reverse, "reverse", false, "reverse mitigate by enabling all CPUs")
- m.other.setFlags(f)
- m.path = cpuInfo
- if m.reverse {
- m.path = allPossibleCPUs
+// GetShutdownList returns the list of threads that will be shutdown on
+// mitigation.
+func (c CPUSet) GetShutdownList() []Thread {
+ threads := make([]Thread, 0)
+ for _, core := range c {
+ // Only if we're vulnerable do shutdown anything. In this case,
+ // shutdown all but the first entry.
+ if core.isVulnerable && len(core.threads) > 1 {
+ threads = append(threads, core.threads[1:]...)
+ }
}
+ return threads
}
-// Execute executes the Mitigate command.
-func (m Mitigate) Execute() error {
- data, err := ioutil.ReadFile(m.path)
- if err != nil {
- return fmt.Errorf("failed to read %s: %v", m.path, err)
+// ThreadGroup represents Hyperthread pairs on the same physical/core ID.
+type ThreadGroup struct {
+ threads []Thread
+ isVulnerable bool
+}
+
+// String implements the String method for threadGroup.
+func (c ThreadGroup) String() string {
+ ret := fmt.Sprintf("ThreadGroup:\nIsVulnerable: %t\n", c.isVulnerable)
+ for _, processor := range c.threads {
+ ret += fmt.Sprintf("%s\n", processor)
}
+ return ret
+}
- if m.reverse {
- err := m.doReverse(data)
+// getThreads returns threads structs from reading /proc/cpuinfo.
+func getThreads(data string) ([]Thread, error) {
+ // Each processor entry should start with the
+ // processor key. Find the beginings of each.
+ r := buildRegex(processorKey, `\d+`)
+ indices := r.FindAllStringIndex(data, -1)
+ if len(indices) < 1 {
+ return nil, fmt.Errorf("no cpus found for: %q", data)
+ }
+
+ // Add the ending index for last entry.
+ indices = append(indices, []int{len(data), -1})
+
+ // Valid cpus are now defined by strings in between
+ // indexes (e.g. data[index[i], index[i+1]]).
+ // There should be len(indicies) - 1 CPUs
+ // since the last index is the end of the string.
+ cpus := make([]Thread, 0, len(indices))
+ // Find each string that represents a CPU. These begin "processor".
+ for i := 1; i < len(indices); i++ {
+ start := indices[i-1][0]
+ end := indices[i][0]
+ // Parse the CPU entry, which should be between start/end.
+ c, err := newThread(data[start:end])
if err != nil {
- return fmt.Errorf("reverse operation failed: %v", err)
+ return nil, err
}
- return nil
+ cpus = append(cpus, c)
+ }
+ return cpus, nil
+}
+
+// GetThreadsFromPossible makes threads from data read from /sys/devices/system/cpu/possible.
+func GetThreadsFromPossible(data []byte) ([]Thread, error) {
+ possibleRegex := regexp.MustCompile(`(?m)^(\d+)(-(\d+))?$`)
+ matches := possibleRegex.FindStringSubmatch(string(data))
+ if len(matches) != 4 {
+ return nil, fmt.Errorf("mismatch regex from possible: %q", string(data))
+ }
+
+ // If matches[3] is empty, we only have one cpu entry.
+ if matches[3] == "" {
+ matches[3] = matches[1]
}
- set, err := m.doMitigate(data)
+ begin, err := strconv.ParseInt(matches[1], 10, 64)
if err != nil {
- return fmt.Errorf("mitigate operation failed: %v", err)
+ return nil, fmt.Errorf("failed to parse begin: %v", err)
}
- return m.other.execute(set, m.dryRun)
+ end, err := strconv.ParseInt(matches[3], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse end: %v", err)
+ }
+ if begin > end || begin < 0 || end < 0 {
+ return nil, fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", begin, end)
+ }
+
+ ret := make([]Thread, 0, end-begin)
+ for i := begin; i <= end; i++ {
+ ret = append(ret, Thread{
+ processorNumber: i,
+ id: threadID{
+ physicalID: 0, // we don't care about id for enable ops.
+ coreID: 0,
+ },
+ })
+ }
+
+ return ret, nil
+}
+
+// threadID for each thread is defined by the physical and
+// core IDs. If equal, two threads are Hyperthread pairs.
+type threadID struct {
+ physicalID int64
+ coreID int64
}
-func (m Mitigate) doMitigate(data []byte) (cpuSet, error) {
- set, err := newCPUSet(data, m.other.vulnerable)
+// Thread represents pertinent info about a single hyperthread in a pair.
+type Thread struct {
+ processorNumber int64 // the processor number of this CPU.
+ vendorID string // the vendorID of CPU (e.g. AuthenticAMD).
+ cpuFamily int64 // CPU family number (e.g. 6 for CascadeLake/Skylake).
+ model int64 // CPU model number (e.g. 85 for CascadeLake/Skylake).
+ id threadID // id for this thread
+ bugs map[string]struct{} // map of vulnerabilities parsed from the 'bugs' field.
+}
+
+// newThread parses a CPU from a single cpu entry from /proc/cpuinfo.
+func newThread(data string) (Thread, error) {
+ empty := Thread{}
+ processor, err := parseProcessor(data)
if err != nil {
- return nil, err
+ return empty, err
}
- log.Infof("Mitigate found the following CPUs...")
- log.Infof("%s", set)
+ vendorID, err := parseVendorID(data)
+ if err != nil {
+ return empty, err
+ }
- disableList := set.getShutdownList()
- log.Infof("Disabling threads on thread pairs.")
- for _, t := range disableList {
- log.Infof("Disable thread: %s", t)
- if m.dryRun {
- continue
- }
- if err := t.disable(); err != nil {
- return nil, fmt.Errorf("error disabling thread: %s err: %v", t, err)
- }
+ cpuFamily, err := parseCPUFamily(data)
+ if err != nil {
+ return empty, err
}
- log.Infof("Shutdown successful.")
- return set, nil
+
+ model, err := parseModel(data)
+ if err != nil {
+ return empty, err
+ }
+
+ physicalID, err := parsePhysicalID(data)
+ if err != nil {
+ return empty, err
+ }
+
+ coreID, err := parseCoreID(data)
+ if err != nil {
+ return empty, err
+ }
+
+ bugs, err := parseBugs(data)
+ if err != nil {
+ return empty, err
+ }
+
+ return Thread{
+ processorNumber: processor,
+ vendorID: vendorID,
+ cpuFamily: cpuFamily,
+ model: model,
+ id: threadID{
+ physicalID: physicalID,
+ coreID: coreID,
+ },
+ bugs: bugs,
+ }, nil
+}
+
+// String implements the String method for thread.
+func (t Thread) String() string {
+ template := `CPU: %d
+CPU ID: %+v
+Vendor: %s
+Family/Model: %d/%d
+Bugs: %s
+`
+ bugs := make([]string, 0)
+ for bug := range t.bugs {
+ bugs = append(bugs, bug)
+ }
+
+ return fmt.Sprintf(template, t.processorNumber, t.id, t.vendorID, t.cpuFamily, t.model, strings.Join(bugs, ","))
+}
+
+// Enable turns on the CPU by writing 1 to /sys/devices/cpu/cpu{N}/online.
+func (t Thread) Enable() error {
+ // Linux ensures that "cpu0" is always online.
+ if t.processorNumber == 0 {
+ return nil
+ }
+ cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber)
+ f, err := os.OpenFile(cpuPath, os.O_WRONLY|os.O_CREATE, 0644)
+ if err != nil {
+ return fmt.Errorf("failed to open file %s: %v", cpuPath, err)
+ }
+ if _, err = f.Write([]byte{'1'}); err != nil {
+ return fmt.Errorf("failed to write '1' to %s: %v", cpuPath, err)
+ }
+ return nil
+}
+
+// Disable turns off the CPU by writing 0 to /sys/devices/cpu/cpu{N}/online.
+func (t Thread) Disable() error {
+ // The core labeled "cpu0" can never be taken offline via this method.
+ // Linux will return EPERM if the user even creates a file at the /sys
+ // path above.
+ if t.processorNumber == 0 {
+ return fmt.Errorf("invalid shutdown operation: cpu0 cannot be disabled")
+ }
+ cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber)
+ return ioutil.WriteFile(cpuPath, []byte{'0'}, 0644)
}
-func (m Mitigate) doReverse(data []byte) error {
- set, err := newCPUSetFromPossible(data)
+// IsVulnerable checks if a CPU is vulnerable to mds.
+func (t Thread) IsVulnerable() bool {
+ _, ok := t.bugs[mds]
+ return ok
+}
+
+// isActive checks if a CPU is active from /sys/devices/system/cpu/cpu{N}/online
+// If the file does not exist (ioutil returns in error), we assume the CPU is on.
+func (t Thread) isActive() bool {
+ cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber)
+ data, err := ioutil.ReadFile(cpuPath)
if err != nil {
- return err
+ return true
}
+ return len(data) > 0 && data[0] != '0'
+}
- log.Infof("Reverse mitigate found the following CPUs...")
- log.Infof("%s", set)
+// SimilarTo checks family/model/bugs fields for equality of two
+// processors.
+func (t Thread) SimilarTo(other Thread) bool {
+ if t.vendorID != other.vendorID {
+ return false
+ }
- enableList := set.getRemainingList()
+ if other.cpuFamily != t.cpuFamily {
+ return false
+ }
- log.Infof("Enabling all CPUs...")
- for _, t := range enableList {
- log.Infof("Enabling thread: %s", t)
- if m.dryRun {
- continue
- }
- if err := t.enable(); err != nil {
- return fmt.Errorf("error enabling thread: %s err: %v", t, err)
+ if other.model != t.model {
+ return false
+ }
+
+ if len(other.bugs) != len(t.bugs) {
+ return false
+ }
+
+ for bug := range t.bugs {
+ if _, ok := other.bugs[bug]; !ok {
+ return false
}
}
- log.Infof("Enable successful.")
- return nil
+ return true
+}
+
+// parseProcessor grabs the processor field from /proc/cpuinfo output.
+func parseProcessor(data string) (int64, error) {
+ return parseIntegerResult(data, processorKey)
+}
+
+// parseVendorID grabs the vendor_id field from /proc/cpuinfo output.
+func parseVendorID(data string) (string, error) {
+ return parseRegex(data, vendorIDKey, `[\w\d]+`)
+}
+
+// parseCPUFamily grabs the cpu family field from /proc/cpuinfo output.
+func parseCPUFamily(data string) (int64, error) {
+ return parseIntegerResult(data, cpuFamilyKey)
+}
+
+// parseModel grabs the model field from /proc/cpuinfo output.
+func parseModel(data string) (int64, error) {
+ return parseIntegerResult(data, modelKey)
+}
+
+// parsePhysicalID parses the physical id field.
+func parsePhysicalID(data string) (int64, error) {
+ return parseIntegerResult(data, physicalIDKey)
+}
+
+// parseCoreID parses the core id field.
+func parseCoreID(data string) (int64, error) {
+ return parseIntegerResult(data, coreIDKey)
+}
+
+// parseBugs grabs the bugs field from /proc/cpuinfo output.
+func parseBugs(data string) (map[string]struct{}, error) {
+ result, err := parseRegex(data, bugsKey, `[\d\w\s]*`)
+ if err != nil {
+ return nil, err
+ }
+ bugs := strings.Split(result, " ")
+ ret := make(map[string]struct{}, len(bugs))
+ for _, bug := range bugs {
+ ret[bug] = struct{}{}
+ }
+ return ret, nil
+}
+
+// parseIntegerResult parses fields expecting an integer.
+func parseIntegerResult(data, key string) (int64, error) {
+ result, err := parseRegex(data, key, `\d+`)
+ if err != nil {
+ return 0, err
+ }
+ return strconv.ParseInt(result, 0, 64)
+}
+
+// buildRegex builds a regex for parsing each CPU field.
+func buildRegex(key, match string) *regexp.Regexp {
+ reg := fmt.Sprintf(`(?m)^%s\s*:\s*(.*)$`, key)
+ return regexp.MustCompile(reg)
+}
+
+// parseRegex parses data with key inserted into a standard regex template.
+func parseRegex(data, key, match string) (string, error) {
+ r := buildRegex(key, match)
+ matches := r.FindStringSubmatch(data)
+ if len(matches) < 2 {
+ return "", fmt.Errorf("failed to match key %q: %q", key, data)
+ }
+ return matches[1], nil
}
diff --git a/runsc/mitigate/mitigate_conf.go b/runsc/mitigate/mitigate_conf.go
deleted file mode 100644
index ee326324b..000000000
--- a/runsc/mitigate/mitigate_conf.go
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2021 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mitigate
-
-import (
- "gvisor.dev/gvisor/runsc/flag"
-)
-
-type mitigate struct {
-}
-
-// usage returns the usage string portion for the mitigate.
-func (m mitigate) usage() string { return "" }
-
-// setFlags sets additional flags for the Mitigate command.
-func (m mitigate) setFlags(f *flag.FlagSet) {}
-
-// execute performs additional parts of Execute for Mitigate.
-func (m mitigate) execute(set cpuSet, dryrun bool) error {
- return nil
-}
-
-func (m mitigate) vulnerable(other thread) bool {
- return other.isVulnerable()
-}
diff --git a/runsc/mitigate/mitigate_test.go b/runsc/mitigate/mitigate_test.go
index b3a9a9b18..fbd8eb886 100644
--- a/runsc/mitigate/mitigate_test.go
+++ b/runsc/mitigate/mitigate_test.go
@@ -17,138 +17,519 @@ package mitigate
import (
"fmt"
"io/ioutil"
- "os"
"strings"
"testing"
+
+ "gvisor.dev/gvisor/runsc/mitigate/mock"
)
-type executeTestCase struct {
- name string
- mitigateData string
- mitigateError error
- reverseData string
- reverseError error
+// TestMockCPUSet tests mock cpu test cases against the cpuSet functions.
+func TestMockCPUSet(t *testing.T) {
+ for _, tc := range []struct {
+ testCase mock.CPU
+ isVulnerable bool
+ }{
+ {
+ testCase: mock.AMD8,
+ isVulnerable: false,
+ },
+ {
+ testCase: mock.Haswell2,
+ isVulnerable: true,
+ },
+ {
+ testCase: mock.Haswell2core,
+ isVulnerable: true,
+ },
+ {
+ testCase: mock.CascadeLake2,
+ isVulnerable: true,
+ },
+ {
+ testCase: mock.CascadeLake4,
+ isVulnerable: true,
+ },
+ } {
+ t.Run(tc.testCase.Name, func(t *testing.T) {
+ data := tc.testCase.MakeCPUString()
+ vulnerable := func(t Thread) bool {
+ return t.IsVulnerable()
+ }
+ set, err := NewCPUSet([]byte(data), vulnerable)
+ if err != nil {
+ t.Fatalf("Failed to create cpuSet: %v", err)
+ }
+
+ for _, tg := range set {
+ if err := checkSorted(tg.threads); err != nil {
+ t.Fatalf("Failed to sort cpuSet: %v", err)
+ }
+ }
+
+ remaining := set.GetRemainingList()
+ // In the non-vulnerable case, no cores should be shutdown so all should remain.
+ want := tc.testCase.PhysicalCores * tc.testCase.Cores * tc.testCase.ThreadsPerCore
+ if tc.isVulnerable {
+ want = tc.testCase.PhysicalCores * tc.testCase.Cores
+ }
+
+ if want != len(remaining) {
+ t.Fatalf("Failed to shutdown the correct number of cores: want: %d got: %d", want, len(remaining))
+ }
+
+ if !tc.isVulnerable {
+ return
+ }
+
+ // If the set is vulnerable, we expect only 1 thread per hyperthread pair.
+ for _, r := range remaining {
+ if _, ok := set[r.id]; !ok {
+ t.Fatalf("Entry %+v not in map, there must be two entries in the same thread group.", r)
+ }
+ delete(set, r.id)
+ }
+
+ possible := tc.testCase.MakeSysPossibleString()
+ set, err = NewCPUSetFromPossible([]byte(possible))
+ if err != nil {
+ t.Fatalf("Failed to make cpuSet: %v", err)
+ }
+
+ want = tc.testCase.PhysicalCores * tc.testCase.Cores * tc.testCase.ThreadsPerCore
+ got := len(set.GetRemainingList())
+ if got != want {
+ t.Fatalf("Returned the wrong number of CPUs want: %d got: %d", want, got)
+ }
+ })
+ }
}
-func TestExecute(t *testing.T) {
+// TestGetCPU tests basic parsing of single CPU strings from reading
+// /proc/cpuinfo.
+func TestGetCPU(t *testing.T) {
+ data := `processor : 0
+vendor_id : GenuineIntel
+cpu family : 6
+model : 85
+physical id: 0
+core id : 0
+bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa itlb_multihit
+`
+ want := Thread{
+ processorNumber: 0,
+ vendorID: "GenuineIntel",
+ cpuFamily: 6,
+ model: 85,
+ id: threadID{
+ physicalID: 0,
+ coreID: 0,
+ },
+ bugs: map[string]struct{}{
+ "cpu_meltdown": struct{}{},
+ "spectre_v1": struct{}{},
+ "spectre_v2": struct{}{},
+ "spec_store_bypass": struct{}{},
+ "l1tf": struct{}{},
+ "mds": struct{}{},
+ "swapgs": struct{}{},
+ "taa": struct{}{},
+ "itlb_multihit": struct{}{},
+ },
+ }
- partial := `processor : 1
-vendor_id : AuthenticAMD
-cpu family : 23
-model : 49
-model name : AMD EPYC 7B12
-physical id : 0
-bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+ got, err := newThread(data)
+ if err != nil {
+ t.Fatalf("getCpu failed with error: %v", err)
+ }
+
+ if !want.SimilarTo(got) {
+ t.Fatalf("Failed cpus not similar: got: %+v, want: %+v", got, want)
+ }
+
+ if !got.IsVulnerable() {
+ t.Fatalf("Failed: cpu should be vulnerable.")
+ }
+}
+
+func TestInvalid(t *testing.T) {
+ result, err := getThreads(`something not a processor`)
+ if err == nil {
+ t.Fatalf("getCPU set didn't return an error: %+v", result)
+ }
+
+ if !strings.Contains(err.Error(), "no cpus") {
+ t.Fatalf("Incorrect error returned: %v", err)
+ }
+}
+
+// TestCPUSet tests getting the right number of CPUs from
+// parsing full output of /proc/cpuinfo.
+func TestCPUSet(t *testing.T) {
+ data := `processor : 0
+vendor_id : GenuineIntel
+cpu family : 6
+model : 63
+model name : Intel(R) Xeon(R) CPU @ 2.30GHz
+stepping : 0
+microcode : 0x1
+cpu MHz : 2299.998
+cache size : 46080 KB
+physical id : 0
+siblings : 2
+core id : 0
+cpu cores : 1
+apicid : 0
+initial apicid : 0
+fpu : yes
+fpu_exception : yes
+cpuid level : 13
+wp : yes
+flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities
+bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs
+bogomips : 4599.99
+clflush size : 64
+cache_alignment : 64
+address sizes : 46 bits physical, 48 bits virtual
+power management:
+
+processor : 1
+vendor_id : GenuineIntel
+cpu family : 6
+model : 63
+model name : Intel(R) Xeon(R) CPU @ 2.30GHz
+stepping : 0
+microcode : 0x1
+cpu MHz : 2299.998
+cache size : 46080 KB
+physical id : 0
+siblings : 2
+core id : 0
+cpu cores : 1
+apicid : 1
+initial apicid : 1
+fpu : yes
+fpu_exception : yes
+cpuid level : 13
+wp : yes
+flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities
+bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs
+bogomips : 4599.99
+clflush size : 64
+cache_alignment : 64
+address sizes : 46 bits physical, 48 bits virtual
power management:
`
+ cpuSet, err := getThreads(data)
+ if err != nil {
+ t.Fatalf("getCPUSet failed: %v", err)
+ }
- for _, tc := range []executeTestCase{
- {
- name: "CascadeLake4",
- mitigateData: cascadeLake4.makeCPUString(),
- reverseData: cascadeLake4.makeSysPossibleString(),
- },
- {
- name: "Empty",
- mitigateData: "",
- mitigateError: fmt.Errorf(`mitigate operation failed: no cpus found for: ""`),
- reverseData: "",
- reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from %s: ""`, allPossibleCPUs),
+ wantCPULen := 2
+ if len(cpuSet) != wantCPULen {
+ t.Fatalf("Num CPU mismatch: want: %d, got: %d", wantCPULen, len(cpuSet))
+ }
+
+ wantCPU := Thread{
+ vendorID: "GenuineIntel",
+ cpuFamily: 6,
+ model: 63,
+ bugs: map[string]struct{}{
+ "cpu_meltdown": struct{}{},
+ "spectre_v1": struct{}{},
+ "spectre_v2": struct{}{},
+ "spec_store_bypass": struct{}{},
+ "l1tf": struct{}{},
+ "mds": struct{}{},
+ "swapgs": struct{}{},
},
- {
- name: "Partial",
- mitigateData: `processor : 0
+ }
+
+ for _, c := range cpuSet {
+ if !wantCPU.SimilarTo(c) {
+ t.Fatalf("Failed cpus not equal: got: %+v, want: %+v", c, wantCPU)
+ }
+ }
+}
+
+// TestReadFile is a smoke test for parsing methods.
+func TestReadFile(t *testing.T) {
+ data, err := ioutil.ReadFile("/proc/cpuinfo")
+ if err != nil {
+ t.Fatalf("Failed to read cpuinfo: %v", err)
+ }
+
+ vulnerable := func(t Thread) bool {
+ return t.IsVulnerable()
+ }
+
+ set, err := NewCPUSet(data, vulnerable)
+ if err != nil {
+ t.Fatalf("Failed to parse CPU data %v\n%s", err, data)
+ }
+
+ for _, tg := range set {
+ if err := checkSorted(tg.threads); err != nil {
+ t.Fatalf("Failed to sort cpuSet: %v", err)
+ }
+ }
+
+ if len(set) < 1 {
+ t.Fatalf("Failed to parse any CPUs: %d", len(set))
+ }
+
+ t.Log(set)
+}
+
+// TestVulnerable tests if the isVulnerable method is correct
+// among known CPUs in GCP.
+func TestVulnerable(t *testing.T) {
+ const haswell = `processor : 0
+vendor_id : GenuineIntel
+cpu family : 6
+model : 63
+model name : Intel(R) Xeon(R) CPU @ 2.30GHz
+stepping : 0
+microcode : 0x1
+cpu MHz : 2299.998
+cache size : 46080 KB
+physical id : 0
+siblings : 4
+core id : 0
+cpu cores : 2
+apicid : 0
+initial apicid : 0
+fpu : yes
+fpu_exception : yes
+cpuid level : 13
+wp : yes
+flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities
+bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs
+bogomips : 4599.99
+clflush size : 64
+cache_alignment : 64
+address sizes : 46 bits physical, 48 bits virtual
+power management:`
+
+ const skylake = `processor : 0
+vendor_id : GenuineIntel
+cpu family : 6
+model : 85
+model name : Intel(R) Xeon(R) CPU @ 2.00GHz
+stepping : 3
+microcode : 0x1
+cpu MHz : 2000.180
+cache size : 39424 KB
+physical id : 0
+siblings : 2
+core id : 0
+cpu cores : 1
+apicid : 0
+initial apicid : 0
+fpu : yes
+fpu_exception : yes
+cpuid level : 13
+wp : yes
+flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat md_clear arch_capabilities
+bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa
+bogomips : 4000.36
+clflush size : 64
+cache_alignment : 64
+address sizes : 46 bits physical, 48 bits virtual
+power management:`
+
+ const cascade = `processor : 0
+vendor_id : GenuineIntel
+cpu family : 6
+model : 85
+model name : Intel(R) Xeon(R) CPU
+stepping : 7
+microcode : 0x1
+cpu MHz : 2800.198
+cache size : 33792 KB
+physical id : 0
+siblings : 2
+core id : 0
+cpu cores : 1
+apicid : 0
+initial apicid : 0
+fpu : yes
+fpu_exception : yes
+cpuid level : 13
+wp : yes
+flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2
+ ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmu
+lqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowpr
+efetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid r
+tm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves a
+rat avx512_vnni md_clear arch_capabilities
+bugs : spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa
+bogomips : 5600.39
+clflush size : 64
+cache_alignment : 64
+address sizes : 46 bits physical, 48 bits virtual
+power management:`
+
+ const amd = `processor : 0
vendor_id : AuthenticAMD
cpu family : 23
model : 49
model name : AMD EPYC 7B12
+stepping : 0
+microcode : 0x1000065
+cpu MHz : 2250.000
+cache size : 512 KB
physical id : 0
+siblings : 2
core id : 0
cpu cores : 1
+apicid : 0
+initial apicid : 0
+fpu : yes
+fpu_exception : yes
+cpuid level : 13
+wp : yes
+flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid extd_apicid tsc_known_freq pni pclmulqdq ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm cmp_legacy cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw topoext ssbd ibrs ibpb stibp vmmcall fsgsbase tsc_adjust bmi1 avx2 smep bmi2 rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 clzero xsaveerptr arat npt nrip_save umip rdpid
bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
-power management:
+bogomips : 4500.00
+TLB size : 3072 4K pages
+clflush size : 64
+cache_alignment : 64
+address sizes : 48 bits physical, 48 bits virtual
+power management:`
-` + partial,
- mitigateError: fmt.Errorf(`mitigate operation failed: failed to match key "core id": %q`, partial),
- reverseData: "1-",
- reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from %s: %q`, allPossibleCPUs, "1-"),
+ for _, tc := range []struct {
+ name string
+ cpuString string
+ vulnerable bool
+ }{
+ {
+ name: "haswell",
+ cpuString: haswell,
+ vulnerable: true,
+ }, {
+ name: "skylake",
+ cpuString: skylake,
+ vulnerable: true,
+ }, {
+ name: "amd",
+ cpuString: amd,
+ vulnerable: false,
},
} {
- doExecuteTest(t, Mitigate{}, tc)
+ t.Run(tc.name, func(t *testing.T) {
+ set, err := getThreads(tc.cpuString)
+ if err != nil {
+ t.Fatalf("Failed to getCPUSet:%v\n %s", err, tc.cpuString)
+ }
+
+ if len(set) < 1 {
+ t.Fatalf("Returned empty cpu set: %v", set)
+ }
+
+ for _, c := range set {
+ got := func() bool {
+ return c.IsVulnerable()
+ }()
+
+ if got != tc.vulnerable {
+ t.Fatalf("Mismatch vulnerable for cpu %+s: got %t want: %t", tc.name, tc.vulnerable, got)
+ }
+ }
+ })
}
}
-func TestExecuteSmoke(t *testing.T) {
- smokeMitigate, err := ioutil.ReadFile(cpuInfo)
+func TestReverse(t *testing.T) {
+ const noParse = "-1-"
+ for _, tc := range []struct {
+ name string
+ output string
+ wantErr error
+ wantCount int
+ }{
+ {
+ name: "base",
+ output: "0-7",
+ wantErr: nil,
+ wantCount: 8,
+ },
+ {
+ name: "huge",
+ output: "0-111",
+ wantErr: nil,
+ wantCount: 112,
+ },
+ {
+ name: "not zero",
+ output: "50-53",
+ wantErr: nil,
+ wantCount: 4,
+ },
+ {
+ name: "small",
+ output: "0",
+ wantErr: nil,
+ wantCount: 1,
+ },
+ {
+ name: "invalid order",
+ output: "10-6",
+ wantErr: fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", 10, 6),
+ },
+ {
+ name: "no parse",
+ output: noParse,
+ wantErr: fmt.Errorf(`mismatch regex from possible: %q`, noParse),
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ threads, err := GetThreadsFromPossible([]byte(tc.output))
+
+ switch {
+ case tc.wantErr == nil:
+ if err != nil {
+ t.Fatalf("Wanted nil err, got: %v", err)
+ }
+ case err == nil:
+ t.Fatalf("Want error: %v got: %v", tc.wantErr, err)
+ default:
+ if tc.wantErr.Error() != err.Error() {
+ t.Fatalf("Want error: %v got error: %v", tc.wantErr, err)
+ }
+ }
+
+ if len(threads) != tc.wantCount {
+ t.Fatalf("Want count: %d got: %d", tc.wantCount, len(threads))
+ }
+ })
+ }
+}
+
+func TestReverseSmoke(t *testing.T) {
+ data, err := ioutil.ReadFile("/sys/devices/system/cpu/possible")
if err != nil {
- t.Fatalf("Failed to read %s: %v", cpuInfo, err)
+ t.Fatalf("Failed to read from possible: %v", err)
}
- smokeReverse, err := ioutil.ReadFile(allPossibleCPUs)
+ threads, err := GetThreadsFromPossible(data)
if err != nil {
- t.Fatalf("Failed to read %s: %v", allPossibleCPUs, err)
+ t.Fatalf("Could not parse possible output: %v", err)
}
- doExecuteTest(t, Mitigate{}, executeTestCase{
- name: "SmokeTest",
- mitigateData: string(smokeMitigate),
- reverseData: string(smokeReverse),
- })
+ if len(threads) <= 0 {
+ t.Fatalf("Didn't get any CPU cores: %d", len(threads))
+ }
}
-// doExecuteTest runs Execute with the mitigate operation and reverse operation.
-func doExecuteTest(t *testing.T, m Mitigate, tc executeTestCase) {
- t.Run("Mitigate"+tc.name, func(t *testing.T) {
- m.dryRun = true
- file, err := ioutil.TempFile("", "outfile.txt")
- if err != nil {
- t.Fatalf("Failed to create tmpfile: %v", err)
- }
- defer os.Remove(file.Name())
-
- if _, err := file.WriteString(tc.mitigateData); err != nil {
- t.Fatalf("Failed to write to file: %v", err)
- }
-
- m.path = file.Name()
-
- got := m.Execute()
- if err = checkErr(tc.mitigateError, got); err != nil {
- t.Fatalf("Mitigate error mismatch: %v", err)
- }
- })
- t.Run("Reverse"+tc.name, func(t *testing.T) {
- m.dryRun = true
- m.reverse = true
-
- file, err := ioutil.TempFile("", "outfile.txt")
- if err != nil {
- t.Fatalf("Failed to create tmpfile: %v", err)
- }
- defer os.Remove(file.Name())
-
- if _, err := file.WriteString(tc.reverseData); err != nil {
- t.Fatalf("Failed to write to file: %v", err)
- }
-
- m.path = file.Name()
- got := m.Execute()
- if err = checkErr(tc.reverseError, got); err != nil {
- t.Fatalf("Mitigate error mismatch: %v", err)
+func checkSorted(threads []Thread) error {
+ if len(threads) < 2 {
+ return nil
+ }
+ last := threads[0].processorNumber
+ for _, t := range threads[1:] {
+ if last >= t.processorNumber {
+ return fmt.Errorf("threads out of order: thread %d before %d", t.processorNumber, last)
}
- })
-
-}
-
-// checkErr checks error for equality.
-func checkErr(want, got error) error {
- switch {
- case want == nil && got == nil:
- case want != nil && got == nil:
- fallthrough
- case want == nil && got != nil:
- fallthrough
- case want.Error() != strings.Trim(got.Error(), " "):
- return fmt.Errorf("got: %v want: %v", got, want)
+ last = t.processorNumber
}
return nil
}
diff --git a/runsc/mitigate/mock/BUILD b/runsc/mitigate/mock/BUILD
new file mode 100644
index 000000000..5019ff9ee
--- /dev/null
+++ b/runsc/mitigate/mock/BUILD
@@ -0,0 +1,11 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "mock",
+ srcs = ["mock.go"],
+ visibility = [
+ "//runsc:__subpackages__",
+ ],
+)
diff --git a/runsc/mitigate/mock/mock.go b/runsc/mitigate/mock/mock.go
new file mode 100644
index 000000000..2db718cb9
--- /dev/null
+++ b/runsc/mitigate/mock/mock.go
@@ -0,0 +1,141 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package mock contains mock CPUs for mitigate tests.
+package mock
+
+import "fmt"
+
+// CPU represents data from CPUs that will be mitigated.
+type CPU struct {
+ Name string
+ VendorID string
+ Family int
+ Model int
+ ModelName string
+ Bugs string
+ PhysicalCores int
+ Cores int
+ ThreadsPerCore int
+}
+
+// CascadeLake2 is a two core Intel CascadeLake machine.
+var CascadeLake2 = CPU{
+ Name: "CascadeLake",
+ VendorID: "GenuineIntel",
+ Family: 6,
+ Model: 85,
+ ModelName: "Intel(R) Xeon(R) CPU",
+ Bugs: "spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa",
+ PhysicalCores: 1,
+ Cores: 1,
+ ThreadsPerCore: 2,
+}
+
+// CascadeLake4 is a four core Intel CascadeLake machine.
+var CascadeLake4 = CPU{
+ Name: "CascadeLake",
+ VendorID: "GenuineIntel",
+ Family: 6,
+ Model: 85,
+ ModelName: "Intel(R) Xeon(R) CPU",
+ Bugs: "spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa",
+ PhysicalCores: 1,
+ Cores: 2,
+ ThreadsPerCore: 2,
+}
+
+// Haswell2 is a two core Intel Haswell machine.
+var Haswell2 = CPU{
+ Name: "Haswell",
+ VendorID: "GenuineIntel",
+ Family: 6,
+ Model: 63,
+ ModelName: "Intel(R) Xeon(R) CPU",
+ Bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs",
+ PhysicalCores: 1,
+ Cores: 1,
+ ThreadsPerCore: 2,
+}
+
+// Haswell2core is a 2 core Intel Haswell machine with no hyperthread pairs.
+var Haswell2core = CPU{
+ Name: "Haswell2Physical",
+ VendorID: "GenuineIntel",
+ Family: 6,
+ Model: 63,
+ ModelName: "Intel(R) Xeon(R) CPU",
+ Bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs",
+ PhysicalCores: 2,
+ Cores: 1,
+ ThreadsPerCore: 1,
+}
+
+// AMD8 is an eight core AMD machine.
+var AMD8 = CPU{
+ Name: "AMD",
+ VendorID: "AuthenticAMD",
+ Family: 23,
+ Model: 49,
+ ModelName: "AMD EPYC 7B12",
+ Bugs: "sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass",
+ PhysicalCores: 4,
+ Cores: 1,
+ ThreadsPerCore: 2,
+}
+
+// MakeCPUString makes a string formated like /proc/cpuinfo for each cpuTestCase
+func (tc CPU) MakeCPUString() string {
+ template := `processor : %d
+vendor_id : %s
+cpu family : %d
+model : %d
+model name : %s
+physical id : %d
+core id : %d
+cpu cores : %d
+bugs : %s
+
+`
+
+ ret := ``
+ for i := 0; i < tc.PhysicalCores; i++ {
+ for j := 0; j < tc.Cores; j++ {
+ for k := 0; k < tc.ThreadsPerCore; k++ {
+ processorNum := (i*tc.Cores+j)*tc.ThreadsPerCore + k
+ ret += fmt.Sprintf(template,
+ processorNum, /*processor*/
+ tc.VendorID, /*vendor_id*/
+ tc.Family, /*cpu family*/
+ tc.Model, /*model*/
+ tc.ModelName, /*model name*/
+ i, /*physical id*/
+ j, /*core id*/
+ tc.Cores*tc.PhysicalCores, /*cpu cores*/
+ tc.Bugs, /*bugs*/
+ )
+ }
+ }
+ }
+ return ret
+}
+
+// MakeSysPossibleString makes a string representing a the contents of /sys/devices/system/cpu/possible.
+func (tc CPU) MakeSysPossibleString() string {
+ max := tc.PhysicalCores * tc.Cores * tc.ThreadsPerCore
+ if max == 1 {
+ return "0"
+ }
+ return fmt.Sprintf("0-%d", max-1)
+}
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index 9e429f7d5..f69558021 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -21,7 +21,6 @@ import (
"path/filepath"
"runtime"
"strconv"
- "syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/vishvananda/netlink"
@@ -102,11 +101,11 @@ func joinNetNS(nsPath string) (func(), error) {
// isRootNS determines whether we are running in the root net namespace.
// /proc/sys/net/core/rmem_default only exists in root network namespace.
func isRootNS() (bool, error) {
- err := syscall.Access("/proc/sys/net/core/rmem_default", syscall.F_OK)
+ err := unix.Access("/proc/sys/net/core/rmem_default", unix.F_OK)
switch err {
case nil:
return true, nil
- case syscall.ENOENT:
+ case unix.ENOENT:
return false, nil
default:
return false, fmt.Errorf("failed to access /proc/sys/net/core/rmem_default: %v", err)
@@ -270,17 +269,17 @@ type socketEntry struct {
func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) (*socketEntry, error) {
// Create the socket.
const protocol = 0x0300 // htons(ETH_P_ALL)
- fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol)
+ fd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, protocol)
if err != nil {
return nil, fmt.Errorf("unable to create raw socket: %v", err)
}
deviceFile := os.NewFile(uintptr(fd), "raw-device-fd")
// Bind to the appropriate device.
- ll := syscall.SockaddrLinklayer{
+ ll := unix.SockaddrLinklayer{
Protocol: protocol,
Ifindex: iface.Index,
}
- if err := syscall.Bind(fd, &ll); err != nil {
+ if err := unix.Bind(fd, &ll); err != nil {
return nil, fmt.Errorf("unable to bind to %q: %v", iface.Name, err)
}
@@ -291,7 +290,7 @@ func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) (
return nil, fmt.Errorf("getting GSO for interface %q: %v", iface.Name, err)
}
if gso {
- if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil {
+ if err := unix.SetsockoptInt(fd, unix.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil {
return nil, fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err)
}
gsoMaxSize = ifaceLink.Attrs().GSOMaxSize
@@ -307,18 +306,18 @@ func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) (
// incurring packet drops.
const bufSize = 4 << 20 // 4MB.
- if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, bufSize); err != nil {
- syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUF, bufSize)
- sz, _ := syscall.GetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUF)
+ if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUFFORCE, bufSize); err != nil {
+ unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, bufSize)
+ sz, _ := unix.GetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF)
if sz < bufSize {
log.Warningf("Failed to increase rcv buffer to %d on SOCK_RAW on %s. Current buffer %d: %v", bufSize, iface.Name, sz, err)
}
}
- if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUFFORCE, bufSize); err != nil {
- syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF, bufSize)
- sz, _ := syscall.GetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF)
+ if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUFFORCE, bufSize); err != nil {
+ unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUF, bufSize)
+ sz, _ := unix.GetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUF)
if sz < bufSize {
log.Warningf("Failed to increase snd buffer to %d on SOCK_RAW on %s. Curent buffer %d: %v", bufSize, iface.Name, sz, err)
}
diff --git a/runsc/sandbox/network_unsafe.go b/runsc/sandbox/network_unsafe.go
index 2a2a0fb7e..1b808a8a0 100644
--- a/runsc/sandbox/network_unsafe.go
+++ b/runsc/sandbox/network_unsafe.go
@@ -15,7 +15,6 @@
package sandbox
import (
- "syscall"
"unsafe"
"golang.org/x/sys/unix"
@@ -48,7 +47,7 @@ func isGSOEnabled(fd int, intf string) (bool, error) {
ifrData: &val,
}
- if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), unix.SIOCETHTOOL, uintptr(unsafe.Pointer(&ifr))); err != 0 {
+ if _, _, err := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), unix.SIOCETHTOOL, uintptr(unsafe.Pointer(&ifr))); err != 0 {
return false, err
}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 7fe65c7ba..450f92645 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -30,6 +30,7 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/control/client"
"gvisor.dev/gvisor/pkg/control/server"
@@ -83,7 +84,7 @@ type Sandbox struct {
// child==true and the sandbox was waited on. This field allows for multiple
// threads to wait on sandbox and get the exit code, since Linux will return
// WaitStatus to one of the waiters only.
- status syscall.WaitStatus
+ status unix.WaitStatus
}
// Args is used to configure a new sandbox.
@@ -383,7 +384,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
binPath := specutils.ExePath
cmd := exec.Command(binPath, conf.ToFlags()...)
- cmd.SysProcAttr = &syscall.SysProcAttr{}
+ cmd.SysProcAttr = &unix.SysProcAttr{}
// Open the log files to pass to the sandbox as FDs.
//
@@ -739,7 +740,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
if args.Attached {
// Kill sandbox if parent process exits in attached mode.
- cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
+ cmd.SysProcAttr.Pdeathsig = unix.SIGKILL
// Tells boot that any process it creates must have pdeathsig set.
cmd.Args = append(cmd.Args, "--attached")
}
@@ -762,7 +763,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
//
// NOTE: The error message is checked because error types are lost over
// rpc calls.
- if strings.Contains(err.Error(), syscall.EACCES.Error()) {
+ if strings.Contains(err.Error(), unix.EACCES.Error()) {
if permsErr := checkBinaryPermissions(conf); permsErr != nil {
return fmt.Errorf("%v: %v", err, permsErr)
}
@@ -782,7 +783,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
}
// Wait waits for the containerized process to exit, and returns its WaitStatus.
-func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) {
+func (s *Sandbox) Wait(cid string) (unix.WaitStatus, error) {
log.Debugf("Waiting for container %q in sandbox %q", cid, s.ID)
if conn, err := s.sandboxConnect(); err != nil {
@@ -790,14 +791,14 @@ func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) {
// There is nothing we can do for subcontainers. For the init container, we
// can try to get the sandbox exit code.
if !s.IsRootContainer(cid) {
- return syscall.WaitStatus(0), err
+ return unix.WaitStatus(0), err
}
log.Warningf("Wait on container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err)
} else {
defer conn.Close()
// Try the Wait RPC to the sandbox.
- var ws syscall.WaitStatus
+ var ws unix.WaitStatus
err = conn.Call(boot.ContainerWait, &cid, &ws)
if err == nil {
// It worked!
@@ -805,7 +806,7 @@ func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) {
}
// See comment above.
if !s.IsRootContainer(cid) {
- return syscall.WaitStatus(0), err
+ return unix.WaitStatus(0), err
}
// The sandbox may have exited after we connected, but before
@@ -817,10 +818,10 @@ func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) {
// The best we can do is ask Linux what the sandbox exit status was, since in
// most cases that will be the same as the container exit status.
if err := s.waitForStopped(); err != nil {
- return syscall.WaitStatus(0), err
+ return unix.WaitStatus(0), err
}
if !s.child {
- return syscall.WaitStatus(0), fmt.Errorf("sandbox no longer running and its exit status is unavailable")
+ return unix.WaitStatus(0), fmt.Errorf("sandbox no longer running and its exit status is unavailable")
}
s.statusMu.Lock()
@@ -830,9 +831,9 @@ func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) {
// WaitPID waits for process 'pid' in the container's sandbox and returns its
// WaitStatus.
-func (s *Sandbox) WaitPID(cid string, pid int32) (syscall.WaitStatus, error) {
+func (s *Sandbox) WaitPID(cid string, pid int32) (unix.WaitStatus, error) {
log.Debugf("Waiting for PID %d in sandbox %q", pid, s.ID)
- var ws syscall.WaitStatus
+ var ws unix.WaitStatus
conn, err := s.sandboxConnect()
if err != nil {
return ws, err
@@ -861,7 +862,7 @@ func (s *Sandbox) destroy() error {
log.Debugf("Destroy sandbox %q", s.ID)
if s.Pid != 0 {
log.Debugf("Killing sandbox %q", s.ID)
- if err := syscall.Kill(s.Pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH {
+ if err := unix.Kill(s.Pid, unix.SIGKILL); err != nil && err != unix.ESRCH {
return fmt.Errorf("killing sandbox %q PID %q: %v", s.ID, s.Pid, err)
}
if err := s.waitForStopped(); err != nil {
@@ -875,7 +876,7 @@ func (s *Sandbox) destroy() error {
// SignalContainer sends the signal to a container in the sandbox. If all is
// true and signal is SIGKILL, then waits for all processes to exit before
// returning.
-func (s *Sandbox) SignalContainer(cid string, sig syscall.Signal, all bool) error {
+func (s *Sandbox) SignalContainer(cid string, sig unix.Signal, all bool) error {
log.Debugf("Signal sandbox %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
@@ -903,7 +904,7 @@ func (s *Sandbox) SignalContainer(cid string, sig syscall.Signal, all bool) erro
// fgProcess is true, then the signal is sent to the foreground process group
// in the same session that PID belongs to. This is only valid if the process
// is attached to a host TTY.
-func (s *Sandbox) SignalProcess(cid string, pid int32, sig syscall.Signal, fgProcess bool) error {
+func (s *Sandbox) SignalProcess(cid string, pid int32, sig unix.Signal, fgProcess bool) error {
log.Debugf("Signal sandbox %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
@@ -984,7 +985,7 @@ func (s *Sandbox) Resume(cid string) error {
func (s *Sandbox) IsRunning() bool {
if s.Pid != 0 {
// Send a signal 0 to the sandbox process.
- if err := syscall.Kill(s.Pid, 0); err == nil {
+ if err := unix.Kill(s.Pid, 0); err == nil {
// Succeeded, process is running.
return true
}
@@ -1147,7 +1148,7 @@ func (s *Sandbox) waitForStopped() error {
}
// The sandbox process is a child of the current process,
// so we can wait it and collect its zombie.
- wpid, err := syscall.Wait4(int(s.Pid), &s.status, syscall.WNOHANG, nil)
+ wpid, err := unix.Wait4(int(s.Pid), &s.status, unix.WNOHANG, nil)
if err != nil {
return fmt.Errorf("error waiting the sandbox process: %v", err)
}
diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go
index 138aa4dd1..b62504a8c 100644
--- a/runsc/specutils/fs.go
+++ b/runsc/specutils/fs.go
@@ -18,9 +18,9 @@ import (
"fmt"
"math/bits"
"path"
- "syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
)
type mapping struct {
@@ -31,48 +31,48 @@ type mapping struct {
// optionsMap maps mount propagation-related OCI filesystem options to mount(2)
// syscall flags.
var optionsMap = map[string]mapping{
- "acl": {set: true, val: syscall.MS_POSIXACL},
- "async": {set: false, val: syscall.MS_SYNCHRONOUS},
- "atime": {set: false, val: syscall.MS_NOATIME},
- "bind": {set: true, val: syscall.MS_BIND},
+ "acl": {set: true, val: unix.MS_POSIXACL},
+ "async": {set: false, val: unix.MS_SYNCHRONOUS},
+ "atime": {set: false, val: unix.MS_NOATIME},
+ "bind": {set: true, val: unix.MS_BIND},
"defaults": {set: true, val: 0},
- "dev": {set: false, val: syscall.MS_NODEV},
- "diratime": {set: false, val: syscall.MS_NODIRATIME},
- "dirsync": {set: true, val: syscall.MS_DIRSYNC},
- "exec": {set: false, val: syscall.MS_NOEXEC},
- "noexec": {set: true, val: syscall.MS_NOEXEC},
- "iversion": {set: true, val: syscall.MS_I_VERSION},
- "loud": {set: false, val: syscall.MS_SILENT},
- "mand": {set: true, val: syscall.MS_MANDLOCK},
- "noacl": {set: false, val: syscall.MS_POSIXACL},
- "noatime": {set: true, val: syscall.MS_NOATIME},
- "nodev": {set: true, val: syscall.MS_NODEV},
- "nodiratime": {set: true, val: syscall.MS_NODIRATIME},
- "noiversion": {set: false, val: syscall.MS_I_VERSION},
- "nomand": {set: false, val: syscall.MS_MANDLOCK},
- "norelatime": {set: false, val: syscall.MS_RELATIME},
- "nostrictatime": {set: false, val: syscall.MS_STRICTATIME},
- "nosuid": {set: true, val: syscall.MS_NOSUID},
- "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC},
- "relatime": {set: true, val: syscall.MS_RELATIME},
- "remount": {set: true, val: syscall.MS_REMOUNT},
- "ro": {set: true, val: syscall.MS_RDONLY},
- "rw": {set: false, val: syscall.MS_RDONLY},
- "silent": {set: true, val: syscall.MS_SILENT},
- "strictatime": {set: true, val: syscall.MS_STRICTATIME},
- "suid": {set: false, val: syscall.MS_NOSUID},
- "sync": {set: true, val: syscall.MS_SYNCHRONOUS},
+ "dev": {set: false, val: unix.MS_NODEV},
+ "diratime": {set: false, val: unix.MS_NODIRATIME},
+ "dirsync": {set: true, val: unix.MS_DIRSYNC},
+ "exec": {set: false, val: unix.MS_NOEXEC},
+ "noexec": {set: true, val: unix.MS_NOEXEC},
+ "iversion": {set: true, val: unix.MS_I_VERSION},
+ "loud": {set: false, val: unix.MS_SILENT},
+ "mand": {set: true, val: unix.MS_MANDLOCK},
+ "noacl": {set: false, val: unix.MS_POSIXACL},
+ "noatime": {set: true, val: unix.MS_NOATIME},
+ "nodev": {set: true, val: unix.MS_NODEV},
+ "nodiratime": {set: true, val: unix.MS_NODIRATIME},
+ "noiversion": {set: false, val: unix.MS_I_VERSION},
+ "nomand": {set: false, val: unix.MS_MANDLOCK},
+ "norelatime": {set: false, val: unix.MS_RELATIME},
+ "nostrictatime": {set: false, val: unix.MS_STRICTATIME},
+ "nosuid": {set: true, val: unix.MS_NOSUID},
+ "rbind": {set: true, val: unix.MS_BIND | unix.MS_REC},
+ "relatime": {set: true, val: unix.MS_RELATIME},
+ "remount": {set: true, val: unix.MS_REMOUNT},
+ "ro": {set: true, val: unix.MS_RDONLY},
+ "rw": {set: false, val: unix.MS_RDONLY},
+ "silent": {set: true, val: unix.MS_SILENT},
+ "strictatime": {set: true, val: unix.MS_STRICTATIME},
+ "suid": {set: false, val: unix.MS_NOSUID},
+ "sync": {set: true, val: unix.MS_SYNCHRONOUS},
}
// propOptionsMap is similar to optionsMap, but it lists propagation options
// that cannot be used together with other flags.
var propOptionsMap = map[string]mapping{
- "private": {set: true, val: syscall.MS_PRIVATE},
- "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC},
- "slave": {set: true, val: syscall.MS_SLAVE},
- "rslave": {set: true, val: syscall.MS_SLAVE | syscall.MS_REC},
- "unbindable": {set: true, val: syscall.MS_UNBINDABLE},
- "runbindable": {set: true, val: syscall.MS_UNBINDABLE | syscall.MS_REC},
+ "private": {set: true, val: unix.MS_PRIVATE},
+ "rprivate": {set: true, val: unix.MS_PRIVATE | unix.MS_REC},
+ "slave": {set: true, val: unix.MS_SLAVE},
+ "rslave": {set: true, val: unix.MS_SLAVE | unix.MS_REC},
+ "unbindable": {set: true, val: unix.MS_UNBINDABLE},
+ "runbindable": {set: true, val: unix.MS_UNBINDABLE | unix.MS_REC},
}
// invalidOptions list options not allowed.
@@ -139,7 +139,7 @@ func ValidateMountOptions(opts []string) error {
// correct.
func validateRootfsPropagation(opt string) error {
flags := PropOptionsToFlags([]string{opt})
- if flags&(syscall.MS_SLAVE|syscall.MS_PRIVATE) == 0 {
+ if flags&(unix.MS_SLAVE|unix.MS_PRIVATE) == 0 {
return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt)
}
return validatePropagation(opt)
@@ -147,7 +147,7 @@ func validateRootfsPropagation(opt string) error {
func validatePropagation(opt string) error {
flags := PropOptionsToFlags([]string{opt})
- exclusive := flags & (syscall.MS_SLAVE | syscall.MS_PRIVATE | syscall.MS_SHARED | syscall.MS_UNBINDABLE)
+ exclusive := flags & (unix.MS_SLAVE | unix.MS_PRIVATE | unix.MS_SHARED | unix.MS_UNBINDABLE)
if bits.OnesCount32(exclusive) > 1 {
return fmt.Errorf("mount propagation options are mutually exclusive: %q", opt)
}
diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go
index 23001d67c..69d7ba5c4 100644
--- a/runsc/specutils/namespace.go
+++ b/runsc/specutils/namespace.go
@@ -109,7 +109,7 @@ func FilterNS(filter []specs.LinuxNamespaceType, s *specs.Spec) []specs.LinuxNam
// setNS sets the namespace of the given type. It must be called with
// OSThreadLocked.
func setNS(fd, nsType uintptr) error {
- if _, _, err := syscall.RawSyscall(unix.SYS_SETNS, fd, nsType, 0); err != 0 {
+ if _, _, err := unix.RawSyscall(unix.SYS_SETNS, fd, nsType, 0); err != 0 {
return err
}
return nil
@@ -158,7 +158,7 @@ func StartInNS(cmd *exec.Cmd, nss []specs.LinuxNamespace) error {
defer runtime.UnlockOSThread()
if cmd.SysProcAttr == nil {
- cmd.SysProcAttr = &syscall.SysProcAttr{}
+ cmd.SysProcAttr = &unix.SysProcAttr{}
}
for _, ns := range nss {
@@ -185,7 +185,7 @@ func SetUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) {
return
}
if cmd.SysProcAttr == nil {
- cmd.SysProcAttr = &syscall.SysProcAttr{}
+ cmd.SysProcAttr = &unix.SysProcAttr{}
}
for _, idMap := range s.Linux.UIDMappings {
log.Infof("Mapping host uid %d to container uid %d (size=%d)", idMap.HostID, idMap.ContainerID, idMap.Size)
@@ -241,8 +241,8 @@ func MaybeRunAsRoot() error {
cmd := exec.Command("/proc/self/exe", os.Args[1:]...)
- cmd.SysProcAttr = &syscall.SysProcAttr{
- Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
+ cmd.SysProcAttr = &unix.SysProcAttr{
+ Cloneflags: unix.CLONE_NEWUSER | unix.CLONE_NEWNS,
// Set current user/group as root inside the namespace. Since we may not
// have CAP_SETUID/CAP_SETGID, just map root to the current user/group.
UidMappings: []syscall.SysProcIDMap{
@@ -255,7 +255,7 @@ func MaybeRunAsRoot() error {
GidMappingsEnableSetgroups: false,
// Make sure child is killed when the parent terminates.
- Pdeathsig: syscall.SIGKILL,
+ Pdeathsig: unix.SIGKILL,
}
cmd.Env = os.Environ()
diff --git a/runsc/specutils/seccomp/BUILD b/runsc/specutils/seccomp/BUILD
index 3520f2d6d..e9e647d82 100644
--- a/runsc/specutils/seccomp/BUILD
+++ b/runsc/specutils/seccomp/BUILD
@@ -18,6 +18,7 @@ go_library(
"//pkg/sentry/kernel",
"//pkg/sentry/syscalls/linux",
"@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
],
)
@@ -30,5 +31,6 @@ go_test(
"//pkg/binary",
"//pkg/bpf",
"@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/runsc/specutils/seccomp/seccomp.go b/runsc/specutils/seccomp/seccomp.go
index 5932f7a41..0ef7a4d54 100644
--- a/runsc/specutils/seccomp/seccomp.go
+++ b/runsc/specutils/seccomp/seccomp.go
@@ -18,9 +18,9 @@ package seccomp
import (
"fmt"
- "syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
"gvisor.dev/gvisor/pkg/log"
@@ -33,9 +33,9 @@ var (
killThreadAction = linux.SECCOMP_RET_KILL_THREAD
trapAction = linux.SECCOMP_RET_TRAP
// runc always returns EPERM as the errorcode for SECCOMP_RET_ERRNO
- errnoAction = linux.SECCOMP_RET_ERRNO.WithReturnCode(uint16(syscall.EPERM))
+ errnoAction = linux.SECCOMP_RET_ERRNO.WithReturnCode(uint16(unix.EPERM))
// runc always returns EPERM as the errorcode for SECCOMP_RET_TRACE
- traceAction = linux.SECCOMP_RET_TRACE.WithReturnCode(uint16(syscall.EPERM))
+ traceAction = linux.SECCOMP_RET_TRACE.WithReturnCode(uint16(unix.EPERM))
allowAction = linux.SECCOMP_RET_ALLOW
)
diff --git a/runsc/specutils/seccomp/seccomp_test.go b/runsc/specutils/seccomp/seccomp_test.go
index 850c237ba..11a6c8daa 100644
--- a/runsc/specutils/seccomp/seccomp_test.go
+++ b/runsc/specutils/seccomp/seccomp_test.go
@@ -16,10 +16,10 @@ package seccomp
import (
"fmt"
- "syscall"
"testing"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/bpf"
)
@@ -184,7 +184,7 @@ var (
Args: []specs.LinuxSeccompArg{
{
Index: 0,
- Value: syscall.CLONE_FS,
+ Value: unix.CLONE_FS,
Op: specs.OpEqualTo,
},
},
@@ -192,7 +192,7 @@ var (
},
},
},
- input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}),
+ input: testInput(nativeArchAuditNo, "clone", &[6]uint64{unix.CLONE_FS}),
expected: uint32(errnoAction),
},
{
@@ -207,12 +207,12 @@ var (
Args: []specs.LinuxSeccompArg{
{
Index: 0,
- Value: syscall.CLONE_FS,
+ Value: unix.CLONE_FS,
Op: specs.OpEqualTo,
},
{
Index: 0,
- Value: syscall.CLONE_VM,
+ Value: unix.CLONE_VM,
Op: specs.OpEqualTo,
},
},
@@ -220,7 +220,7 @@ var (
},
},
},
- input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}),
+ input: testInput(nativeArchAuditNo, "clone", &[6]uint64{unix.CLONE_FS}),
expected: uint32(errnoAction),
},
{
@@ -235,12 +235,12 @@ var (
Args: []specs.LinuxSeccompArg{
{
Index: 1,
- Value: syscall.SOL_SOCKET,
+ Value: unix.SOL_SOCKET,
Op: specs.OpEqualTo,
},
{
Index: 2,
- Value: syscall.SO_PEERCRED,
+ Value: unix.SO_PEERCRED,
Op: specs.OpEqualTo,
},
},
@@ -248,7 +248,7 @@ var (
},
},
},
- input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, syscall.SOL_SOCKET, syscall.SO_PEERCRED}),
+ input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, unix.SOL_SOCKET, unix.SO_PEERCRED}),
expected: uint32(errnoAction),
},
{
@@ -263,12 +263,12 @@ var (
Args: []specs.LinuxSeccompArg{
{
Index: 1,
- Value: syscall.SOL_SOCKET,
+ Value: unix.SOL_SOCKET,
Op: specs.OpEqualTo,
},
{
Index: 2,
- Value: syscall.SO_PEERCRED,
+ Value: unix.SO_PEERCRED,
Op: specs.OpEqualTo,
},
},
@@ -276,7 +276,7 @@ var (
},
},
},
- input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, syscall.SOL_SOCKET}),
+ input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, unix.SOL_SOCKET}),
expected: uint32(allowAction),
},
{
@@ -291,7 +291,7 @@ var (
Args: []specs.LinuxSeccompArg{
{
Index: 0,
- Value: syscall.CLONE_FS,
+ Value: unix.CLONE_FS,
Op: specs.OpEqualTo,
},
},
@@ -299,7 +299,7 @@ var (
},
},
},
- input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_VM}),
+ input: testInput(nativeArchAuditNo, "clone", &[6]uint64{unix.CLONE_VM}),
expected: uint32(allowAction),
},
{
@@ -314,8 +314,8 @@ var (
Args: []specs.LinuxSeccompArg{
{
Index: 0,
- Value: syscall.CLONE_FS,
- ValueTwo: syscall.CLONE_FS,
+ Value: unix.CLONE_FS,
+ ValueTwo: unix.CLONE_FS,
Op: specs.OpMaskedEqual,
},
},
@@ -323,7 +323,7 @@ var (
},
},
},
- input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS | syscall.CLONE_VM}),
+ input: testInput(nativeArchAuditNo, "clone", &[6]uint64{unix.CLONE_FS | unix.CLONE_VM}),
expected: uint32(errnoAction),
},
{
@@ -338,8 +338,8 @@ var (
Args: []specs.LinuxSeccompArg{
{
Index: 0,
- Value: syscall.CLONE_FS | syscall.CLONE_VM,
- ValueTwo: syscall.CLONE_FS | syscall.CLONE_VM,
+ Value: unix.CLONE_FS | unix.CLONE_VM,
+ ValueTwo: unix.CLONE_FS | unix.CLONE_VM,
Op: specs.OpMaskedEqual,
},
},
@@ -347,7 +347,7 @@ var (
},
},
},
- input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}),
+ input: testInput(nativeArchAuditNo, "clone", &[6]uint64{unix.CLONE_FS}),
expected: uint32(allowAction),
},
{
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index ea55bbc7d..5ba38bfe4 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -26,12 +26,12 @@ import (
"path/filepath"
"strconv"
"strings"
- "syscall"
"time"
"github.com/cenkalti/backoff"
"github.com/mohae/deepcopy"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bits"
"gvisor.dev/gvisor/pkg/log"
@@ -375,9 +375,9 @@ func WaitForReady(pid int, timeout time.Duration, ready func() (bool, error)) er
// Check if the process is still running.
// If the process is alive, child is 0 because of the NOHANG option.
// If the process has terminated, child equals the process id.
- var ws syscall.WaitStatus
- var ru syscall.Rusage
- child, err := syscall.Wait4(pid, &ws, syscall.WNOHANG, &ru)
+ var ws unix.WaitStatus
+ var ru unix.Rusage
+ child, err := unix.Wait4(pid, &ws, unix.WNOHANG, &ru)
if err != nil {
return backoff.Permanent(fmt.Errorf("error waiting for process: %v", err))
} else if child == pid {
@@ -437,7 +437,7 @@ func Mount(src, dst, typ string, flags uint32) error {
return fmt.Errorf("mkdir(%q) failed: %v", parent, err)
}
// Create the destination file if it does not exist.
- f, err := os.OpenFile(dst, syscall.O_CREAT, 0777)
+ f, err := os.OpenFile(dst, unix.O_CREAT, 0777)
if err != nil {
return fmt.Errorf("open(%q) failed: %v", dst, err)
}
@@ -445,7 +445,7 @@ func Mount(src, dst, typ string, flags uint32) error {
}
// Do the mount.
- if err := syscall.Mount(src, dst, typ, uintptr(flags), ""); err != nil {
+ if err := unix.Mount(src, dst, typ, uintptr(flags), ""); err != nil {
return fmt.Errorf("mount(%q, %q, %d) failed: %v", src, dst, flags, err)
}
return nil
@@ -466,7 +466,7 @@ func ContainsStr(strs []string, str string) bool {
func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) {
for {
r1, r2, err := f()
- if err != syscall.EINTR {
+ if err != unix.EINTR {
return r1, r2, err
}
}