diff options
Diffstat (limited to 'runsc')
63 files changed, 1997 insertions, 1900 deletions
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index a3a76b609..28e82e117 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -17,8 +17,8 @@ package boot import ( "fmt" "os" - "syscall" + "golang.org/x/sys/unix" "google.golang.org/protobuf/proto" "gvisor.dev/gvisor/pkg/eventchannel" "gvisor.dev/gvisor/pkg/log" @@ -93,19 +93,19 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) { tr := c.trackers[sysnr] if tr == nil { switch sysnr { - case syscall.SYS_PRCTL: + case unix.SYS_PRCTL: // args: cmd, ... tr = newArgsTracker(0) - case syscall.SYS_IOCTL, syscall.SYS_EPOLL_CTL, syscall.SYS_SHMCTL, syscall.SYS_FUTEX, syscall.SYS_FALLOCATE: + case unix.SYS_IOCTL, unix.SYS_EPOLL_CTL, unix.SYS_SHMCTL, unix.SYS_FUTEX, unix.SYS_FALLOCATE: // args: fd/addr, cmd, ... tr = newArgsTracker(1) - case syscall.SYS_GETSOCKOPT, syscall.SYS_SETSOCKOPT: + case unix.SYS_GETSOCKOPT, unix.SYS_SETSOCKOPT: // args: fd, level, name, ... tr = newArgsTracker(1, 2) - case syscall.SYS_SEMCTL: + case unix.SYS_SEMCTL: // args: semid, semnum, cmd, ... tr = newArgsTracker(2) @@ -131,7 +131,7 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) { } func (c *compatEmitter) emitUncaughtSignal(msg *ucspb.UncaughtSignal) { - sig := syscall.Signal(msg.SignalNumber) + sig := unix.Signal(msg.SignalNumber) c.sink.Infof( "Uncaught signal: %q (%d), PID: %d, TID: %d, fault addr: %#x", sig, msg.SignalNumber, msg.Pid, msg.Tid, msg.FaultAddr) diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go index 8eb76b2ba..7e13ff87c 100644 --- a/runsc/boot/compat_amd64.go +++ b/runsc/boot/compat_amd64.go @@ -16,8 +16,8 @@ package boot import ( "fmt" - "syscall" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/sentry/arch" rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" @@ -92,7 +92,7 @@ func syscallNum(regs *rpb.Registers) uint64 { func newArchArgsTracker(sysnr uint64) syscallTracker { switch sysnr { - case syscall.SYS_ARCH_PRCTL: + case unix.SYS_ARCH_PRCTL: // args: cmd, ... return newArgsTracker(0) } diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 5e849cb37..1cd5fba5c 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -18,9 +18,9 @@ import ( "errors" "fmt" "os" - "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/control/server" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" @@ -366,7 +366,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { case 2: // The device file is donated to the platform. // Can't take ownership away from os.File. dup them to get a new FD. - fd, err := syscall.Dup(int(o.Files[1].Fd())) + fd, err := unix.Dup(int(o.Files[1].Fd())) if err != nil { return fmt.Errorf("failed to dup file: %v", err) } diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 2a8c916d5..49b503f99 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -16,7 +16,6 @@ package filter import ( "os" - "syscall" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" @@ -26,19 +25,19 @@ import ( // allowedSyscalls is the set of syscalls executed by the Sentry to the host OS. var allowedSyscalls = seccomp.SyscallRules{ - syscall.SYS_CLOCK_GETTIME: {}, - syscall.SYS_CLOSE: {}, - syscall.SYS_DUP: {}, - syscall.SYS_DUP3: []seccomp.Rule{ + unix.SYS_CLOCK_GETTIME: {}, + unix.SYS_CLOSE: {}, + unix.SYS_DUP: {}, + unix.SYS_DUP3: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.O_CLOEXEC), + seccomp.EqualTo(unix.O_CLOEXEC), }, }, - syscall.SYS_EPOLL_CREATE1: {}, - syscall.SYS_EPOLL_CTL: {}, - syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{ + unix.SYS_EPOLL_CREATE1: {}, + unix.SYS_EPOLL_CTL: {}, + unix.SYS_EPOLL_PWAIT: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, @@ -47,34 +46,34 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.EqualTo(0), }, }, - syscall.SYS_EVENTFD2: []seccomp.Rule{ + unix.SYS_EVENTFD2: []seccomp.Rule{ { seccomp.EqualTo(0), seccomp.EqualTo(0), }, }, - syscall.SYS_EXIT: {}, - syscall.SYS_EXIT_GROUP: {}, - syscall.SYS_FALLOCATE: {}, - syscall.SYS_FCHMOD: {}, - syscall.SYS_FCNTL: []seccomp.Rule{ + unix.SYS_EXIT: {}, + unix.SYS_EXIT_GROUP: {}, + unix.SYS_FALLOCATE: {}, + unix.SYS_FCHMOD: {}, + unix.SYS_FCNTL: []seccomp.Rule{ { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.F_GETFL), + seccomp.EqualTo(unix.F_GETFL), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.F_SETFL), + seccomp.EqualTo(unix.F_SETFL), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.F_GETFD), + seccomp.EqualTo(unix.F_GETFD), }, }, - syscall.SYS_FSTAT: {}, - syscall.SYS_FSYNC: {}, - syscall.SYS_FTRUNCATE: {}, - syscall.SYS_FUTEX: []seccomp.Rule{ + unix.SYS_FSTAT: {}, + unix.SYS_FSYNC: {}, + unix.SYS_FTRUNCATE: {}, + unix.SYS_FUTEX: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), @@ -109,35 +108,35 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.EqualTo(0), }, }, - syscall.SYS_GETPID: {}, + unix.SYS_GETPID: {}, unix.SYS_GETRANDOM: {}, - syscall.SYS_GETSOCKOPT: []seccomp.Rule{ + unix.SYS_GETSOCKOPT: []seccomp.Rule{ { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_DOMAIN), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_DOMAIN), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_TYPE), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_TYPE), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_ERROR), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_ERROR), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_SNDBUF), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_SNDBUF), }, }, - syscall.SYS_GETTID: {}, - syscall.SYS_GETTIMEOFDAY: {}, + unix.SYS_GETTID: {}, + unix.SYS_GETTIMEOFDAY: {}, // SYS_IOCTL is needed for terminal support, but we only allow // setting/getting termios and winsize. - syscall.SYS_IOCTL: []seccomp.Rule{ + unix.SYS_IOCTL: []seccomp.Rule{ { seccomp.MatchAny{}, /* fd */ seccomp.EqualTo(linux.TCGETS), @@ -169,94 +168,94 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.MatchAny{}, /* winsize struct */ }, }, - syscall.SYS_LSEEK: {}, - syscall.SYS_MADVISE: {}, + unix.SYS_LSEEK: {}, + unix.SYS_MADVISE: {}, unix.SYS_MEMBARRIER: []seccomp.Rule{ { seccomp.EqualTo(linux.MEMBARRIER_CMD_GLOBAL), seccomp.EqualTo(0), }, }, - syscall.SYS_MINCORE: {}, + unix.SYS_MINCORE: {}, // Used by the Go runtime as a temporarily workaround for a Linux // 5.2-5.4 bug. // // See src/runtime/os_linux_x86.go. // // TODO(b/148688965): Remove once this is gone from Go. - syscall.SYS_MLOCK: []seccomp.Rule{ + unix.SYS_MLOCK: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.EqualTo(4096), }, }, - syscall.SYS_MMAP: []seccomp.Rule{ + unix.SYS_MMAP: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MAP_SHARED), + seccomp.EqualTo(unix.MAP_SHARED), }, { seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MAP_PRIVATE), + seccomp.EqualTo(unix.MAP_PRIVATE), }, { seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS), + seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS), }, { seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_STACK), + seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS | unix.MAP_STACK), }, { seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_NORESERVE), + seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS | unix.MAP_NORESERVE), }, { seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.PROT_WRITE | syscall.PROT_READ), - seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED), + seccomp.EqualTo(unix.PROT_WRITE | unix.PROT_READ), + seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS | unix.MAP_FIXED), }, }, - syscall.SYS_MPROTECT: {}, - syscall.SYS_MUNMAP: {}, - syscall.SYS_NANOSLEEP: {}, - syscall.SYS_PPOLL: {}, - syscall.SYS_PREAD64: {}, - syscall.SYS_PREADV: {}, - unix.SYS_PREADV2: {}, - syscall.SYS_PWRITE64: {}, - syscall.SYS_PWRITEV: {}, - unix.SYS_PWRITEV2: {}, - syscall.SYS_READ: {}, - syscall.SYS_RECVMSG: []seccomp.Rule{ + unix.SYS_MPROTECT: {}, + unix.SYS_MUNMAP: {}, + unix.SYS_NANOSLEEP: {}, + unix.SYS_PPOLL: {}, + unix.SYS_PREAD64: {}, + unix.SYS_PREADV: {}, + unix.SYS_PREADV2: {}, + unix.SYS_PWRITE64: {}, + unix.SYS_PWRITEV: {}, + unix.SYS_PWRITEV2: {}, + unix.SYS_READ: {}, + unix.SYS_RECVMSG: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC), + seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_TRUNC), }, { seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK), + seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_TRUNC | unix.MSG_PEEK), }, }, - syscall.SYS_RECVMMSG: []seccomp.Rule{ + unix.SYS_RECVMMSG: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.EqualTo(fdbased.MaxMsgsPerRecv), - seccomp.EqualTo(syscall.MSG_DONTWAIT), + seccomp.EqualTo(unix.MSG_DONTWAIT), seccomp.EqualTo(0), }, }, @@ -265,34 +264,34 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MSG_DONTWAIT), + seccomp.EqualTo(unix.MSG_DONTWAIT), seccomp.EqualTo(0), }, }, - syscall.SYS_RESTART_SYSCALL: {}, - syscall.SYS_RT_SIGACTION: {}, - syscall.SYS_RT_SIGPROCMASK: {}, - syscall.SYS_RT_SIGRETURN: {}, - syscall.SYS_SCHED_YIELD: {}, - syscall.SYS_SENDMSG: []seccomp.Rule{ + unix.SYS_RESTART_SYSCALL: {}, + unix.SYS_RT_SIGACTION: {}, + unix.SYS_RT_SIGPROCMASK: {}, + unix.SYS_RT_SIGRETURN: {}, + unix.SYS_SCHED_YIELD: {}, + unix.SYS_SENDMSG: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL), + seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_NOSIGNAL), }, }, - syscall.SYS_SETITIMER: {}, - syscall.SYS_SHUTDOWN: []seccomp.Rule{ + unix.SYS_SETITIMER: {}, + unix.SYS_SHUTDOWN: []seccomp.Rule{ // Used by fs/host to shutdown host sockets. - {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RD)}, - {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_WR)}, + {seccomp.MatchAny{}, seccomp.EqualTo(unix.SHUT_RD)}, + {seccomp.MatchAny{}, seccomp.EqualTo(unix.SHUT_WR)}, // Used by unet to shutdown connections. - {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RDWR)}, + {seccomp.MatchAny{}, seccomp.EqualTo(unix.SHUT_RDWR)}, }, - syscall.SYS_SIGALTSTACK: {}, - unix.SYS_STATX: {}, - syscall.SYS_SYNC_FILE_RANGE: {}, - syscall.SYS_TEE: []seccomp.Rule{ + unix.SYS_SIGALTSTACK: {}, + unix.SYS_STATX: {}, + unix.SYS_SYNC_FILE_RANGE: {}, + unix.SYS_TEE: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, @@ -300,12 +299,12 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.EqualTo(unix.SPLICE_F_NONBLOCK), /* flags */ }, }, - syscall.SYS_TGKILL: []seccomp.Rule{ + unix.SYS_TGKILL: []seccomp.Rule{ { seccomp.EqualTo(uint64(os.Getpid())), }, }, - syscall.SYS_UTIMENSAT: []seccomp.Rule{ + unix.SYS_UTIMENSAT: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.EqualTo(0), /* null pathname */ @@ -313,9 +312,9 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.EqualTo(0), /* flags */ }, }, - syscall.SYS_WRITE: {}, + unix.SYS_WRITE: {}, // For rawfile.NonBlockingWriteIovec. - syscall.SYS_WRITEV: []seccomp.Rule{ + unix.SYS_WRITEV: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, @@ -327,313 +326,313 @@ var allowedSyscalls = seccomp.SyscallRules{ // hostInetFilters contains syscalls that are needed by sentry/socket/hostinet. func hostInetFilters() seccomp.SyscallRules { return seccomp.SyscallRules{ - syscall.SYS_ACCEPT4: []seccomp.Rule{ + unix.SYS_ACCEPT4: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(unix.SOCK_NONBLOCK | unix.SOCK_CLOEXEC), }, }, - syscall.SYS_BIND: {}, - syscall.SYS_CONNECT: {}, - syscall.SYS_GETPEERNAME: {}, - syscall.SYS_GETSOCKNAME: {}, - syscall.SYS_GETSOCKOPT: []seccomp.Rule{ + unix.SYS_BIND: {}, + unix.SYS_CONNECT: {}, + unix.SYS_GETPEERNAME: {}, + unix.SYS_GETSOCKNAME: {}, + unix.SYS_GETSOCKOPT: []seccomp.Rule{ { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IP), - seccomp.EqualTo(syscall.IP_TOS), + seccomp.EqualTo(unix.SOL_IP), + seccomp.EqualTo(unix.IP_TOS), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IP), - seccomp.EqualTo(syscall.IP_RECVTOS), + seccomp.EqualTo(unix.SOL_IP), + seccomp.EqualTo(unix.IP_RECVTOS), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IP), - seccomp.EqualTo(syscall.IP_PKTINFO), + seccomp.EqualTo(unix.SOL_IP), + seccomp.EqualTo(unix.IP_PKTINFO), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IP), - seccomp.EqualTo(syscall.IP_RECVORIGDSTADDR), + seccomp.EqualTo(unix.SOL_IP), + seccomp.EqualTo(unix.IP_RECVORIGDSTADDR), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IP), - seccomp.EqualTo(syscall.IP_RECVERR), + seccomp.EqualTo(unix.SOL_IP), + seccomp.EqualTo(unix.IP_RECVERR), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IPV6), - seccomp.EqualTo(syscall.IPV6_TCLASS), + seccomp.EqualTo(unix.SOL_IPV6), + seccomp.EqualTo(unix.IPV6_TCLASS), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IPV6), - seccomp.EqualTo(syscall.IPV6_RECVTCLASS), + seccomp.EqualTo(unix.SOL_IPV6), + seccomp.EqualTo(unix.IPV6_RECVTCLASS), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IPV6), - seccomp.EqualTo(syscall.IPV6_RECVERR), + seccomp.EqualTo(unix.SOL_IPV6), + seccomp.EqualTo(unix.IPV6_RECVERR), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IPV6), - seccomp.EqualTo(syscall.IPV6_V6ONLY), + seccomp.EqualTo(unix.SOL_IPV6), + seccomp.EqualTo(unix.IPV6_V6ONLY), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(unix.SOL_IPV6), seccomp.EqualTo(linux.IPV6_RECVORIGDSTADDR), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_ERROR), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_ERROR), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_KEEPALIVE), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_KEEPALIVE), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_SNDBUF), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_SNDBUF), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_RCVBUF), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_RCVBUF), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_REUSEADDR), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_REUSEADDR), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_TYPE), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_TYPE), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_LINGER), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_LINGER), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_TIMESTAMP), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_TIMESTAMP), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_TCP), - seccomp.EqualTo(syscall.TCP_NODELAY), + seccomp.EqualTo(unix.SOL_TCP), + seccomp.EqualTo(unix.TCP_NODELAY), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_TCP), - seccomp.EqualTo(syscall.TCP_INFO), + seccomp.EqualTo(unix.SOL_TCP), + seccomp.EqualTo(unix.TCP_INFO), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_TCP), + seccomp.EqualTo(unix.SOL_TCP), seccomp.EqualTo(linux.TCP_INQ), }, }, - syscall.SYS_IOCTL: []seccomp.Rule{ + unix.SYS_IOCTL: []seccomp.Rule{ { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.TIOCOUTQ), + seccomp.EqualTo(unix.TIOCOUTQ), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.TIOCINQ), + seccomp.EqualTo(unix.TIOCINQ), }, }, - syscall.SYS_LISTEN: {}, - syscall.SYS_READV: {}, - syscall.SYS_RECVFROM: {}, - syscall.SYS_RECVMSG: {}, - syscall.SYS_SENDMSG: {}, - syscall.SYS_SENDTO: {}, - syscall.SYS_SETSOCKOPT: []seccomp.Rule{ + unix.SYS_LISTEN: {}, + unix.SYS_READV: {}, + unix.SYS_RECVFROM: {}, + unix.SYS_RECVMSG: {}, + unix.SYS_SENDMSG: {}, + unix.SYS_SENDTO: {}, + unix.SYS_SETSOCKOPT: []seccomp.Rule{ { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_SNDBUF), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_SNDBUF), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_RCVBUF), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_RCVBUF), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_REUSEADDR), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_REUSEADDR), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_TIMESTAMP), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_TIMESTAMP), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_TCP), - seccomp.EqualTo(syscall.TCP_NODELAY), + seccomp.EqualTo(unix.SOL_TCP), + seccomp.EqualTo(unix.TCP_NODELAY), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_TCP), + seccomp.EqualTo(unix.SOL_TCP), seccomp.EqualTo(linux.TCP_INQ), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IP), - seccomp.EqualTo(syscall.IP_TOS), + seccomp.EqualTo(unix.SOL_IP), + seccomp.EqualTo(unix.IP_TOS), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IP), - seccomp.EqualTo(syscall.IP_RECVTOS), + seccomp.EqualTo(unix.SOL_IP), + seccomp.EqualTo(unix.IP_RECVTOS), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IP), - seccomp.EqualTo(syscall.IP_PKTINFO), + seccomp.EqualTo(unix.SOL_IP), + seccomp.EqualTo(unix.IP_PKTINFO), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IP), - seccomp.EqualTo(syscall.IP_RECVORIGDSTADDR), + seccomp.EqualTo(unix.SOL_IP), + seccomp.EqualTo(unix.IP_RECVORIGDSTADDR), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IP), - seccomp.EqualTo(syscall.IP_RECVERR), + seccomp.EqualTo(unix.SOL_IP), + seccomp.EqualTo(unix.IP_RECVERR), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IPV6), - seccomp.EqualTo(syscall.IPV6_TCLASS), + seccomp.EqualTo(unix.SOL_IPV6), + seccomp.EqualTo(unix.IPV6_TCLASS), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IPV6), - seccomp.EqualTo(syscall.IPV6_RECVTCLASS), + seccomp.EqualTo(unix.SOL_IPV6), + seccomp.EqualTo(unix.IPV6_RECVTCLASS), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(unix.SOL_IPV6), seccomp.EqualTo(linux.IPV6_RECVORIGDSTADDR), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IPV6), - seccomp.EqualTo(syscall.IPV6_RECVERR), + seccomp.EqualTo(unix.SOL_IPV6), + seccomp.EqualTo(unix.IPV6_RECVERR), seccomp.MatchAny{}, seccomp.EqualTo(4), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IPV6), - seccomp.EqualTo(syscall.IPV6_V6ONLY), + seccomp.EqualTo(unix.SOL_IPV6), + seccomp.EqualTo(unix.IPV6_V6ONLY), seccomp.MatchAny{}, seccomp.EqualTo(4), }, }, - syscall.SYS_SHUTDOWN: []seccomp.Rule{ + unix.SYS_SHUTDOWN: []seccomp.Rule{ { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SHUT_RD), + seccomp.EqualTo(unix.SHUT_RD), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SHUT_WR), + seccomp.EqualTo(unix.SHUT_WR), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SHUT_RDWR), + seccomp.EqualTo(unix.SHUT_RDWR), }, }, - syscall.SYS_SOCKET: []seccomp.Rule{ + unix.SYS_SOCKET: []seccomp.Rule{ { - seccomp.EqualTo(syscall.AF_INET), - seccomp.EqualTo(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(unix.AF_INET), + seccomp.EqualTo(unix.SOCK_STREAM | unix.SOCK_NONBLOCK | unix.SOCK_CLOEXEC), seccomp.EqualTo(0), }, { - seccomp.EqualTo(syscall.AF_INET), - seccomp.EqualTo(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(unix.AF_INET), + seccomp.EqualTo(unix.SOCK_DGRAM | unix.SOCK_NONBLOCK | unix.SOCK_CLOEXEC), seccomp.EqualTo(0), }, { - seccomp.EqualTo(syscall.AF_INET6), - seccomp.EqualTo(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(unix.AF_INET6), + seccomp.EqualTo(unix.SOCK_STREAM | unix.SOCK_NONBLOCK | unix.SOCK_CLOEXEC), seccomp.EqualTo(0), }, { - seccomp.EqualTo(syscall.AF_INET6), - seccomp.EqualTo(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(unix.AF_INET6), + seccomp.EqualTo(unix.SOCK_DGRAM | unix.SOCK_NONBLOCK | unix.SOCK_CLOEXEC), seccomp.EqualTo(0), }, }, - syscall.SYS_WRITEV: {}, + unix.SYS_WRITEV: {}, } } func controlServerFilters(fd int) seccomp.SyscallRules { return seccomp.SyscallRules{ - syscall.SYS_ACCEPT: []seccomp.Rule{ + unix.SYS_ACCEPT: []seccomp.Rule{ { seccomp.EqualTo(fd), }, }, - syscall.SYS_LISTEN: []seccomp.Rule{ + unix.SYS_LISTEN: []seccomp.Rule{ { seccomp.EqualTo(fd), seccomp.EqualTo(16 /* unet.backlog */), }, }, - syscall.SYS_GETSOCKOPT: []seccomp.Rule{ + unix.SYS_GETSOCKOPT: []seccomp.Rule{ { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_SOCKET), - seccomp.EqualTo(syscall.SO_PEERCRED), + seccomp.EqualTo(unix.SOL_SOCKET), + seccomp.EqualTo(unix.SO_PEERCRED), }, }, } diff --git a/runsc/boot/filter/config_amd64.go b/runsc/boot/filter/config_amd64.go index cea5613b8..42cb8ed3a 100644 --- a/runsc/boot/filter/config_amd64.go +++ b/runsc/boot/filter/config_amd64.go @@ -17,30 +17,29 @@ package filter import ( - "syscall" - + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/seccomp" ) func init() { - allowedSyscalls[syscall.SYS_ARCH_PRCTL] = []seccomp.Rule{ + allowedSyscalls[unix.SYS_ARCH_PRCTL] = []seccomp.Rule{ // TODO(b/168828518): No longer used in Go 1.16+. {seccomp.EqualTo(linux.ARCH_SET_FS)}, } - allowedSyscalls[syscall.SYS_CLONE] = []seccomp.Rule{ + allowedSyscalls[unix.SYS_CLONE] = []seccomp.Rule{ // parent_tidptr and child_tidptr are always 0 because neither // CLONE_PARENT_SETTID nor CLONE_CHILD_SETTID are used. { seccomp.EqualTo( - syscall.CLONE_VM | - syscall.CLONE_FS | - syscall.CLONE_FILES | - syscall.CLONE_SETTLS | - syscall.CLONE_SIGHAND | - syscall.CLONE_SYSVSEM | - syscall.CLONE_THREAD), + unix.CLONE_VM | + unix.CLONE_FS | + unix.CLONE_FILES | + unix.CLONE_SETTLS | + unix.CLONE_SIGHAND | + unix.CLONE_SYSVSEM | + unix.CLONE_THREAD), seccomp.MatchAny{}, // newsp seccomp.EqualTo(0), // parent_tidptr seccomp.EqualTo(0), // child_tidptr @@ -49,12 +48,12 @@ func init() { { // TODO(b/168828518): No longer used in Go 1.16+ (on amd64). seccomp.EqualTo( - syscall.CLONE_VM | - syscall.CLONE_FS | - syscall.CLONE_FILES | - syscall.CLONE_SIGHAND | - syscall.CLONE_SYSVSEM | - syscall.CLONE_THREAD), + unix.CLONE_VM | + unix.CLONE_FS | + unix.CLONE_FILES | + unix.CLONE_SIGHAND | + unix.CLONE_SYSVSEM | + unix.CLONE_THREAD), seccomp.MatchAny{}, // newsp seccomp.EqualTo(0), // parent_tidptr seccomp.EqualTo(0), // child_tidptr diff --git a/runsc/boot/filter/config_arm64.go b/runsc/boot/filter/config_arm64.go index 37313f97f..f162f87ff 100644 --- a/runsc/boot/filter/config_arm64.go +++ b/runsc/boot/filter/config_arm64.go @@ -17,21 +17,20 @@ package filter import ( - "syscall" - + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/seccomp" ) func init() { - allowedSyscalls[syscall.SYS_CLONE] = []seccomp.Rule{ + allowedSyscalls[unix.SYS_CLONE] = []seccomp.Rule{ { seccomp.EqualTo( - syscall.CLONE_VM | - syscall.CLONE_FS | - syscall.CLONE_FILES | - syscall.CLONE_SIGHAND | - syscall.CLONE_SYSVSEM | - syscall.CLONE_THREAD), + unix.CLONE_VM | + unix.CLONE_FS | + unix.CLONE_FILES | + unix.CLONE_SIGHAND | + unix.CLONE_SYSVSEM | + unix.CLONE_THREAD), seccomp.MatchAny{}, // newsp // These arguments are left uninitialized by the Go // runtime, so they may be anything (and are unused by diff --git a/runsc/boot/filter/config_profile.go b/runsc/boot/filter/config_profile.go index 7b8669595..89b66a6da 100644 --- a/runsc/boot/filter/config_profile.go +++ b/runsc/boot/filter/config_profile.go @@ -15,19 +15,18 @@ package filter import ( - "syscall" - + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/seccomp" ) // profileFilters returns extra syscalls made by runtime/pprof package. func profileFilters() seccomp.SyscallRules { return seccomp.SyscallRules{ - syscall.SYS_OPENAT: []seccomp.Rule{ + unix.SYS_OPENAT: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), + seccomp.EqualTo(unix.O_RDONLY | unix.O_LARGEFILE | unix.O_CLOEXEC), }, }, } diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go index 209e646a7..41baa78cd 100644 --- a/runsc/boot/filter/extra_filters_msan.go +++ b/runsc/boot/filter/extra_filters_msan.go @@ -17,8 +17,7 @@ package filter import ( - "syscall" - + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/seccomp" ) @@ -26,9 +25,9 @@ import ( func instrumentationFilters() seccomp.SyscallRules { Report("MSAN is enabled: syscall filters less restrictive!") return seccomp.SyscallRules{ - syscall.SYS_CLONE: {}, - syscall.SYS_MMAP: {}, - syscall.SYS_SCHED_GETAFFINITY: {}, - syscall.SYS_SET_ROBUST_LIST: {}, + unix.SYS_CLONE: {}, + unix.SYS_MMAP: {}, + unix.SYS_SCHED_GETAFFINITY: {}, + unix.SYS_SET_ROBUST_LIST: {}, } } diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go index 5b99eb8cd..79b2104f0 100644 --- a/runsc/boot/filter/extra_filters_race.go +++ b/runsc/boot/filter/extra_filters_race.go @@ -17,8 +17,7 @@ package filter import ( - "syscall" - + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/seccomp" ) @@ -26,17 +25,17 @@ import ( func instrumentationFilters() seccomp.SyscallRules { Report("TSAN is enabled: syscall filters less restrictive!") return seccomp.SyscallRules{ - syscall.SYS_BRK: {}, - syscall.SYS_CLOCK_NANOSLEEP: {}, - syscall.SYS_CLONE: {}, - syscall.SYS_FUTEX: {}, - syscall.SYS_MMAP: {}, - syscall.SYS_MUNLOCK: {}, - syscall.SYS_NANOSLEEP: {}, - syscall.SYS_OPEN: {}, - syscall.SYS_OPENAT: {}, - syscall.SYS_SET_ROBUST_LIST: {}, + unix.SYS_BRK: {}, + unix.SYS_CLOCK_NANOSLEEP: {}, + unix.SYS_CLONE: {}, + unix.SYS_FUTEX: {}, + unix.SYS_MMAP: {}, + unix.SYS_MUNLOCK: {}, + unix.SYS_NANOSLEEP: {}, + unix.SYS_OPEN: {}, + unix.SYS_OPENAT: {}, + unix.SYS_SET_ROBUST_LIST: {}, // Used within glibc's malloc. - syscall.SYS_TIME: {}, + unix.SYS_TIME: {}, } } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 2b0d2cd51..77f632bb9 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -20,9 +20,9 @@ import ( "sort" "strconv" "strings" - "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" @@ -312,11 +312,11 @@ func setupContainerFS(ctx context.Context, conf *config.Config, mntr *containerM } func adjustDirentCache(k *kernel.Kernel) error { - var hl syscall.Rlimit - if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &hl); err != nil { + var hl unix.Rlimit + if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &hl); err != nil { return fmt.Errorf("getting RLIMIT_NOFILE: %v", err) } - if int64(hl.Cur) != syscall.RLIM_INFINITY { + if hl.Cur != unix.RLIM_INFINITY { newSize := hl.Cur / 2 if newSize < gofer.DefaultDirentCacheSize { log.Infof("Setting gofer dirent cache size to %d", newSize) @@ -844,10 +844,10 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Confi // than simply printed to the logs for the 'runsc boot' command. // // We check the error message string rather than type because the - // actual error types (syscall.EIO, syscall.EPIPE) are lost by file system + // actual error types (unix.EIO, unix.EPIPE) are lost by file system // implementation (e.g. p9). // TODO(gvisor.dev/issue/1765): Remove message when bug is resolved. - if strings.Contains(err.Error(), syscall.EIO.Error()) || strings.Contains(err.Error(), syscall.EPIPE.Error()) { + if strings.Contains(err.Error(), unix.EIO.Error()) || strings.Contains(err.Error(), unix.EPIPE.Error()) { return fmt.Errorf("%v: %s", err, specutils.FaqErrorMsg("memlock", "you may be encountering a Linux kernel bug")) } return err diff --git a/runsc/boot/limits.go b/runsc/boot/limits.go index ce62236e5..3d2b3506d 100644 --- a/runsc/boot/limits.go +++ b/runsc/boot/limits.go @@ -16,9 +16,9 @@ package boot import ( "fmt" - "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sync" @@ -104,9 +104,9 @@ func (d *defs) initDefaults() error { // Read host limits that directly affect the sandbox and adjust the defaults // based on them. - for _, res := range []int{syscall.RLIMIT_FSIZE, syscall.RLIMIT_NOFILE} { - var hl syscall.Rlimit - if err := syscall.Getrlimit(res, &hl); err != nil { + for _, res := range []int{unix.RLIMIT_FSIZE, unix.RLIMIT_NOFILE} { + var hl unix.Rlimit + if err := unix.Getrlimit(res, &hl); err != nil { return err } diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index b77b4762e..3121ca6eb 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -19,7 +19,6 @@ import ( "math/rand" "os" "reflect" - "syscall" "testing" "time" @@ -78,7 +77,7 @@ func testSpec() *specs.Spec { // sandbox side of the connection, and a function that when called will stop the // gofer. func startGofer(root string) (int, func(), error) { - fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) + fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) if err != nil { return 0, nil, err } @@ -86,8 +85,8 @@ func startGofer(root string) (int, func(), error) { socket, err := unet.NewSocket(goferEnd) if err != nil { - syscall.Close(sandboxEnd) - syscall.Close(goferEnd) + unix.Close(sandboxEnd) + unix.Close(goferEnd) return 0, nil, fmt.Errorf("error creating server on FD %d: %v", goferEnd, err) } at, err := fsgofer.NewAttachPoint(root, fsgofer.Config{ROMount: true}) diff --git a/runsc/boot/network.go b/runsc/boot/network.go index 3d3a813df..7e627e4c6 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -19,8 +19,8 @@ import ( "net" "runtime" "strings" - "syscall" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" @@ -195,7 +195,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct for j := 0; j < link.NumChannels; j++ { // Copy the underlying FD. oldFD := args.FilePayload.Files[fdOffset].Fd() - newFD, err := syscall.Dup(int(oldFD)) + newFD, err := unix.Dup(int(oldFD)) if err != nil { return fmt.Errorf("failed to dup FD %v: %v", oldFD, err) } diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index ac9e4e3a8..438b7ef3e 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -27,7 +27,6 @@ import ( "path/filepath" "strconv" "strings" - "syscall" "time" "github.com/cenkalti/backoff" @@ -111,7 +110,7 @@ func setValue(path, name, data string) error { err := ioutil.WriteFile(fullpath, []byte(data), 0700) if err == nil { return nil - } else if !errors.Is(err, syscall.EINTR) { + } else if !errors.Is(err, unix.EINTR) { return err } } @@ -161,7 +160,7 @@ func fillFromAncestor(path string) (string, error) { err := ioutil.WriteFile(path, []byte(val), 0700) if err == nil { break - } else if !errors.Is(err, syscall.EINTR) { + } else if !errors.Is(err, unix.EINTR) { return "", err } } @@ -337,7 +336,7 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error { c.Own[key] = true if err := os.MkdirAll(path, 0755); err != nil { - if cfg.optional && errors.Is(err, syscall.EROFS) { + if cfg.optional && errors.Is(err, unix.EROFS) { log.Infof("Skipping cgroup %q", key) continue } @@ -370,7 +369,7 @@ func (c *Cgroup) Uninstall() error { defer cancel() b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) fn := func() error { - err := syscall.Rmdir(path) + err := unix.Rmdir(path) if os.IsNotExist(err) { return nil } diff --git a/runsc/cli/BUILD b/runsc/cli/BUILD index 32cce2a18..f1e3cce68 100644 --- a/runsc/cli/BUILD +++ b/runsc/cli/BUILD @@ -18,5 +18,6 @@ go_library( "//runsc/flag", "//runsc/specutils", "@com_github_google_subcommands//:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/cli/main.go b/runsc/cli/main.go index bf6928941..a3c515f4b 100644 --- a/runsc/cli/main.go +++ b/runsc/cli/main.go @@ -23,10 +23,10 @@ import ( "os" "os/signal" "runtime" - "syscall" "time" "github.com/google/subcommands" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/platform" @@ -198,7 +198,7 @@ func Main(version string) { // want with them. Since Docker and Containerd both eat boot's stderr, we // dup our stderr to the provided log FD so that panics will appear in the // logs, rather than just disappear. - if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil { + if err := unix.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil { cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err) } } else if conf.AlsoLogToStderr { @@ -227,11 +227,11 @@ func Main(version string) { // SIGTERM is sent to all processes if a test exceeds its // timeout and this case is handled by syscall_test_runner. log.Warningf("Block the TERM signal. This is only safe in tests!") - signal.Ignore(syscall.SIGTERM) + signal.Ignore(unix.SIGTERM) } // Call the subcommand and pass in the configuration. - var ws syscall.WaitStatus + var ws unix.WaitStatus subcmdCode := subcommands.Execute(context.Background(), conf, &ws) if subcmdCode == subcommands.ExitSuccess { log.Infof("Exiting with status: %v", ws) diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index e3e289da3..2c3b4058b 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -77,6 +77,7 @@ go_test( "delete_test.go", "exec_test.go", "gofer_test.go", + "mitigate_test.go", ], data = [ "//runsc", @@ -91,6 +92,8 @@ go_test( "//pkg/urpc", "//runsc/config", "//runsc/container", + "//runsc/mitigate", + "//runsc/mitigate/mock", "//runsc/specutils", "@com_github_google_go_cmp//cmp:go_default_library", "@com_github_google_go_cmp//cmp/cmpopts:go_default_library", diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 2c92e3067..a14249641 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -19,7 +19,6 @@ import ( "os" "runtime/debug" "strings" - "syscall" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -259,8 +258,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) ws := l.WaitExit() log.Infof("application exiting with %+v", ws) - waitStatus := args[1].(*syscall.WaitStatus) - *waitStatus = syscall.WaitStatus(ws.Status()) + waitStatus := args[1].(*unix.WaitStatus) + *waitStatus = unix.WaitStatus(ws.Status()) l.Destroy() return subcommands.ExitSuccess } diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go index 124198239..a9dbe86de 100644 --- a/runsc/cmd/checkpoint.go +++ b/runsc/cmd/checkpoint.go @@ -18,9 +18,9 @@ import ( "context" "os" "path/filepath" - "syscall" "github.com/google/subcommands" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/container" @@ -73,7 +73,7 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa id := f.Arg(0) conf := args[0].(*config.Config) - waitStatus := args[1].(*syscall.WaitStatus) + waitStatus := args[1].(*unix.WaitStatus) cont, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go index 189244765..e988247da 100644 --- a/runsc/cmd/chroot.go +++ b/runsc/cmd/chroot.go @@ -18,8 +18,8 @@ import ( "fmt" "os" "path/filepath" - "syscall" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/specutils" ) @@ -49,11 +49,11 @@ func pivotRoot(root string) error { // will be moved to "/" too. The parent mount of the old_root will be // new_root, so after umounting the old_root, we will see only // the new_root in "/". - if err := syscall.PivotRoot(".", "."); err != nil { + if err := unix.PivotRoot(".", "."); err != nil { return fmt.Errorf("pivot_root failed, make sure that the root mount has a parent: %v", err) } - if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil { + if err := unix.Unmount(".", unix.MNT_DETACH); err != nil { return fmt.Errorf("error umounting the old root file system: %v", err) } return nil @@ -70,26 +70,26 @@ func setUpChroot(pidns bool) error { // Convert all shared mounts into slave to be sure that nothing will be // propagated outside of our namespace. - if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + if err := unix.Mount("", "/", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { return fmt.Errorf("error converting mounts: %v", err) } - if err := syscall.Mount("runsc-root", chroot, "tmpfs", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC, ""); err != nil { + if err := unix.Mount("runsc-root", chroot, "tmpfs", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, ""); err != nil { return fmt.Errorf("error mounting tmpfs in choot: %v", err) } if pidns { - flags := uint32(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC | syscall.MS_RDONLY) + flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY) if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil { return fmt.Errorf("error mounting proc in chroot: %v", err) } } else { - if err := mountInChroot(chroot, "/proc", "/proc", "bind", syscall.MS_BIND|syscall.MS_RDONLY|syscall.MS_REC); err != nil { + if err := mountInChroot(chroot, "/proc", "/proc", "bind", unix.MS_BIND|unix.MS_RDONLY|unix.MS_REC); err != nil { return fmt.Errorf("error mounting proc in chroot: %v", err) } } - if err := syscall.Mount("", chroot, "", syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_BIND, ""); err != nil { + if err := unix.Mount("", chroot, "", unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_BIND, ""); err != nil { return fmt.Errorf("error remounting chroot in read-only: %v", err) } diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go index f1a4887ef..4dd55cc33 100644 --- a/runsc/cmd/cmd.go +++ b/runsc/cmd/cmd.go @@ -19,9 +19,9 @@ import ( "fmt" "runtime" "strconv" - "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/specutils" ) @@ -71,7 +71,7 @@ func setCapsAndCallSelf(args []string, caps *specs.LinuxCapabilities) error { binPath := specutils.ExePath log.Infof("Execve %q again, bye!", binPath) - err := syscall.Exec(binPath, args, []string{}) + err := unix.Exec(binPath, args, []string{}) return fmt.Errorf("error executing %s: %v", binPath, err) } @@ -83,16 +83,16 @@ func callSelfAsNobody(args []string) error { const nobody = 65534 - if _, _, err := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(nobody), 0, 0); err != 0 { + if _, _, err := unix.RawSyscall(unix.SYS_SETGID, uintptr(nobody), 0, 0); err != 0 { return fmt.Errorf("error setting uid: %v", err) } - if _, _, err := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(nobody), 0, 0); err != 0 { + if _, _, err := unix.RawSyscall(unix.SYS_SETUID, uintptr(nobody), 0, 0); err != 0 { return fmt.Errorf("error setting gid: %v", err) } binPath := specutils.ExePath log.Infof("Execve %q again, bye!", binPath) - err := syscall.Exec(binPath, args, []string{}) + err := unix.Exec(binPath, args, []string{}) return fmt.Errorf("error executing %s: %v", binPath, err) } diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go index b84142b0d..6212ffb2e 100644 --- a/runsc/cmd/debug.go +++ b/runsc/cmd/debug.go @@ -21,10 +21,10 @@ import ( "strconv" "strings" "sync" - "syscall" "time" "github.com/google/subcommands" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/runsc/config" @@ -135,7 +135,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Perform synchronous actions. if d.signal > 0 { log.Infof("Sending signal %d to process: %d", d.signal, c.Sandbox.Pid) - if err := syscall.Kill(c.Sandbox.Pid, syscall.Signal(d.signal)); err != nil { + if err := unix.Kill(c.Sandbox.Pid, unix.Signal(d.signal)); err != nil { return Errorf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid) } } @@ -317,7 +317,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) wg.Wait() }() signals := make(chan os.Signal, 1) - signal.Notify(signals, syscall.SIGTERM, syscall.SIGINT) + signal.Notify(signals, unix.SIGTERM, unix.SIGINT) select { case <-readyChan: break // Safe to proceed. diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go index 8a8d9f752..22c1dfeb8 100644 --- a/runsc/cmd/do.go +++ b/runsc/cmd/do.go @@ -26,10 +26,10 @@ import ( "path/filepath" "strconv" "strings" - "syscall" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/container" @@ -86,7 +86,7 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su } conf := args[0].(*config.Config) - waitStatus := args[1].(*syscall.WaitStatus) + waitStatus := args[1].(*unix.WaitStatus) if conf.Rootless { if err := specutils.MaybeRunAsRoot(); err != nil { @@ -225,7 +225,7 @@ func resolvePath(path string) (string, error) { return "", fmt.Errorf("resolving %q: %v", path, err) } path = filepath.Clean(path) - if err := syscall.Access(path, 0); err != nil { + if err := unix.Access(path, 0); err != nil { return "", fmt.Errorf("unable to access %q: %v", path, err) } return path, nil diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index e9726401a..242d474b8 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -24,11 +24,11 @@ import ( "path/filepath" "strconv" "strings" - "syscall" "time" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -110,7 +110,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) if err != nil { Fatalf("parsing process spec: %v", err) } - waitStatus := args[1].(*syscall.WaitStatus) + waitStatus := args[1].(*unix.WaitStatus) c, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { @@ -149,7 +149,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) return ex.exec(c, e, waitStatus) } -func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *syscall.WaitStatus) subcommands.ExitStatus { +func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *unix.WaitStatus) subcommands.ExitStatus { // Start the new process and get its pid. pid, err := c.Execute(e) if err != nil { @@ -189,7 +189,7 @@ func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *sy return subcommands.ExitSuccess } -func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStatus { +func (ex *Exec) execChildAndWait(waitStatus *unix.WaitStatus) subcommands.ExitStatus { var args []string for _, a := range os.Args[1:] { if !strings.Contains(a, "detach") { @@ -233,7 +233,7 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi cmd.Stdin = tty cmd.Stdout = tty cmd.Stderr = tty - cmd.SysProcAttr = &syscall.SysProcAttr{ + cmd.SysProcAttr = &unix.SysProcAttr{ Setsid: true, Setctty: true, // The Ctty FD must be the FD in the child process's FD @@ -263,7 +263,7 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi } return pid == cmd.Process.Pid, nil } - if pe, ok := err.(*os.PathError); !ok || pe.Err != syscall.ENOENT { + if pe, ok := err.(*os.PathError); !ok || pe.Err != unix.ENOENT { return false, err } // No file yet, continue to wait... diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 371fcc0ae..639b2219c 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -21,7 +21,6 @@ import ( "os" "path/filepath" "strings" - "syscall" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -149,16 +148,16 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // fsgofer should run with a umask of 0, because we want to preserve file // modes exactly as sent by the sandbox, which will have applied its own umask. - syscall.Umask(0) + unix.Umask(0) if err := fsgofer.OpenProcSelfFD(); err != nil { Fatalf("failed to open /proc/self/fd: %v", err) } - if err := syscall.Chroot(root); err != nil { + if err := unix.Chroot(root); err != nil { Fatalf("failed to chroot to %q: %v", root, err) } - if err := syscall.Chdir("/"); err != nil { + if err := unix.Chdir("/"); err != nil { Fatalf("changing working dir: %v", err) } log.Infof("Process chroot'd to %q", root) @@ -166,7 +165,8 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Start with root mount, then add any other additional mount as needed. ats := make([]p9.Attacher, 0, len(spec.Mounts)+1) ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{ - ROMount: spec.Root.Readonly || conf.Overlay, + ROMount: spec.Root.Readonly || conf.Overlay, + EnableXattr: conf.Verity, }) if err != nil { Fatalf("creating attach point: %v", err) @@ -178,8 +178,9 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) for _, m := range spec.Mounts { if specutils.Is9PMount(m) { cfg := fsgofer.Config{ - ROMount: isReadonlyMount(m.Options) || conf.Overlay, - HostUDS: conf.FSGoferHostUDS, + ROMount: isReadonlyMount(m.Options) || conf.Overlay, + HostUDS: conf.FSGoferHostUDS, + EnableXattr: conf.Verity, } ap, err := fsgofer.NewAttachPoint(m.Destination, cfg) if err != nil { @@ -262,7 +263,7 @@ func isReadonlyMount(opts []string) bool { func setupRootFS(spec *specs.Spec, conf *config.Config) error { // Convert all shared mounts into slaves to be sure that nothing will be // propagated outside of our namespace. - if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + if err := unix.Mount("", "/", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { Fatalf("error converting mounts: %v", err) } @@ -274,30 +275,30 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error { // // We need a directory to construct a new root and we know that // runsc can't start without /proc, so we can use it for this. - flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC) - if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil { + flags := uintptr(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC) + if err := unix.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil { Fatalf("error mounting tmpfs: %v", err) } // Prepare tree structure for pivot_root(2). os.Mkdir("/proc/proc", 0755) os.Mkdir("/proc/root", 0755) - if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil { + if err := unix.Mount("runsc-proc", "/proc/proc", "proc", flags|unix.MS_RDONLY, ""); err != nil { Fatalf("error mounting proc: %v", err) } root = "/proc/root" } // Mount root path followed by submounts. - if err := syscall.Mount(spec.Root.Path, root, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { + if err := unix.Mount(spec.Root.Path, root, "bind", unix.MS_BIND|unix.MS_REC, ""); err != nil { return fmt.Errorf("mounting root on root (%q) err: %v", root, err) } - flags := uint32(syscall.MS_SLAVE | syscall.MS_REC) + flags := uint32(unix.MS_SLAVE | unix.MS_REC) if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation}) } - if err := syscall.Mount("", root, "", uintptr(flags), ""); err != nil { + if err := unix.Mount("", root, "", uintptr(flags), ""); err != nil { return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", root, flags, err) } @@ -323,8 +324,8 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error { // If root is a mount point but not read-only, we can change mount options // to make it read-only for extra safety. log.Infof("Remounting root as readonly: %q", root) - flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC) - if err := syscall.Mount(root, root, "bind", flags, ""); err != nil { + flags := uintptr(unix.MS_BIND | unix.MS_REMOUNT | unix.MS_RDONLY | unix.MS_REC) + if err := unix.Mount(root, root, "bind", flags, ""); err != nil { return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", root, root, flags, err) } } @@ -354,10 +355,10 @@ func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error { return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) } - flags := specutils.OptionsToFlags(m.Options) | syscall.MS_BIND + flags := specutils.OptionsToFlags(m.Options) | unix.MS_BIND if conf.Overlay { // Force mount read-only if writes are not going to be sent to it. - flags |= syscall.MS_RDONLY + flags |= unix.MS_RDONLY } log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) @@ -368,7 +369,7 @@ func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error { // Set propagation options that cannot be set together with other options. flags = specutils.PropOptionsToFlags(m.Options) if flags != 0 { - if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil { + if err := unix.Mount("", dst, "", uintptr(flags), ""); err != nil { return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err) } } @@ -469,8 +470,8 @@ func adjustMountOptions(conf *config.Config, path string, opts []string) ([]stri copy(rv, opts) if conf.OverlayfsStaleRead { - statfs := syscall.Statfs_t{} - if err := syscall.Statfs(path, &statfs); err != nil { + statfs := unix.Statfs_t{} + if err := unix.Statfs(path, &statfs); err != nil { return nil, err } if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC { diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go index e0df39266..239fc7ac2 100644 --- a/runsc/cmd/kill.go +++ b/runsc/cmd/kill.go @@ -19,7 +19,6 @@ import ( "fmt" "strconv" "strings" - "syscall" "github.com/google/subcommands" "golang.org/x/sys/unix" @@ -99,10 +98,10 @@ func (k *Kill) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) return subcommands.ExitSuccess } -func parseSignal(s string) (syscall.Signal, error) { +func parseSignal(s string) (unix.Signal, error) { n, err := strconv.Atoi(s) if err == nil { - sig := syscall.Signal(n) + sig := unix.Signal(n) for _, msig := range signalMap { if sig == msig { return sig, nil @@ -116,7 +115,7 @@ func parseSignal(s string) (syscall.Signal, error) { return -1, fmt.Errorf("unknown signal %q", s) } -var signalMap = map[string]syscall.Signal{ +var signalMap = map[string]unix.Signal{ "ABRT": unix.SIGABRT, "ALRM": unix.SIGALRM, "BUS": unix.SIGBUS, diff --git a/runsc/cmd/mitigate.go b/runsc/cmd/mitigate.go index 822af1917..fddf0e0dd 100644 --- a/runsc/cmd/mitigate.go +++ b/runsc/cmd/mitigate.go @@ -16,6 +16,8 @@ package cmd import ( "context" + "fmt" + "io/ioutil" "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/log" @@ -23,9 +25,23 @@ import ( "gvisor.dev/gvisor/runsc/mitigate" ) +const ( + // cpuInfo is the path used to parse CPU info. + cpuInfo = "/proc/cpuinfo" + // allPossibleCPUs is the path used to enable CPUs. + allPossibleCPUs = "/sys/devices/system/cpu/possible" +) + // Mitigate implements subcommands.Command for the "mitigate" command. type Mitigate struct { - mitigate mitigate.Mitigate + // Run the command without changing the underlying system. + dryRun bool + // Reverse mitigate by turning on all CPU cores. + reverse bool + // Path to file to read to create CPUSet. + path string + // Callback to check if a given thread is vulnerable. + vulnerable func(other mitigate.Thread) bool } // Name implements subcommands.command.name. @@ -38,14 +54,19 @@ func (*Mitigate) Synopsis() string { return "mitigate mitigates the underlying system against side channel attacks" } -// Usage implements subcommands.Command.Usage. -func (m *Mitigate) Usage() string { - return m.mitigate.Usage() +// Usage implments Usage for cmd.Mitigate. +func (m Mitigate) Usage() string { + return `mitigate [flags] + +mitigate mitigates a system to the "MDS" vulnerability by implementing a manual shutdown of SMT. The command checks /proc/cpuinfo for cpus having the MDS vulnerability, and if found, shutdown all but one CPU per hyperthread pair via /sys/devices/system/cpu/cpu{N}/online. CPUs can be restored by writing "2" to each file in /sys/devices/system/cpu/cpu{N}/online or performing a system reboot. + +The command can be reversed with --reverse, which reads the total CPUs from /sys/devices/system/cpu/possible and enables all with /sys/devices/system/cpu/cpu{N}/online.` } -// SetFlags implements subcommands.Command.SetFlags. +// SetFlags sets flags for the command Mitigate. func (m *Mitigate) SetFlags(f *flag.FlagSet) { - m.mitigate.SetFlags(f) + f.BoolVar(&m.dryRun, "dryrun", false, "run the command without changing system") + f.BoolVar(&m.reverse, "reverse", false, "reverse mitigate by enabling all CPUs") } // Execute implements subcommands.Command.Execute. @@ -55,10 +76,97 @@ func (m *Mitigate) Execute(_ context.Context, f *flag.FlagSet, args ...interface return subcommands.ExitUsageError } - if err := m.mitigate.Execute(); err != nil { + m.path = cpuInfo + if m.reverse { + m.path = allPossibleCPUs + } + + m.vulnerable = func(other mitigate.Thread) bool { + return other.IsVulnerable() + } + + if _, err := m.doExecute(); err != nil { log.Warningf("Execute failed: %v", err) return subcommands.ExitFailure } return subcommands.ExitSuccess } + +// Execute executes the Mitigate command. +func (m *Mitigate) doExecute() (mitigate.CPUSet, error) { + if m.dryRun { + log.Infof("Running with DryRun. No cpu settings will be changed.") + } + if m.reverse { + data, err := ioutil.ReadFile(m.path) + if err != nil { + return nil, fmt.Errorf("failed to read %s: %v", m.path, err) + } + + set, err := m.doReverse(data) + if err != nil { + return nil, fmt.Errorf("reverse operation failed: %v", err) + } + return set, nil + } + + data, err := ioutil.ReadFile(m.path) + if err != nil { + return nil, fmt.Errorf("failed to read %s: %v", m.path, err) + } + set, err := m.doMitigate(data) + if err != nil { + return nil, fmt.Errorf("mitigate operation failed: %v", err) + } + return set, nil +} + +func (m *Mitigate) doMitigate(data []byte) (mitigate.CPUSet, error) { + set, err := mitigate.NewCPUSet(data, m.vulnerable) + if err != nil { + return nil, err + } + + log.Infof("Mitigate found the following CPUs...") + log.Infof("%s", set) + + disableList := set.GetShutdownList() + log.Infof("Disabling threads on thread pairs.") + for _, t := range disableList { + log.Infof("Disable thread: %s", t) + if m.dryRun { + continue + } + if err := t.Disable(); err != nil { + return nil, fmt.Errorf("error disabling thread: %s err: %v", t, err) + } + } + log.Infof("Shutdown successful.") + return set, nil +} + +func (m *Mitigate) doReverse(data []byte) (mitigate.CPUSet, error) { + set, err := mitigate.NewCPUSetFromPossible(data) + if err != nil { + return nil, err + } + + log.Infof("Reverse mitigate found the following CPUs...") + log.Infof("%s", set) + + enableList := set.GetRemainingList() + + log.Infof("Enabling all CPUs...") + for _, t := range enableList { + log.Infof("Enabling thread: %s", t) + if m.dryRun { + continue + } + if err := t.Enable(); err != nil { + return nil, fmt.Errorf("error enabling thread: %s err: %v", t, err) + } + } + log.Infof("Enable successful.") + return set, nil +} diff --git a/runsc/cmd/mitigate_test.go b/runsc/cmd/mitigate_test.go new file mode 100644 index 000000000..163fece42 --- /dev/null +++ b/runsc/cmd/mitigate_test.go @@ -0,0 +1,169 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "fmt" + "io/ioutil" + "os" + "strings" + "testing" + + "gvisor.dev/gvisor/runsc/mitigate" + "gvisor.dev/gvisor/runsc/mitigate/mock" +) + +type executeTestCase struct { + name string + mitigateData string + mitigateError error + mitigateCPU int + reverseData string + reverseError error + reverseCPU int +} + +func TestExecute(t *testing.T) { + + partial := `processor : 1 +vendor_id : AuthenticAMD +cpu family : 23 +model : 49 +model name : AMD EPYC 7B12 +physical id : 0 +bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass +power management: +` + + for _, tc := range []executeTestCase{ + { + name: "CascadeLake4", + mitigateData: mock.CascadeLake4.MakeCPUString(), + mitigateCPU: 2, + reverseData: mock.CascadeLake4.MakeSysPossibleString(), + reverseCPU: 4, + }, + { + name: "Empty", + mitigateData: "", + mitigateError: fmt.Errorf(`mitigate operation failed: no cpus found for: ""`), + reverseData: "", + reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from possible: ""`), + }, + { + name: "Partial", + mitigateData: `processor : 0 +vendor_id : AuthenticAMD +cpu family : 23 +model : 49 +model name : AMD EPYC 7B12 +physical id : 0 +core id : 0 +cpu cores : 1 +bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass +power management::84 + +` + partial, + mitigateError: fmt.Errorf(`mitigate operation failed: failed to match key "core id": %q`, partial), + reverseData: "1-", + reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from possible: %q`, "1-"), + }, + } { + t.Run(tc.name, func(t *testing.T) { + m := &Mitigate{ + dryRun: true, + vulnerable: func(other mitigate.Thread) bool { + return other.IsVulnerable() + }, + } + m.doExecuteTest(t, "Mitigate", tc.mitigateData, tc.mitigateCPU, tc.mitigateError) + + m.reverse = true + m.doExecuteTest(t, "Reverse", tc.reverseData, tc.reverseCPU, tc.reverseError) + }) + } +} + +func TestExecuteSmoke(t *testing.T) { + smokeMitigate, err := ioutil.ReadFile(cpuInfo) + if err != nil { + t.Fatalf("Failed to read %s: %v", cpuInfo, err) + } + + m := &Mitigate{ + dryRun: true, + vulnerable: func(other mitigate.Thread) bool { + return other.IsVulnerable() + }, + } + + m.doExecuteTest(t, "Mitigate", string(smokeMitigate), 0, nil) + + smokeReverse, err := ioutil.ReadFile(allPossibleCPUs) + if err != nil { + t.Fatalf("Failed to read %s: %v", allPossibleCPUs, err) + } + + m.reverse = true + m.doExecuteTest(t, "Reverse", string(smokeReverse), 0, nil) +} + +// doExecuteTest runs Execute with the mitigate operation and reverse operation. +func (m *Mitigate) doExecuteTest(t *testing.T, name, data string, want int, wantErr error) { + t.Run(name, func(t *testing.T) { + file, err := ioutil.TempFile("", "outfile.txt") + if err != nil { + t.Fatalf("Failed to create tmpfile: %v", err) + } + defer os.Remove(file.Name()) + + if _, err := file.WriteString(data); err != nil { + t.Fatalf("Failed to write to file: %v", err) + } + + // Set fields for mitigate and dryrun to keep test hermetic. + m.path = file.Name() + + set, err := m.doExecute() + if err = checkErr(wantErr, err); err != nil { + t.Fatalf("Mitigate error mismatch: %v", err) + } + + // case where test should end in error or we don't care + // about how many cpus are returned. + if wantErr != nil || want < 1 { + return + } + got := len(set.GetRemainingList()) + if want != got { + t.Fatalf("Failed wrong number of remaining CPUs: want %d, got %d", want, got) + } + + }) +} + +// checkErr checks error for equality. +func checkErr(want, got error) error { + switch { + case want == nil && got == nil: + case want != nil && got == nil: + fallthrough + case want == nil && got != nil: + fallthrough + case want.Error() != strings.Trim(got.Error(), " "): + return fmt.Errorf("got: %v want: %v", got, want) + } + return nil +} diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go index 096ec814c..b21f05921 100644 --- a/runsc/cmd/restore.go +++ b/runsc/cmd/restore.go @@ -17,9 +17,9 @@ package cmd import ( "context" "path/filepath" - "syscall" "github.com/google/subcommands" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/flag" @@ -78,7 +78,7 @@ func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{ id := f.Arg(0) conf := args[0].(*config.Config) - waitStatus := args[1].(*syscall.WaitStatus) + waitStatus := args[1].(*unix.WaitStatus) if conf.Rootless { return Errorf("Rootless mode not supported with %q", r.Name()) diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go index c48cbe4cd..722181aff 100644 --- a/runsc/cmd/run.go +++ b/runsc/cmd/run.go @@ -16,9 +16,9 @@ package cmd import ( "context" - "syscall" "github.com/google/subcommands" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/flag" @@ -65,7 +65,7 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s id := f.Arg(0) conf := args[0].(*config.Config) - waitStatus := args[1].(*syscall.WaitStatus) + waitStatus := args[1].(*unix.WaitStatus) if conf.Rootless { return Errorf("Rootless mode not supported with %q", r.Name()) diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go index 5d55422c7..d7a783b88 100644 --- a/runsc/cmd/wait.go +++ b/runsc/cmd/wait.go @@ -18,9 +18,9 @@ import ( "context" "encoding/json" "os" - "syscall" "github.com/google/subcommands" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/flag" @@ -77,7 +77,7 @@ func (wt *Wait) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("loading container: %v", err) } - var waitStatus syscall.WaitStatus + var waitStatus unix.WaitStatus switch { // Wait on the whole container. case wt.rootPID == unsetPID && wt.pid == unsetPID: @@ -119,7 +119,7 @@ type waitResult struct { // exitStatus returns the correct exit status for a process based on if it // was signaled or exited cleanly. -func exitStatus(status syscall.WaitStatus) int { +func exitStatus(status unix.WaitStatus) int { if status.Signaled() { return 128 + int(status.Signal()) } diff --git a/runsc/config/config.go b/runsc/config/config.go index e9fd7708f..34ef48825 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -64,6 +64,9 @@ type Config struct { // Overlay is whether to wrap the root filesystem in an overlay. Overlay bool `flag:"overlay"` + // Verity is whether there's one or more verity file system to mount. + Verity bool `flag:"verity"` + // FSGoferHostUDS enables the gofer to mount a host UDS. FSGoferHostUDS bool `flag:"fsgofer-host-uds"` diff --git a/runsc/config/flags.go b/runsc/config/flags.go index 7e738dfdf..adbee506c 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -69,6 +69,7 @@ func RegisterFlags() { // Flags that control sandbox runtime behavior: FS related. flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") + flag.Bool("verity", false, "specifies whether a verity file system will be mounted.") flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem") flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.") flag.Bool("vfs2", false, "enables VFSv2. This uses the new VFS layer that is faster than the previous one.") diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 8793c8916..3620dc8c3 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -30,6 +30,7 @@ go_library( "@com_github_cenkalti_backoff//:go_default_library", "@com_github_gofrs_flock//:go_default_library", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 7a3d5a523..79b056fce 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -21,7 +21,6 @@ import ( "math/rand" "os" "path/filepath" - "syscall" "testing" "time" @@ -320,7 +319,7 @@ func TestJobControlSignalExec(t *testing.T) { // Send a SIGTERM to the foreground process for the exec PID. Note that // although we pass in the PID of "bash", it should actually terminate // "sleep", since that is the foreground process. - if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.SIGTERM, true /* fgProcess */); err != nil { + if err := c.Sandbox.SignalProcess(c.ID, pid, unix.SIGTERM, true /* fgProcess */); err != nil { t.Fatalf("error signaling container: %v", err) } @@ -340,7 +339,7 @@ func TestJobControlSignalExec(t *testing.T) { // Send a SIGKILL to the foreground process again. This time "bash" // should be killed. We use SIGKILL instead of SIGTERM or SIGINT // because bash ignores those. - if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.SIGKILL, true /* fgProcess */); err != nil { + if err := c.Sandbox.SignalProcess(c.ID, pid, unix.SIGKILL, true /* fgProcess */); err != nil { t.Fatalf("error signaling container: %v", err) } expectedPL = expectedPL[:1] @@ -356,7 +355,7 @@ func TestJobControlSignalExec(t *testing.T) { if !ws.Signaled() { t.Error("ws.Signaled() got false, want true") } - if got, want := ws.Signal(), syscall.SIGKILL; got != want { + if got, want := ws.Signal(), unix.SIGKILL; got != want { t.Errorf("ws.Signal() got %v, want %v", got, want) } } @@ -423,7 +422,7 @@ func TestJobControlSignalRootContainer(t *testing.T) { // very early, otherwise it might exit before we have a chance to call // Wait. var ( - ws syscall.WaitStatus + ws unix.WaitStatus wg sync.WaitGroup ) wg.Add(1) @@ -459,7 +458,7 @@ func TestJobControlSignalRootContainer(t *testing.T) { // Send a SIGTERM to the foreground process. We pass PID=0, indicating // that the root process should be killed. However, by setting // fgProcess=true, the signal should actually be sent to sleep. - if err := c.Sandbox.SignalProcess(c.ID, 0 /* PID */, syscall.SIGTERM, true /* fgProcess */); err != nil { + if err := c.Sandbox.SignalProcess(c.ID, 0 /* PID */, unix.SIGTERM, true /* fgProcess */); err != nil { t.Fatalf("error signaling container: %v", err) } @@ -479,7 +478,7 @@ func TestJobControlSignalRootContainer(t *testing.T) { // Send a SIGKILL to the foreground process again. This time "bash" // should be killed. We use SIGKILL instead of SIGTERM or SIGINT // because bash ignores those. - if err := c.Sandbox.SignalProcess(c.ID, 0 /* PID */, syscall.SIGKILL, true /* fgProcess */); err != nil { + if err := c.Sandbox.SignalProcess(c.ID, 0 /* PID */, unix.SIGKILL, true /* fgProcess */); err != nil { t.Fatalf("error signaling container: %v", err) } @@ -488,7 +487,7 @@ func TestJobControlSignalRootContainer(t *testing.T) { if !ws.Signaled() { t.Error("ws.Signaled() got false, want true") } - if got, want := ws.Signal(), syscall.SIGKILL; got != want { + if got, want := ws.Signal(), unix.SIGKILL; got != want { t.Errorf("ws.Signal() got %v, want %v", got, want) } } diff --git a/runsc/container/container.go b/runsc/container/container.go index 40812efb8..f9d83c118 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -30,6 +30,7 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/log" @@ -244,7 +245,7 @@ func New(conf *config.Config, args Args) (*Container, error) { // If there is cgroup config, install it before creating sandbox process. if err := cg.Install(args.Spec.Linux.Resources); err != nil { switch { - case errors.Is(err, syscall.EACCES) && conf.Rootless: + case errors.Is(err, unix.EACCES) && conf.Rootless: log.Warningf("Skipping cgroup configuration in rootless mode: %v", err) cg = nil default: @@ -447,7 +448,7 @@ func (c *Container) Restore(spec *specs.Spec, conf *config.Config, restoreFile s } // Run is a helper that calls Create + Start + Wait. -func Run(conf *config.Config, args Args) (syscall.WaitStatus, error) { +func Run(conf *config.Config, args Args) (unix.WaitStatus, error) { log.Debugf("Run container, cid: %s, rootDir: %q", args.ID, conf.RootDir) c, err := New(conf, args) if err != nil { @@ -517,7 +518,7 @@ func (c *Container) SandboxPid() int { // Wait waits for the container to exit, and returns its WaitStatus. // Call to wait on a stopped container is needed to retrieve the exit status // and wait returns immediately. -func (c *Container) Wait() (syscall.WaitStatus, error) { +func (c *Container) Wait() (unix.WaitStatus, error) { log.Debugf("Wait on container, cid: %s", c.ID) ws, err := c.Sandbox.Wait(c.ID) if err == nil { @@ -529,7 +530,7 @@ func (c *Container) Wait() (syscall.WaitStatus, error) { // WaitRootPID waits for process 'pid' in the sandbox's PID namespace and // returns its WaitStatus. -func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) { +func (c *Container) WaitRootPID(pid int32) (unix.WaitStatus, error) { log.Debugf("Wait on process %d in sandbox, cid: %s", pid, c.Sandbox.ID) if !c.IsSandboxRunning() { return 0, fmt.Errorf("sandbox is not running") @@ -539,7 +540,7 @@ func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) { // WaitPID waits for process 'pid' in the container's PID namespace and returns // its WaitStatus. -func (c *Container) WaitPID(pid int32) (syscall.WaitStatus, error) { +func (c *Container) WaitPID(pid int32) (unix.WaitStatus, error) { log.Debugf("Wait on process %d in container, cid: %s", pid, c.ID) if !c.IsSandboxRunning() { return 0, fmt.Errorf("sandbox is not running") @@ -551,7 +552,7 @@ func (c *Container) WaitPID(pid int32) (syscall.WaitStatus, error) { // is SIGKILL, then waits for all processes to exit before returning. // SignalContainer returns an error if the container is already stopped. // TODO(b/113680494): Distinguish different error types. -func (c *Container) SignalContainer(sig syscall.Signal, all bool) error { +func (c *Container) SignalContainer(sig unix.Signal, all bool) error { log.Debugf("Signal container, cid: %s, signal: %v (%d)", c.ID, sig, sig) // Signaling container in Stopped state is allowed. When all=false, // an error will be returned anyway; when all=true, this allows @@ -568,7 +569,7 @@ func (c *Container) SignalContainer(sig syscall.Signal, all bool) error { } // SignalProcess sends sig to a specific process in the container. -func (c *Container) SignalProcess(sig syscall.Signal, pid int32) error { +func (c *Container) SignalProcess(sig unix.Signal, pid int32) error { log.Debugf("Signal process %d in container, cid: %s, signal: %v (%d)", pid, c.ID, sig, sig) if err := c.requireStatus("signal a process inside", Running); err != nil { return err @@ -586,7 +587,7 @@ func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() { log.Debugf("Forwarding all signals to container, cid: %s, PIDPID: %d, fgProcess: %t", c.ID, pid, fgProcess) stop := sighandling.StartSignalForwarding(func(sig linux.Signal) { log.Debugf("Forwarding signal %d to container, cid: %s, PID: %d, fgProcess: %t", sig, c.ID, pid, fgProcess) - if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.Signal(sig), fgProcess); err != nil { + if err := c.Sandbox.SignalProcess(c.ID, pid, unix.Signal(sig), fgProcess); err != nil { log.Warningf("error forwarding signal %d to container %q: %v", sig, c.ID, err) } }) @@ -768,9 +769,9 @@ func (c *Container) stop() error { // Try killing gofer if it does not exit with container. if c.GoferPid != 0 { log.Debugf("Killing gofer for container, cid: %s, PID: %d", c.ID, c.GoferPid) - if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil { + if err := unix.Kill(c.GoferPid, unix.SIGKILL); err != nil { // The gofer may already be stopped, log the error. - log.Warningf("Error sending signal %d to gofer %d: %v", syscall.SIGKILL, c.GoferPid, err) + log.Warningf("Error sending signal %d to gofer %d: %v", unix.SIGKILL, c.GoferPid, err) } } @@ -793,7 +794,7 @@ func (c *Container) waitForStopped() error { b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) op := func() error { if c.IsSandboxRunning() { - if err := c.SignalContainer(syscall.Signal(0), false); err == nil { + if err := c.SignalContainer(unix.Signal(0), false); err == nil { return fmt.Errorf("container is still running") } } @@ -803,7 +804,7 @@ func (c *Container) waitForStopped() error { if c.goferIsChild { // The gofer process is a child of the current process, // so we can wait it and collect its zombie. - wpid, err := syscall.Wait4(int(c.GoferPid), nil, syscall.WNOHANG, nil) + wpid, err := unix.Wait4(int(c.GoferPid), nil, unix.WNOHANG, nil) if err != nil { return fmt.Errorf("error waiting the gofer process: %v", err) } @@ -811,7 +812,7 @@ func (c *Container) waitForStopped() error { return fmt.Errorf("gofer is still running") } - } else if err := syscall.Kill(c.GoferPid, 0); err == nil { + } else if err := unix.Kill(c.GoferPid, 0); err == nil { return fmt.Errorf("gofer is still running") } c.GoferPid = 0 @@ -892,7 +893,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu sandEnds := make([]*os.File, 0, mountCount) for i := 0; i < mountCount; i++ { - fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) + fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) if err != nil { return nil, nil, err } @@ -914,8 +915,8 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu if attached { // The gofer is attached to the lifetime of this process, so it // should synchronously die when this process dies. - cmd.SysProcAttr = &syscall.SysProcAttr{ - Pdeathsig: syscall.SIGKILL, + cmd.SysProcAttr = &unix.SysProcAttr{ + Pdeathsig: unix.SIGKILL, } } @@ -1113,7 +1114,7 @@ func setOOMScoreAdj(pid int, scoreAdj int) error { } defer f.Close() if _, err := f.WriteString(strconv.Itoa(scoreAdj)); err != nil { - if errors.Is(err, syscall.ESRCH) { + if errors.Is(err, unix.ESRCH) { log.Warningf("Process (%d) exited while setting oom_score_adj", pid) return nil } diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 862d9444d..5a0c468a4 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -27,12 +27,12 @@ import ( "reflect" "strconv" "strings" - "syscall" "testing" "time" "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bits" "gvisor.dev/gvisor/pkg/log" @@ -103,7 +103,7 @@ func waitForProcessCount(cont *Container, want int) error { func blockUntilWaitable(pid int) error { _, _, err := specutils.RetryEintr(func() (uintptr, uintptr, error) { var err error - _, _, err1 := syscall.Syscall6(syscall.SYS_WAITID, 1, uintptr(pid), 0, syscall.WEXITED|syscall.WNOWAIT, 0, 0) + _, _, err1 := unix.Syscall6(unix.SYS_WAITID, 1, uintptr(pid), 0, unix.WEXITED|unix.WNOWAIT, 0, 0) if err1 != 0 { err = err1 } @@ -468,7 +468,7 @@ func TestLifecycle(t *testing.T) { if err != nil { ch <- err } - if got, want := ws.Signal(), syscall.SIGTERM; got != want { + if got, want := ws.Signal(), unix.SIGTERM; got != want { ch <- fmt.Errorf("got signal %v, want %v", got, want) } ch <- nil @@ -479,8 +479,8 @@ func TestLifecycle(t *testing.T) { time.Sleep(time.Second) // Send the container a SIGTERM which will cause it to stop. - if err := c.SignalContainer(syscall.SIGTERM, false); err != nil { - t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err) + if err := c.SignalContainer(unix.SIGTERM, false); err != nil { + t.Fatalf("error sending signal %v to container: %v", unix.SIGTERM, err) } // Wait for it to die. @@ -815,11 +815,11 @@ func TestExec(t *testing.T) { t.Run("nonexist", func(t *testing.T) { // b/179114837 found by Syzkaller that causes nil pointer panic when // trying to dec-ref an unix socket FD. - fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) + fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM, 0) if err != nil { t.Fatal(err) } - defer syscall.Close(fds[0]) + defer unix.Close(fds[0]) _, err = cont.executeSync(&control.ExecArgs{ Argv: []string{"/nonexist"}, @@ -956,7 +956,7 @@ func TestKillPid(t *testing.T) { pid = int32(p.PID) } } - if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil { + if err := cont.SignalProcess(unix.SIGKILL, pid); err != nil { t.Fatalf("failed to signal process %d: %v", pid, err) } @@ -1601,12 +1601,12 @@ func TestReadonlyRoot(t *testing.T) { } // Read mounts to check that root is readonly. - out, err := executeCombinedOutput(c, "/bin/sh", "-c", "mount | grep ' / '") + out, err := executeCombinedOutput(c, "/bin/sh", "-c", "mount | grep ' / ' | grep -o -e '(.*)'") if err != nil { t.Fatalf("exec failed: %v", err) } - t.Logf("root mount: %q", out) - if !strings.Contains(string(out), "(ro)") { + t.Logf("root mount options: %q", out) + if !strings.Contains(string(out), "ro") { t.Errorf("root not mounted readonly: %q", out) } @@ -1615,7 +1615,7 @@ func TestReadonlyRoot(t *testing.T) { if err != nil { t.Fatalf("touch file in ro mount: %v", err) } - if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { + if !ws.Exited() || unix.Errno(ws.ExitStatus()) != unix.EPERM { t.Fatalf("wrong waitStatus: %v", ws) } }) @@ -1659,13 +1659,13 @@ func TestReadonlyMount(t *testing.T) { } // Read mounts to check that volume is readonly. - cmd := fmt.Sprintf("mount | grep ' %s '", dir) + cmd := fmt.Sprintf("mount | grep ' %s ' | grep -o -e '(.*)'", dir) out, err := executeCombinedOutput(c, "/bin/sh", "-c", cmd) if err != nil { t.Fatalf("exec failed, err: %v", err) } - t.Logf("mount: %q", out) - if !strings.Contains(string(out), "(ro)") { + t.Logf("mount options: %q", out) + if !strings.Contains(string(out), "ro") { t.Errorf("volume not mounted readonly: %q", out) } @@ -1674,7 +1674,7 @@ func TestReadonlyMount(t *testing.T) { if err != nil { t.Fatalf("touch file in ro mount: %v", err) } - if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { + if !ws.Exited() || unix.Errno(ws.ExitStatus()) != unix.EPERM { t.Fatalf("wrong WaitStatus: %v", ws) } }) @@ -1750,8 +1750,8 @@ func TestUIDMap(t *testing.T) { if !ws.Exited() || ws.ExitStatus() != 0 { t.Fatalf("container failed, waitStatus: %v", ws) } - st := syscall.Stat_t{} - if err := syscall.Stat(testFile, &st); err != nil { + st := unix.Stat_t{} + if err := unix.Stat(testFile, &st); err != nil { t.Fatalf("error stat /testfile: %v", err) } @@ -1880,7 +1880,7 @@ func doGoferExitTest(t *testing.T, vfs2 bool) { } err = blockUntilWaitable(c.GoferPid) - if err != nil && err != syscall.ECHILD { + if err != nil && err != unix.ECHILD { t.Errorf("error waiting for gofer to exit: %v", err) } } @@ -1929,7 +1929,7 @@ func TestUserLog(t *testing.T) { } // sched_rr_get_interval - not implemented in gvisor. - num := strconv.Itoa(syscall.SYS_SCHED_RR_GET_INTERVAL) + num := strconv.Itoa(unix.SYS_SCHED_RR_GET_INTERVAL) spec := testutil.NewSpecWithArgs(app, "syscall", "--syscall="+num) conf := testutil.TestConfig(t) _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) @@ -2159,10 +2159,10 @@ func TestMountPropagation(t *testing.T) { f.Close() // Setup src as a shared mount. - if err := syscall.Mount(src, src, "bind", syscall.MS_BIND, ""); err != nil { + if err := unix.Mount(src, src, "bind", unix.MS_BIND, ""); err != nil { t.Fatalf("mount(%q, %q, MS_BIND): %v", dir, srcMnt, err) } - if err := syscall.Mount("", src, "", syscall.MS_SHARED, ""); err != nil { + if err := unix.Mount("", src, "", unix.MS_SHARED, ""); err != nil { t.Fatalf("mount(%q, MS_SHARED): %v", srcMnt, err) } @@ -2209,7 +2209,7 @@ func TestMountPropagation(t *testing.T) { // After the container is started, mount dir inside source and check what // happens to both destinations. - if err := syscall.Mount(dir, srcMnt, "bind", syscall.MS_BIND, ""); err != nil { + if err := unix.Mount(dir, srcMnt, "bind", unix.MS_BIND, ""); err != nil { t.Fatalf("mount(%q, %q, MS_BIND): %v", dir, srcMnt, err) } @@ -2449,7 +2449,7 @@ func TestCreateWithCorruptedStateFile(t *testing.T) { } } -func execute(cont *Container, name string, arg ...string) (syscall.WaitStatus, error) { +func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) { args := &control.ExecArgs{ Filename: name, Argv: append([]string{name}, arg...), @@ -2483,7 +2483,7 @@ func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, } // executeSync synchronously executes a new process. -func (c *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) { +func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) { pid, err := c.Execute(args) if err != nil { return 0, fmt.Errorf("error executing: %v", err) diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index b434cdb23..0f0a223ce 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -22,11 +22,11 @@ import ( "path" "path/filepath" "strings" - "syscall" "testing" "time" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -403,7 +403,7 @@ func TestMultiPIDNSKill(t *testing.T) { t.Logf("Container %q procs: %s", c.ID, procListToString(procs)) pidToKill := procs[processes-1].PID t.Logf("PID to kill: %d", pidToKill) - if err := c.SignalProcess(syscall.SIGKILL, int32(pidToKill)); err != nil { + if err := c.SignalProcess(unix.SIGKILL, int32(pidToKill)); err != nil { t.Errorf("container.SignalProcess: %v", err) } // Wait for the process to get killed. @@ -432,7 +432,7 @@ func TestMultiPIDNSKill(t *testing.T) { pidToKill = procs[len(procs)-1].PID t.Logf("PID that should not be killed: %d", pidToKill) - err = c.SignalProcess(syscall.SIGKILL, int32(pidToKill)) + err = c.SignalProcess(unix.SIGKILL, int32(pidToKill)) if err == nil { t.Fatalf("killing another container's process should fail") } @@ -640,7 +640,7 @@ func TestMultiContainerSignal(t *testing.T) { } // Kill process 2. - if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil { + if err := containers[1].SignalContainer(unix.SIGKILL, false); err != nil { t.Errorf("failed to kill process 2: %v", err) } @@ -660,10 +660,10 @@ func TestMultiContainerSignal(t *testing.T) { t.Errorf("failed to destroy container: %v", err) } _, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) { - cpid, err := syscall.Wait4(goferPid, nil, 0, nil) + cpid, err := unix.Wait4(goferPid, nil, 0, nil) return uintptr(cpid), 0, err }) - if err != syscall.ECHILD { + if err != unix.ECHILD { t.Errorf("error waiting for gofer to exit: %v", err) } // Make sure process 1 is still running. @@ -673,28 +673,28 @@ func TestMultiContainerSignal(t *testing.T) { // Now that process 2 is gone, ensure we get an error trying to // signal it again. - if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil { + if err := containers[1].SignalContainer(unix.SIGKILL, false); err == nil { t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID) } // Kill process 1. - if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil { + if err := containers[0].SignalContainer(unix.SIGKILL, false); err != nil { t.Errorf("failed to kill process 1: %v", err) } // Ensure that container's gofer and sandbox process are no more. err = blockUntilWaitable(containers[0].GoferPid) - if err != nil && err != syscall.ECHILD { + if err != nil && err != unix.ECHILD { t.Errorf("error waiting for gofer to exit: %v", err) } err = blockUntilWaitable(containers[0].Sandbox.Pid) - if err != nil && err != syscall.ECHILD { + if err != nil && err != unix.ECHILD { t.Errorf("error waiting for sandbox to exit: %v", err) } // The sentry should be gone, so signaling should yield an error. - if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil { + if err := containers[0].SignalContainer(unix.SIGKILL, false); err == nil { t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID) } @@ -893,7 +893,7 @@ func TestMultiContainerKillAll(t *testing.T) { if tc.killContainer { // First kill the init process to make the container be stopped with // processes still running inside. - containers[1].SignalContainer(syscall.SIGKILL, false) + containers[1].SignalContainer(unix.SIGKILL, false) op := func() error { c, err := Load(conf.RootDir, FullID{ContainerID: ids[1]}, LoadOpts{}) if err != nil { @@ -914,7 +914,7 @@ func TestMultiContainerKillAll(t *testing.T) { t.Fatalf("failed to load child container %q: %v", c.ID, err) } // Kill'Em All - if err := c.SignalContainer(syscall.SIGKILL, true); err != nil { + if err := c.SignalContainer(unix.SIGKILL, true); err != nil { t.Fatalf("failed to send SIGKILL to container %q: %v", c.ID, err) } @@ -1640,8 +1640,8 @@ func TestMultiContainerGoferKilled(t *testing.T) { } // Kill container's gofer. - if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil { - t.Fatalf("syscall.Kill(%d, SIGKILL)=%v", c.GoferPid, err) + if err := unix.Kill(c.GoferPid, unix.SIGKILL); err != nil { + t.Fatalf("unix.Kill(%d, SIGKILL)=%v", c.GoferPid, err) } // Wait until container stops. @@ -1672,8 +1672,8 @@ func TestMultiContainerGoferKilled(t *testing.T) { // Kill root container's gofer to bring entire sandbox down. c = containers[0] - if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil { - t.Fatalf("syscall.Kill(%d, SIGKILL)=%v", c.GoferPid, err) + if err := unix.Kill(c.GoferPid, unix.SIGKILL); err != nil { + t.Fatalf("unix.Kill(%d, SIGKILL)=%v", c.GoferPid, err) } // Wait until sandbox stops. waitForProcessList will loop until sandbox exits diff --git a/runsc/container/state_file.go b/runsc/container/state_file.go index c46322ba4..0399903a0 100644 --- a/runsc/container/state_file.go +++ b/runsc/container/state_file.go @@ -22,9 +22,9 @@ import ( "path/filepath" "regexp" "strings" - "syscall" "github.com/gofrs/flock" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sync" ) @@ -89,7 +89,7 @@ func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) { c.changeStatus(Stopped) } case Running: - if err := c.SignalContainer(syscall.Signal(0), false); err != nil { + if err := c.SignalContainer(unix.Signal(0), false); err != nil { c.changeStatus(Stopped) } } @@ -245,7 +245,7 @@ type StateFile struct { // lock globally locks all locking operations for the container. func (s *StateFile) lock() error { s.once.Do(func() { - s.flock = flock.NewFlock(s.lockPath()) + s.flock = flock.New(s.lockPath()) }) if err := s.flock.Lock(); err != nil { diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go index d1af539cb..fd72414ce 100644 --- a/runsc/fsgofer/filter/config.go +++ b/runsc/fsgofer/filter/config.go @@ -16,7 +16,6 @@ package filter import ( "os" - "syscall" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" @@ -25,12 +24,12 @@ import ( // allowedSyscalls is the set of syscalls executed by the gofer. var allowedSyscalls = seccomp.SyscallRules{ - syscall.SYS_ACCEPT: {}, - syscall.SYS_CLOCK_GETTIME: {}, - syscall.SYS_CLOSE: {}, - syscall.SYS_DUP: {}, - syscall.SYS_EPOLL_CTL: {}, - syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{ + unix.SYS_ACCEPT: {}, + unix.SYS_CLOCK_GETTIME: {}, + unix.SYS_CLOSE: {}, + unix.SYS_DUP: {}, + unix.SYS_EPOLL_CTL: {}, + unix.SYS_EPOLL_PWAIT: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, @@ -39,34 +38,34 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.EqualTo(0), }, }, - syscall.SYS_EVENTFD2: []seccomp.Rule{ + unix.SYS_EVENTFD2: []seccomp.Rule{ { seccomp.EqualTo(0), seccomp.EqualTo(0), }, }, - syscall.SYS_EXIT: {}, - syscall.SYS_EXIT_GROUP: {}, - syscall.SYS_FALLOCATE: []seccomp.Rule{ + unix.SYS_EXIT: {}, + unix.SYS_EXIT_GROUP: {}, + unix.SYS_FALLOCATE: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.EqualTo(0), }, }, - syscall.SYS_FCHMOD: {}, - syscall.SYS_FCHOWNAT: {}, - syscall.SYS_FCNTL: []seccomp.Rule{ + unix.SYS_FCHMOD: {}, + unix.SYS_FCHOWNAT: {}, + unix.SYS_FCNTL: []seccomp.Rule{ { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.F_GETFL), + seccomp.EqualTo(unix.F_GETFL), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.F_SETFL), + seccomp.EqualTo(unix.F_SETFL), }, { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.F_GETFD), + seccomp.EqualTo(unix.F_GETFD), }, // Used by flipcall.PacketWindowAllocator.Init(). { @@ -74,11 +73,11 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.EqualTo(unix.F_ADD_SEALS), }, }, - syscall.SYS_FSTAT: {}, - syscall.SYS_FSTATFS: {}, - syscall.SYS_FSYNC: {}, - syscall.SYS_FTRUNCATE: {}, - syscall.SYS_FUTEX: { + unix.SYS_FSTAT: {}, + unix.SYS_FSTATFS: {}, + unix.SYS_FSYNC: {}, + unix.SYS_FTRUNCATE: {}, + unix.SYS_FUTEX: { seccomp.Rule{ seccomp.MatchAny{}, seccomp.EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), @@ -116,78 +115,78 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.EqualTo(0), }, }, - syscall.SYS_GETDENTS64: {}, - syscall.SYS_GETPID: {}, - unix.SYS_GETRANDOM: {}, - syscall.SYS_GETTID: {}, - syscall.SYS_GETTIMEOFDAY: {}, - syscall.SYS_LINKAT: {}, - syscall.SYS_LSEEK: {}, - syscall.SYS_MADVISE: {}, - unix.SYS_MEMFD_CREATE: {}, /// Used by flipcall.PacketWindowAllocator.Init(). - syscall.SYS_MKDIRAT: {}, - syscall.SYS_MKNODAT: {}, + unix.SYS_GETDENTS64: {}, + unix.SYS_GETPID: {}, + unix.SYS_GETRANDOM: {}, + unix.SYS_GETTID: {}, + unix.SYS_GETTIMEOFDAY: {}, + unix.SYS_LINKAT: {}, + unix.SYS_LSEEK: {}, + unix.SYS_MADVISE: {}, + unix.SYS_MEMFD_CREATE: {}, /// Used by flipcall.PacketWindowAllocator.Init(). + unix.SYS_MKDIRAT: {}, + unix.SYS_MKNODAT: {}, // Used by the Go runtime as a temporarily workaround for a Linux // 5.2-5.4 bug. // // See src/runtime/os_linux_x86.go. // // TODO(b/148688965): Remove once this is gone from Go. - syscall.SYS_MLOCK: []seccomp.Rule{ + unix.SYS_MLOCK: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.EqualTo(4096), }, }, - syscall.SYS_MMAP: []seccomp.Rule{ + unix.SYS_MMAP: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MAP_SHARED), + seccomp.EqualTo(unix.MAP_SHARED), }, { seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS), + seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS), }, { seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED), + seccomp.EqualTo(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS | unix.MAP_FIXED), }, }, - syscall.SYS_MPROTECT: {}, - syscall.SYS_MUNMAP: {}, - syscall.SYS_NANOSLEEP: {}, - syscall.SYS_OPENAT: {}, - syscall.SYS_PPOLL: {}, - syscall.SYS_PREAD64: {}, - syscall.SYS_PWRITE64: {}, - syscall.SYS_READ: {}, - syscall.SYS_READLINKAT: {}, - syscall.SYS_RECVMSG: []seccomp.Rule{ + unix.SYS_MPROTECT: {}, + unix.SYS_MUNMAP: {}, + unix.SYS_NANOSLEEP: {}, + unix.SYS_OPENAT: {}, + unix.SYS_PPOLL: {}, + unix.SYS_PREAD64: {}, + unix.SYS_PWRITE64: {}, + unix.SYS_READ: {}, + unix.SYS_READLINKAT: {}, + unix.SYS_RECVMSG: []seccomp.Rule{ { seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC), + seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_TRUNC), }, { seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK), + seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_TRUNC | unix.MSG_PEEK), }, }, - syscall.SYS_RENAMEAT: {}, - syscall.SYS_RESTART_SYSCALL: {}, + unix.SYS_RENAMEAT: {}, + unix.SYS_RESTART_SYSCALL: {}, // May be used by the runtime during panic(). - syscall.SYS_RT_SIGACTION: {}, - syscall.SYS_RT_SIGPROCMASK: {}, - syscall.SYS_RT_SIGRETURN: {}, - syscall.SYS_SCHED_YIELD: {}, - syscall.SYS_SENDMSG: []seccomp.Rule{ + unix.SYS_RT_SIGACTION: {}, + unix.SYS_RT_SIGPROCMASK: {}, + unix.SYS_RT_SIGRETURN: {}, + unix.SYS_SCHED_YIELD: {}, + unix.SYS_SENDMSG: []seccomp.Rule{ // Used by fdchannel.Endpoint.SendFD(). { seccomp.MatchAny{}, @@ -198,51 +197,51 @@ var allowedSyscalls = seccomp.SyscallRules{ { seccomp.MatchAny{}, seccomp.MatchAny{}, - seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL), + seccomp.EqualTo(unix.MSG_DONTWAIT | unix.MSG_NOSIGNAL), }, }, - syscall.SYS_SHUTDOWN: []seccomp.Rule{ - {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RDWR)}, + unix.SYS_SHUTDOWN: []seccomp.Rule{ + {seccomp.MatchAny{}, seccomp.EqualTo(unix.SHUT_RDWR)}, }, - syscall.SYS_SIGALTSTACK: {}, + unix.SYS_SIGALTSTACK: {}, // Used by fdchannel.NewConnectedSockets(). - syscall.SYS_SOCKETPAIR: { + unix.SYS_SOCKETPAIR: { { - seccomp.EqualTo(syscall.AF_UNIX), - seccomp.EqualTo(syscall.SOCK_SEQPACKET | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(unix.AF_UNIX), + seccomp.EqualTo(unix.SOCK_SEQPACKET | unix.SOCK_CLOEXEC), seccomp.EqualTo(0), }, }, - syscall.SYS_SYMLINKAT: {}, - syscall.SYS_TGKILL: []seccomp.Rule{ + unix.SYS_SYMLINKAT: {}, + unix.SYS_TGKILL: []seccomp.Rule{ { seccomp.EqualTo(uint64(os.Getpid())), }, }, - syscall.SYS_UNLINKAT: {}, - syscall.SYS_UTIMENSAT: {}, - syscall.SYS_WRITE: {}, + unix.SYS_UNLINKAT: {}, + unix.SYS_UTIMENSAT: {}, + unix.SYS_WRITE: {}, } var udsSyscalls = seccomp.SyscallRules{ - syscall.SYS_SOCKET: []seccomp.Rule{ + unix.SYS_SOCKET: []seccomp.Rule{ { - seccomp.EqualTo(syscall.AF_UNIX), - seccomp.EqualTo(syscall.SOCK_STREAM), + seccomp.EqualTo(unix.AF_UNIX), + seccomp.EqualTo(unix.SOCK_STREAM), seccomp.EqualTo(0), }, { - seccomp.EqualTo(syscall.AF_UNIX), - seccomp.EqualTo(syscall.SOCK_DGRAM), + seccomp.EqualTo(unix.AF_UNIX), + seccomp.EqualTo(unix.SOCK_DGRAM), seccomp.EqualTo(0), }, { - seccomp.EqualTo(syscall.AF_UNIX), - seccomp.EqualTo(syscall.SOCK_SEQPACKET), + seccomp.EqualTo(unix.AF_UNIX), + seccomp.EqualTo(unix.SOCK_SEQPACKET), seccomp.EqualTo(0), }, }, - syscall.SYS_CONNECT: []seccomp.Rule{ + unix.SYS_CONNECT: []seccomp.Rule{ { seccomp.MatchAny{}, }, diff --git a/runsc/fsgofer/filter/config_amd64.go b/runsc/fsgofer/filter/config_amd64.go index 686753d96..2d0151dcc 100644 --- a/runsc/fsgofer/filter/config_amd64.go +++ b/runsc/fsgofer/filter/config_amd64.go @@ -17,30 +17,29 @@ package filter import ( - "syscall" - + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/seccomp" ) func init() { - allowedSyscalls[syscall.SYS_ARCH_PRCTL] = []seccomp.Rule{ + allowedSyscalls[unix.SYS_ARCH_PRCTL] = []seccomp.Rule{ // TODO(b/168828518): No longer used in Go 1.16+. {seccomp.EqualTo(linux.ARCH_SET_FS)}, } - allowedSyscalls[syscall.SYS_CLONE] = []seccomp.Rule{ + allowedSyscalls[unix.SYS_CLONE] = []seccomp.Rule{ // parent_tidptr and child_tidptr are always 0 because neither // CLONE_PARENT_SETTID nor CLONE_CHILD_SETTID are used. { seccomp.EqualTo( - syscall.CLONE_VM | - syscall.CLONE_FS | - syscall.CLONE_FILES | - syscall.CLONE_SETTLS | - syscall.CLONE_SIGHAND | - syscall.CLONE_SYSVSEM | - syscall.CLONE_THREAD), + unix.CLONE_VM | + unix.CLONE_FS | + unix.CLONE_FILES | + unix.CLONE_SETTLS | + unix.CLONE_SIGHAND | + unix.CLONE_SYSVSEM | + unix.CLONE_THREAD), seccomp.MatchAny{}, // newsp seccomp.EqualTo(0), // parent_tidptr seccomp.EqualTo(0), // child_tidptr @@ -49,12 +48,12 @@ func init() { { // TODO(b/168828518): No longer used in Go 1.16+ (on amd64). seccomp.EqualTo( - syscall.CLONE_VM | - syscall.CLONE_FS | - syscall.CLONE_FILES | - syscall.CLONE_SIGHAND | - syscall.CLONE_SYSVSEM | - syscall.CLONE_THREAD), + unix.CLONE_VM | + unix.CLONE_FS | + unix.CLONE_FILES | + unix.CLONE_SIGHAND | + unix.CLONE_SYSVSEM | + unix.CLONE_THREAD), seccomp.MatchAny{}, // newsp seccomp.EqualTo(0), // parent_tidptr seccomp.EqualTo(0), // child_tidptr @@ -62,5 +61,5 @@ func init() { }, } - allowedSyscalls[syscall.SYS_NEWFSTATAT] = []seccomp.Rule{} + allowedSyscalls[unix.SYS_NEWFSTATAT] = []seccomp.Rule{} } diff --git a/runsc/fsgofer/filter/config_arm64.go b/runsc/fsgofer/filter/config_arm64.go index ff0cf77a0..7d458c02d 100644 --- a/runsc/fsgofer/filter/config_arm64.go +++ b/runsc/fsgofer/filter/config_arm64.go @@ -17,23 +17,22 @@ package filter import ( - "syscall" - + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/seccomp" ) func init() { - allowedSyscalls[syscall.SYS_CLONE] = []seccomp.Rule{ + allowedSyscalls[unix.SYS_CLONE] = []seccomp.Rule{ // parent_tidptr and child_tidptr are always 0 because neither // CLONE_PARENT_SETTID nor CLONE_CHILD_SETTID are used. { seccomp.EqualTo( - syscall.CLONE_VM | - syscall.CLONE_FS | - syscall.CLONE_FILES | - syscall.CLONE_SIGHAND | - syscall.CLONE_SYSVSEM | - syscall.CLONE_THREAD), + unix.CLONE_VM | + unix.CLONE_FS | + unix.CLONE_FILES | + unix.CLONE_SIGHAND | + unix.CLONE_SYSVSEM | + unix.CLONE_THREAD), seccomp.MatchAny{}, // newsp // These arguments are left uninitialized by the Go // runtime, so they may be anything (and are unused by @@ -44,5 +43,5 @@ func init() { }, } - allowedSyscalls[syscall.SYS_FSTATAT] = []seccomp.Rule{} + allowedSyscalls[unix.SYS_FSTATAT] = []seccomp.Rule{} } diff --git a/runsc/fsgofer/filter/extra_filters_msan.go b/runsc/fsgofer/filter/extra_filters_msan.go index 8c6179c8f..d768ed0bb 100644 --- a/runsc/fsgofer/filter/extra_filters_msan.go +++ b/runsc/fsgofer/filter/extra_filters_msan.go @@ -17,8 +17,7 @@ package filter import ( - "syscall" - + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/seccomp" ) @@ -27,7 +26,7 @@ import ( func instrumentationFilters() seccomp.SyscallRules { log.Warningf("*** SECCOMP WARNING: MSAN is enabled: syscall filters less restrictive!") return seccomp.SyscallRules{ - syscall.SYS_SCHED_GETAFFINITY: {}, - syscall.SYS_SET_ROBUST_LIST: {}, + unix.SYS_SCHED_GETAFFINITY: {}, + unix.SYS_SET_ROBUST_LIST: {}, } } diff --git a/runsc/fsgofer/filter/extra_filters_race.go b/runsc/fsgofer/filter/extra_filters_race.go index cbd5c487e..9e75c025d 100644 --- a/runsc/fsgofer/filter/extra_filters_race.go +++ b/runsc/fsgofer/filter/extra_filters_race.go @@ -17,8 +17,7 @@ package filter import ( - "syscall" - + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/seccomp" ) @@ -27,18 +26,18 @@ import ( func instrumentationFilters() seccomp.SyscallRules { log.Warningf("*** SECCOMP WARNING: TSAN is enabled: syscall filters less restrictive!") return seccomp.SyscallRules{ - syscall.SYS_BRK: {}, - syscall.SYS_CLOCK_NANOSLEEP: {}, - syscall.SYS_CLONE: {}, - syscall.SYS_FUTEX: {}, - syscall.SYS_MADVISE: {}, - syscall.SYS_MMAP: {}, - syscall.SYS_MUNLOCK: {}, - syscall.SYS_NANOSLEEP: {}, - syscall.SYS_OPEN: {}, - syscall.SYS_OPENAT: {}, - syscall.SYS_SET_ROBUST_LIST: {}, + unix.SYS_BRK: {}, + unix.SYS_CLOCK_NANOSLEEP: {}, + unix.SYS_CLONE: {}, + unix.SYS_FUTEX: {}, + unix.SYS_MADVISE: {}, + unix.SYS_MMAP: {}, + unix.SYS_MUNLOCK: {}, + unix.SYS_NANOSLEEP: {}, + unix.SYS_OPEN: {}, + unix.SYS_OPENAT: {}, + unix.SYS_SET_ROBUST_LIST: {}, // Used within glibc's malloc. - syscall.SYS_TIME: {}, + unix.SYS_TIME: {}, } } diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index cfa3796b1..1e80a634d 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -66,6 +66,9 @@ type Config struct { // HostUDS signals whether the gofer can mount a host's UDS. HostUDS bool + + // enableXattr allows Get/SetXattr for the mounted file systems. + EnableXattr bool } type attachPoint struct { @@ -795,12 +798,22 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { return err } -func (*localFile) GetXattr(string, uint64) (string, error) { - return "", unix.EOPNOTSUPP +func (l *localFile) GetXattr(name string, size uint64) (string, error) { + if !l.attachPoint.conf.EnableXattr { + return "", unix.EOPNOTSUPP + } + buffer := make([]byte, size) + if _, err := unix.Fgetxattr(l.file.FD(), name, buffer); err != nil { + return "", err + } + return string(buffer), nil } -func (*localFile) SetXattr(string, string, uint32) error { - return unix.EOPNOTSUPP +func (l *localFile) SetXattr(name string, value string, flags uint32) error { + if !l.attachPoint.conf.EnableXattr { + return unix.EOPNOTSUPP + } + return unix.Fsetxattr(l.file.FD(), name, []byte(value), int(flags)) } func (*localFile) ListXattr(uint64) (map[string]struct{}, error) { diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go index 99ea9bd32..a5f09f88f 100644 --- a/runsc/fsgofer/fsgofer_test.go +++ b/runsc/fsgofer/fsgofer_test.go @@ -565,6 +565,38 @@ func TestSetAttrOwner(t *testing.T) { }) } +func SetGetXattr(l *localFile, name string, value string) error { + if err := l.SetXattr(name, value, 0 /* flags */); err != nil { + return err + } + ret, err := l.GetXattr(name, uint64(len(value))) + if err != nil { + return err + } + if ret != value { + return fmt.Errorf("Got value %s, want %s", ret, value) + } + return nil +} + +func TestSetGetXattr(t *testing.T) { + xattrConfs := []Config{{ROMount: false, EnableXattr: false}, {ROMount: false, EnableXattr: true}} + runCustom(t, []uint32{unix.S_IFREG}, xattrConfs, func(t *testing.T, s state) { + name := "user.test" + value := "tmp" + err := SetGetXattr(s.file, name, value) + if s.conf.EnableXattr { + if err != nil { + t.Fatalf("%v: SetGetXattr failed, err: %v", s, err) + } + } else { + if err == nil { + t.Fatalf("%v: SetGetXattr should have failed", s) + } + } + }) +} + func TestLink(t *testing.T) { if !specutils.HasCapabilities(capability.CAP_DAC_READ_SEARCH) { t.Skipf("Link test requires CAP_DAC_READ_SEARCH, running as %d", os.Getuid()) diff --git a/runsc/mitigate/BUILD b/runsc/mitigate/BUILD index 561854e66..1238890fc 100644 --- a/runsc/mitigate/BUILD +++ b/runsc/mitigate/BUILD @@ -4,28 +4,20 @@ package(licenses = ["notice"]) go_library( name = "mitigate", - srcs = [ - "cpu.go", - "mitigate.go", - "mitigate_conf.go", - ], + srcs = ["mitigate.go"], visibility = [ "//runsc:__subpackages__", ], - deps = [ - "//pkg/log", - "//runsc/flag", - "@in_gopkg_yaml_v2//:go_default_library", - ], + deps = ["@in_gopkg_yaml_v2//:go_default_library"], ) go_test( name = "mitigate_test", size = "small", - srcs = [ - "cpu_test.go", - "mitigate_test.go", - ], + srcs = ["mitigate_test.go"], library = ":mitigate", - deps = ["@com_github_google_go_cmp//cmp:go_default_library"], + deps = [ + "//runsc/mitigate/mock", + "@com_github_google_go_cmp//cmp:go_default_library", + ], ) diff --git a/runsc/mitigate/cpu.go b/runsc/mitigate/cpu.go deleted file mode 100644 index 4b2aa351f..000000000 --- a/runsc/mitigate/cpu.go +++ /dev/null @@ -1,423 +0,0 @@ -// Copyright 2021 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mitigate - -import ( - "fmt" - "io/ioutil" - "regexp" - "strconv" - "strings" -) - -const ( - // mds is the only bug we care about. - mds = "mds" - - // Constants for parsing /proc/cpuinfo. - processorKey = "processor" - vendorIDKey = "vendor_id" - cpuFamilyKey = "cpu family" - modelKey = "model" - physicalIDKey = "physical id" - coreIDKey = "core id" - bugsKey = "bugs" - - // Path to shutdown a CPU. - cpuOnlineTemplate = "/sys/devices/system/cpu/cpu%d/online" -) - -// cpuSet contains a map of all CPUs on the system, mapped -// by Physical ID and CoreIDs. threads with the same -// Core and Physical ID are Hyperthread pairs. -type cpuSet map[cpuID]*threadGroup - -// newCPUSet creates a CPUSet from data read from /proc/cpuinfo. -func newCPUSet(data []byte, vulnerable func(thread) bool) (cpuSet, error) { - processors, err := getThreads(string(data)) - if err != nil { - return nil, err - } - - set := make(cpuSet) - for _, p := range processors { - // Each ID is of the form physicalID:coreID. Hyperthread pairs - // have identical physical and core IDs. We need to match - // Hyperthread pairs so that we can shutdown all but one per - // pair. - core, ok := set[p.id] - if !ok { - core = &threadGroup{} - set[p.id] = core - } - core.isVulnerable = core.isVulnerable || vulnerable(p) - core.threads = append(core.threads, p) - } - return set, nil -} - -// newCPUSetFromPossible makes a cpuSet data read from -// /sys/devices/system/cpu/possible. This is used in enable operations -// where the caller simply wants to enable all CPUS. -func newCPUSetFromPossible(data []byte) (cpuSet, error) { - threads, err := getThreadsFromPossible(data) - if err != nil { - return nil, err - } - - // We don't care if a CPU is vulnerable or not, we just - // want to return a list of all CPUs on the host. - set := cpuSet{ - threads[0].id: &threadGroup{ - threads: threads, - isVulnerable: false, - }, - } - return set, nil -} - -// String implements the String method for CPUSet. -func (c cpuSet) String() string { - ret := "" - for _, tg := range c { - ret += fmt.Sprintf("%s\n", tg) - } - return ret -} - -// getRemainingList returns the list of threads that will remain active -// after mitigation. -func (c cpuSet) getRemainingList() []thread { - threads := make([]thread, 0, len(c)) - for _, core := range c { - // If we're vulnerable, take only one thread from the pair. - if core.isVulnerable { - threads = append(threads, core.threads[0]) - continue - } - // Otherwise don't shutdown anything. - threads = append(threads, core.threads...) - } - return threads -} - -// getShutdownList returns the list of threads that will be shutdown on -// mitigation. -func (c cpuSet) getShutdownList() []thread { - threads := make([]thread, 0) - for _, core := range c { - // Only if we're vulnerable do shutdown anything. In this case, - // shutdown all but the first entry. - if core.isVulnerable && len(core.threads) > 1 { - threads = append(threads, core.threads[1:]...) - } - } - return threads -} - -// threadGroup represents Hyperthread pairs on the same physical/core ID. -type threadGroup struct { - threads []thread - isVulnerable bool -} - -// String implements the String method for threadGroup. -func (c threadGroup) String() string { - ret := fmt.Sprintf("ThreadGroup:\nIsVulnerable: %t\n", c.isVulnerable) - for _, processor := range c.threads { - ret += fmt.Sprintf("%s\n", processor) - } - return ret -} - -// getThreads returns threads structs from reading /proc/cpuinfo. -func getThreads(data string) ([]thread, error) { - // Each processor entry should start with the - // processor key. Find the beginings of each. - r := buildRegex(processorKey, `\d+`) - indices := r.FindAllStringIndex(data, -1) - if len(indices) < 1 { - return nil, fmt.Errorf("no cpus found for: %q", data) - } - - // Add the ending index for last entry. - indices = append(indices, []int{len(data), -1}) - - // Valid cpus are now defined by strings in between - // indexes (e.g. data[index[i], index[i+1]]). - // There should be len(indicies) - 1 CPUs - // since the last index is the end of the string. - cpus := make([]thread, 0, len(indices)) - // Find each string that represents a CPU. These begin "processor". - for i := 1; i < len(indices); i++ { - start := indices[i-1][0] - end := indices[i][0] - // Parse the CPU entry, which should be between start/end. - c, err := newThread(data[start:end]) - if err != nil { - return nil, err - } - cpus = append(cpus, c) - } - return cpus, nil -} - -// getThreadsFromPossible makes threads from data read from /sys/devices/system/cpu/possible. -func getThreadsFromPossible(data []byte) ([]thread, error) { - possibleRegex := regexp.MustCompile(`(?m)^(\d+)(-(\d+))?$`) - matches := possibleRegex.FindStringSubmatch(string(data)) - if len(matches) != 4 { - return nil, fmt.Errorf("mismatch regex from %s: %q", allPossibleCPUs, string(data)) - } - - // If matches[3] is empty, we only have one cpu entry. - if matches[3] == "" { - matches[3] = matches[1] - } - - begin, err := strconv.ParseInt(matches[1], 10, 64) - if err != nil { - return nil, fmt.Errorf("failed to parse begin: %v", err) - } - end, err := strconv.ParseInt(matches[3], 10, 64) - if err != nil { - return nil, fmt.Errorf("failed to parse end: %v", err) - } - if begin > end || begin < 0 || end < 0 { - return nil, fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", begin, end) - } - - ret := make([]thread, 0, end-begin) - for i := begin; i <= end; i++ { - ret = append(ret, thread{ - processorNumber: i, - id: cpuID{ - physicalID: 0, // we don't care about id for enable ops. - coreID: 0, - }, - }) - } - - return ret, nil -} - -// cpuID for each thread is defined by the physical and -// core IDs. If equal, two threads are Hyperthread pairs. -type cpuID struct { - physicalID int64 - coreID int64 -} - -// type cpu represents pertinent info about a cpu. -type thread struct { - processorNumber int64 // the processor number of this CPU. - vendorID string // the vendorID of CPU (e.g. AuthenticAMD). - cpuFamily int64 // CPU family number (e.g. 6 for CascadeLake/Skylake). - model int64 // CPU model number (e.g. 85 for CascadeLake/Skylake). - id cpuID // id for this thread - bugs map[string]struct{} // map of vulnerabilities parsed from the 'bugs' field. -} - -// newThread parses a CPU from a single cpu entry from /proc/cpuinfo. -func newThread(data string) (thread, error) { - empty := thread{} - processor, err := parseProcessor(data) - if err != nil { - return empty, err - } - - vendorID, err := parseVendorID(data) - if err != nil { - return empty, err - } - - cpuFamily, err := parseCPUFamily(data) - if err != nil { - return empty, err - } - - model, err := parseModel(data) - if err != nil { - return empty, err - } - - physicalID, err := parsePhysicalID(data) - if err != nil { - return empty, err - } - - coreID, err := parseCoreID(data) - if err != nil { - return empty, err - } - - bugs, err := parseBugs(data) - if err != nil { - return empty, err - } - - return thread{ - processorNumber: processor, - vendorID: vendorID, - cpuFamily: cpuFamily, - model: model, - id: cpuID{ - physicalID: physicalID, - coreID: coreID, - }, - bugs: bugs, - }, nil -} - -// String implements the String method for thread. -func (t thread) String() string { - template := `CPU: %d -CPU ID: %+v -Vendor: %s -Family/Model: %d/%d -Bugs: %s -` - bugs := make([]string, 0) - for bug := range t.bugs { - bugs = append(bugs, bug) - } - - return fmt.Sprintf(template, t.processorNumber, t.id, t.vendorID, t.cpuFamily, t.model, strings.Join(bugs, ",")) -} - -// enable turns on the CPU by writing 1 to /sys/devices/cpu/cpu{N}/online. -func (t thread) enable() error { - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - return ioutil.WriteFile(cpuPath, []byte{'1'}, 0644) -} - -// disable turns off the CPU by writing 0 to /sys/devices/cpu/cpu{N}/online. -func (t thread) disable() error { - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - return ioutil.WriteFile(cpuPath, []byte{'0'}, 0644) -} - -// isVulnerable checks if a CPU is vulnerable to mds. -func (t thread) isVulnerable() bool { - _, ok := t.bugs[mds] - return ok -} - -// isActive checks if a CPU is active from /sys/devices/system/cpu/cpu{N}/online -// If the file does not exist (ioutil returns in error), we assume the CPU is on. -func (t thread) isActive() bool { - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - data, err := ioutil.ReadFile(cpuPath) - if err != nil { - return true - } - return len(data) > 0 && data[0] != '0' -} - -// similarTo checks family/model/bugs fields for equality of two -// processors. -func (t thread) similarTo(other thread) bool { - if t.vendorID != other.vendorID { - return false - } - - if other.cpuFamily != t.cpuFamily { - return false - } - - if other.model != t.model { - return false - } - - if len(other.bugs) != len(t.bugs) { - return false - } - - for bug := range t.bugs { - if _, ok := other.bugs[bug]; !ok { - return false - } - } - return true -} - -// parseProcessor grabs the processor field from /proc/cpuinfo output. -func parseProcessor(data string) (int64, error) { - return parseIntegerResult(data, processorKey) -} - -// parseVendorID grabs the vendor_id field from /proc/cpuinfo output. -func parseVendorID(data string) (string, error) { - return parseRegex(data, vendorIDKey, `[\w\d]+`) -} - -// parseCPUFamily grabs the cpu family field from /proc/cpuinfo output. -func parseCPUFamily(data string) (int64, error) { - return parseIntegerResult(data, cpuFamilyKey) -} - -// parseModel grabs the model field from /proc/cpuinfo output. -func parseModel(data string) (int64, error) { - return parseIntegerResult(data, modelKey) -} - -// parsePhysicalID parses the physical id field. -func parsePhysicalID(data string) (int64, error) { - return parseIntegerResult(data, physicalIDKey) -} - -// parseCoreID parses the core id field. -func parseCoreID(data string) (int64, error) { - return parseIntegerResult(data, coreIDKey) -} - -// parseBugs grabs the bugs field from /proc/cpuinfo output. -func parseBugs(data string) (map[string]struct{}, error) { - result, err := parseRegex(data, bugsKey, `[\d\w\s]*`) - if err != nil { - return nil, err - } - bugs := strings.Split(result, " ") - ret := make(map[string]struct{}, len(bugs)) - for _, bug := range bugs { - ret[bug] = struct{}{} - } - return ret, nil -} - -// parseIntegerResult parses fields expecting an integer. -func parseIntegerResult(data, key string) (int64, error) { - result, err := parseRegex(data, key, `\d+`) - if err != nil { - return 0, err - } - return strconv.ParseInt(result, 0, 64) -} - -// buildRegex builds a regex for parsing each CPU field. -func buildRegex(key, match string) *regexp.Regexp { - reg := fmt.Sprintf(`(?m)^%s\s*:\s*(.*)$`, key) - return regexp.MustCompile(reg) -} - -// parseRegex parses data with key inserted into a standard regex template. -func parseRegex(data, key, match string) (string, error) { - r := buildRegex(key, match) - matches := r.FindStringSubmatch(data) - if len(matches) < 2 { - return "", fmt.Errorf("failed to match key %q: %q", key, data) - } - return matches[1], nil -} diff --git a/runsc/mitigate/cpu_test.go b/runsc/mitigate/cpu_test.go deleted file mode 100644 index 374333465..000000000 --- a/runsc/mitigate/cpu_test.go +++ /dev/null @@ -1,605 +0,0 @@ -// Copyright 2021 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mitigate - -import ( - "fmt" - "io/ioutil" - "strings" - "testing" -) - -// mockCPU represents data from CPUs that will be mitigated. -type mockCPU struct { - name string - vendorID string - family int - model int - modelName string - bugs string - physicalCores int - cores int - threadsPerCore int -} - -var cascadeLake4 = mockCPU{ - name: "CascadeLake", - vendorID: "GenuineIntel", - family: 6, - model: 85, - modelName: "Intel(R) Xeon(R) CPU", - bugs: "spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa", - physicalCores: 1, - cores: 2, - threadsPerCore: 2, -} - -var haswell2 = mockCPU{ - name: "Haswell", - vendorID: "GenuineIntel", - family: 6, - model: 63, - modelName: "Intel(R) Xeon(R) CPU", - bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs", - physicalCores: 1, - cores: 1, - threadsPerCore: 2, -} - -var haswell2core = mockCPU{ - name: "Haswell2Physical", - vendorID: "GenuineIntel", - family: 6, - model: 63, - modelName: "Intel(R) Xeon(R) CPU", - bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs", - physicalCores: 2, - cores: 1, - threadsPerCore: 1, -} - -var amd8 = mockCPU{ - name: "AMD", - vendorID: "AuthenticAMD", - family: 23, - model: 49, - modelName: "AMD EPYC 7B12", - bugs: "sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass", - physicalCores: 4, - cores: 1, - threadsPerCore: 2, -} - -// makeCPUString makes a string formated like /proc/cpuinfo for each cpuTestCase -func (tc mockCPU) makeCPUString() string { - template := `processor : %d -vendor_id : %s -cpu family : %d -model : %d -model name : %s -physical id : %d -core id : %d -cpu cores : %d -bugs : %s -` - ret := `` - for i := 0; i < tc.physicalCores; i++ { - for j := 0; j < tc.cores; j++ { - for k := 0; k < tc.threadsPerCore; k++ { - processorNum := (i*tc.cores+j)*tc.threadsPerCore + k - ret += fmt.Sprintf(template, - processorNum, /*processor*/ - tc.vendorID, /*vendor_id*/ - tc.family, /*cpu family*/ - tc.model, /*model*/ - tc.modelName, /*model name*/ - i, /*physical id*/ - j, /*core id*/ - tc.cores*tc.physicalCores, /*cpu cores*/ - tc.bugs /*bugs*/) - } - } - } - return ret -} - -func (tc mockCPU) makeSysPossibleString() string { - max := tc.physicalCores * tc.cores * tc.threadsPerCore - if max == 1 { - return "0" - } - return fmt.Sprintf("0-%d", max-1) -} - -// TestMockCPUSet tests mock cpu test cases against the cpuSet functions. -func TestMockCPUSet(t *testing.T) { - for _, tc := range []struct { - testCase mockCPU - isVulnerable bool - }{ - { - testCase: amd8, - isVulnerable: false, - }, - { - testCase: haswell2, - isVulnerable: true, - }, - { - testCase: haswell2core, - isVulnerable: true, - }, - - { - testCase: cascadeLake4, - isVulnerable: true, - }, - } { - t.Run(tc.testCase.name, func(t *testing.T) { - data := tc.testCase.makeCPUString() - vulnerable := func(t thread) bool { - return t.isVulnerable() - } - set, err := newCPUSet([]byte(data), vulnerable) - if err != nil { - t.Fatalf("Failed to ") - } - remaining := set.getRemainingList() - // In the non-vulnerable case, no cores should be shutdown so all should remain. - want := tc.testCase.physicalCores * tc.testCase.cores * tc.testCase.threadsPerCore - if tc.isVulnerable { - want = tc.testCase.physicalCores * tc.testCase.cores - } - - if want != len(remaining) { - t.Fatalf("Failed to shutdown the correct number of cores: want: %d got: %d", want, len(remaining)) - } - - if !tc.isVulnerable { - return - } - - // If the set is vulnerable, we expect only 1 thread per hyperthread pair. - for _, r := range remaining { - if _, ok := set[r.id]; !ok { - t.Fatalf("Entry %+v not in map, there must be two entries in the same thread group.", r) - } - delete(set, r.id) - } - - possible := tc.testCase.makeSysPossibleString() - set, err = newCPUSetFromPossible([]byte(possible)) - if err != nil { - t.Fatalf("Failed to make cpuSet: %v", err) - } - - want = tc.testCase.physicalCores * tc.testCase.cores * tc.testCase.threadsPerCore - got := len(set.getRemainingList()) - if got != want { - t.Fatalf("Returned the wrong number of CPUs want: %d got: %d", want, got) - } - }) - } -} - -// TestGetCPU tests basic parsing of single CPU strings from reading -// /proc/cpuinfo. -func TestGetCPU(t *testing.T) { - data := `processor : 0 -vendor_id : GenuineIntel -cpu family : 6 -model : 85 -physical id: 0 -core id : 0 -bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa itlb_multihit -` - want := thread{ - processorNumber: 0, - vendorID: "GenuineIntel", - cpuFamily: 6, - model: 85, - id: cpuID{ - physicalID: 0, - coreID: 0, - }, - bugs: map[string]struct{}{ - "cpu_meltdown": struct{}{}, - "spectre_v1": struct{}{}, - "spectre_v2": struct{}{}, - "spec_store_bypass": struct{}{}, - "l1tf": struct{}{}, - "mds": struct{}{}, - "swapgs": struct{}{}, - "taa": struct{}{}, - "itlb_multihit": struct{}{}, - }, - } - - got, err := newThread(data) - if err != nil { - t.Fatalf("getCpu failed with error: %v", err) - } - - if !want.similarTo(got) { - t.Fatalf("Failed cpus not similar: got: %+v, want: %+v", got, want) - } - - if !got.isVulnerable() { - t.Fatalf("Failed: cpu should be vulnerable.") - } -} - -func TestInvalid(t *testing.T) { - result, err := getThreads(`something not a processor`) - if err == nil { - t.Fatalf("getCPU set didn't return an error: %+v", result) - } - - if !strings.Contains(err.Error(), "no cpus") { - t.Fatalf("Incorrect error returned: %v", err) - } -} - -// TestCPUSet tests getting the right number of CPUs from -// parsing full output of /proc/cpuinfo. -func TestCPUSet(t *testing.T) { - data := `processor : 0 -vendor_id : GenuineIntel -cpu family : 6 -model : 63 -model name : Intel(R) Xeon(R) CPU @ 2.30GHz -stepping : 0 -microcode : 0x1 -cpu MHz : 2299.998 -cache size : 46080 KB -physical id : 0 -siblings : 2 -core id : 0 -cpu cores : 1 -apicid : 0 -initial apicid : 0 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities -bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs -bogomips : 4599.99 -clflush size : 64 -cache_alignment : 64 -address sizes : 46 bits physical, 48 bits virtual -power management: - -processor : 1 -vendor_id : GenuineIntel -cpu family : 6 -model : 63 -model name : Intel(R) Xeon(R) CPU @ 2.30GHz -stepping : 0 -microcode : 0x1 -cpu MHz : 2299.998 -cache size : 46080 KB -physical id : 0 -siblings : 2 -core id : 0 -cpu cores : 1 -apicid : 1 -initial apicid : 1 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities -bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs -bogomips : 4599.99 -clflush size : 64 -cache_alignment : 64 -address sizes : 46 bits physical, 48 bits virtual -power management: -` - cpuSet, err := getThreads(data) - if err != nil { - t.Fatalf("getCPUSet failed: %v", err) - } - - wantCPULen := 2 - if len(cpuSet) != wantCPULen { - t.Fatalf("Num CPU mismatch: want: %d, got: %d", wantCPULen, len(cpuSet)) - } - - wantCPU := thread{ - vendorID: "GenuineIntel", - cpuFamily: 6, - model: 63, - bugs: map[string]struct{}{ - "cpu_meltdown": struct{}{}, - "spectre_v1": struct{}{}, - "spectre_v2": struct{}{}, - "spec_store_bypass": struct{}{}, - "l1tf": struct{}{}, - "mds": struct{}{}, - "swapgs": struct{}{}, - }, - } - - for _, c := range cpuSet { - if !wantCPU.similarTo(c) { - t.Fatalf("Failed cpus not equal: got: %+v, want: %+v", c, wantCPU) - } - } -} - -// TestReadFile is a smoke test for parsing methods. -func TestReadFile(t *testing.T) { - data, err := ioutil.ReadFile("/proc/cpuinfo") - if err != nil { - t.Fatalf("Failed to read cpuinfo: %v", err) - } - - vulnerable := func(t thread) bool { - return t.isVulnerable() - } - - set, err := newCPUSet(data, vulnerable) - if err != nil { - t.Fatalf("Failed to parse CPU data %v\n%s", err, data) - } - - if len(set) < 1 { - t.Fatalf("Failed to parse any CPUs: %d", len(set)) - } - - t.Log(set) -} - -// TestVulnerable tests if the isVulnerable method is correct -// among known CPUs in GCP. -func TestVulnerable(t *testing.T) { - const haswell = `processor : 0 -vendor_id : GenuineIntel -cpu family : 6 -model : 63 -model name : Intel(R) Xeon(R) CPU @ 2.30GHz -stepping : 0 -microcode : 0x1 -cpu MHz : 2299.998 -cache size : 46080 KB -physical id : 0 -siblings : 4 -core id : 0 -cpu cores : 2 -apicid : 0 -initial apicid : 0 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities -bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs -bogomips : 4599.99 -clflush size : 64 -cache_alignment : 64 -address sizes : 46 bits physical, 48 bits virtual -power management:` - - const skylake = `processor : 0 -vendor_id : GenuineIntel -cpu family : 6 -model : 85 -model name : Intel(R) Xeon(R) CPU @ 2.00GHz -stepping : 3 -microcode : 0x1 -cpu MHz : 2000.180 -cache size : 39424 KB -physical id : 0 -siblings : 2 -core id : 0 -cpu cores : 1 -apicid : 0 -initial apicid : 0 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat md_clear arch_capabilities -bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa -bogomips : 4000.36 -clflush size : 64 -cache_alignment : 64 -address sizes : 46 bits physical, 48 bits virtual -power management:` - - const cascade = `processor : 0 -vendor_id : GenuineIntel -cpu family : 6 -model : 85 -model name : Intel(R) Xeon(R) CPU -stepping : 7 -microcode : 0x1 -cpu MHz : 2800.198 -cache size : 33792 KB -physical id : 0 -siblings : 2 -core id : 0 -cpu cores : 1 -apicid : 0 -initial apicid : 0 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 - ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmu -lqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowpr -efetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid r -tm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves a -rat avx512_vnni md_clear arch_capabilities -bugs : spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa -bogomips : 5600.39 -clflush size : 64 -cache_alignment : 64 -address sizes : 46 bits physical, 48 bits virtual -power management:` - - const amd = `processor : 0 -vendor_id : AuthenticAMD -cpu family : 23 -model : 49 -model name : AMD EPYC 7B12 -stepping : 0 -microcode : 0x1000065 -cpu MHz : 2250.000 -cache size : 512 KB -physical id : 0 -siblings : 2 -core id : 0 -cpu cores : 1 -apicid : 0 -initial apicid : 0 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid extd_apicid tsc_known_freq pni pclmulqdq ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm cmp_legacy cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw topoext ssbd ibrs ibpb stibp vmmcall fsgsbase tsc_adjust bmi1 avx2 smep bmi2 rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 clzero xsaveerptr arat npt nrip_save umip rdpid -bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass -bogomips : 4500.00 -TLB size : 3072 4K pages -clflush size : 64 -cache_alignment : 64 -address sizes : 48 bits physical, 48 bits virtual -power management:` - - for _, tc := range []struct { - name string - cpuString string - vulnerable bool - }{ - { - name: "haswell", - cpuString: haswell, - vulnerable: true, - }, { - name: "skylake", - cpuString: skylake, - vulnerable: true, - }, { - name: "amd", - cpuString: amd, - vulnerable: false, - }, - } { - t.Run(tc.name, func(t *testing.T) { - set, err := getThreads(tc.cpuString) - if err != nil { - t.Fatalf("Failed to getCPUSet:%v\n %s", err, tc.cpuString) - } - - if len(set) < 1 { - t.Fatalf("Returned empty cpu set: %v", set) - } - - for _, c := range set { - got := func() bool { - return c.isVulnerable() - }() - - if got != tc.vulnerable { - t.Fatalf("Mismatch vulnerable for cpu %+s: got %t want: %t", tc.name, tc.vulnerable, got) - } - } - }) - } -} - -func TestReverse(t *testing.T) { - const noParse = "-1-" - for _, tc := range []struct { - name string - output string - wantErr error - wantCount int - }{ - { - name: "base", - output: "0-7", - wantErr: nil, - wantCount: 8, - }, - { - name: "huge", - output: "0-111", - wantErr: nil, - wantCount: 112, - }, - { - name: "not zero", - output: "50-53", - wantErr: nil, - wantCount: 4, - }, - { - name: "small", - output: "0", - wantErr: nil, - wantCount: 1, - }, - { - name: "invalid order", - output: "10-6", - wantErr: fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", 10, 6), - }, - { - name: "no parse", - output: noParse, - wantErr: fmt.Errorf(`mismatch regex from /sys/devices/system/cpu/possible: %q`, noParse), - }, - } { - t.Run(tc.name, func(t *testing.T) { - threads, err := getThreadsFromPossible([]byte(tc.output)) - - switch { - case tc.wantErr == nil: - if err != nil { - t.Fatalf("Wanted nil err, got: %v", err) - } - case err == nil: - t.Fatalf("Want error: %v got: %v", tc.wantErr, err) - default: - if tc.wantErr.Error() != err.Error() { - t.Fatalf("Want error: %v got error: %v", tc.wantErr, err) - } - } - - if len(threads) != tc.wantCount { - t.Fatalf("Want count: %d got: %d", tc.wantCount, len(threads)) - } - }) - } -} - -func TestReverseSmoke(t *testing.T) { - data, err := ioutil.ReadFile(allPossibleCPUs) - if err != nil { - t.Fatalf("Failed to read from possible: %v", err) - } - threads, err := getThreadsFromPossible(data) - if err != nil { - t.Fatalf("Could not parse possible output: %v", err) - } - - if len(threads) <= 0 { - t.Fatalf("Didn't get any CPU cores: %d", len(threads)) - } -} diff --git a/runsc/mitigate/mitigate.go b/runsc/mitigate/mitigate.go index 91de623e3..24f67414c 100644 --- a/runsc/mitigate/mitigate.go +++ b/runsc/mitigate/mitigate.go @@ -14,121 +14,440 @@ // Package mitigate provides libraries for the mitigate command. The // mitigate command mitigates side channel attacks such as MDS. Mitigate -// shuts down CPUs via /sys/devices/system/cpu/cpu{N}/online. In addition, -// the mitigate also handles computing available CPU in kubernetes kube_config -// files. +// shuts down CPUs via /sys/devices/system/cpu/cpu{N}/online. package mitigate import ( "fmt" "io/ioutil" - - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/runsc/flag" + "os" + "regexp" + "sort" + "strconv" + "strings" ) const ( - cpuInfo = "/proc/cpuinfo" - allPossibleCPUs = "/sys/devices/system/cpu/possible" + // mds is the only bug we care about. + mds = "mds" + + // Constants for parsing /proc/cpuinfo. + processorKey = "processor" + vendorIDKey = "vendor_id" + cpuFamilyKey = "cpu family" + modelKey = "model" + physicalIDKey = "physical id" + coreIDKey = "core id" + bugsKey = "bugs" + + // Path to shutdown a CPU. + cpuOnlineTemplate = "/sys/devices/system/cpu/cpu%d/online" ) -// Mitigate handles high level mitigate operations provided to runsc. -type Mitigate struct { - dryRun bool // Run the command without changing the underlying system. - reverse bool // Reverse mitigate by turning on all CPU cores. - other mitigate // Struct holds extra mitigate logic. - path string // path to read for each operation (e.g. /proc/cpuinfo). +// CPUSet contains a map of all CPUs on the system, mapped +// by Physical ID and CoreIDs. threads with the same +// Core and Physical ID are Hyperthread pairs. +type CPUSet map[threadID]*ThreadGroup + +// NewCPUSet creates a CPUSet from data read from /proc/cpuinfo. +func NewCPUSet(data []byte, vulnerable func(Thread) bool) (CPUSet, error) { + processors, err := getThreads(string(data)) + if err != nil { + return nil, err + } + + set := make(CPUSet) + for _, p := range processors { + // Each ID is of the form physicalID:coreID. Hyperthread pairs + // have identical physical and core IDs. We need to match + // Hyperthread pairs so that we can shutdown all but one per + // pair. + core, ok := set[p.id] + if !ok { + core = &ThreadGroup{} + set[p.id] = core + } + core.isVulnerable = core.isVulnerable || vulnerable(p) + core.threads = append(core.threads, p) + } + + // We need to make sure we shutdown the lowest number processor per + // thread group. + for _, tg := range set { + sort.Slice(tg.threads, func(i, j int) bool { + return tg.threads[i].processorNumber < tg.threads[j].processorNumber + }) + } + return set, nil } -// Usage implments Usage for cmd.Mitigate. -func (m Mitigate) Usage() string { - usageString := `mitigate [flags] +// NewCPUSetFromPossible makes a cpuSet data read from +// /sys/devices/system/cpu/possible. This is used in enable operations +// where the caller simply wants to enable all CPUS. +func NewCPUSetFromPossible(data []byte) (CPUSet, error) { + threads, err := GetThreadsFromPossible(data) + if err != nil { + return nil, err + } + + // We don't care if a CPU is vulnerable or not, we just + // want to return a list of all CPUs on the host. + set := CPUSet{ + threads[0].id: &ThreadGroup{ + threads: threads, + isVulnerable: false, + }, + } + return set, nil +} -Mitigate mitigates a system to the "MDS" vulnerability by implementing a manual shutdown of SMT. The command checks /proc/cpuinfo for cpus having the MDS vulnerability, and if found, shutdown all but one CPU per hyperthread pair via /sys/devices/system/cpu/cpu{N}/online. CPUs can be restored by writing "2" to each file in /sys/devices/system/cpu/cpu{N}/online or performing a system reboot. +// String implements the String method for CPUSet. +func (c CPUSet) String() string { + ret := "" + for _, tg := range c { + ret += fmt.Sprintf("%s\n", tg) + } + return ret +} -The command can be reversed with --reverse, which reads the total CPUs from /sys/devices/system/cpu/possible and enables all with /sys/devices/system/cpu/cpu{N}/online. -` - return usageString + m.other.usage() +// GetRemainingList returns the list of threads that will remain active +// after mitigation. +func (c CPUSet) GetRemainingList() []Thread { + threads := make([]Thread, 0, len(c)) + for _, core := range c { + // If we're vulnerable, take only one thread from the pair. + if core.isVulnerable { + threads = append(threads, core.threads[0]) + continue + } + // Otherwise don't shutdown anything. + threads = append(threads, core.threads...) + } + return threads } -// SetFlags sets flags for the command Mitigate. -func (m Mitigate) SetFlags(f *flag.FlagSet) { - f.BoolVar(&m.dryRun, "dryrun", false, "run the command without changing system") - f.BoolVar(&m.reverse, "reverse", false, "reverse mitigate by enabling all CPUs") - m.other.setFlags(f) - m.path = cpuInfo - if m.reverse { - m.path = allPossibleCPUs +// GetShutdownList returns the list of threads that will be shutdown on +// mitigation. +func (c CPUSet) GetShutdownList() []Thread { + threads := make([]Thread, 0) + for _, core := range c { + // Only if we're vulnerable do shutdown anything. In this case, + // shutdown all but the first entry. + if core.isVulnerable && len(core.threads) > 1 { + threads = append(threads, core.threads[1:]...) + } } + return threads } -// Execute executes the Mitigate command. -func (m Mitigate) Execute() error { - data, err := ioutil.ReadFile(m.path) - if err != nil { - return fmt.Errorf("failed to read %s: %v", m.path, err) +// ThreadGroup represents Hyperthread pairs on the same physical/core ID. +type ThreadGroup struct { + threads []Thread + isVulnerable bool +} + +// String implements the String method for threadGroup. +func (c ThreadGroup) String() string { + ret := fmt.Sprintf("ThreadGroup:\nIsVulnerable: %t\n", c.isVulnerable) + for _, processor := range c.threads { + ret += fmt.Sprintf("%s\n", processor) } + return ret +} - if m.reverse { - err := m.doReverse(data) +// getThreads returns threads structs from reading /proc/cpuinfo. +func getThreads(data string) ([]Thread, error) { + // Each processor entry should start with the + // processor key. Find the beginings of each. + r := buildRegex(processorKey, `\d+`) + indices := r.FindAllStringIndex(data, -1) + if len(indices) < 1 { + return nil, fmt.Errorf("no cpus found for: %q", data) + } + + // Add the ending index for last entry. + indices = append(indices, []int{len(data), -1}) + + // Valid cpus are now defined by strings in between + // indexes (e.g. data[index[i], index[i+1]]). + // There should be len(indicies) - 1 CPUs + // since the last index is the end of the string. + cpus := make([]Thread, 0, len(indices)) + // Find each string that represents a CPU. These begin "processor". + for i := 1; i < len(indices); i++ { + start := indices[i-1][0] + end := indices[i][0] + // Parse the CPU entry, which should be between start/end. + c, err := newThread(data[start:end]) if err != nil { - return fmt.Errorf("reverse operation failed: %v", err) + return nil, err } - return nil + cpus = append(cpus, c) + } + return cpus, nil +} + +// GetThreadsFromPossible makes threads from data read from /sys/devices/system/cpu/possible. +func GetThreadsFromPossible(data []byte) ([]Thread, error) { + possibleRegex := regexp.MustCompile(`(?m)^(\d+)(-(\d+))?$`) + matches := possibleRegex.FindStringSubmatch(string(data)) + if len(matches) != 4 { + return nil, fmt.Errorf("mismatch regex from possible: %q", string(data)) + } + + // If matches[3] is empty, we only have one cpu entry. + if matches[3] == "" { + matches[3] = matches[1] } - set, err := m.doMitigate(data) + begin, err := strconv.ParseInt(matches[1], 10, 64) if err != nil { - return fmt.Errorf("mitigate operation failed: %v", err) + return nil, fmt.Errorf("failed to parse begin: %v", err) } - return m.other.execute(set, m.dryRun) + end, err := strconv.ParseInt(matches[3], 10, 64) + if err != nil { + return nil, fmt.Errorf("failed to parse end: %v", err) + } + if begin > end || begin < 0 || end < 0 { + return nil, fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", begin, end) + } + + ret := make([]Thread, 0, end-begin) + for i := begin; i <= end; i++ { + ret = append(ret, Thread{ + processorNumber: i, + id: threadID{ + physicalID: 0, // we don't care about id for enable ops. + coreID: 0, + }, + }) + } + + return ret, nil +} + +// threadID for each thread is defined by the physical and +// core IDs. If equal, two threads are Hyperthread pairs. +type threadID struct { + physicalID int64 + coreID int64 } -func (m Mitigate) doMitigate(data []byte) (cpuSet, error) { - set, err := newCPUSet(data, m.other.vulnerable) +// Thread represents pertinent info about a single hyperthread in a pair. +type Thread struct { + processorNumber int64 // the processor number of this CPU. + vendorID string // the vendorID of CPU (e.g. AuthenticAMD). + cpuFamily int64 // CPU family number (e.g. 6 for CascadeLake/Skylake). + model int64 // CPU model number (e.g. 85 for CascadeLake/Skylake). + id threadID // id for this thread + bugs map[string]struct{} // map of vulnerabilities parsed from the 'bugs' field. +} + +// newThread parses a CPU from a single cpu entry from /proc/cpuinfo. +func newThread(data string) (Thread, error) { + empty := Thread{} + processor, err := parseProcessor(data) if err != nil { - return nil, err + return empty, err } - log.Infof("Mitigate found the following CPUs...") - log.Infof("%s", set) + vendorID, err := parseVendorID(data) + if err != nil { + return empty, err + } - disableList := set.getShutdownList() - log.Infof("Disabling threads on thread pairs.") - for _, t := range disableList { - log.Infof("Disable thread: %s", t) - if m.dryRun { - continue - } - if err := t.disable(); err != nil { - return nil, fmt.Errorf("error disabling thread: %s err: %v", t, err) - } + cpuFamily, err := parseCPUFamily(data) + if err != nil { + return empty, err } - log.Infof("Shutdown successful.") - return set, nil + + model, err := parseModel(data) + if err != nil { + return empty, err + } + + physicalID, err := parsePhysicalID(data) + if err != nil { + return empty, err + } + + coreID, err := parseCoreID(data) + if err != nil { + return empty, err + } + + bugs, err := parseBugs(data) + if err != nil { + return empty, err + } + + return Thread{ + processorNumber: processor, + vendorID: vendorID, + cpuFamily: cpuFamily, + model: model, + id: threadID{ + physicalID: physicalID, + coreID: coreID, + }, + bugs: bugs, + }, nil +} + +// String implements the String method for thread. +func (t Thread) String() string { + template := `CPU: %d +CPU ID: %+v +Vendor: %s +Family/Model: %d/%d +Bugs: %s +` + bugs := make([]string, 0) + for bug := range t.bugs { + bugs = append(bugs, bug) + } + + return fmt.Sprintf(template, t.processorNumber, t.id, t.vendorID, t.cpuFamily, t.model, strings.Join(bugs, ",")) +} + +// Enable turns on the CPU by writing 1 to /sys/devices/cpu/cpu{N}/online. +func (t Thread) Enable() error { + // Linux ensures that "cpu0" is always online. + if t.processorNumber == 0 { + return nil + } + cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) + f, err := os.OpenFile(cpuPath, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return fmt.Errorf("failed to open file %s: %v", cpuPath, err) + } + if _, err = f.Write([]byte{'1'}); err != nil { + return fmt.Errorf("failed to write '1' to %s: %v", cpuPath, err) + } + return nil +} + +// Disable turns off the CPU by writing 0 to /sys/devices/cpu/cpu{N}/online. +func (t Thread) Disable() error { + // The core labeled "cpu0" can never be taken offline via this method. + // Linux will return EPERM if the user even creates a file at the /sys + // path above. + if t.processorNumber == 0 { + return fmt.Errorf("invalid shutdown operation: cpu0 cannot be disabled") + } + cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) + return ioutil.WriteFile(cpuPath, []byte{'0'}, 0644) } -func (m Mitigate) doReverse(data []byte) error { - set, err := newCPUSetFromPossible(data) +// IsVulnerable checks if a CPU is vulnerable to mds. +func (t Thread) IsVulnerable() bool { + _, ok := t.bugs[mds] + return ok +} + +// isActive checks if a CPU is active from /sys/devices/system/cpu/cpu{N}/online +// If the file does not exist (ioutil returns in error), we assume the CPU is on. +func (t Thread) isActive() bool { + cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) + data, err := ioutil.ReadFile(cpuPath) if err != nil { - return err + return true } + return len(data) > 0 && data[0] != '0' +} - log.Infof("Reverse mitigate found the following CPUs...") - log.Infof("%s", set) +// SimilarTo checks family/model/bugs fields for equality of two +// processors. +func (t Thread) SimilarTo(other Thread) bool { + if t.vendorID != other.vendorID { + return false + } - enableList := set.getRemainingList() + if other.cpuFamily != t.cpuFamily { + return false + } - log.Infof("Enabling all CPUs...") - for _, t := range enableList { - log.Infof("Enabling thread: %s", t) - if m.dryRun { - continue - } - if err := t.enable(); err != nil { - return fmt.Errorf("error enabling thread: %s err: %v", t, err) + if other.model != t.model { + return false + } + + if len(other.bugs) != len(t.bugs) { + return false + } + + for bug := range t.bugs { + if _, ok := other.bugs[bug]; !ok { + return false } } - log.Infof("Enable successful.") - return nil + return true +} + +// parseProcessor grabs the processor field from /proc/cpuinfo output. +func parseProcessor(data string) (int64, error) { + return parseIntegerResult(data, processorKey) +} + +// parseVendorID grabs the vendor_id field from /proc/cpuinfo output. +func parseVendorID(data string) (string, error) { + return parseRegex(data, vendorIDKey, `[\w\d]+`) +} + +// parseCPUFamily grabs the cpu family field from /proc/cpuinfo output. +func parseCPUFamily(data string) (int64, error) { + return parseIntegerResult(data, cpuFamilyKey) +} + +// parseModel grabs the model field from /proc/cpuinfo output. +func parseModel(data string) (int64, error) { + return parseIntegerResult(data, modelKey) +} + +// parsePhysicalID parses the physical id field. +func parsePhysicalID(data string) (int64, error) { + return parseIntegerResult(data, physicalIDKey) +} + +// parseCoreID parses the core id field. +func parseCoreID(data string) (int64, error) { + return parseIntegerResult(data, coreIDKey) +} + +// parseBugs grabs the bugs field from /proc/cpuinfo output. +func parseBugs(data string) (map[string]struct{}, error) { + result, err := parseRegex(data, bugsKey, `[\d\w\s]*`) + if err != nil { + return nil, err + } + bugs := strings.Split(result, " ") + ret := make(map[string]struct{}, len(bugs)) + for _, bug := range bugs { + ret[bug] = struct{}{} + } + return ret, nil +} + +// parseIntegerResult parses fields expecting an integer. +func parseIntegerResult(data, key string) (int64, error) { + result, err := parseRegex(data, key, `\d+`) + if err != nil { + return 0, err + } + return strconv.ParseInt(result, 0, 64) +} + +// buildRegex builds a regex for parsing each CPU field. +func buildRegex(key, match string) *regexp.Regexp { + reg := fmt.Sprintf(`(?m)^%s\s*:\s*(.*)$`, key) + return regexp.MustCompile(reg) +} + +// parseRegex parses data with key inserted into a standard regex template. +func parseRegex(data, key, match string) (string, error) { + r := buildRegex(key, match) + matches := r.FindStringSubmatch(data) + if len(matches) < 2 { + return "", fmt.Errorf("failed to match key %q: %q", key, data) + } + return matches[1], nil } diff --git a/runsc/mitigate/mitigate_conf.go b/runsc/mitigate/mitigate_conf.go deleted file mode 100644 index ee326324b..000000000 --- a/runsc/mitigate/mitigate_conf.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2021 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mitigate - -import ( - "gvisor.dev/gvisor/runsc/flag" -) - -type mitigate struct { -} - -// usage returns the usage string portion for the mitigate. -func (m mitigate) usage() string { return "" } - -// setFlags sets additional flags for the Mitigate command. -func (m mitigate) setFlags(f *flag.FlagSet) {} - -// execute performs additional parts of Execute for Mitigate. -func (m mitigate) execute(set cpuSet, dryrun bool) error { - return nil -} - -func (m mitigate) vulnerable(other thread) bool { - return other.isVulnerable() -} diff --git a/runsc/mitigate/mitigate_test.go b/runsc/mitigate/mitigate_test.go index b3a9a9b18..fbd8eb886 100644 --- a/runsc/mitigate/mitigate_test.go +++ b/runsc/mitigate/mitigate_test.go @@ -17,138 +17,519 @@ package mitigate import ( "fmt" "io/ioutil" - "os" "strings" "testing" + + "gvisor.dev/gvisor/runsc/mitigate/mock" ) -type executeTestCase struct { - name string - mitigateData string - mitigateError error - reverseData string - reverseError error +// TestMockCPUSet tests mock cpu test cases against the cpuSet functions. +func TestMockCPUSet(t *testing.T) { + for _, tc := range []struct { + testCase mock.CPU + isVulnerable bool + }{ + { + testCase: mock.AMD8, + isVulnerable: false, + }, + { + testCase: mock.Haswell2, + isVulnerable: true, + }, + { + testCase: mock.Haswell2core, + isVulnerable: true, + }, + { + testCase: mock.CascadeLake2, + isVulnerable: true, + }, + { + testCase: mock.CascadeLake4, + isVulnerable: true, + }, + } { + t.Run(tc.testCase.Name, func(t *testing.T) { + data := tc.testCase.MakeCPUString() + vulnerable := func(t Thread) bool { + return t.IsVulnerable() + } + set, err := NewCPUSet([]byte(data), vulnerable) + if err != nil { + t.Fatalf("Failed to create cpuSet: %v", err) + } + + for _, tg := range set { + if err := checkSorted(tg.threads); err != nil { + t.Fatalf("Failed to sort cpuSet: %v", err) + } + } + + remaining := set.GetRemainingList() + // In the non-vulnerable case, no cores should be shutdown so all should remain. + want := tc.testCase.PhysicalCores * tc.testCase.Cores * tc.testCase.ThreadsPerCore + if tc.isVulnerable { + want = tc.testCase.PhysicalCores * tc.testCase.Cores + } + + if want != len(remaining) { + t.Fatalf("Failed to shutdown the correct number of cores: want: %d got: %d", want, len(remaining)) + } + + if !tc.isVulnerable { + return + } + + // If the set is vulnerable, we expect only 1 thread per hyperthread pair. + for _, r := range remaining { + if _, ok := set[r.id]; !ok { + t.Fatalf("Entry %+v not in map, there must be two entries in the same thread group.", r) + } + delete(set, r.id) + } + + possible := tc.testCase.MakeSysPossibleString() + set, err = NewCPUSetFromPossible([]byte(possible)) + if err != nil { + t.Fatalf("Failed to make cpuSet: %v", err) + } + + want = tc.testCase.PhysicalCores * tc.testCase.Cores * tc.testCase.ThreadsPerCore + got := len(set.GetRemainingList()) + if got != want { + t.Fatalf("Returned the wrong number of CPUs want: %d got: %d", want, got) + } + }) + } } -func TestExecute(t *testing.T) { +// TestGetCPU tests basic parsing of single CPU strings from reading +// /proc/cpuinfo. +func TestGetCPU(t *testing.T) { + data := `processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 85 +physical id: 0 +core id : 0 +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa itlb_multihit +` + want := Thread{ + processorNumber: 0, + vendorID: "GenuineIntel", + cpuFamily: 6, + model: 85, + id: threadID{ + physicalID: 0, + coreID: 0, + }, + bugs: map[string]struct{}{ + "cpu_meltdown": struct{}{}, + "spectre_v1": struct{}{}, + "spectre_v2": struct{}{}, + "spec_store_bypass": struct{}{}, + "l1tf": struct{}{}, + "mds": struct{}{}, + "swapgs": struct{}{}, + "taa": struct{}{}, + "itlb_multihit": struct{}{}, + }, + } - partial := `processor : 1 -vendor_id : AuthenticAMD -cpu family : 23 -model : 49 -model name : AMD EPYC 7B12 -physical id : 0 -bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass + got, err := newThread(data) + if err != nil { + t.Fatalf("getCpu failed with error: %v", err) + } + + if !want.SimilarTo(got) { + t.Fatalf("Failed cpus not similar: got: %+v, want: %+v", got, want) + } + + if !got.IsVulnerable() { + t.Fatalf("Failed: cpu should be vulnerable.") + } +} + +func TestInvalid(t *testing.T) { + result, err := getThreads(`something not a processor`) + if err == nil { + t.Fatalf("getCPU set didn't return an error: %+v", result) + } + + if !strings.Contains(err.Error(), "no cpus") { + t.Fatalf("Incorrect error returned: %v", err) + } +} + +// TestCPUSet tests getting the right number of CPUs from +// parsing full output of /proc/cpuinfo. +func TestCPUSet(t *testing.T) { + data := `processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 63 +model name : Intel(R) Xeon(R) CPU @ 2.30GHz +stepping : 0 +microcode : 0x1 +cpu MHz : 2299.998 +cache size : 46080 KB +physical id : 0 +siblings : 2 +core id : 0 +cpu cores : 1 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs +bogomips : 4599.99 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 1 +vendor_id : GenuineIntel +cpu family : 6 +model : 63 +model name : Intel(R) Xeon(R) CPU @ 2.30GHz +stepping : 0 +microcode : 0x1 +cpu MHz : 2299.998 +cache size : 46080 KB +physical id : 0 +siblings : 2 +core id : 0 +cpu cores : 1 +apicid : 1 +initial apicid : 1 +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs +bogomips : 4599.99 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual power management: ` + cpuSet, err := getThreads(data) + if err != nil { + t.Fatalf("getCPUSet failed: %v", err) + } - for _, tc := range []executeTestCase{ - { - name: "CascadeLake4", - mitigateData: cascadeLake4.makeCPUString(), - reverseData: cascadeLake4.makeSysPossibleString(), - }, - { - name: "Empty", - mitigateData: "", - mitigateError: fmt.Errorf(`mitigate operation failed: no cpus found for: ""`), - reverseData: "", - reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from %s: ""`, allPossibleCPUs), + wantCPULen := 2 + if len(cpuSet) != wantCPULen { + t.Fatalf("Num CPU mismatch: want: %d, got: %d", wantCPULen, len(cpuSet)) + } + + wantCPU := Thread{ + vendorID: "GenuineIntel", + cpuFamily: 6, + model: 63, + bugs: map[string]struct{}{ + "cpu_meltdown": struct{}{}, + "spectre_v1": struct{}{}, + "spectre_v2": struct{}{}, + "spec_store_bypass": struct{}{}, + "l1tf": struct{}{}, + "mds": struct{}{}, + "swapgs": struct{}{}, }, - { - name: "Partial", - mitigateData: `processor : 0 + } + + for _, c := range cpuSet { + if !wantCPU.SimilarTo(c) { + t.Fatalf("Failed cpus not equal: got: %+v, want: %+v", c, wantCPU) + } + } +} + +// TestReadFile is a smoke test for parsing methods. +func TestReadFile(t *testing.T) { + data, err := ioutil.ReadFile("/proc/cpuinfo") + if err != nil { + t.Fatalf("Failed to read cpuinfo: %v", err) + } + + vulnerable := func(t Thread) bool { + return t.IsVulnerable() + } + + set, err := NewCPUSet(data, vulnerable) + if err != nil { + t.Fatalf("Failed to parse CPU data %v\n%s", err, data) + } + + for _, tg := range set { + if err := checkSorted(tg.threads); err != nil { + t.Fatalf("Failed to sort cpuSet: %v", err) + } + } + + if len(set) < 1 { + t.Fatalf("Failed to parse any CPUs: %d", len(set)) + } + + t.Log(set) +} + +// TestVulnerable tests if the isVulnerable method is correct +// among known CPUs in GCP. +func TestVulnerable(t *testing.T) { + const haswell = `processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 63 +model name : Intel(R) Xeon(R) CPU @ 2.30GHz +stepping : 0 +microcode : 0x1 +cpu MHz : 2299.998 +cache size : 46080 KB +physical id : 0 +siblings : 4 +core id : 0 +cpu cores : 2 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs +bogomips : 4599.99 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management:` + + const skylake = `processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 85 +model name : Intel(R) Xeon(R) CPU @ 2.00GHz +stepping : 3 +microcode : 0x1 +cpu MHz : 2000.180 +cache size : 39424 KB +physical id : 0 +siblings : 2 +core id : 0 +cpu cores : 1 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat md_clear arch_capabilities +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa +bogomips : 4000.36 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management:` + + const cascade = `processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 85 +model name : Intel(R) Xeon(R) CPU +stepping : 7 +microcode : 0x1 +cpu MHz : 2800.198 +cache size : 33792 KB +physical id : 0 +siblings : 2 +core id : 0 +cpu cores : 1 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 + ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmu +lqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowpr +efetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid r +tm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves a +rat avx512_vnni md_clear arch_capabilities +bugs : spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa +bogomips : 5600.39 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management:` + + const amd = `processor : 0 vendor_id : AuthenticAMD cpu family : 23 model : 49 model name : AMD EPYC 7B12 +stepping : 0 +microcode : 0x1000065 +cpu MHz : 2250.000 +cache size : 512 KB physical id : 0 +siblings : 2 core id : 0 cpu cores : 1 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid extd_apicid tsc_known_freq pni pclmulqdq ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm cmp_legacy cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw topoext ssbd ibrs ibpb stibp vmmcall fsgsbase tsc_adjust bmi1 avx2 smep bmi2 rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 clzero xsaveerptr arat npt nrip_save umip rdpid bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass -power management: +bogomips : 4500.00 +TLB size : 3072 4K pages +clflush size : 64 +cache_alignment : 64 +address sizes : 48 bits physical, 48 bits virtual +power management:` -` + partial, - mitigateError: fmt.Errorf(`mitigate operation failed: failed to match key "core id": %q`, partial), - reverseData: "1-", - reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from %s: %q`, allPossibleCPUs, "1-"), + for _, tc := range []struct { + name string + cpuString string + vulnerable bool + }{ + { + name: "haswell", + cpuString: haswell, + vulnerable: true, + }, { + name: "skylake", + cpuString: skylake, + vulnerable: true, + }, { + name: "amd", + cpuString: amd, + vulnerable: false, }, } { - doExecuteTest(t, Mitigate{}, tc) + t.Run(tc.name, func(t *testing.T) { + set, err := getThreads(tc.cpuString) + if err != nil { + t.Fatalf("Failed to getCPUSet:%v\n %s", err, tc.cpuString) + } + + if len(set) < 1 { + t.Fatalf("Returned empty cpu set: %v", set) + } + + for _, c := range set { + got := func() bool { + return c.IsVulnerable() + }() + + if got != tc.vulnerable { + t.Fatalf("Mismatch vulnerable for cpu %+s: got %t want: %t", tc.name, tc.vulnerable, got) + } + } + }) } } -func TestExecuteSmoke(t *testing.T) { - smokeMitigate, err := ioutil.ReadFile(cpuInfo) +func TestReverse(t *testing.T) { + const noParse = "-1-" + for _, tc := range []struct { + name string + output string + wantErr error + wantCount int + }{ + { + name: "base", + output: "0-7", + wantErr: nil, + wantCount: 8, + }, + { + name: "huge", + output: "0-111", + wantErr: nil, + wantCount: 112, + }, + { + name: "not zero", + output: "50-53", + wantErr: nil, + wantCount: 4, + }, + { + name: "small", + output: "0", + wantErr: nil, + wantCount: 1, + }, + { + name: "invalid order", + output: "10-6", + wantErr: fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", 10, 6), + }, + { + name: "no parse", + output: noParse, + wantErr: fmt.Errorf(`mismatch regex from possible: %q`, noParse), + }, + } { + t.Run(tc.name, func(t *testing.T) { + threads, err := GetThreadsFromPossible([]byte(tc.output)) + + switch { + case tc.wantErr == nil: + if err != nil { + t.Fatalf("Wanted nil err, got: %v", err) + } + case err == nil: + t.Fatalf("Want error: %v got: %v", tc.wantErr, err) + default: + if tc.wantErr.Error() != err.Error() { + t.Fatalf("Want error: %v got error: %v", tc.wantErr, err) + } + } + + if len(threads) != tc.wantCount { + t.Fatalf("Want count: %d got: %d", tc.wantCount, len(threads)) + } + }) + } +} + +func TestReverseSmoke(t *testing.T) { + data, err := ioutil.ReadFile("/sys/devices/system/cpu/possible") if err != nil { - t.Fatalf("Failed to read %s: %v", cpuInfo, err) + t.Fatalf("Failed to read from possible: %v", err) } - smokeReverse, err := ioutil.ReadFile(allPossibleCPUs) + threads, err := GetThreadsFromPossible(data) if err != nil { - t.Fatalf("Failed to read %s: %v", allPossibleCPUs, err) + t.Fatalf("Could not parse possible output: %v", err) } - doExecuteTest(t, Mitigate{}, executeTestCase{ - name: "SmokeTest", - mitigateData: string(smokeMitigate), - reverseData: string(smokeReverse), - }) + if len(threads) <= 0 { + t.Fatalf("Didn't get any CPU cores: %d", len(threads)) + } } -// doExecuteTest runs Execute with the mitigate operation and reverse operation. -func doExecuteTest(t *testing.T, m Mitigate, tc executeTestCase) { - t.Run("Mitigate"+tc.name, func(t *testing.T) { - m.dryRun = true - file, err := ioutil.TempFile("", "outfile.txt") - if err != nil { - t.Fatalf("Failed to create tmpfile: %v", err) - } - defer os.Remove(file.Name()) - - if _, err := file.WriteString(tc.mitigateData); err != nil { - t.Fatalf("Failed to write to file: %v", err) - } - - m.path = file.Name() - - got := m.Execute() - if err = checkErr(tc.mitigateError, got); err != nil { - t.Fatalf("Mitigate error mismatch: %v", err) - } - }) - t.Run("Reverse"+tc.name, func(t *testing.T) { - m.dryRun = true - m.reverse = true - - file, err := ioutil.TempFile("", "outfile.txt") - if err != nil { - t.Fatalf("Failed to create tmpfile: %v", err) - } - defer os.Remove(file.Name()) - - if _, err := file.WriteString(tc.reverseData); err != nil { - t.Fatalf("Failed to write to file: %v", err) - } - - m.path = file.Name() - got := m.Execute() - if err = checkErr(tc.reverseError, got); err != nil { - t.Fatalf("Mitigate error mismatch: %v", err) +func checkSorted(threads []Thread) error { + if len(threads) < 2 { + return nil + } + last := threads[0].processorNumber + for _, t := range threads[1:] { + if last >= t.processorNumber { + return fmt.Errorf("threads out of order: thread %d before %d", t.processorNumber, last) } - }) - -} - -// checkErr checks error for equality. -func checkErr(want, got error) error { - switch { - case want == nil && got == nil: - case want != nil && got == nil: - fallthrough - case want == nil && got != nil: - fallthrough - case want.Error() != strings.Trim(got.Error(), " "): - return fmt.Errorf("got: %v want: %v", got, want) + last = t.processorNumber } return nil } diff --git a/runsc/mitigate/mock/BUILD b/runsc/mitigate/mock/BUILD new file mode 100644 index 000000000..5019ff9ee --- /dev/null +++ b/runsc/mitigate/mock/BUILD @@ -0,0 +1,11 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "mock", + srcs = ["mock.go"], + visibility = [ + "//runsc:__subpackages__", + ], +) diff --git a/runsc/mitigate/mock/mock.go b/runsc/mitigate/mock/mock.go new file mode 100644 index 000000000..2db718cb9 --- /dev/null +++ b/runsc/mitigate/mock/mock.go @@ -0,0 +1,141 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mock contains mock CPUs for mitigate tests. +package mock + +import "fmt" + +// CPU represents data from CPUs that will be mitigated. +type CPU struct { + Name string + VendorID string + Family int + Model int + ModelName string + Bugs string + PhysicalCores int + Cores int + ThreadsPerCore int +} + +// CascadeLake2 is a two core Intel CascadeLake machine. +var CascadeLake2 = CPU{ + Name: "CascadeLake", + VendorID: "GenuineIntel", + Family: 6, + Model: 85, + ModelName: "Intel(R) Xeon(R) CPU", + Bugs: "spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa", + PhysicalCores: 1, + Cores: 1, + ThreadsPerCore: 2, +} + +// CascadeLake4 is a four core Intel CascadeLake machine. +var CascadeLake4 = CPU{ + Name: "CascadeLake", + VendorID: "GenuineIntel", + Family: 6, + Model: 85, + ModelName: "Intel(R) Xeon(R) CPU", + Bugs: "spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa", + PhysicalCores: 1, + Cores: 2, + ThreadsPerCore: 2, +} + +// Haswell2 is a two core Intel Haswell machine. +var Haswell2 = CPU{ + Name: "Haswell", + VendorID: "GenuineIntel", + Family: 6, + Model: 63, + ModelName: "Intel(R) Xeon(R) CPU", + Bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs", + PhysicalCores: 1, + Cores: 1, + ThreadsPerCore: 2, +} + +// Haswell2core is a 2 core Intel Haswell machine with no hyperthread pairs. +var Haswell2core = CPU{ + Name: "Haswell2Physical", + VendorID: "GenuineIntel", + Family: 6, + Model: 63, + ModelName: "Intel(R) Xeon(R) CPU", + Bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs", + PhysicalCores: 2, + Cores: 1, + ThreadsPerCore: 1, +} + +// AMD8 is an eight core AMD machine. +var AMD8 = CPU{ + Name: "AMD", + VendorID: "AuthenticAMD", + Family: 23, + Model: 49, + ModelName: "AMD EPYC 7B12", + Bugs: "sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass", + PhysicalCores: 4, + Cores: 1, + ThreadsPerCore: 2, +} + +// MakeCPUString makes a string formated like /proc/cpuinfo for each cpuTestCase +func (tc CPU) MakeCPUString() string { + template := `processor : %d +vendor_id : %s +cpu family : %d +model : %d +model name : %s +physical id : %d +core id : %d +cpu cores : %d +bugs : %s + +` + + ret := `` + for i := 0; i < tc.PhysicalCores; i++ { + for j := 0; j < tc.Cores; j++ { + for k := 0; k < tc.ThreadsPerCore; k++ { + processorNum := (i*tc.Cores+j)*tc.ThreadsPerCore + k + ret += fmt.Sprintf(template, + processorNum, /*processor*/ + tc.VendorID, /*vendor_id*/ + tc.Family, /*cpu family*/ + tc.Model, /*model*/ + tc.ModelName, /*model name*/ + i, /*physical id*/ + j, /*core id*/ + tc.Cores*tc.PhysicalCores, /*cpu cores*/ + tc.Bugs, /*bugs*/ + ) + } + } + } + return ret +} + +// MakeSysPossibleString makes a string representing a the contents of /sys/devices/system/cpu/possible. +func (tc CPU) MakeSysPossibleString() string { + max := tc.PhysicalCores * tc.Cores * tc.ThreadsPerCore + if max == 1 { + return "0" + } + return fmt.Sprintf("0-%d", max-1) +} diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index 9e429f7d5..f69558021 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -21,7 +21,6 @@ import ( "path/filepath" "runtime" "strconv" - "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/vishvananda/netlink" @@ -102,11 +101,11 @@ func joinNetNS(nsPath string) (func(), error) { // isRootNS determines whether we are running in the root net namespace. // /proc/sys/net/core/rmem_default only exists in root network namespace. func isRootNS() (bool, error) { - err := syscall.Access("/proc/sys/net/core/rmem_default", syscall.F_OK) + err := unix.Access("/proc/sys/net/core/rmem_default", unix.F_OK) switch err { case nil: return true, nil - case syscall.ENOENT: + case unix.ENOENT: return false, nil default: return false, fmt.Errorf("failed to access /proc/sys/net/core/rmem_default: %v", err) @@ -270,17 +269,17 @@ type socketEntry struct { func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) (*socketEntry, error) { // Create the socket. const protocol = 0x0300 // htons(ETH_P_ALL) - fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol) + fd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, protocol) if err != nil { return nil, fmt.Errorf("unable to create raw socket: %v", err) } deviceFile := os.NewFile(uintptr(fd), "raw-device-fd") // Bind to the appropriate device. - ll := syscall.SockaddrLinklayer{ + ll := unix.SockaddrLinklayer{ Protocol: protocol, Ifindex: iface.Index, } - if err := syscall.Bind(fd, &ll); err != nil { + if err := unix.Bind(fd, &ll); err != nil { return nil, fmt.Errorf("unable to bind to %q: %v", iface.Name, err) } @@ -291,7 +290,7 @@ func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) ( return nil, fmt.Errorf("getting GSO for interface %q: %v", iface.Name, err) } if gso { - if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil { + if err := unix.SetsockoptInt(fd, unix.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil { return nil, fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err) } gsoMaxSize = ifaceLink.Attrs().GSOMaxSize @@ -307,18 +306,18 @@ func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) ( // incurring packet drops. const bufSize = 4 << 20 // 4MB. - if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, bufSize); err != nil { - syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUF, bufSize) - sz, _ := syscall.GetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUF) + if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUFFORCE, bufSize); err != nil { + unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, bufSize) + sz, _ := unix.GetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF) if sz < bufSize { log.Warningf("Failed to increase rcv buffer to %d on SOCK_RAW on %s. Current buffer %d: %v", bufSize, iface.Name, sz, err) } } - if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUFFORCE, bufSize); err != nil { - syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF, bufSize) - sz, _ := syscall.GetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF) + if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUFFORCE, bufSize); err != nil { + unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUF, bufSize) + sz, _ := unix.GetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUF) if sz < bufSize { log.Warningf("Failed to increase snd buffer to %d on SOCK_RAW on %s. Curent buffer %d: %v", bufSize, iface.Name, sz, err) } diff --git a/runsc/sandbox/network_unsafe.go b/runsc/sandbox/network_unsafe.go index 2a2a0fb7e..1b808a8a0 100644 --- a/runsc/sandbox/network_unsafe.go +++ b/runsc/sandbox/network_unsafe.go @@ -15,7 +15,6 @@ package sandbox import ( - "syscall" "unsafe" "golang.org/x/sys/unix" @@ -48,7 +47,7 @@ func isGSOEnabled(fd int, intf string) (bool, error) { ifrData: &val, } - if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), unix.SIOCETHTOOL, uintptr(unsafe.Pointer(&ifr))); err != 0 { + if _, _, err := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), unix.SIOCETHTOOL, uintptr(unsafe.Pointer(&ifr))); err != 0 { return false, err } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 7fe65c7ba..450f92645 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -30,6 +30,7 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/control/client" "gvisor.dev/gvisor/pkg/control/server" @@ -83,7 +84,7 @@ type Sandbox struct { // child==true and the sandbox was waited on. This field allows for multiple // threads to wait on sandbox and get the exit code, since Linux will return // WaitStatus to one of the waiters only. - status syscall.WaitStatus + status unix.WaitStatus } // Args is used to configure a new sandbox. @@ -383,7 +384,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn binPath := specutils.ExePath cmd := exec.Command(binPath, conf.ToFlags()...) - cmd.SysProcAttr = &syscall.SysProcAttr{} + cmd.SysProcAttr = &unix.SysProcAttr{} // Open the log files to pass to the sandbox as FDs. // @@ -739,7 +740,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn if args.Attached { // Kill sandbox if parent process exits in attached mode. - cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + cmd.SysProcAttr.Pdeathsig = unix.SIGKILL // Tells boot that any process it creates must have pdeathsig set. cmd.Args = append(cmd.Args, "--attached") } @@ -762,7 +763,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn // // NOTE: The error message is checked because error types are lost over // rpc calls. - if strings.Contains(err.Error(), syscall.EACCES.Error()) { + if strings.Contains(err.Error(), unix.EACCES.Error()) { if permsErr := checkBinaryPermissions(conf); permsErr != nil { return fmt.Errorf("%v: %v", err, permsErr) } @@ -782,7 +783,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn } // Wait waits for the containerized process to exit, and returns its WaitStatus. -func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) { +func (s *Sandbox) Wait(cid string) (unix.WaitStatus, error) { log.Debugf("Waiting for container %q in sandbox %q", cid, s.ID) if conn, err := s.sandboxConnect(); err != nil { @@ -790,14 +791,14 @@ func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) { // There is nothing we can do for subcontainers. For the init container, we // can try to get the sandbox exit code. if !s.IsRootContainer(cid) { - return syscall.WaitStatus(0), err + return unix.WaitStatus(0), err } log.Warningf("Wait on container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err) } else { defer conn.Close() // Try the Wait RPC to the sandbox. - var ws syscall.WaitStatus + var ws unix.WaitStatus err = conn.Call(boot.ContainerWait, &cid, &ws) if err == nil { // It worked! @@ -805,7 +806,7 @@ func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) { } // See comment above. if !s.IsRootContainer(cid) { - return syscall.WaitStatus(0), err + return unix.WaitStatus(0), err } // The sandbox may have exited after we connected, but before @@ -817,10 +818,10 @@ func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) { // The best we can do is ask Linux what the sandbox exit status was, since in // most cases that will be the same as the container exit status. if err := s.waitForStopped(); err != nil { - return syscall.WaitStatus(0), err + return unix.WaitStatus(0), err } if !s.child { - return syscall.WaitStatus(0), fmt.Errorf("sandbox no longer running and its exit status is unavailable") + return unix.WaitStatus(0), fmt.Errorf("sandbox no longer running and its exit status is unavailable") } s.statusMu.Lock() @@ -830,9 +831,9 @@ func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) { // WaitPID waits for process 'pid' in the container's sandbox and returns its // WaitStatus. -func (s *Sandbox) WaitPID(cid string, pid int32) (syscall.WaitStatus, error) { +func (s *Sandbox) WaitPID(cid string, pid int32) (unix.WaitStatus, error) { log.Debugf("Waiting for PID %d in sandbox %q", pid, s.ID) - var ws syscall.WaitStatus + var ws unix.WaitStatus conn, err := s.sandboxConnect() if err != nil { return ws, err @@ -861,7 +862,7 @@ func (s *Sandbox) destroy() error { log.Debugf("Destroy sandbox %q", s.ID) if s.Pid != 0 { log.Debugf("Killing sandbox %q", s.ID) - if err := syscall.Kill(s.Pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH { + if err := unix.Kill(s.Pid, unix.SIGKILL); err != nil && err != unix.ESRCH { return fmt.Errorf("killing sandbox %q PID %q: %v", s.ID, s.Pid, err) } if err := s.waitForStopped(); err != nil { @@ -875,7 +876,7 @@ func (s *Sandbox) destroy() error { // SignalContainer sends the signal to a container in the sandbox. If all is // true and signal is SIGKILL, then waits for all processes to exit before // returning. -func (s *Sandbox) SignalContainer(cid string, sig syscall.Signal, all bool) error { +func (s *Sandbox) SignalContainer(cid string, sig unix.Signal, all bool) error { log.Debugf("Signal sandbox %q", s.ID) conn, err := s.sandboxConnect() if err != nil { @@ -903,7 +904,7 @@ func (s *Sandbox) SignalContainer(cid string, sig syscall.Signal, all bool) erro // fgProcess is true, then the signal is sent to the foreground process group // in the same session that PID belongs to. This is only valid if the process // is attached to a host TTY. -func (s *Sandbox) SignalProcess(cid string, pid int32, sig syscall.Signal, fgProcess bool) error { +func (s *Sandbox) SignalProcess(cid string, pid int32, sig unix.Signal, fgProcess bool) error { log.Debugf("Signal sandbox %q", s.ID) conn, err := s.sandboxConnect() if err != nil { @@ -984,7 +985,7 @@ func (s *Sandbox) Resume(cid string) error { func (s *Sandbox) IsRunning() bool { if s.Pid != 0 { // Send a signal 0 to the sandbox process. - if err := syscall.Kill(s.Pid, 0); err == nil { + if err := unix.Kill(s.Pid, 0); err == nil { // Succeeded, process is running. return true } @@ -1147,7 +1148,7 @@ func (s *Sandbox) waitForStopped() error { } // The sandbox process is a child of the current process, // so we can wait it and collect its zombie. - wpid, err := syscall.Wait4(int(s.Pid), &s.status, syscall.WNOHANG, nil) + wpid, err := unix.Wait4(int(s.Pid), &s.status, unix.WNOHANG, nil) if err != nil { return fmt.Errorf("error waiting the sandbox process: %v", err) } diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go index 138aa4dd1..b62504a8c 100644 --- a/runsc/specutils/fs.go +++ b/runsc/specutils/fs.go @@ -18,9 +18,9 @@ import ( "fmt" "math/bits" "path" - "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" ) type mapping struct { @@ -31,48 +31,48 @@ type mapping struct { // optionsMap maps mount propagation-related OCI filesystem options to mount(2) // syscall flags. var optionsMap = map[string]mapping{ - "acl": {set: true, val: syscall.MS_POSIXACL}, - "async": {set: false, val: syscall.MS_SYNCHRONOUS}, - "atime": {set: false, val: syscall.MS_NOATIME}, - "bind": {set: true, val: syscall.MS_BIND}, + "acl": {set: true, val: unix.MS_POSIXACL}, + "async": {set: false, val: unix.MS_SYNCHRONOUS}, + "atime": {set: false, val: unix.MS_NOATIME}, + "bind": {set: true, val: unix.MS_BIND}, "defaults": {set: true, val: 0}, - "dev": {set: false, val: syscall.MS_NODEV}, - "diratime": {set: false, val: syscall.MS_NODIRATIME}, - "dirsync": {set: true, val: syscall.MS_DIRSYNC}, - "exec": {set: false, val: syscall.MS_NOEXEC}, - "noexec": {set: true, val: syscall.MS_NOEXEC}, - "iversion": {set: true, val: syscall.MS_I_VERSION}, - "loud": {set: false, val: syscall.MS_SILENT}, - "mand": {set: true, val: syscall.MS_MANDLOCK}, - "noacl": {set: false, val: syscall.MS_POSIXACL}, - "noatime": {set: true, val: syscall.MS_NOATIME}, - "nodev": {set: true, val: syscall.MS_NODEV}, - "nodiratime": {set: true, val: syscall.MS_NODIRATIME}, - "noiversion": {set: false, val: syscall.MS_I_VERSION}, - "nomand": {set: false, val: syscall.MS_MANDLOCK}, - "norelatime": {set: false, val: syscall.MS_RELATIME}, - "nostrictatime": {set: false, val: syscall.MS_STRICTATIME}, - "nosuid": {set: true, val: syscall.MS_NOSUID}, - "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC}, - "relatime": {set: true, val: syscall.MS_RELATIME}, - "remount": {set: true, val: syscall.MS_REMOUNT}, - "ro": {set: true, val: syscall.MS_RDONLY}, - "rw": {set: false, val: syscall.MS_RDONLY}, - "silent": {set: true, val: syscall.MS_SILENT}, - "strictatime": {set: true, val: syscall.MS_STRICTATIME}, - "suid": {set: false, val: syscall.MS_NOSUID}, - "sync": {set: true, val: syscall.MS_SYNCHRONOUS}, + "dev": {set: false, val: unix.MS_NODEV}, + "diratime": {set: false, val: unix.MS_NODIRATIME}, + "dirsync": {set: true, val: unix.MS_DIRSYNC}, + "exec": {set: false, val: unix.MS_NOEXEC}, + "noexec": {set: true, val: unix.MS_NOEXEC}, + "iversion": {set: true, val: unix.MS_I_VERSION}, + "loud": {set: false, val: unix.MS_SILENT}, + "mand": {set: true, val: unix.MS_MANDLOCK}, + "noacl": {set: false, val: unix.MS_POSIXACL}, + "noatime": {set: true, val: unix.MS_NOATIME}, + "nodev": {set: true, val: unix.MS_NODEV}, + "nodiratime": {set: true, val: unix.MS_NODIRATIME}, + "noiversion": {set: false, val: unix.MS_I_VERSION}, + "nomand": {set: false, val: unix.MS_MANDLOCK}, + "norelatime": {set: false, val: unix.MS_RELATIME}, + "nostrictatime": {set: false, val: unix.MS_STRICTATIME}, + "nosuid": {set: true, val: unix.MS_NOSUID}, + "rbind": {set: true, val: unix.MS_BIND | unix.MS_REC}, + "relatime": {set: true, val: unix.MS_RELATIME}, + "remount": {set: true, val: unix.MS_REMOUNT}, + "ro": {set: true, val: unix.MS_RDONLY}, + "rw": {set: false, val: unix.MS_RDONLY}, + "silent": {set: true, val: unix.MS_SILENT}, + "strictatime": {set: true, val: unix.MS_STRICTATIME}, + "suid": {set: false, val: unix.MS_NOSUID}, + "sync": {set: true, val: unix.MS_SYNCHRONOUS}, } // propOptionsMap is similar to optionsMap, but it lists propagation options // that cannot be used together with other flags. var propOptionsMap = map[string]mapping{ - "private": {set: true, val: syscall.MS_PRIVATE}, - "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC}, - "slave": {set: true, val: syscall.MS_SLAVE}, - "rslave": {set: true, val: syscall.MS_SLAVE | syscall.MS_REC}, - "unbindable": {set: true, val: syscall.MS_UNBINDABLE}, - "runbindable": {set: true, val: syscall.MS_UNBINDABLE | syscall.MS_REC}, + "private": {set: true, val: unix.MS_PRIVATE}, + "rprivate": {set: true, val: unix.MS_PRIVATE | unix.MS_REC}, + "slave": {set: true, val: unix.MS_SLAVE}, + "rslave": {set: true, val: unix.MS_SLAVE | unix.MS_REC}, + "unbindable": {set: true, val: unix.MS_UNBINDABLE}, + "runbindable": {set: true, val: unix.MS_UNBINDABLE | unix.MS_REC}, } // invalidOptions list options not allowed. @@ -139,7 +139,7 @@ func ValidateMountOptions(opts []string) error { // correct. func validateRootfsPropagation(opt string) error { flags := PropOptionsToFlags([]string{opt}) - if flags&(syscall.MS_SLAVE|syscall.MS_PRIVATE) == 0 { + if flags&(unix.MS_SLAVE|unix.MS_PRIVATE) == 0 { return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt) } return validatePropagation(opt) @@ -147,7 +147,7 @@ func validateRootfsPropagation(opt string) error { func validatePropagation(opt string) error { flags := PropOptionsToFlags([]string{opt}) - exclusive := flags & (syscall.MS_SLAVE | syscall.MS_PRIVATE | syscall.MS_SHARED | syscall.MS_UNBINDABLE) + exclusive := flags & (unix.MS_SLAVE | unix.MS_PRIVATE | unix.MS_SHARED | unix.MS_UNBINDABLE) if bits.OnesCount32(exclusive) > 1 { return fmt.Errorf("mount propagation options are mutually exclusive: %q", opt) } diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index 23001d67c..69d7ba5c4 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -109,7 +109,7 @@ func FilterNS(filter []specs.LinuxNamespaceType, s *specs.Spec) []specs.LinuxNam // setNS sets the namespace of the given type. It must be called with // OSThreadLocked. func setNS(fd, nsType uintptr) error { - if _, _, err := syscall.RawSyscall(unix.SYS_SETNS, fd, nsType, 0); err != 0 { + if _, _, err := unix.RawSyscall(unix.SYS_SETNS, fd, nsType, 0); err != 0 { return err } return nil @@ -158,7 +158,7 @@ func StartInNS(cmd *exec.Cmd, nss []specs.LinuxNamespace) error { defer runtime.UnlockOSThread() if cmd.SysProcAttr == nil { - cmd.SysProcAttr = &syscall.SysProcAttr{} + cmd.SysProcAttr = &unix.SysProcAttr{} } for _, ns := range nss { @@ -185,7 +185,7 @@ func SetUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) { return } if cmd.SysProcAttr == nil { - cmd.SysProcAttr = &syscall.SysProcAttr{} + cmd.SysProcAttr = &unix.SysProcAttr{} } for _, idMap := range s.Linux.UIDMappings { log.Infof("Mapping host uid %d to container uid %d (size=%d)", idMap.HostID, idMap.ContainerID, idMap.Size) @@ -241,8 +241,8 @@ func MaybeRunAsRoot() error { cmd := exec.Command("/proc/self/exe", os.Args[1:]...) - cmd.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS, + cmd.SysProcAttr = &unix.SysProcAttr{ + Cloneflags: unix.CLONE_NEWUSER | unix.CLONE_NEWNS, // Set current user/group as root inside the namespace. Since we may not // have CAP_SETUID/CAP_SETGID, just map root to the current user/group. UidMappings: []syscall.SysProcIDMap{ @@ -255,7 +255,7 @@ func MaybeRunAsRoot() error { GidMappingsEnableSetgroups: false, // Make sure child is killed when the parent terminates. - Pdeathsig: syscall.SIGKILL, + Pdeathsig: unix.SIGKILL, } cmd.Env = os.Environ() diff --git a/runsc/specutils/seccomp/BUILD b/runsc/specutils/seccomp/BUILD index 3520f2d6d..e9e647d82 100644 --- a/runsc/specutils/seccomp/BUILD +++ b/runsc/specutils/seccomp/BUILD @@ -18,6 +18,7 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/syscalls/linux", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) @@ -30,5 +31,6 @@ go_test( "//pkg/binary", "//pkg/bpf", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/specutils/seccomp/seccomp.go b/runsc/specutils/seccomp/seccomp.go index 5932f7a41..0ef7a4d54 100644 --- a/runsc/specutils/seccomp/seccomp.go +++ b/runsc/specutils/seccomp/seccomp.go @@ -18,9 +18,9 @@ package seccomp import ( "fmt" - "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" "gvisor.dev/gvisor/pkg/log" @@ -33,9 +33,9 @@ var ( killThreadAction = linux.SECCOMP_RET_KILL_THREAD trapAction = linux.SECCOMP_RET_TRAP // runc always returns EPERM as the errorcode for SECCOMP_RET_ERRNO - errnoAction = linux.SECCOMP_RET_ERRNO.WithReturnCode(uint16(syscall.EPERM)) + errnoAction = linux.SECCOMP_RET_ERRNO.WithReturnCode(uint16(unix.EPERM)) // runc always returns EPERM as the errorcode for SECCOMP_RET_TRACE - traceAction = linux.SECCOMP_RET_TRACE.WithReturnCode(uint16(syscall.EPERM)) + traceAction = linux.SECCOMP_RET_TRACE.WithReturnCode(uint16(unix.EPERM)) allowAction = linux.SECCOMP_RET_ALLOW ) diff --git a/runsc/specutils/seccomp/seccomp_test.go b/runsc/specutils/seccomp/seccomp_test.go index 850c237ba..11a6c8daa 100644 --- a/runsc/specutils/seccomp/seccomp_test.go +++ b/runsc/specutils/seccomp/seccomp_test.go @@ -16,10 +16,10 @@ package seccomp import ( "fmt" - "syscall" "testing" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/bpf" ) @@ -184,7 +184,7 @@ var ( Args: []specs.LinuxSeccompArg{ { Index: 0, - Value: syscall.CLONE_FS, + Value: unix.CLONE_FS, Op: specs.OpEqualTo, }, }, @@ -192,7 +192,7 @@ var ( }, }, }, - input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}), + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{unix.CLONE_FS}), expected: uint32(errnoAction), }, { @@ -207,12 +207,12 @@ var ( Args: []specs.LinuxSeccompArg{ { Index: 0, - Value: syscall.CLONE_FS, + Value: unix.CLONE_FS, Op: specs.OpEqualTo, }, { Index: 0, - Value: syscall.CLONE_VM, + Value: unix.CLONE_VM, Op: specs.OpEqualTo, }, }, @@ -220,7 +220,7 @@ var ( }, }, }, - input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}), + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{unix.CLONE_FS}), expected: uint32(errnoAction), }, { @@ -235,12 +235,12 @@ var ( Args: []specs.LinuxSeccompArg{ { Index: 1, - Value: syscall.SOL_SOCKET, + Value: unix.SOL_SOCKET, Op: specs.OpEqualTo, }, { Index: 2, - Value: syscall.SO_PEERCRED, + Value: unix.SO_PEERCRED, Op: specs.OpEqualTo, }, }, @@ -248,7 +248,7 @@ var ( }, }, }, - input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, syscall.SOL_SOCKET, syscall.SO_PEERCRED}), + input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, unix.SOL_SOCKET, unix.SO_PEERCRED}), expected: uint32(errnoAction), }, { @@ -263,12 +263,12 @@ var ( Args: []specs.LinuxSeccompArg{ { Index: 1, - Value: syscall.SOL_SOCKET, + Value: unix.SOL_SOCKET, Op: specs.OpEqualTo, }, { Index: 2, - Value: syscall.SO_PEERCRED, + Value: unix.SO_PEERCRED, Op: specs.OpEqualTo, }, }, @@ -276,7 +276,7 @@ var ( }, }, }, - input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, syscall.SOL_SOCKET}), + input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, unix.SOL_SOCKET}), expected: uint32(allowAction), }, { @@ -291,7 +291,7 @@ var ( Args: []specs.LinuxSeccompArg{ { Index: 0, - Value: syscall.CLONE_FS, + Value: unix.CLONE_FS, Op: specs.OpEqualTo, }, }, @@ -299,7 +299,7 @@ var ( }, }, }, - input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_VM}), + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{unix.CLONE_VM}), expected: uint32(allowAction), }, { @@ -314,8 +314,8 @@ var ( Args: []specs.LinuxSeccompArg{ { Index: 0, - Value: syscall.CLONE_FS, - ValueTwo: syscall.CLONE_FS, + Value: unix.CLONE_FS, + ValueTwo: unix.CLONE_FS, Op: specs.OpMaskedEqual, }, }, @@ -323,7 +323,7 @@ var ( }, }, }, - input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS | syscall.CLONE_VM}), + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{unix.CLONE_FS | unix.CLONE_VM}), expected: uint32(errnoAction), }, { @@ -338,8 +338,8 @@ var ( Args: []specs.LinuxSeccompArg{ { Index: 0, - Value: syscall.CLONE_FS | syscall.CLONE_VM, - ValueTwo: syscall.CLONE_FS | syscall.CLONE_VM, + Value: unix.CLONE_FS | unix.CLONE_VM, + ValueTwo: unix.CLONE_FS | unix.CLONE_VM, Op: specs.OpMaskedEqual, }, }, @@ -347,7 +347,7 @@ var ( }, }, }, - input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}), + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{unix.CLONE_FS}), expected: uint32(allowAction), }, { diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index ea55bbc7d..5ba38bfe4 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -26,12 +26,12 @@ import ( "path/filepath" "strconv" "strings" - "syscall" "time" "github.com/cenkalti/backoff" "github.com/mohae/deepcopy" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bits" "gvisor.dev/gvisor/pkg/log" @@ -375,9 +375,9 @@ func WaitForReady(pid int, timeout time.Duration, ready func() (bool, error)) er // Check if the process is still running. // If the process is alive, child is 0 because of the NOHANG option. // If the process has terminated, child equals the process id. - var ws syscall.WaitStatus - var ru syscall.Rusage - child, err := syscall.Wait4(pid, &ws, syscall.WNOHANG, &ru) + var ws unix.WaitStatus + var ru unix.Rusage + child, err := unix.Wait4(pid, &ws, unix.WNOHANG, &ru) if err != nil { return backoff.Permanent(fmt.Errorf("error waiting for process: %v", err)) } else if child == pid { @@ -437,7 +437,7 @@ func Mount(src, dst, typ string, flags uint32) error { return fmt.Errorf("mkdir(%q) failed: %v", parent, err) } // Create the destination file if it does not exist. - f, err := os.OpenFile(dst, syscall.O_CREAT, 0777) + f, err := os.OpenFile(dst, unix.O_CREAT, 0777) if err != nil { return fmt.Errorf("open(%q) failed: %v", dst, err) } @@ -445,7 +445,7 @@ func Mount(src, dst, typ string, flags uint32) error { } // Do the mount. - if err := syscall.Mount(src, dst, typ, uintptr(flags), ""); err != nil { + if err := unix.Mount(src, dst, typ, uintptr(flags), ""); err != nil { return fmt.Errorf("mount(%q, %q, %d) failed: %v", src, dst, flags, err) } return nil @@ -466,7 +466,7 @@ func ContainsStr(strs []string, str string) bool { func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) { for { r1, r2, err := f() - if err != syscall.EINTR { + if err != unix.EINTR { return r1, r2, err } } |