diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/BUILD | 2 | ||||
-rw-r--r-- | runsc/boot/filter/config.go | 478 | ||||
-rw-r--r-- | runsc/boot/filter/config_amd64.go | 4 | ||||
-rw-r--r-- | runsc/boot/filter/config_profile.go | 6 | ||||
-rw-r--r-- | runsc/boot/loader.go | 29 | ||||
-rw-r--r-- | runsc/config/config.go | 4 | ||||
-rw-r--r-- | runsc/config/flags.go | 1 | ||||
-rw-r--r-- | runsc/fsgofer/filter/config.go | 154 | ||||
-rw-r--r-- | runsc/fsgofer/filter/config_amd64.go | 4 | ||||
-rw-r--r-- | runsc/specutils/seccomp/BUILD | 34 | ||||
-rw-r--r-- | runsc/specutils/seccomp/audit_amd64.go | 25 | ||||
-rw-r--r-- | runsc/specutils/seccomp/audit_arm64.go | 25 | ||||
-rw-r--r-- | runsc/specutils/seccomp/seccomp.go | 229 | ||||
-rw-r--r-- | runsc/specutils/seccomp/seccomp_test.go | 414 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 5 |
15 files changed, 1086 insertions, 328 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 704c66742..01f62d50a 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -26,6 +26,7 @@ go_library( deps = [ "//pkg/abi", "//pkg/abi/linux", + "//pkg/bpf", "//pkg/context", "//pkg/control/server", "//pkg/cpuid", @@ -107,6 +108,7 @@ go_library( "//runsc/boot/pprof", "//runsc/config", "//runsc/specutils", + "//runsc/specutils/seccomp", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", "@org_golang_x_sys//unix:go_default_library", diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 149eb0b1b..4ed28b5cd 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -29,7 +29,7 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_CLOCK_GETTIME: {}, syscall.SYS_CLONE: []seccomp.Rule{ { - seccomp.AllowValue( + seccomp.EqualTo( syscall.CLONE_VM | syscall.CLONE_FS | syscall.CLONE_FILES | @@ -42,26 +42,26 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_DUP: {}, syscall.SYS_DUP3: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.O_CLOEXEC), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.O_CLOEXEC), }, }, syscall.SYS_EPOLL_CREATE1: {}, syscall.SYS_EPOLL_CTL: {}, syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, }, syscall.SYS_EVENTFD2: []seccomp.Rule{ { - seccomp.AllowValue(0), - seccomp.AllowValue(0), + seccomp.EqualTo(0), + seccomp.EqualTo(0), }, }, syscall.SYS_EXIT: {}, @@ -70,16 +70,16 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_FCHMOD: {}, syscall.SYS_FCNTL: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_GETFL), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_GETFL), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_SETFL), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_SETFL), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_GETFD), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_GETFD), }, }, syscall.SYS_FSTAT: {}, @@ -87,52 +87,52 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_FTRUNCATE: {}, syscall.SYS_FUTEX: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), - seccomp.AllowAny{}, - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), + seccomp.MatchAny{}, + seccomp.MatchAny{}, }, { - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), + seccomp.MatchAny{}, }, // Non-private variants are included for flipcall support. They are otherwise // unncessary, as the sentry will use only private futexes internally. { - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAIT), - seccomp.AllowAny{}, - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAIT), + seccomp.MatchAny{}, + seccomp.MatchAny{}, }, { - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAKE), - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAKE), + seccomp.MatchAny{}, }, }, syscall.SYS_GETPID: {}, unix.SYS_GETRANDOM: {}, syscall.SYS_GETSOCKOPT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_DOMAIN), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_DOMAIN), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_TYPE), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_TYPE), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_ERROR), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_ERROR), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_SNDBUF), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_SNDBUF), }, }, syscall.SYS_GETTID: {}, @@ -141,34 +141,34 @@ var allowedSyscalls = seccomp.SyscallRules{ // setting/getting termios and winsize. syscall.SYS_IOCTL: []seccomp.Rule{ { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TCGETS), - seccomp.AllowAny{}, /* termios struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TCGETS), + seccomp.MatchAny{}, /* termios struct */ }, { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TCSETS), - seccomp.AllowAny{}, /* termios struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TCSETS), + seccomp.MatchAny{}, /* termios struct */ }, { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TCSETSF), - seccomp.AllowAny{}, /* termios struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TCSETSF), + seccomp.MatchAny{}, /* termios struct */ }, { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TCSETSW), - seccomp.AllowAny{}, /* termios struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TCSETSW), + seccomp.MatchAny{}, /* termios struct */ }, { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TIOCSWINSZ), - seccomp.AllowAny{}, /* winsize struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TIOCSWINSZ), + seccomp.MatchAny{}, /* winsize struct */ }, { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TIOCGWINSZ), - seccomp.AllowAny{}, /* winsize struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TIOCGWINSZ), + seccomp.MatchAny{}, /* winsize struct */ }, }, syscall.SYS_LSEEK: {}, @@ -182,46 +182,46 @@ var allowedSyscalls = seccomp.SyscallRules{ // TODO(b/148688965): Remove once this is gone from Go. syscall.SYS_MLOCK: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(4096), + seccomp.MatchAny{}, + seccomp.EqualTo(4096), }, }, syscall.SYS_MMAP: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_SHARED), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_SHARED), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_STACK), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_STACK), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_NORESERVE), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_NORESERVE), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.PROT_WRITE | syscall.PROT_READ), - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.PROT_WRITE | syscall.PROT_READ), + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED), }, }, syscall.SYS_MPROTECT: {}, @@ -237,32 +237,32 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_READ: {}, syscall.SYS_RECVMSG: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK), }, }, syscall.SYS_RECVMMSG: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(fdbased.MaxMsgsPerRecv), - seccomp.AllowValue(syscall.MSG_DONTWAIT), - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(fdbased.MaxMsgsPerRecv), + seccomp.EqualTo(syscall.MSG_DONTWAIT), + seccomp.EqualTo(0), }, }, unix.SYS_SENDMMSG: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT), - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT), + seccomp.EqualTo(0), }, }, syscall.SYS_RESTART_SYSCALL: {}, @@ -272,49 +272,49 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_SCHED_YIELD: {}, syscall.SYS_SENDMSG: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL), }, }, syscall.SYS_SETITIMER: {}, syscall.SYS_SHUTDOWN: []seccomp.Rule{ // Used by fs/host to shutdown host sockets. - {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RD)}, - {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_WR)}, + {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RD)}, + {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_WR)}, // Used by unet to shutdown connections. - {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)}, + {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RDWR)}, }, syscall.SYS_SIGALTSTACK: {}, unix.SYS_STATX: {}, syscall.SYS_SYNC_FILE_RANGE: {}, syscall.SYS_TEE: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(1), /* len */ - seccomp.AllowValue(unix.SPLICE_F_NONBLOCK), /* flags */ + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(1), /* len */ + seccomp.EqualTo(unix.SPLICE_F_NONBLOCK), /* flags */ }, }, syscall.SYS_TGKILL: []seccomp.Rule{ { - seccomp.AllowValue(uint64(os.Getpid())), + seccomp.EqualTo(uint64(os.Getpid())), }, }, syscall.SYS_UTIMENSAT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(0), /* null pathname */ - seccomp.AllowAny{}, - seccomp.AllowValue(0), /* flags */ + seccomp.MatchAny{}, + seccomp.EqualTo(0), /* null pathname */ + seccomp.MatchAny{}, + seccomp.EqualTo(0), /* flags */ }, }, syscall.SYS_WRITE: {}, // For rawfile.NonBlockingWriteIovec. syscall.SYS_WRITEV: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, seccomp.GreaterThan(0), }, }, @@ -325,10 +325,10 @@ func hostInetFilters() seccomp.SyscallRules { return seccomp.SyscallRules{ syscall.SYS_ACCEPT4: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), }, }, syscall.SYS_BIND: {}, @@ -337,84 +337,84 @@ func hostInetFilters() seccomp.SyscallRules { syscall.SYS_GETSOCKNAME: {}, syscall.SYS_GETSOCKOPT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IP), - seccomp.AllowValue(syscall.IP_TOS), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IP), + seccomp.EqualTo(syscall.IP_TOS), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IP), - seccomp.AllowValue(syscall.IP_RECVTOS), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IP), + seccomp.EqualTo(syscall.IP_RECVTOS), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_TCLASS), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_TCLASS), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_RECVTCLASS), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_RECVTCLASS), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_V6ONLY), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_V6ONLY), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_ERROR), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_ERROR), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_KEEPALIVE), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_KEEPALIVE), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_SNDBUF), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_SNDBUF), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_RCVBUF), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_RCVBUF), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_REUSEADDR), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_REUSEADDR), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_TYPE), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_TYPE), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_LINGER), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_LINGER), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_TCP), - seccomp.AllowValue(syscall.TCP_NODELAY), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_TCP), + seccomp.EqualTo(syscall.TCP_NODELAY), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_TCP), - seccomp.AllowValue(syscall.TCP_INFO), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_TCP), + seccomp.EqualTo(syscall.TCP_INFO), }, }, syscall.SYS_IOCTL: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.TIOCOUTQ), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.TIOCOUTQ), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.TIOCINQ), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.TIOCINQ), }, }, syscall.SYS_LISTEN: {}, @@ -425,103 +425,103 @@ func hostInetFilters() seccomp.SyscallRules { syscall.SYS_SENDTO: {}, syscall.SYS_SETSOCKOPT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_V6ONLY), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_V6ONLY), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_SNDBUF), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_SNDBUF), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_RCVBUF), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_RCVBUF), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_REUSEADDR), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_REUSEADDR), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_TCP), - seccomp.AllowValue(syscall.TCP_NODELAY), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_TCP), + seccomp.EqualTo(syscall.TCP_NODELAY), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IP), - seccomp.AllowValue(syscall.IP_TOS), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IP), + seccomp.EqualTo(syscall.IP_TOS), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IP), - seccomp.AllowValue(syscall.IP_RECVTOS), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IP), + seccomp.EqualTo(syscall.IP_RECVTOS), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_TCLASS), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_TCLASS), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_RECVTCLASS), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_RECVTCLASS), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, }, syscall.SYS_SHUTDOWN: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SHUT_RD), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SHUT_RD), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SHUT_WR), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SHUT_WR), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SHUT_RDWR), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SHUT_RDWR), }, }, syscall.SYS_SOCKET: []seccomp.Rule{ { - seccomp.AllowValue(syscall.AF_INET), - seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_INET), + seccomp.EqualTo(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(0), }, { - seccomp.AllowValue(syscall.AF_INET), - seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_INET), + seccomp.EqualTo(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(0), }, { - seccomp.AllowValue(syscall.AF_INET6), - seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_INET6), + seccomp.EqualTo(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(0), }, { - seccomp.AllowValue(syscall.AF_INET6), - seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_INET6), + seccomp.EqualTo(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(0), }, }, syscall.SYS_WRITEV: {}, @@ -532,20 +532,20 @@ func controlServerFilters(fd int) seccomp.SyscallRules { return seccomp.SyscallRules{ syscall.SYS_ACCEPT: []seccomp.Rule{ { - seccomp.AllowValue(fd), + seccomp.EqualTo(fd), }, }, syscall.SYS_LISTEN: []seccomp.Rule{ { - seccomp.AllowValue(fd), - seccomp.AllowValue(16 /* unet.backlog */), + seccomp.EqualTo(fd), + seccomp.EqualTo(16 /* unet.backlog */), }, }, syscall.SYS_GETSOCKOPT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_PEERCRED), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_PEERCRED), }, }, } diff --git a/runsc/boot/filter/config_amd64.go b/runsc/boot/filter/config_amd64.go index 5335ff82c..9b1799416 100644 --- a/runsc/boot/filter/config_amd64.go +++ b/runsc/boot/filter/config_amd64.go @@ -25,7 +25,7 @@ import ( func init() { allowedSyscalls[syscall.SYS_ARCH_PRCTL] = append(allowedSyscalls[syscall.SYS_ARCH_PRCTL], - seccomp.Rule{seccomp.AllowValue(linux.ARCH_GET_FS)}, - seccomp.Rule{seccomp.AllowValue(linux.ARCH_SET_FS)}, + seccomp.Rule{seccomp.EqualTo(linux.ARCH_GET_FS)}, + seccomp.Rule{seccomp.EqualTo(linux.ARCH_SET_FS)}, ) } diff --git a/runsc/boot/filter/config_profile.go b/runsc/boot/filter/config_profile.go index 194952a7b..7b8669595 100644 --- a/runsc/boot/filter/config_profile.go +++ b/runsc/boot/filter/config_profile.go @@ -25,9 +25,9 @@ func profileFilters() seccomp.SyscallRules { return seccomp.SyscallRules{ syscall.SYS_OPENAT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), }, }, } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index a136da21a..4940ea96a 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -27,6 +27,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bpf" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/fd" @@ -70,6 +71,7 @@ import ( "gvisor.dev/gvisor/runsc/boot/pprof" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/specutils/seccomp" // Include supported socket providers. "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" @@ -507,6 +509,7 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) { return mf, nil } +// installSeccompFilters installs sandbox seccomp filters with the host. func (l *Loader) installSeccompFilters() error { if l.root.conf.DisableSeccomp { filter.Report("syscall filter is DISABLED. Running in less secure mode.") @@ -577,6 +580,7 @@ func (l *Loader) run() error { if _, err := l.createContainerProcess(true, l.sandboxID, &l.root, ep); err != nil { return err } + } ep.tg = l.k.GlobalInit() @@ -764,6 +768,31 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn } } + // Install seccomp filters with the new task if there are any. + if info.conf.OCISeccomp { + if info.spec.Linux != nil && info.spec.Linux.Seccomp != nil { + program, err := seccomp.BuildProgram(info.spec.Linux.Seccomp) + if err != nil { + return nil, fmt.Errorf("building seccomp program: %v", err) + } + + if log.IsLogging(log.Debug) { + out, _ := bpf.DecodeProgram(program) + log.Debugf("Installing OCI seccomp filters\nProgram:\n%s", out) + } + + task := tg.Leader() + // NOTE: It seems Flags are ignored by runc so we ignore them too. + if err := task.AppendSyscallFilter(program, true); err != nil { + return nil, fmt.Errorf("appending seccomp filters: %v", err) + } + } + } else { + if info.spec.Linux != nil && info.spec.Linux.Seccomp != nil { + log.Warningf("Seccomp spec is being ignored") + } + } + return tg, nil } diff --git a/runsc/config/config.go b/runsc/config/config.go index df134bb2f..f30f79f68 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -157,8 +157,12 @@ type Config struct { // Enables FUSE usage. FUSE bool `flag:"fuse"` + // Allows overriding of flags in OCI annotations. AllowFlagOverride bool `flag:"allow-flag-override"` + // Enables seccomp inside the sandbox. + OCISeccomp bool `flag:"oci-seccomp"` + // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in // tests. It allows runsc to start the sandbox process as the current // user, and without chrooting the sandbox process. This can be diff --git a/runsc/config/flags.go b/runsc/config/flags.go index eff46e938..a5f25cfa2 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -63,6 +63,7 @@ func RegisterFlags() { flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.") flag.Var(leakModePtr(refs.NoLeakChecking), "ref-leak-mode", "sets reference leak check mode: disabled (default), log-names, log-traces.") flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)") + flag.Bool("oci-seccomp", false, "Enables loading OCI seccomp filters inside the sandbox.") // Flags that control sandbox runtime behavior: FS related. flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go index 88814b83c..0cb9b1cae 100644 --- a/runsc/fsgofer/filter/config.go +++ b/runsc/fsgofer/filter/config.go @@ -29,7 +29,7 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_CLOCK_GETTIME: {}, syscall.SYS_CLONE: []seccomp.Rule{ { - seccomp.AllowValue( + seccomp.EqualTo( syscall.CLONE_VM | syscall.CLONE_FS | syscall.CLONE_FILES | @@ -43,46 +43,46 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_EPOLL_CTL: {}, syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, }, syscall.SYS_EVENTFD2: []seccomp.Rule{ { - seccomp.AllowValue(0), - seccomp.AllowValue(0), + seccomp.EqualTo(0), + seccomp.EqualTo(0), }, }, syscall.SYS_EXIT: {}, syscall.SYS_EXIT_GROUP: {}, syscall.SYS_FALLOCATE: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, }, syscall.SYS_FCHMOD: {}, syscall.SYS_FCHOWNAT: {}, syscall.SYS_FCNTL: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_GETFL), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_GETFL), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_SETFL), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_SETFL), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_GETFD), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_GETFD), }, // Used by flipcall.PacketWindowAllocator.Init(). { - seccomp.AllowAny{}, - seccomp.AllowValue(unix.F_ADD_SEALS), + seccomp.MatchAny{}, + seccomp.EqualTo(unix.F_ADD_SEALS), }, }, syscall.SYS_FSTAT: {}, @@ -91,31 +91,31 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_FTRUNCATE: {}, syscall.SYS_FUTEX: { seccomp.Rule{ - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, seccomp.Rule{ - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, // Non-private futex used for flipcall. seccomp.Rule{ - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAIT), - seccomp.AllowAny{}, - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAIT), + seccomp.MatchAny{}, + seccomp.MatchAny{}, }, seccomp.Rule{ - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAKE), - seccomp.AllowAny{}, - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAKE), + seccomp.MatchAny{}, + seccomp.MatchAny{}, }, }, syscall.SYS_GETDENTS64: {}, @@ -137,28 +137,28 @@ var allowedSyscalls = seccomp.SyscallRules{ // TODO(b/148688965): Remove once this is gone from Go. syscall.SYS_MLOCK: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(4096), + seccomp.MatchAny{}, + seccomp.EqualTo(4096), }, }, syscall.SYS_MMAP: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_SHARED), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_SHARED), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED), }, }, syscall.SYS_MPROTECT: {}, @@ -172,14 +172,14 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_READLINKAT: {}, syscall.SYS_RECVMSG: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK), }, }, syscall.SYS_RENAMEAT: {}, @@ -190,33 +190,33 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_SENDMSG: []seccomp.Rule{ // Used by fdchannel.Endpoint.SendFD(). { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, // Used by unet.SocketWriter.WriteVec(). { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL), }, }, syscall.SYS_SHUTDOWN: []seccomp.Rule{ - {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)}, + {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RDWR)}, }, syscall.SYS_SIGALTSTACK: {}, // Used by fdchannel.NewConnectedSockets(). syscall.SYS_SOCKETPAIR: { { - seccomp.AllowValue(syscall.AF_UNIX), - seccomp.AllowValue(syscall.SOCK_SEQPACKET | syscall.SOCK_CLOEXEC), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_UNIX), + seccomp.EqualTo(syscall.SOCK_SEQPACKET | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(0), }, }, syscall.SYS_SYMLINKAT: {}, syscall.SYS_TGKILL: []seccomp.Rule{ { - seccomp.AllowValue(uint64(os.Getpid())), + seccomp.EqualTo(uint64(os.Getpid())), }, }, syscall.SYS_UNLINKAT: {}, @@ -227,24 +227,24 @@ var allowedSyscalls = seccomp.SyscallRules{ var udsSyscalls = seccomp.SyscallRules{ syscall.SYS_SOCKET: []seccomp.Rule{ { - seccomp.AllowValue(syscall.AF_UNIX), - seccomp.AllowValue(syscall.SOCK_STREAM), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_UNIX), + seccomp.EqualTo(syscall.SOCK_STREAM), + seccomp.EqualTo(0), }, { - seccomp.AllowValue(syscall.AF_UNIX), - seccomp.AllowValue(syscall.SOCK_DGRAM), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_UNIX), + seccomp.EqualTo(syscall.SOCK_DGRAM), + seccomp.EqualTo(0), }, { - seccomp.AllowValue(syscall.AF_UNIX), - seccomp.AllowValue(syscall.SOCK_SEQPACKET), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_UNIX), + seccomp.EqualTo(syscall.SOCK_SEQPACKET), + seccomp.EqualTo(0), }, }, syscall.SYS_CONNECT: []seccomp.Rule{ { - seccomp.AllowAny{}, + seccomp.MatchAny{}, }, }, } diff --git a/runsc/fsgofer/filter/config_amd64.go b/runsc/fsgofer/filter/config_amd64.go index a4b28cb8b..53506b5e1 100644 --- a/runsc/fsgofer/filter/config_amd64.go +++ b/runsc/fsgofer/filter/config_amd64.go @@ -25,8 +25,8 @@ import ( func init() { allowedSyscalls[syscall.SYS_ARCH_PRCTL] = []seccomp.Rule{ - {seccomp.AllowValue(linux.ARCH_GET_FS)}, - {seccomp.AllowValue(linux.ARCH_SET_FS)}, + {seccomp.EqualTo(linux.ARCH_GET_FS)}, + {seccomp.EqualTo(linux.ARCH_SET_FS)}, } allowedSyscalls[syscall.SYS_NEWFSTATAT] = []seccomp.Rule{} diff --git a/runsc/specutils/seccomp/BUILD b/runsc/specutils/seccomp/BUILD new file mode 100644 index 000000000..3520f2d6d --- /dev/null +++ b/runsc/specutils/seccomp/BUILD @@ -0,0 +1,34 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "seccomp", + srcs = [ + "audit_amd64.go", + "audit_arm64.go", + "seccomp.go", + ], + visibility = ["//:sandbox"], + deps = [ + "//pkg/abi/linux", + "//pkg/bpf", + "//pkg/log", + "//pkg/seccomp", + "//pkg/sentry/kernel", + "//pkg/sentry/syscalls/linux", + "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + ], +) + +go_test( + name = "seccomp_test", + size = "small", + srcs = ["seccomp_test.go"], + library = ":seccomp", + deps = [ + "//pkg/binary", + "//pkg/bpf", + "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + ], +) diff --git a/runsc/specutils/seccomp/audit_amd64.go b/runsc/specutils/seccomp/audit_amd64.go new file mode 100644 index 000000000..417cf4a7a --- /dev/null +++ b/runsc/specutils/seccomp/audit_amd64.go @@ -0,0 +1,25 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package seccomp + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" +) + +const ( + nativeArchAuditNo = linux.AUDIT_ARCH_X86_64 +) diff --git a/runsc/specutils/seccomp/audit_arm64.go b/runsc/specutils/seccomp/audit_arm64.go new file mode 100644 index 000000000..b727ceff2 --- /dev/null +++ b/runsc/specutils/seccomp/audit_arm64.go @@ -0,0 +1,25 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package seccomp + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" +) + +const ( + nativeArchAuditNo = linux.AUDIT_ARCH_AARCH64 +) diff --git a/runsc/specutils/seccomp/seccomp.go b/runsc/specutils/seccomp/seccomp.go new file mode 100644 index 000000000..5932f7a41 --- /dev/null +++ b/runsc/specutils/seccomp/seccomp.go @@ -0,0 +1,229 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package seccomp implements some features of libseccomp in order to support +// OCI. +package seccomp + +import ( + "fmt" + "syscall" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/sentry/kernel" + slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" +) + +var ( + killThreadAction = linux.SECCOMP_RET_KILL_THREAD + trapAction = linux.SECCOMP_RET_TRAP + // runc always returns EPERM as the errorcode for SECCOMP_RET_ERRNO + errnoAction = linux.SECCOMP_RET_ERRNO.WithReturnCode(uint16(syscall.EPERM)) + // runc always returns EPERM as the errorcode for SECCOMP_RET_TRACE + traceAction = linux.SECCOMP_RET_TRACE.WithReturnCode(uint16(syscall.EPERM)) + allowAction = linux.SECCOMP_RET_ALLOW +) + +// BuildProgram generates a bpf program based on the given OCI seccomp +// config. +func BuildProgram(s *specs.LinuxSeccomp) (bpf.Program, error) { + defaultAction, err := convertAction(s.DefaultAction) + if err != nil { + return bpf.Program{}, fmt.Errorf("secomp default action: %w", err) + } + ruleset, err := convertRules(s) + if err != nil { + return bpf.Program{}, fmt.Errorf("invalid seccomp rules: %w", err) + } + + instrs, err := seccomp.BuildProgram(ruleset, defaultAction, killThreadAction) + if err != nil { + return bpf.Program{}, fmt.Errorf("building seccomp program: %w", err) + } + + program, err := bpf.Compile(instrs) + if err != nil { + return bpf.Program{}, fmt.Errorf("compiling seccomp program: %w", err) + } + + return program, nil +} + +// lookupSyscallNo gets the syscall number for the syscall with the given name +// for the given architecture. +func lookupSyscallNo(arch uint32, name string) (uint32, error) { + var table *kernel.SyscallTable + switch arch { + case linux.AUDIT_ARCH_X86_64: + table = slinux.AMD64 + case linux.AUDIT_ARCH_AARCH64: + table = slinux.ARM64 + } + if table == nil { + return 0, fmt.Errorf("unsupported architecture: %d", arch) + } + n, err := table.LookupNo(name) + if err != nil { + return 0, err + } + return uint32(n), nil +} + +// convertAction converts a LinuxSeccompAction to BPFAction +func convertAction(act specs.LinuxSeccompAction) (linux.BPFAction, error) { + // TODO(gvisor.dev/issue/3124): Update specs package to include ActLog and ActKillProcess. + switch act { + case specs.ActKill: + return killThreadAction, nil + case specs.ActTrap: + return trapAction, nil + case specs.ActErrno: + return errnoAction, nil + case specs.ActTrace: + return traceAction, nil + case specs.ActAllow: + return allowAction, nil + default: + return 0, fmt.Errorf("invalid action: %v", act) + } +} + +// convertRules converts OCI linux seccomp rules into RuleSets that can be used by +// the seccomp package to build a seccomp program. +func convertRules(s *specs.LinuxSeccomp) ([]seccomp.RuleSet, error) { + // NOTE: Architectures are only really relevant when calling 32bit syscalls + // on a 64bit system. Since we don't support that in gVisor anyway, we + // ignore Architectures and only test against the native architecture. + + ruleset := []seccomp.RuleSet{} + + for _, syscall := range s.Syscalls { + sysRules := seccomp.NewSyscallRules() + + action, err := convertAction(syscall.Action) + if err != nil { + return nil, err + } + + // Args + rules, err := convertArgs(syscall.Args) + if err != nil { + return nil, err + } + + for _, name := range syscall.Names { + syscallNo, err := lookupSyscallNo(nativeArchAuditNo, name) + if err != nil { + // If there is an error looking up the syscall number, assume it is + // not supported on this architecture and ignore it. This is, for + // better or worse, what runc does. + log.Warningf("OCI seccomp: ignoring syscall %q", name) + continue + } + + for _, rule := range rules { + sysRules.AddRule(uintptr(syscallNo), rule) + } + } + + ruleset = append(ruleset, seccomp.RuleSet{ + Rules: sysRules, + Action: action, + }) + } + + return ruleset, nil +} + +// convertArgs converts an OCI seccomp argument rule to a list of seccomp.Rule. +func convertArgs(args []specs.LinuxSeccompArg) ([]seccomp.Rule, error) { + argCounts := make([]uint, 6) + + for _, arg := range args { + if arg.Index > 6 { + return nil, fmt.Errorf("invalid index: %d", arg.Index) + } + + argCounts[arg.Index]++ + } + + // NOTE: If multiple rules apply to the same argument (same index) the + // action is triggered if any one of the rules matches (OR). If not, then + // all rules much match in order to trigger the action (AND). This appears to + // be some kind of legacy behavior of runc that nevertheless needs to be + // supported to maintain compatibility. + + hasMultipleArgs := false + for _, count := range argCounts { + if count > 1 { + hasMultipleArgs = true + break + } + } + + if hasMultipleArgs { + rules := []seccomp.Rule{} + + // Old runc behavior - do this for compatibility. + // Add rules as ORs by adding separate Rules. + for _, arg := range args { + rule := seccomp.Rule{nil, nil, nil, nil, nil, nil} + + if err := convertRule(arg, &rule); err != nil { + return nil, err + } + + rules = append(rules, rule) + } + + return rules, nil + } + + // Add rules as ANDs by adding to the same Rule. + rule := seccomp.Rule{nil, nil, nil, nil, nil, nil} + for _, arg := range args { + if err := convertRule(arg, &rule); err != nil { + return nil, err + } + } + + return []seccomp.Rule{rule}, nil +} + +// convertRule converts and adds the arg to a rule. +func convertRule(arg specs.LinuxSeccompArg, rule *seccomp.Rule) error { + switch arg.Op { + case specs.OpEqualTo: + rule[arg.Index] = seccomp.EqualTo(arg.Value) + case specs.OpNotEqual: + rule[arg.Index] = seccomp.NotEqual(arg.Value) + case specs.OpGreaterThan: + rule[arg.Index] = seccomp.GreaterThan(arg.Value) + case specs.OpGreaterEqual: + rule[arg.Index] = seccomp.GreaterThanOrEqual(arg.Value) + case specs.OpLessThan: + rule[arg.Index] = seccomp.LessThan(arg.Value) + case specs.OpLessEqual: + rule[arg.Index] = seccomp.LessThanOrEqual(arg.Value) + case specs.OpMaskedEqual: + rule[arg.Index] = seccomp.MaskedEqual(uintptr(arg.Value), uintptr(arg.ValueTwo)) + default: + return fmt.Errorf("unsupported operand: %q", arg.Op) + } + return nil +} diff --git a/runsc/specutils/seccomp/seccomp_test.go b/runsc/specutils/seccomp/seccomp_test.go new file mode 100644 index 000000000..2079cd2e9 --- /dev/null +++ b/runsc/specutils/seccomp/seccomp_test.go @@ -0,0 +1,414 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seccomp + +import ( + "fmt" + "syscall" + "testing" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/bpf" +) + +type seccompData struct { + nr uint32 + arch uint32 + instructionPointer uint64 + args [6]uint64 +} + +// asInput converts a seccompData to a bpf.Input. +func asInput(d seccompData) bpf.Input { + return bpf.InputBytes{binary.Marshal(nil, binary.LittleEndian, d), binary.LittleEndian} +} + +// testInput creates an Input struct with given seccomp input values. +func testInput(arch uint32, syscallName string, args *[6]uint64) bpf.Input { + syscallNo, err := lookupSyscallNo(arch, syscallName) + if err != nil { + // Assume tests set valid syscall names. + panic(err) + } + + if args == nil { + argArray := [6]uint64{0, 0, 0, 0, 0, 0} + args = &argArray + } + + data := seccompData{ + nr: syscallNo, + arch: arch, + args: *args, + } + + return asInput(data) +} + +// testCase holds a seccomp test case. +type testCase struct { + name string + config specs.LinuxSeccomp + input bpf.Input + expected uint32 +} + +var ( + // seccompTests is a list of speccomp test cases. + seccompTests = []testCase{ + { + name: "default_allow", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + }, + input: testInput(nativeArchAuditNo, "read", nil), + expected: uint32(allowAction), + }, + { + name: "default_deny", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActErrno, + }, + input: testInput(nativeArchAuditNo, "read", nil), + expected: uint32(errnoAction), + }, + { + name: "deny_arch", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getcwd", + }, + Action: specs.ActErrno, + }, + }, + }, + // Syscall matches but the arch is AUDIT_ARCH_X86 so the return + // value is the bad arch action. + input: asInput(seccompData{nr: 183, arch: 0x40000003}), // + expected: uint32(killThreadAction), + }, + { + name: "match_name_errno", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getcwd", + "chmod", + }, + Action: specs.ActErrno, + }, + { + Names: []string{ + "write", + }, + Action: specs.ActTrace, + }, + }, + }, + input: testInput(nativeArchAuditNo, "getcwd", nil), + expected: uint32(errnoAction), + }, + { + name: "match_name_trace", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getcwd", + "chmod", + }, + Action: specs.ActErrno, + }, + { + Names: []string{ + "write", + }, + Action: specs.ActTrace, + }, + }, + }, + input: testInput(nativeArchAuditNo, "write", nil), + expected: uint32(traceAction), + }, + { + name: "no_match_name_allow", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getcwd", + "chmod", + }, + Action: specs.ActErrno, + }, + { + Names: []string{ + "write", + }, + Action: specs.ActTrace, + }, + }, + }, + input: testInput(nativeArchAuditNo, "open", nil), + expected: uint32(allowAction), + }, + { + name: "simple_match_args", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: syscall.CLONE_FS, + Op: specs.OpEqualTo, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}), + expected: uint32(errnoAction), + }, + { + name: "match_args_or", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: syscall.CLONE_FS, + Op: specs.OpEqualTo, + }, + { + Index: 0, + Value: syscall.CLONE_VM, + Op: specs.OpEqualTo, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}), + expected: uint32(errnoAction), + }, + { + name: "match_args_and", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getsockopt", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 1, + Value: syscall.SOL_SOCKET, + Op: specs.OpEqualTo, + }, + { + Index: 2, + Value: syscall.SO_PEERCRED, + Op: specs.OpEqualTo, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, syscall.SOL_SOCKET, syscall.SO_PEERCRED}), + expected: uint32(errnoAction), + }, + { + name: "no_match_args_and", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getsockopt", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 1, + Value: syscall.SOL_SOCKET, + Op: specs.OpEqualTo, + }, + { + Index: 2, + Value: syscall.SO_PEERCRED, + Op: specs.OpEqualTo, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, syscall.SOL_SOCKET}), + expected: uint32(allowAction), + }, + { + name: "Simple args (no match)", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: syscall.CLONE_FS, + Op: specs.OpEqualTo, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_VM}), + expected: uint32(allowAction), + }, + { + name: "OpMaskedEqual (match)", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: syscall.CLONE_FS, + ValueTwo: syscall.CLONE_FS, + Op: specs.OpMaskedEqual, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS | syscall.CLONE_VM}), + expected: uint32(errnoAction), + }, + { + name: "OpMaskedEqual (no match)", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: syscall.CLONE_FS | syscall.CLONE_VM, + ValueTwo: syscall.CLONE_FS | syscall.CLONE_VM, + Op: specs.OpMaskedEqual, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}), + expected: uint32(allowAction), + }, + { + name: "OpMaskedEqual (clone)", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActErrno, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + // This comes from the Docker default seccomp + // profile for clone. + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: 0x7e020000, + ValueTwo: 0x0, + Op: specs.OpMaskedEqual, + }, + }, + Action: specs.ActAllow, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{0x50f00}), + expected: uint32(allowAction), + }, + } +) + +// TestRunscSeccomp generates seccomp programs from OCI config and executes +// them using runsc's library, comparing against expected results. +func TestRunscSeccomp(t *testing.T) { + for _, tc := range seccompTests { + t.Run(tc.name, func(t *testing.T) { + runscProgram, err := BuildProgram(&tc.config) + if err != nil { + t.Fatalf("generating runsc BPF: %v", err) + } + + if err := checkProgram(runscProgram, tc.input, tc.expected); err != nil { + t.Fatalf("running runsc BPF: %v", err) + } + }) + } +} + +// checkProgram runs the given program over the given input and checks the +// result against the expected output. +func checkProgram(p bpf.Program, in bpf.Input, expected uint32) error { + result, err := bpf.Exec(p, in) + if err != nil { + return err + } + + if result != expected { + // Include a decoded version of the program in output for debugging purposes. + decoded, _ := bpf.DecodeProgram(p) + return fmt.Errorf("Unexpected result: got: %d, expected: %d\nBPF Program\n%s", result, expected, decoded) + } + + return nil +} diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index a2275398a..0392e3e83 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -111,11 +111,6 @@ func ValidateSpec(spec *specs.Spec) error { log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.") } - // TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox. - if spec.Linux != nil && spec.Linux.Seccomp != nil { - log.Warningf("Seccomp spec is being ignored") - } - if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { if err := validateRootfsPropagation(spec.Linux.RootfsPropagation); err != nil { return err |