summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/filter/BUILD1
-rw-r--r--runsc/boot/filter/config.go447
-rw-r--r--runsc/boot/filter/filter.go17
-rw-r--r--runsc/boot/loader.go10
4 files changed, 400 insertions, 75 deletions
diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD
index d20605a91..48f2c8024 100644
--- a/runsc/boot/filter/BUILD
+++ b/runsc/boot/filter/BUILD
@@ -22,6 +22,7 @@ go_library(
"//pkg/sentry/platform",
"//pkg/sentry/platform/kvm",
"//pkg/sentry/platform/ptrace",
+ "//pkg/tcpip/link/fdbased",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 1bec89900..7227ea2b7 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -15,72 +15,127 @@
package filter
import (
+ "os"
"syscall"
"golang.org/x/sys/unix"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased"
)
-// allowedSyscalls is the set of syscalls executed by the Sentry
-// to the host OS.
+// allowedSyscalls is the set of syscalls executed by the Sentry to the host OS.
var allowedSyscalls = seccomp.SyscallRules{
- syscall.SYS_ACCEPT: {},
- syscall.SYS_ARCH_PRCTL: {},
+ syscall.SYS_ARCH_PRCTL: []seccomp.Rule{
+ {seccomp.AllowValue(linux.ARCH_GET_FS)},
+ {seccomp.AllowValue(linux.ARCH_SET_FS)},
+ },
syscall.SYS_CLOCK_GETTIME: {},
- syscall.SYS_CLONE: {},
+ syscall.SYS_CLONE: []seccomp.Rule{
+ {
+ seccomp.AllowValue(
+ syscall.CLONE_VM |
+ syscall.CLONE_FS |
+ syscall.CLONE_FILES |
+ syscall.CLONE_SIGHAND |
+ syscall.CLONE_SYSVSEM |
+ syscall.CLONE_THREAD),
+ },
+ },
syscall.SYS_CLOSE: {},
syscall.SYS_DUP: {},
syscall.SYS_EPOLL_CREATE1: {},
syscall.SYS_EPOLL_CTL: {},
- syscall.SYS_EPOLL_PWAIT: {},
- syscall.SYS_EPOLL_WAIT: {},
- syscall.SYS_EVENTFD2: {},
- syscall.SYS_EXIT: {},
- syscall.SYS_EXIT_GROUP: {},
- syscall.SYS_FALLOCATE: {},
- syscall.SYS_FCNTL: {},
- syscall.SYS_FSTAT: {},
- syscall.SYS_FSYNC: {},
- syscall.SYS_FTRUNCATE: {},
- syscall.SYS_FUTEX: {},
- syscall.SYS_GETDENTS64: {},
- syscall.SYS_GETPID: {},
- unix.SYS_GETRANDOM: {},
- syscall.SYS_GETSOCKOPT: {},
- syscall.SYS_GETTID: {},
- syscall.SYS_GETTIMEOFDAY: {},
- syscall.SYS_LISTEN: {},
- syscall.SYS_LSEEK: {},
- // TODO: Remove SYS_LSTAT when executable lookup moves
- // into the gofer.
- syscall.SYS_LSTAT: {},
- syscall.SYS_MADVISE: {},
- syscall.SYS_MINCORE: {},
- syscall.SYS_MMAP: {},
- syscall.SYS_MPROTECT: {},
- syscall.SYS_MUNMAP: {},
- syscall.SYS_NANOSLEEP: {},
- syscall.SYS_POLL: {},
- syscall.SYS_PREAD64: {},
- syscall.SYS_PWRITE64: {},
- syscall.SYS_READ: {},
- syscall.SYS_READV: {},
- syscall.SYS_RECVMSG: {},
- syscall.SYS_RESTART_SYSCALL: {},
- syscall.SYS_RT_SIGACTION: {},
- syscall.SYS_RT_SIGPROCMASK: {},
- syscall.SYS_RT_SIGRETURN: {},
- syscall.SYS_SCHED_YIELD: {},
- syscall.SYS_SENDMSG: {},
- syscall.SYS_SETITIMER: {},
- syscall.SYS_SHUTDOWN: {},
- syscall.SYS_SIGALTSTACK: {},
- syscall.SYS_SYNC_FILE_RANGE: {},
- syscall.SYS_TGKILL: {},
- syscall.SYS_WRITE: {},
- syscall.SYS_WRITEV: {},
-
+ syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(0),
+ },
+ },
+ syscall.SYS_EPOLL_WAIT: {},
+ syscall.SYS_EVENTFD2: []seccomp.Rule{
+ {
+ seccomp.AllowValue(0),
+ seccomp.AllowValue(0),
+ },
+ },
+ syscall.SYS_EXIT: {},
+ syscall.SYS_EXIT_GROUP: {},
+ syscall.SYS_FALLOCATE: {},
+ syscall.SYS_FCHMOD: {},
+ syscall.SYS_FCNTL: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.F_GETFL),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.F_SETFL),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.F_GETFD),
+ },
+ },
+ syscall.SYS_FSTAT: {},
+ syscall.SYS_FSYNC: {},
+ syscall.SYS_FTRUNCATE: {},
+ syscall.SYS_FUTEX: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(0),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(0),
+ },
+ },
+ syscall.SYS_GETDENTS64: {},
+ syscall.SYS_GETPID: {},
+ unix.SYS_GETRANDOM: {},
+ syscall.SYS_GETSOCKOPT: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_DOMAIN),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_TYPE),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_ERROR),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_SNDBUF),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_RCVBUF),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_REUSEADDR),
+ },
+ },
+ syscall.SYS_GETTID: {},
+ syscall.SYS_GETTIMEOFDAY: {},
// SYS_IOCTL is needed for terminal support, but we only allow
// setting/getting termios and winsize.
syscall.SYS_IOCTL: []seccomp.Rule{
@@ -110,6 +165,107 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.AllowAny{}, /* winsize struct */
},
},
+ syscall.SYS_LSEEK: {},
+ // TODO: Remove SYS_LSTAT when executable lookup moves
+ // into the gofer.
+ syscall.SYS_LSTAT: {},
+ syscall.SYS_MADVISE: {},
+ syscall.SYS_MINCORE: {},
+ syscall.SYS_MMAP: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.MAP_SHARED),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.MAP_PRIVATE),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_STACK),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_NORESERVE),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.PROT_WRITE | syscall.PROT_READ),
+ seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED),
+ },
+ },
+ syscall.SYS_MPROTECT: {},
+ syscall.SYS_MUNMAP: {},
+ syscall.SYS_NANOSLEEP: {},
+ syscall.SYS_POLL: {},
+ syscall.SYS_PREAD64: {},
+ syscall.SYS_PWRITE64: {},
+ syscall.SYS_READ: {},
+ syscall.SYS_READV: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(len(fdbased.BufConfig)),
+ },
+ },
+ syscall.SYS_RECVMSG: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK),
+ },
+ },
+ syscall.SYS_RESTART_SYSCALL: {},
+ syscall.SYS_RT_SIGACTION: {},
+ syscall.SYS_RT_SIGPROCMASK: {},
+ syscall.SYS_RT_SIGRETURN: {},
+ syscall.SYS_SCHED_YIELD: {},
+ syscall.SYS_SENDMSG: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL),
+ },
+ },
+ syscall.SYS_SETITIMER: {},
+ syscall.SYS_SHUTDOWN: []seccomp.Rule{
+ {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)},
+ },
+ syscall.SYS_SIGALTSTACK: {},
+ syscall.SYS_SYNC_FILE_RANGE: {},
+ syscall.SYS_TGKILL: []seccomp.Rule{
+ {
+ seccomp.AllowValue(uint64(os.Getpid())),
+ },
+ },
+ syscall.SYS_WRITE: {},
+ syscall.SYS_WRITEV: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(2),
+ },
+ },
}
// whitelistFSFilters returns syscalls made by whitelistFS. Using WhitelistFS
@@ -154,42 +310,197 @@ func whitelistFSFilters() seccomp.SyscallRules {
// hostInetFilters contains syscalls that are needed by sentry/socket/hostinet.
func hostInetFilters() seccomp.SyscallRules {
return seccomp.SyscallRules{
- syscall.SYS_ACCEPT4: {},
+ syscall.SYS_ACCEPT4: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ },
+ },
syscall.SYS_BIND: {},
syscall.SYS_CONNECT: {},
syscall.SYS_GETPEERNAME: {},
syscall.SYS_GETSOCKNAME: {},
- syscall.SYS_GETSOCKOPT: {},
- syscall.SYS_IOCTL: {},
- syscall.SYS_LISTEN: {},
- syscall.SYS_READV: {},
- syscall.SYS_RECVFROM: {},
- syscall.SYS_RECVMSG: {},
- syscall.SYS_SENDMSG: {},
- syscall.SYS_SENDTO: {},
- syscall.SYS_SETSOCKOPT: {},
- syscall.SYS_SHUTDOWN: {},
- syscall.SYS_SOCKET: {},
- syscall.SYS_WRITEV: {},
+ syscall.SYS_GETSOCKOPT: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IPV6),
+ seccomp.AllowValue(syscall.IPV6_V6ONLY),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_ERROR),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_KEEPALIVE),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_SNDBUF),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_REUSEADDR),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_TYPE),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_LINGER),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_TCP),
+ seccomp.AllowValue(syscall.TCP_NODELAY),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_TCP),
+ seccomp.AllowValue(syscall.TCP_INFO),
+ },
+ },
+ syscall.SYS_IOCTL: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.TIOCOUTQ),
+ },
+ },
+ syscall.SYS_LISTEN: {},
+ syscall.SYS_READV: {},
+ syscall.SYS_RECVFROM: {},
+ syscall.SYS_RECVMSG: {},
+ syscall.SYS_SENDMSG: {},
+ syscall.SYS_SENDTO: {},
+ syscall.SYS_SETSOCKOPT: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IPV6),
+ seccomp.AllowValue(syscall.IPV6_V6ONLY),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_SNDBUF),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_RCVBUF),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_REUSEADDR),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_TCP),
+ seccomp.AllowValue(syscall.TCP_NODELAY),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
+ },
+ syscall.SYS_SHUTDOWN: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SHUT_RD),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SHUT_WR),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SHUT_RDWR),
+ },
+ },
+ syscall.SYS_SOCKET: []seccomp.Rule{
+ {
+ seccomp.AllowValue(syscall.AF_INET),
+ seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.AllowValue(0),
+ },
+ {
+ seccomp.AllowValue(syscall.AF_INET),
+ seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.AllowValue(0),
+ },
+ {
+ seccomp.AllowValue(syscall.AF_INET6),
+ seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.AllowValue(0),
+ },
+ {
+ seccomp.AllowValue(syscall.AF_INET6),
+ seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.AllowValue(0),
+ },
+ },
+ syscall.SYS_WRITEV: {},
}
}
// ptraceFilters returns syscalls made exclusively by the ptrace platform.
func ptraceFilters() seccomp.SyscallRules {
return seccomp.SyscallRules{
- syscall.SYS_PTRACE: {},
- syscall.SYS_WAIT4: {},
unix.SYS_GETCPU: {},
unix.SYS_SCHED_SETAFFINITY: {},
+ syscall.SYS_PTRACE: {},
+ syscall.SYS_TGKILL: {},
+ syscall.SYS_WAIT4: {},
}
}
// kvmFilters returns syscalls made exclusively by the KVM platform.
func kvmFilters() seccomp.SyscallRules {
return seccomp.SyscallRules{
+ syscall.SYS_ARCH_PRCTL: {},
+ syscall.SYS_FUTEX: {},
syscall.SYS_IOCTL: {},
+ syscall.SYS_MMAP: {},
syscall.SYS_RT_SIGSUSPEND: {},
syscall.SYS_RT_SIGTIMEDWAIT: {},
0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host.
}
}
+
+func controlServerFilters(fd int) seccomp.SyscallRules {
+ return seccomp.SyscallRules{
+ syscall.SYS_ACCEPT: []seccomp.Rule{
+ {
+ seccomp.AllowValue(fd),
+ },
+ },
+ syscall.SYS_LISTEN: []seccomp.Rule{
+ {
+ seccomp.AllowValue(fd),
+ seccomp.AllowValue(16 /* unet.backlog */),
+ },
+ },
+ syscall.SYS_GETSOCKOPT: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_SOCKET),
+ seccomp.AllowValue(syscall.SO_PEERCRED),
+ },
+ },
+ }
+}
diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go
index c57bbd2e5..56d30f2a0 100644
--- a/runsc/boot/filter/filter.go
+++ b/runsc/boot/filter/filter.go
@@ -27,24 +27,33 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace"
)
+// Options are seccomp filter related options.
+type Options struct {
+ Platform platform.Platform
+ WhitelistFS bool
+ HostNetwork bool
+ ControllerFD int
+}
+
// Install installs seccomp filters for based on the given platform.
-func Install(p platform.Platform, whitelistFS, hostNetwork bool) error {
+func Install(opt Options) error {
s := allowedSyscalls
+ s.Merge(controlServerFilters(opt.ControllerFD))
// Set of additional filters used by -race and -msan. Returns empty
// when not enabled.
s.Merge(instrumentationFilters())
- if whitelistFS {
+ if opt.WhitelistFS {
Report("direct file access allows unrestricted file access!")
s.Merge(whitelistFSFilters())
}
- if hostNetwork {
+ if opt.HostNetwork {
Report("host networking enabled: syscall filters less restrictive!")
s.Merge(hostInetFilters())
}
- switch p := p.(type) {
+ switch p := opt.Platform.(type) {
case *ptrace.PTrace:
s.Merge(ptraceFilters())
case *kvm.KVM:
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 0ad830a6b..74d0c2534 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -351,9 +351,13 @@ func (l *Loader) run() error {
if l.conf.DisableSeccomp {
filter.Report("syscall filter is DISABLED. Running in less secure mode.")
} else {
- whitelistFS := l.conf.FileAccess == FileAccessDirect
- hostNet := l.conf.Network == NetworkHost
- if err := filter.Install(l.k.Platform, whitelistFS, hostNet); err != nil {
+ opts := filter.Options{
+ Platform: l.k.Platform,
+ WhitelistFS: l.conf.FileAccess == FileAccessDirect,
+ HostNetwork: l.conf.Network == NetworkHost,
+ ControllerFD: l.ctrl.srv.FD(),
+ }
+ if err := filter.Install(opts); err != nil {
return fmt.Errorf("Failed to install seccomp filters: %v", err)
}
}