summaryrefslogtreecommitdiffhomepage
path: root/pkg/seccomp
diff options
context:
space:
mode:
authorJamie Liu <jamieliu@google.com>2021-07-08 18:55:56 -0700
committergVisor bot <gvisor-bot@google.com>2021-07-08 18:59:01 -0700
commitde29d8d415ab539195840aeba57a17cd6c89218f (patch)
tree60c9f3ae0e0a16383aefa258a4f9ae840ff275f8 /pkg/seccomp
parentf8207a823351055a2aaad633b428fe7c1f0585f0 (diff)
Fix some //pkg/seccomp bugs.
- LockOSThread() around prctl(PR_SET_NO_NEW_PRIVS) => seccomp(). go:nosplit "mostly" prevents async preemption, but IIUC preemption is still permitted during function prologues: funcpctab "".seccomp [valfunc=pctopcdata] 0 -1 00000 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) TEXT "".seccomp(SB), NOSPLIT|ABIInternal, $72-32 0 00000 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) TEXT "".seccomp(SB), NOSPLIT|ABIInternal, $72-32 0 -1 00000 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) SUBQ $72, SP 4 00004 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) MOVQ BP, 64(SP) 9 00009 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) LEAQ 64(SP), BP e 00014 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) FUNCDATA $0, gclocals·ba30782f8935b28ed1adaec603e72627(SB) e 00014 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) FUNCDATA $1, gclocals·663f8c6bfa83aa777198789ce63d9ab4(SB) e 00014 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) FUNCDATA $2, "".seccomp.stkobj(SB) e 00014 (gvisor/pkg/seccomp/seccomp_unsafe.go:111) PCDATA $0, $-2 e -2 00014 (gvisor/pkg/seccomp/seccomp_unsafe.go:111) MOVQ "".ptr+88(SP), AX (-1 is objabi.PCDATA_UnsafePointSafe and -2 is objabi.PCDATA_UnsafePointUnsafe, from Go's cmd/internal/objabi.) - Handle non-errno failures from seccomp() with SECCOMP_FILTER_FLAG_TSYNC. PiperOrigin-RevId: 383757580
Diffstat (limited to 'pkg/seccomp')
-rw-r--r--pkg/seccomp/seccomp.go4
-rw-r--r--pkg/seccomp/seccomp_unsafe.go70
2 files changed, 62 insertions, 12 deletions
diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go
index 8ffa1db37..062250d69 100644
--- a/pkg/seccomp/seccomp.go
+++ b/pkg/seccomp/seccomp.go
@@ -74,8 +74,8 @@ func Install(rules SyscallRules) error {
}
// Perform the actual installation.
- if errno := SetFilter(instrs); errno != 0 {
- return fmt.Errorf("failed to set filter: %v", errno)
+ if err := SetFilter(instrs); err != nil {
+ return fmt.Errorf("failed to set filter: %v", err)
}
log.Infof("Seccomp filters installed.")
diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go
index 7202591df..061cd26ab 100644
--- a/pkg/seccomp/seccomp_unsafe.go
+++ b/pkg/seccomp/seccomp_unsafe.go
@@ -15,6 +15,8 @@
package seccomp
import (
+ "fmt"
+ "runtime"
"unsafe"
"golang.org/x/sys/unix"
@@ -22,12 +24,52 @@ import (
)
// SetFilter installs the given BPF program.
+func SetFilter(instrs []linux.BPFInstruction) error {
+ // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See
+ // seccomp(2) for details.
+ //
+ // PR_SET_NO_NEW_PRIVS is specific to the calling thread, not the whole
+ // thread group, so between PR_SET_NO_NEW_PRIVS and seccomp() below we must
+ // remain on the same thread. no_new_privs will be propagated to other
+ // threads in the thread group by seccomp(SECCOMP_FILTER_FLAG_TSYNC), in
+ // kernel/seccomp.c:seccomp_sync_threads().
+ runtime.LockOSThread()
+ defer runtime.UnlockOSThread()
+ if _, _, errno := unix.RawSyscall6(unix.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); errno != 0 {
+ return errno
+ }
+
+ sockProg := linux.SockFprog{
+ Len: uint16(len(instrs)),
+ Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])),
+ }
+ tid, errno := seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg))
+ if errno != 0 {
+ return errno
+ }
+ // "On error, if SECCOMP_FILTER_FLAG_TSYNC was used, the return value is
+ // the ID of the thread that caused the synchronization failure. (This ID
+ // is a kernel thread ID of the type returned by clone(2) and gettid(2).)"
+ // - seccomp(2)
+ if tid != 0 {
+ return fmt.Errorf("couldn't synchronize filter to TID %d", tid)
+ }
+ return nil
+}
+
+// SetFilterInChild is equivalent to SetFilter, but:
+//
+// - It is safe to call after runtime.syscall_runtime_AfterForkInChild.
//
-// This is safe to call from an afterFork context.
+// - It requires that the calling goroutine cannot be moved to another thread,
+// which either requires that runtime.LockOSThread() is in effect or that the
+// caller is in fact in a fork()ed child process.
+//
+// - Since fork()ed child processes cannot perform heap allocation, it returns
+// a unix.Errno rather than an error.
//
//go:nosplit
-func SetFilter(instrs []linux.BPFInstruction) unix.Errno {
- // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See seccomp(2) for details.
+func SetFilterInChild(instrs []linux.BPFInstruction) unix.Errno {
if _, _, errno := unix.RawSyscall6(unix.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); errno != 0 {
return errno
}
@@ -36,12 +78,22 @@ func SetFilter(instrs []linux.BPFInstruction) unix.Errno {
Len: uint16(len(instrs)),
Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])),
}
- return seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg))
+ tid, errno := seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg))
+ if errno != 0 {
+ return errno
+ }
+ if tid != 0 {
+ // Return an errno that seccomp(2) doesn't to uniquely identify this
+ // case. Since this case occurs if another thread has a conflicting
+ // filter set, "name not unique on network" is at least suggestive?
+ return unix.ENOTUNIQ
+ }
+ return 0
}
func isKillProcessAvailable() (bool, error) {
action := uint32(linux.SECCOMP_RET_KILL_PROCESS)
- if errno := seccomp(linux.SECCOMP_GET_ACTION_AVAIL, 0, unsafe.Pointer(&action)); errno != 0 {
+ if _, errno := seccomp(linux.SECCOMP_GET_ACTION_AVAIL, 0, unsafe.Pointer(&action)); errno != 0 {
// EINVAL: SECCOMP_GET_ACTION_AVAIL not in this kernel yet.
// EOPNOTSUPP: SECCOMP_RET_KILL_PROCESS not supported.
if errno == unix.EINVAL || errno == unix.EOPNOTSUPP {
@@ -55,9 +107,7 @@ func isKillProcessAvailable() (bool, error) {
// seccomp calls seccomp(2). This is safe to call from an afterFork context.
//
//go:nosplit
-func seccomp(op, flags uint32, ptr unsafe.Pointer) unix.Errno {
- if _, _, errno := unix.RawSyscall(SYS_SECCOMP, uintptr(op), uintptr(flags), uintptr(ptr)); errno != 0 {
- return errno
- }
- return 0
+func seccomp(op, flags uint32, ptr unsafe.Pointer) (uintptr, unix.Errno) {
+ n, _, errno := unix.RawSyscall(SYS_SECCOMP, uintptr(op), uintptr(flags), uintptr(ptr))
+ return n, errno
}