diff options
author | Jamie Liu <jamieliu@google.com> | 2021-07-08 18:55:56 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-07-08 18:59:01 -0700 |
commit | de29d8d415ab539195840aeba57a17cd6c89218f (patch) | |
tree | 60c9f3ae0e0a16383aefa258a4f9ae840ff275f8 /pkg/seccomp/seccomp_unsafe.go | |
parent | f8207a823351055a2aaad633b428fe7c1f0585f0 (diff) |
Fix some //pkg/seccomp bugs.
- LockOSThread() around prctl(PR_SET_NO_NEW_PRIVS) => seccomp(). go:nosplit
"mostly" prevents async preemption, but IIUC preemption is still permitted
during function prologues:
funcpctab "".seccomp [valfunc=pctopcdata]
0 -1 00000 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) TEXT "".seccomp(SB), NOSPLIT|ABIInternal, $72-32
0 00000 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) TEXT "".seccomp(SB), NOSPLIT|ABIInternal, $72-32
0 -1 00000 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) SUBQ $72, SP
4 00004 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) MOVQ BP, 64(SP)
9 00009 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) LEAQ 64(SP), BP
e 00014 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) FUNCDATA $0, gclocals·ba30782f8935b28ed1adaec603e72627(SB)
e 00014 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) FUNCDATA $1, gclocals·663f8c6bfa83aa777198789ce63d9ab4(SB)
e 00014 (gvisor/pkg/seccomp/seccomp_unsafe.go:110) FUNCDATA $2, "".seccomp.stkobj(SB)
e 00014 (gvisor/pkg/seccomp/seccomp_unsafe.go:111) PCDATA $0, $-2
e -2 00014 (gvisor/pkg/seccomp/seccomp_unsafe.go:111) MOVQ "".ptr+88(SP), AX
(-1 is objabi.PCDATA_UnsafePointSafe and -2 is objabi.PCDATA_UnsafePointUnsafe,
from Go's cmd/internal/objabi.)
- Handle non-errno failures from seccomp() with SECCOMP_FILTER_FLAG_TSYNC.
PiperOrigin-RevId: 383757580
Diffstat (limited to 'pkg/seccomp/seccomp_unsafe.go')
-rw-r--r-- | pkg/seccomp/seccomp_unsafe.go | 70 |
1 files changed, 60 insertions, 10 deletions
diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go index 7202591df..061cd26ab 100644 --- a/pkg/seccomp/seccomp_unsafe.go +++ b/pkg/seccomp/seccomp_unsafe.go @@ -15,6 +15,8 @@ package seccomp import ( + "fmt" + "runtime" "unsafe" "golang.org/x/sys/unix" @@ -22,12 +24,52 @@ import ( ) // SetFilter installs the given BPF program. +func SetFilter(instrs []linux.BPFInstruction) error { + // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See + // seccomp(2) for details. + // + // PR_SET_NO_NEW_PRIVS is specific to the calling thread, not the whole + // thread group, so between PR_SET_NO_NEW_PRIVS and seccomp() below we must + // remain on the same thread. no_new_privs will be propagated to other + // threads in the thread group by seccomp(SECCOMP_FILTER_FLAG_TSYNC), in + // kernel/seccomp.c:seccomp_sync_threads(). + runtime.LockOSThread() + defer runtime.UnlockOSThread() + if _, _, errno := unix.RawSyscall6(unix.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); errno != 0 { + return errno + } + + sockProg := linux.SockFprog{ + Len: uint16(len(instrs)), + Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])), + } + tid, errno := seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg)) + if errno != 0 { + return errno + } + // "On error, if SECCOMP_FILTER_FLAG_TSYNC was used, the return value is + // the ID of the thread that caused the synchronization failure. (This ID + // is a kernel thread ID of the type returned by clone(2) and gettid(2).)" + // - seccomp(2) + if tid != 0 { + return fmt.Errorf("couldn't synchronize filter to TID %d", tid) + } + return nil +} + +// SetFilterInChild is equivalent to SetFilter, but: +// +// - It is safe to call after runtime.syscall_runtime_AfterForkInChild. // -// This is safe to call from an afterFork context. +// - It requires that the calling goroutine cannot be moved to another thread, +// which either requires that runtime.LockOSThread() is in effect or that the +// caller is in fact in a fork()ed child process. +// +// - Since fork()ed child processes cannot perform heap allocation, it returns +// a unix.Errno rather than an error. // //go:nosplit -func SetFilter(instrs []linux.BPFInstruction) unix.Errno { - // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See seccomp(2) for details. +func SetFilterInChild(instrs []linux.BPFInstruction) unix.Errno { if _, _, errno := unix.RawSyscall6(unix.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); errno != 0 { return errno } @@ -36,12 +78,22 @@ func SetFilter(instrs []linux.BPFInstruction) unix.Errno { Len: uint16(len(instrs)), Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])), } - return seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg)) + tid, errno := seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg)) + if errno != 0 { + return errno + } + if tid != 0 { + // Return an errno that seccomp(2) doesn't to uniquely identify this + // case. Since this case occurs if another thread has a conflicting + // filter set, "name not unique on network" is at least suggestive? + return unix.ENOTUNIQ + } + return 0 } func isKillProcessAvailable() (bool, error) { action := uint32(linux.SECCOMP_RET_KILL_PROCESS) - if errno := seccomp(linux.SECCOMP_GET_ACTION_AVAIL, 0, unsafe.Pointer(&action)); errno != 0 { + if _, errno := seccomp(linux.SECCOMP_GET_ACTION_AVAIL, 0, unsafe.Pointer(&action)); errno != 0 { // EINVAL: SECCOMP_GET_ACTION_AVAIL not in this kernel yet. // EOPNOTSUPP: SECCOMP_RET_KILL_PROCESS not supported. if errno == unix.EINVAL || errno == unix.EOPNOTSUPP { @@ -55,9 +107,7 @@ func isKillProcessAvailable() (bool, error) { // seccomp calls seccomp(2). This is safe to call from an afterFork context. // //go:nosplit -func seccomp(op, flags uint32, ptr unsafe.Pointer) unix.Errno { - if _, _, errno := unix.RawSyscall(SYS_SECCOMP, uintptr(op), uintptr(flags), uintptr(ptr)); errno != 0 { - return errno - } - return 0 +func seccomp(op, flags uint32, ptr unsafe.Pointer) (uintptr, unix.Errno) { + n, _, errno := unix.RawSyscall(SYS_SECCOMP, uintptr(op), uintptr(flags), uintptr(ptr)) + return n, errno } |