summaryrefslogtreecommitdiffhomepage
path: root/pkg/seccomp
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2019-06-02 06:44:55 +0000
committergVisor bot <gvisor-bot@google.com>2019-06-02 06:44:55 +0000
commitceb0d792f328d1fc0692197d8856a43c3936a571 (patch)
tree83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/seccomp
parentdeb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff)
parent216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff)
Merge 216da0b7 (automated)
Diffstat (limited to 'pkg/seccomp')
-rw-r--r--pkg/seccomp/seccomp.go375
-rw-r--r--pkg/seccomp/seccomp_amd64.go26
-rw-r--r--pkg/seccomp/seccomp_arm64.go26
-rw-r--r--pkg/seccomp/seccomp_rules.go132
-rwxr-xr-xpkg/seccomp/seccomp_state_autogen.go4
-rw-r--r--pkg/seccomp/seccomp_unsafe.go70
6 files changed, 633 insertions, 0 deletions
diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go
new file mode 100644
index 000000000..cc142a497
--- /dev/null
+++ b/pkg/seccomp/seccomp.go
@@ -0,0 +1,375 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package seccomp provides basic seccomp filters for x86_64 (little endian).
+package seccomp
+
+import (
+ "fmt"
+ "reflect"
+ "sort"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/bpf"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+const (
+ // skipOneInst is the offset to take for skipping one instruction.
+ skipOneInst = 1
+
+ // defaultLabel is the label for the default action.
+ defaultLabel = "default_action"
+)
+
+// Install generates BPF code based on the set of syscalls provided. It only
+// allows syscalls that conform to the specification. Syscalls that violate the
+// specification will trigger RET_KILL_PROCESS, except for the cases below.
+//
+// RET_TRAP is used in violations, instead of RET_KILL_PROCESS, in the
+// following cases:
+// 1. Kernel doesn't support RET_KILL_PROCESS: RET_KILL_THREAD only kills the
+// offending thread and often keeps the sentry hanging.
+// 2. Debug: RET_TRAP generates a panic followed by a stack trace which is
+// much easier to debug then RET_KILL_PROCESS which can't be caught.
+//
+// Be aware that RET_TRAP sends SIGSYS to the process and it may be ignored,
+// making it possible for the process to continue running after a violation.
+// However, it will leave a SECCOMP audit event trail behind. In any case, the
+// syscall is still blocked from executing.
+func Install(rules SyscallRules) error {
+ defaultAction, err := defaultAction()
+ if err != nil {
+ return err
+ }
+
+ // Uncomment to get stack trace when there is a violation.
+ // defaultAction = linux.BPFAction(linux.SECCOMP_RET_TRAP)
+
+ log.Infof("Installing seccomp filters for %d syscalls (action=%v)", len(rules), defaultAction)
+
+ instrs, err := BuildProgram([]RuleSet{
+ RuleSet{
+ Rules: rules,
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ }, defaultAction)
+ if log.IsLogging(log.Debug) {
+ programStr, errDecode := bpf.DecodeProgram(instrs)
+ if errDecode != nil {
+ programStr = fmt.Sprintf("Error: %v\n%s", errDecode, programStr)
+ }
+ log.Debugf("Seccomp program dump:\n%s", programStr)
+ }
+ if err != nil {
+ return err
+ }
+
+ // Perform the actual installation.
+ if errno := SetFilter(instrs); errno != 0 {
+ return fmt.Errorf("Failed to set filter: %v", errno)
+ }
+
+ log.Infof("Seccomp filters installed.")
+ return nil
+}
+
+func defaultAction() (linux.BPFAction, error) {
+ available, err := isKillProcessAvailable()
+ if err != nil {
+ return 0, err
+ }
+ if available {
+ return linux.SECCOMP_RET_KILL_PROCESS, nil
+ }
+ return linux.SECCOMP_RET_TRAP, nil
+}
+
+// RuleSet is a set of rules and associated action.
+type RuleSet struct {
+ Rules SyscallRules
+ Action linux.BPFAction
+
+ // Vsyscall indicates that a check is made for a function being called
+ // from kernel mappings. This is where the vsyscall page is located
+ // (and typically) emulated, so this RuleSet will not match any
+ // functions not dispatched from the vsyscall page.
+ Vsyscall bool
+}
+
+// SyscallName gives names to system calls. It is used purely for debugging purposes.
+//
+// An alternate namer can be provided to the package at initialization time.
+var SyscallName = func(sysno uintptr) string {
+ return fmt.Sprintf("syscall_%d", sysno)
+}
+
+// BuildProgram builds a BPF program from the given map of actions to matching
+// SyscallRules. The single generated program covers all provided RuleSets.
+func BuildProgram(rules []RuleSet, defaultAction linux.BPFAction) ([]linux.BPFInstruction, error) {
+ program := bpf.NewProgramBuilder()
+
+ // Be paranoid and check that syscall is done in the expected architecture.
+ //
+ // A = seccomp_data.arch
+ // if (A != AUDIT_ARCH) goto defaultAction.
+ program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArch)
+ // defaultLabel is at the bottom of the program. The size of program
+ // may exceeds 255 lines, which is the limit of a condition jump.
+ program.AddJump(bpf.Jmp|bpf.Jeq|bpf.K, LINUX_AUDIT_ARCH, skipOneInst, 0)
+ program.AddDirectJumpLabel(defaultLabel)
+ if err := buildIndex(rules, program); err != nil {
+ return nil, err
+ }
+
+ // Exhausted: return defaultAction.
+ if err := program.AddLabel(defaultLabel); err != nil {
+ return nil, err
+ }
+ program.AddStmt(bpf.Ret|bpf.K, uint32(defaultAction))
+
+ return program.Instructions()
+}
+
+// buildIndex builds a BST to quickly search through all syscalls.
+func buildIndex(rules []RuleSet, program *bpf.ProgramBuilder) error {
+ // Build a list of all application system calls, across all given rule
+ // sets. We have a simple BST, but may dispatch individual matchers
+ // with different actions. The matchers are evaluated linearly.
+ requiredSyscalls := make(map[uintptr]struct{})
+ for _, rs := range rules {
+ for sysno := range rs.Rules {
+ requiredSyscalls[sysno] = struct{}{}
+ }
+ }
+ syscalls := make([]uintptr, 0, len(requiredSyscalls))
+ for sysno, _ := range requiredSyscalls {
+ syscalls = append(syscalls, sysno)
+ }
+ sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] })
+ for _, sysno := range syscalls {
+ for _, rs := range rules {
+ // Print only if there is a corresponding set of rules.
+ if _, ok := rs.Rules[sysno]; ok {
+ log.Debugf("syscall filter %v: %s => 0x%x", SyscallName(sysno), rs.Rules[sysno], rs.Action)
+ }
+ }
+ }
+
+ root := createBST(syscalls)
+ root.root = true
+
+ // Load syscall number into A and run through BST.
+ //
+ // A = seccomp_data.nr
+ program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetNR)
+ return root.traverse(buildBSTProgram, rules, program)
+}
+
+// createBST converts sorted syscall slice into a balanced BST.
+// Panics if syscalls is empty.
+func createBST(syscalls []uintptr) *node {
+ i := len(syscalls) / 2
+ parent := node{value: syscalls[i]}
+ if i > 0 {
+ parent.left = createBST(syscalls[:i])
+ }
+ if i+1 < len(syscalls) {
+ parent.right = createBST(syscalls[i+1:])
+ }
+ return &parent
+}
+
+func vsyscallViolationLabel(ruleSetIdx int, sysno uintptr) string {
+ return fmt.Sprintf("vsyscallViolation_%v_%v", ruleSetIdx, sysno)
+}
+
+func ruleViolationLabel(ruleSetIdx int, sysno uintptr, idx int) string {
+ return fmt.Sprintf("ruleViolation_%v_%v_%v", ruleSetIdx, sysno, idx)
+}
+
+func checkArgsLabel(sysno uintptr) string {
+ return fmt.Sprintf("checkArgs_%v", sysno)
+}
+
+// addSyscallArgsCheck adds argument checks for a single system call. It does
+// not insert a jump to the default action at the end and it is the
+// responsibility of the caller to insert an appropriate jump after calling
+// this function.
+func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action linux.BPFAction, ruleSetIdx int, sysno uintptr) error {
+ for ruleidx, rule := range rules {
+ labelled := false
+ for i, arg := range rule {
+ if arg != nil {
+ switch a := arg.(type) {
+ case AllowAny:
+ case AllowValue:
+ high, low := uint32(a>>32), uint32(a)
+ // assert arg_low == low
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgLow(i))
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
+ // assert arg_high == high
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgHigh(i))
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
+ labelled = true
+ default:
+ return fmt.Errorf("unknown syscall rule type: %v", reflect.TypeOf(a))
+ }
+ }
+ }
+
+ // Matched, emit the given action.
+ p.AddStmt(bpf.Ret|bpf.K, uint32(action))
+
+ // Label the end of the rule if necessary. This is added for
+ // the jumps above when the argument check fails.
+ if labelled {
+ if err := p.AddLabel(ruleViolationLabel(ruleSetIdx, sysno, ruleidx)); err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+// buildBSTProgram converts a binary tree started in 'root' into BPF code. The ouline of the code
+// is as follows:
+//
+// // SYS_PIPE(22), root
+// (A == 22) ? goto argument check : continue
+// (A > 22) ? goto index_35 : goto index_9
+//
+// index_9: // SYS_MMAP(9), leaf
+// A == 9) ? goto argument check : defaultLabel
+//
+// index_35: // SYS_NANOSLEEP(35), single child
+// (A == 35) ? goto argument check : continue
+// (A > 35) ? goto index_50 : goto defaultLabel
+//
+// index_50: // SYS_LISTEN(50), leaf
+// (A == 50) ? goto argument check : goto defaultLabel
+//
+func buildBSTProgram(n *node, rules []RuleSet, program *bpf.ProgramBuilder) error {
+ // Root node is never referenced by label, skip it.
+ if !n.root {
+ if err := program.AddLabel(n.label()); err != nil {
+ return err
+ }
+ }
+
+ sysno := n.value
+ program.AddJumpTrueLabel(bpf.Jmp|bpf.Jeq|bpf.K, uint32(sysno), checkArgsLabel(sysno), 0)
+ if n.left == nil && n.right == nil {
+ // Leaf nodes don't require extra check.
+ program.AddDirectJumpLabel(defaultLabel)
+ } else {
+ // Non-leaf node. Check which turn to take otherwise. Using direct jumps
+ // in case that the offset may exceed the limit of a conditional jump (255)
+ program.AddJump(bpf.Jmp|bpf.Jgt|bpf.K, uint32(sysno), 0, skipOneInst)
+ program.AddDirectJumpLabel(n.right.label())
+ program.AddDirectJumpLabel(n.left.label())
+ }
+
+ if err := program.AddLabel(checkArgsLabel(sysno)); err != nil {
+ return err
+ }
+
+ emitted := false
+ for ruleSetIdx, rs := range rules {
+ if _, ok := rs.Rules[sysno]; ok {
+ // If there are no rules, then this will always match.
+ // Remember we've done this so that we can emit a
+ // sensible error. We can't catch all overlaps, but we
+ // can catch this one at least.
+ if emitted {
+ return fmt.Errorf("unreachable action for %v: 0x%x (rule set %d)", SyscallName(sysno), rs.Action, ruleSetIdx)
+ }
+
+ // Emit a vsyscall check if this rule requires a
+ // Vsyscall match. This rule ensures that the top bit
+ // is set in the instruction pointer, which is where
+ // the vsyscall page will be mapped.
+ if rs.Vsyscall {
+ program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetIPHigh)
+ program.AddJumpFalseLabel(bpf.Jmp|bpf.Jset|bpf.K, 0x80000000, 0, vsyscallViolationLabel(ruleSetIdx, sysno))
+ }
+
+ // Emit matchers.
+ if len(rs.Rules[sysno]) == 0 {
+ // This is a blanket action.
+ program.AddStmt(bpf.Ret|bpf.K, uint32(rs.Action))
+ emitted = true
+ } else {
+ // Add an argument check for these particular
+ // arguments. This will continue execution and
+ // check the next rule set. We need to ensure
+ // that at the very end, we insert a direct
+ // jump label for the unmatched case.
+ if err := addSyscallArgsCheck(program, rs.Rules[sysno], rs.Action, ruleSetIdx, sysno); err != nil {
+ return err
+ }
+ }
+
+ // If there was a Vsyscall check for this rule, then we
+ // need to add an appropriate label for the jump above.
+ if rs.Vsyscall {
+ if err := program.AddLabel(vsyscallViolationLabel(ruleSetIdx, sysno)); err != nil {
+ return err
+ }
+ }
+ }
+ }
+
+ // Not matched? We only need to insert a jump to the default label if
+ // not default action has been emitted for this call.
+ if !emitted {
+ program.AddDirectJumpLabel(defaultLabel)
+ }
+
+ return nil
+}
+
+// node represents a tree node.
+type node struct {
+ value uintptr
+ left *node
+ right *node
+ root bool
+}
+
+// label returns the label corresponding to this node.
+//
+// If n is nil, then the defaultLabel is returned.
+func (n *node) label() string {
+ if n == nil {
+ return defaultLabel
+ }
+ return fmt.Sprintf("index_%v", n.value)
+}
+
+type traverseFunc func(*node, []RuleSet, *bpf.ProgramBuilder) error
+
+func (n *node) traverse(fn traverseFunc, rules []RuleSet, p *bpf.ProgramBuilder) error {
+ if n == nil {
+ return nil
+ }
+ if err := fn(n, rules, p); err != nil {
+ return err
+ }
+ if err := n.left.traverse(fn, rules, p); err != nil {
+ return err
+ }
+ return n.right.traverse(fn, rules, p)
+}
diff --git a/pkg/seccomp/seccomp_amd64.go b/pkg/seccomp/seccomp_amd64.go
new file mode 100644
index 000000000..02dfb8d9f
--- /dev/null
+++ b/pkg/seccomp/seccomp_amd64.go
@@ -0,0 +1,26 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package seccomp
+
+import (
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+)
+
+const (
+ LINUX_AUDIT_ARCH = linux.AUDIT_ARCH_X86_64
+ SYS_SECCOMP = 317
+)
diff --git a/pkg/seccomp/seccomp_arm64.go b/pkg/seccomp/seccomp_arm64.go
new file mode 100644
index 000000000..b575bcdbf
--- /dev/null
+++ b/pkg/seccomp/seccomp_arm64.go
@@ -0,0 +1,26 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package seccomp
+
+import (
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+)
+
+const (
+ LINUX_AUDIT_ARCH = linux.AUDIT_ARCH_AARCH64
+ SYS_SECCOMP = 277
+)
diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go
new file mode 100644
index 000000000..29eec8db1
--- /dev/null
+++ b/pkg/seccomp/seccomp_rules.go
@@ -0,0 +1,132 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package seccomp
+
+import "fmt"
+
+// The offsets are based on the following struct in include/linux/seccomp.h.
+// struct seccomp_data {
+// int nr;
+// __u32 arch;
+// __u64 instruction_pointer;
+// __u64 args[6];
+// };
+const (
+ seccompDataOffsetNR = 0
+ seccompDataOffsetArch = 4
+ seccompDataOffsetIPLow = 8
+ seccompDataOffsetIPHigh = 12
+ seccompDataOffsetArgs = 16
+)
+
+func seccompDataOffsetArgLow(i int) uint32 {
+ return uint32(seccompDataOffsetArgs + i*8)
+}
+
+func seccompDataOffsetArgHigh(i int) uint32 {
+ return seccompDataOffsetArgLow(i) + 4
+}
+
+// AllowAny is marker to indicate any value will be accepted.
+type AllowAny struct{}
+
+func (a AllowAny) String() (s string) {
+ return "*"
+}
+
+// AllowValue specifies a value that needs to be strictly matched.
+type AllowValue uintptr
+
+func (a AllowValue) String() (s string) {
+ return fmt.Sprintf("%#x ", uintptr(a))
+}
+
+// Rule stores the whitelist of syscall arguments.
+//
+// For example:
+// rule := Rule {
+// AllowValue(linux.ARCH_GET_FS | linux.ARCH_SET_FS), // arg0
+// }
+type Rule [6]interface{}
+
+func (r Rule) String() (s string) {
+ if len(r) == 0 {
+ return
+ }
+ s += "( "
+ for _, arg := range r {
+ if arg != nil {
+ s += fmt.Sprintf("%v ", arg)
+ }
+ }
+ s += ")"
+ return
+}
+
+// SyscallRules stores a map of OR'ed whitelist rules indexed by the syscall number.
+// If the 'Rules' is empty, we treat it as any argument is allowed.
+//
+// For example:
+// rules := SyscallRules{
+// syscall.SYS_FUTEX: []Rule{
+// {
+// AllowAny{},
+// AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
+// }, // OR
+// {
+// AllowAny{},
+// AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
+// },
+// },
+// syscall.SYS_GETPID: []Rule{},
+// }
+type SyscallRules map[uintptr][]Rule
+
+// NewSyscallRules returns a new SyscallRules.
+func NewSyscallRules() SyscallRules {
+ return make(map[uintptr][]Rule)
+}
+
+// AddRule adds the given rule. It will create a new entry for a new syscall, otherwise
+// it will append to the existing rules.
+func (sr SyscallRules) AddRule(sysno uintptr, r Rule) {
+ if cur, ok := sr[sysno]; ok {
+ // An empty rules means allow all. Honor it when more rules are added.
+ if len(cur) == 0 {
+ sr[sysno] = append(sr[sysno], Rule{})
+ }
+ sr[sysno] = append(sr[sysno], r)
+ } else {
+ sr[sysno] = []Rule{r}
+ }
+}
+
+// Merge merges the given SyscallRules.
+func (sr SyscallRules) Merge(rules SyscallRules) {
+ for sysno, rs := range rules {
+ if cur, ok := sr[sysno]; ok {
+ // An empty rules means allow all. Honor it when more rules are added.
+ if len(cur) == 0 {
+ sr[sysno] = append(sr[sysno], Rule{})
+ }
+ if len(rs) == 0 {
+ rs = []Rule{{}}
+ }
+ sr[sysno] = append(sr[sysno], rs...)
+ } else {
+ sr[sysno] = rs
+ }
+ }
+}
diff --git a/pkg/seccomp/seccomp_state_autogen.go b/pkg/seccomp/seccomp_state_autogen.go
new file mode 100755
index 000000000..0fc23d1a8
--- /dev/null
+++ b/pkg/seccomp/seccomp_state_autogen.go
@@ -0,0 +1,4 @@
+// automatically generated by stateify.
+
+package seccomp
+
diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go
new file mode 100644
index 000000000..ebb6397e8
--- /dev/null
+++ b/pkg/seccomp/seccomp_unsafe.go
@@ -0,0 +1,70 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package seccomp
+
+import (
+ "syscall"
+ "unsafe"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+)
+
+// sockFprog is sock_fprog taken from <linux/filter.h>.
+type sockFprog struct {
+ Len uint16
+ pad [6]byte
+ Filter *linux.BPFInstruction
+}
+
+// SetFilter installs the given BPF program.
+//
+// This is safe to call from an afterFork context.
+//
+//go:nosplit
+func SetFilter(instrs []linux.BPFInstruction) syscall.Errno {
+ // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See seccomp(2) for details.
+ if _, _, errno := syscall.RawSyscall(syscall.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0); errno != 0 {
+ return errno
+ }
+
+ sockProg := sockFprog{
+ Len: uint16(len(instrs)),
+ Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])),
+ }
+ return seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg))
+}
+
+func isKillProcessAvailable() (bool, error) {
+ action := uint32(linux.SECCOMP_RET_KILL_PROCESS)
+ if errno := seccomp(linux.SECCOMP_GET_ACTION_AVAIL, 0, unsafe.Pointer(&action)); errno != 0 {
+ // EINVAL: SECCOMP_GET_ACTION_AVAIL not in this kernel yet.
+ // EOPNOTSUPP: SECCOMP_RET_KILL_PROCESS not supported.
+ if errno == syscall.EINVAL || errno == syscall.EOPNOTSUPP {
+ return false, nil
+ }
+ return false, errno
+ }
+ return true, nil
+}
+
+// seccomp calls seccomp(2). This is safe to call from an afterFork context.
+//
+//go:nosplit
+func seccomp(op, flags uint32, ptr unsafe.Pointer) syscall.Errno {
+ if _, _, errno := syscall.RawSyscall(SYS_SECCOMP, uintptr(op), uintptr(flags), uintptr(ptr)); errno != 0 {
+ return errno
+ }
+ return 0
+}