summaryrefslogtreecommitdiffhomepage
path: root/pkg/seccomp
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/seccomp')
-rw-r--r--pkg/seccomp/BUILD3
-rw-r--r--pkg/seccomp/seccomp.go224
-rw-r--r--pkg/seccomp/seccomp_rules.go8
-rw-r--r--pkg/seccomp/seccomp_test.go172
-rw-r--r--pkg/seccomp/seccomp_unsafe.go24
5 files changed, 306 insertions, 125 deletions
diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD
index b3e2f0b38..1975d17a6 100644
--- a/pkg/seccomp/BUILD
+++ b/pkg/seccomp/BUILD
@@ -28,12 +28,9 @@ go_library(
importpath = "gvisor.googlesource.com/gvisor/pkg/seccomp",
visibility = ["//visibility:public"],
deps = [
- "//pkg/abi",
"//pkg/abi/linux",
"//pkg/bpf",
"//pkg/log",
- "//pkg/sentry/arch",
- "//pkg/sentry/strace",
],
)
diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go
index 49da3c775..a746dc9b3 100644
--- a/pkg/seccomp/seccomp.go
+++ b/pkg/seccomp/seccomp.go
@@ -20,31 +20,36 @@ import (
"reflect"
"sort"
- "gvisor.googlesource.com/gvisor/pkg/abi"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/bpf"
"gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/strace"
)
const (
- // violationLabel is added to the program to take action on a violation.
- violationLabel = "violation"
-
// skipOneInst is the offset to take for skipping one instruction.
skipOneInst = 1
+
+ // defaultLabel is the label for the default action.
+ defaultLabel = "default_action"
)
// Install generates BPF code based on the set of syscalls provided. It only
-// allows syscalls that conform to the specification (*) and generates SIGSYS
+// allows syscalls that conform to the specification and generates SIGSYS
// trap unless kill is set.
//
-// (*) The current implementation only checks the syscall number. It does NOT
-// validate any of the arguments.
+// This is a convenience wrapper around BuildProgram and SetFilter.
func Install(rules SyscallRules, kill bool) error {
log.Infof("Installing seccomp filters for %d syscalls (kill=%t)", len(rules), kill)
- instrs, err := buildProgram(rules, kill)
+ defaultAction := uint32(linux.SECCOMP_RET_TRAP)
+ if kill {
+ defaultAction = uint32(linux.SECCOMP_RET_KILL)
+ }
+ instrs, err := BuildProgram([]RuleSet{
+ RuleSet{
+ Rules: rules,
+ Action: uint32(linux.SECCOMP_RET_ALLOW),
+ },
+ }, defaultAction)
if log.IsLogging(log.Debug) {
programStr, errDecode := bpf.DecodeProgram(instrs)
if errDecode != nil {
@@ -56,60 +61,84 @@ func Install(rules SyscallRules, kill bool) error {
return err
}
- if err := seccomp(instrs); err != nil {
- return err
+ // Perform the actual installation.
+ if errno := SetFilter(instrs); errno != 0 {
+ return fmt.Errorf("Failed to set filter: %v", errno)
}
log.Infof("Seccomp filters installed.")
return nil
}
-// buildProgram builds a BPF program that whitelists all given syscall rules.
-func buildProgram(rules SyscallRules, kill bool) ([]linux.BPFInstruction, error) {
+// RuleSet is a set of rules and associated action.
+type RuleSet struct {
+ Rules SyscallRules
+ Action uint32
+
+ // Vsyscall indicates that a check is made for a function being called
+ // from kernel mappings. This is where the vsyscall page is located
+ // (and typically) emulated, so this RuleSet will not match any
+ // functions not dispatched from the vsyscall page.
+ Vsyscall bool
+}
+
+// SyscallName gives names to system calls. It is used purely for debugging purposes.
+//
+// An alternate namer can be provided to the package at initialization time.
+var SyscallName = func(sysno uintptr) string {
+ return fmt.Sprintf("syscall_%d", sysno)
+}
+
+// BuildProgram builds a BPF program from the given map of actions to matching
+// SyscallRules. The single generated program covers all provided RuleSets.
+func BuildProgram(rules []RuleSet, defaultAction uint32) ([]linux.BPFInstruction, error) {
program := bpf.NewProgramBuilder()
- violationAction := uint32(linux.SECCOMP_RET_KILL)
- if !kill {
- violationAction = linux.SECCOMP_RET_TRAP
- }
// Be paranoid and check that syscall is done in the expected architecture.
//
// A = seccomp_data.arch
- // if (A != AUDIT_ARCH_X86_64) goto violation
+ // if (A != AUDIT_ARCH_X86_64) goto defaultAction.
program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArch)
- // violationLabel is at the bottom of the program. The size of program
+ // defaultLabel is at the bottom of the program. The size of program
// may exceeds 255 lines, which is the limit of a condition jump.
program.AddJump(bpf.Jmp|bpf.Jeq|bpf.K, linux.AUDIT_ARCH_X86_64, skipOneInst, 0)
- program.AddDirectJumpLabel(violationLabel)
-
+ program.AddDirectJumpLabel(defaultLabel)
if err := buildIndex(rules, program); err != nil {
return nil, err
}
- // violation: return violationAction
- if err := program.AddLabel(violationLabel); err != nil {
+ // Exhausted: return defaultAction.
+ if err := program.AddLabel(defaultLabel); err != nil {
return nil, err
}
- program.AddStmt(bpf.Ret|bpf.K, violationAction)
+ program.AddStmt(bpf.Ret|bpf.K, defaultAction)
return program.Instructions()
}
-// buildIndex builds a BST to quickly search through all syscalls that are whitelisted.
-func buildIndex(rules SyscallRules, program *bpf.ProgramBuilder) error {
- syscalls := []uintptr{}
- for sysno := range rules {
- syscalls = append(syscalls, sysno)
+// buildIndex builds a BST to quickly search through all syscalls.
+func buildIndex(rules []RuleSet, program *bpf.ProgramBuilder) error {
+ // Build a list of all application system calls, across all given rule
+ // sets. We have a simple BST, but may dispatch individual matchers
+ // with different actions. The matchers are evaluated linearly.
+ requiredSyscalls := make(map[uintptr]struct{})
+ for _, rs := range rules {
+ for sysno := range rs.Rules {
+ requiredSyscalls[sysno] = struct{}{}
+ }
}
-
- t, ok := strace.Lookup(abi.Linux, arch.AMD64)
- if !ok {
- panic("Can't find amd64 Linux syscall table")
+ syscalls := make([]uintptr, 0, len(requiredSyscalls))
+ for sysno, _ := range requiredSyscalls {
+ syscalls = append(syscalls, sysno)
}
-
sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] })
- for _, s := range syscalls {
- log.Infof("syscall filter: %v (%v): %s", s, t.Name(s), rules[s])
+ for _, sysno := range syscalls {
+ for _, rs := range rules {
+ // Print only if there is a corresponding set of rules.
+ if _, ok := rs.Rules[sysno]; ok {
+ log.Debugf("syscall filter %v: %s => 0x%x", SyscallName(sysno), rs.Rules[sysno], rs.Action)
+ }
+ }
}
root := createBST(syscalls)
@@ -119,7 +148,7 @@ func buildIndex(rules SyscallRules, program *bpf.ProgramBuilder) error {
//
// A = seccomp_data.nr
program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetNR)
- return root.traverse(buildBSTProgram, program, rules)
+ return root.traverse(buildBSTProgram, rules, program)
}
// createBST converts sorted syscall slice into a balanced BST.
@@ -136,15 +165,23 @@ func createBST(syscalls []uintptr) *node {
return &parent
}
-func ruleViolationLabel(sysno uintptr, idx int) string {
- return fmt.Sprintf("ruleViolation_%v_%v", sysno, idx)
+func vsyscallViolationLabel(ruleSetIdx int, sysno uintptr) string {
+ return fmt.Sprintf("vsyscallViolation_%v_%v", ruleSetIdx, sysno)
+}
+
+func ruleViolationLabel(ruleSetIdx int, sysno uintptr, idx int) string {
+ return fmt.Sprintf("ruleViolation_%v_%v_%v", ruleSetIdx, sysno, idx)
}
func checkArgsLabel(sysno uintptr) string {
return fmt.Sprintf("checkArgs_%v", sysno)
}
-func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, sysno uintptr) error {
+// addSyscallArgsCheck adds argument checks for a single system call. It does
+// not insert a jump to the default action at the end and it is the
+// responsibility of the caller to insert an appropriate jump after calling
+// this function.
+func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action uint32, ruleSetIdx int, sysno uintptr) error {
for ruleidx, rule := range rules {
labelled := false
for i, arg := range rule {
@@ -155,28 +192,29 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, sysno uintptr) err
high, low := uint32(a>>32), uint32(a)
// assert arg_low == low
p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgLow(i))
- p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(sysno, ruleidx))
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
// assert arg_high == high
p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgHigh(i))
- p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(sysno, ruleidx))
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
labelled = true
-
default:
return fmt.Errorf("unknown syscall rule type: %v", reflect.TypeOf(a))
}
}
}
- // Matched, allow the syscall.
- p.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW)
- // Label the end of the rule if necessary.
+
+ // Matched, emit the given action.
+ p.AddStmt(bpf.Ret|bpf.K, action)
+
+ // Label the end of the rule if necessary. This is added for
+ // the jumps above when the argument check fails.
if labelled {
- if err := p.AddLabel(ruleViolationLabel(sysno, ruleidx)); err != nil {
+ if err := p.AddLabel(ruleViolationLabel(ruleSetIdx, sysno, ruleidx)); err != nil {
return err
}
}
}
- // Not matched?
- p.AddDirectJumpLabel(violationLabel)
+
return nil
}
@@ -188,16 +226,16 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, sysno uintptr) err
// (A > 22) ? goto index_35 : goto index_9
//
// index_9: // SYS_MMAP(9), leaf
-// A == 9) ? goto argument check : violation
+// A == 9) ? goto argument check : defaultLabel
//
// index_35: // SYS_NANOSLEEP(35), single child
// (A == 35) ? goto argument check : continue
-// (A > 35) ? goto index_50 : goto violation
+// (A > 35) ? goto index_50 : goto defaultLabel
//
// index_50: // SYS_LISTEN(50), leaf
-// (A == 50) ? goto argument check : goto violation
+// (A == 50) ? goto argument check : goto defaultLabel
//
-func buildBSTProgram(program *bpf.ProgramBuilder, rules SyscallRules, n *node) error {
+func buildBSTProgram(n *node, rules []RuleSet, program *bpf.ProgramBuilder) error {
// Root node is never referenced by label, skip it.
if !n.root {
if err := program.AddLabel(n.label()); err != nil {
@@ -209,11 +247,10 @@ func buildBSTProgram(program *bpf.ProgramBuilder, rules SyscallRules, n *node) e
program.AddJumpTrueLabel(bpf.Jmp|bpf.Jeq|bpf.K, uint32(sysno), checkArgsLabel(sysno), 0)
if n.left == nil && n.right == nil {
// Leaf nodes don't require extra check.
- program.AddDirectJumpLabel(violationLabel)
+ program.AddDirectJumpLabel(defaultLabel)
} else {
// Non-leaf node. Check which turn to take otherwise. Using direct jumps
// in case that the offset may exceed the limit of a conditional jump (255)
- // Note that 'violationLabel' is returned for nil children.
program.AddJump(bpf.Jmp|bpf.Jgt|bpf.K, uint32(sysno), 0, skipOneInst)
program.AddDirectJumpLabel(n.right.label())
program.AddDirectJumpLabel(n.left.label())
@@ -222,12 +259,60 @@ func buildBSTProgram(program *bpf.ProgramBuilder, rules SyscallRules, n *node) e
if err := program.AddLabel(checkArgsLabel(sysno)); err != nil {
return err
}
- // No rules, just allow it and save one jmp.
- if len(rules[sysno]) == 0 {
- program.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW)
- return nil
+
+ emitted := false
+ for ruleSetIdx, rs := range rules {
+ if _, ok := rs.Rules[sysno]; ok {
+ // If there are no rules, then this will always match.
+ // Remember we've done this so that we can emit a
+ // sensible error. We can't catch all overlaps, but we
+ // can catch this one at least.
+ if emitted {
+ return fmt.Errorf("unreachable action for %v: 0x%x (rule set %d)", SyscallName(sysno), rs.Action, ruleSetIdx)
+ }
+
+ // Emit a vsyscall check if this rule requires a
+ // Vsyscall match. This rule ensures that the top bit
+ // is set in the instruction pointer, which is where
+ // the vsyscall page will be mapped.
+ if rs.Vsyscall {
+ program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetIPHigh)
+ program.AddJumpFalseLabel(bpf.Jmp|bpf.Jset|bpf.K, 0x80000000, 0, vsyscallViolationLabel(ruleSetIdx, sysno))
+ }
+
+ // Emit matchers.
+ if len(rs.Rules[sysno]) == 0 {
+ // This is a blanket action.
+ program.AddStmt(bpf.Ret|bpf.K, rs.Action)
+ emitted = true
+ } else {
+ // Add an argument check for these particular
+ // arguments. This will continue execution and
+ // check the next rule set. We need to ensure
+ // that at the very end, we insert a direct
+ // jump label for the unmatched case.
+ if err := addSyscallArgsCheck(program, rs.Rules[sysno], rs.Action, ruleSetIdx, sysno); err != nil {
+ return err
+ }
+ }
+
+ // If there was a Vsyscall check for this rule, then we
+ // need to add an appropriate label for the jump above.
+ if rs.Vsyscall {
+ if err := program.AddLabel(vsyscallViolationLabel(ruleSetIdx, sysno)); err != nil {
+ return err
+ }
+ }
+ }
}
- return addSyscallArgsCheck(program, rules[sysno], sysno)
+
+ // Not matched? We only need to insert a jump to the default label if
+ // not default action has been emitted for this call.
+ if !emitted {
+ program.AddDirectJumpLabel(defaultLabel)
+ }
+
+ return nil
}
// node represents a tree node.
@@ -238,26 +323,27 @@ type node struct {
root bool
}
-// label returns the label corresponding to this node. If node is nil (syscall not present),
-// violationLabel is returned for convenience.
+// label returns the label corresponding to this node.
+//
+// If n is nil, then the defaultLabel is returned.
func (n *node) label() string {
if n == nil {
- return violationLabel
+ return defaultLabel
}
return fmt.Sprintf("index_%v", n.value)
}
-type traverseFunc func(*bpf.ProgramBuilder, SyscallRules, *node) error
+type traverseFunc func(*node, []RuleSet, *bpf.ProgramBuilder) error
-func (n *node) traverse(fn traverseFunc, p *bpf.ProgramBuilder, rules SyscallRules) error {
+func (n *node) traverse(fn traverseFunc, rules []RuleSet, p *bpf.ProgramBuilder) error {
if n == nil {
return nil
}
- if err := fn(p, rules, n); err != nil {
+ if err := fn(n, rules, p); err != nil {
return err
}
- if err := n.left.traverse(fn, p, rules); err != nil {
+ if err := n.left.traverse(fn, rules, p); err != nil {
return err
}
- return n.right.traverse(fn, p, rules)
+ return n.right.traverse(fn, rules, p)
}
diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go
index 9215e5c90..6b707f195 100644
--- a/pkg/seccomp/seccomp_rules.go
+++ b/pkg/seccomp/seccomp_rules.go
@@ -24,9 +24,11 @@ import "fmt"
// __u64 args[6];
// };
const (
- seccompDataOffsetNR = 0
- seccompDataOffsetArch = 4
- seccompDataOffsetArgs = 16
+ seccompDataOffsetNR = 0
+ seccompDataOffsetArch = 4
+ seccompDataOffsetIPLow = 8
+ seccompDataOffsetIPHigh = 12
+ seccompDataOffsetArgs = 16
)
func seccompDataOffsetArgLow(i int) uint32 {
diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go
index 42cf85c03..0188ad4f3 100644
--- a/pkg/seccomp/seccomp_test.go
+++ b/pkg/seccomp/seccomp_test.go
@@ -76,14 +76,18 @@ func TestBasic(t *testing.T) {
}
for _, test := range []struct {
- // filters are the set of syscall that are allowed.
- filters SyscallRules
- kill bool
- specs []spec
+ ruleSets []RuleSet
+ defaultAction uint32
+ specs []spec
}{
{
- filters: SyscallRules{1: {}},
- kill: false,
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{1: {}},
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
specs: []spec{
{
desc: "Single syscall allowed",
@@ -98,12 +102,61 @@ func TestBasic(t *testing.T) {
},
},
{
- filters: SyscallRules{
- 1: {},
- 3: {},
- 5: {},
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ AllowValue(0x1),
+ },
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ Rules: SyscallRules{
+ 1: {},
+ 2: {},
+ },
+ Action: linux.SECCOMP_RET_TRAP,
+ },
},
- kill: false,
+ defaultAction: linux.SECCOMP_RET_KILL,
+ specs: []spec{
+ {
+ desc: "Multiple rulesets allowed (1a)",
+ data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{0x1}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "Multiple rulesets allowed (1b)",
+ data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "Multiple rulesets allowed (2)",
+ data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "Multiple rulesets allowed (2)",
+ data: seccompData{nr: 0, arch: linux.AUDIT_ARCH_X86_64},
+ want: linux.SECCOMP_RET_KILL,
+ },
+ },
+ },
+ {
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: {},
+ 3: {},
+ 5: {},
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
specs: []spec{
{
desc: "Multiple syscalls allowed (1)",
@@ -148,8 +201,15 @@ func TestBasic(t *testing.T) {
},
},
{
- filters: SyscallRules{1: {}},
- kill: false,
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: {},
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
specs: []spec{
{
desc: "Wrong architecture",
@@ -159,26 +219,38 @@ func TestBasic(t *testing.T) {
},
},
{
- filters: SyscallRules{1: {}},
- kill: true,
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: {},
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
specs: []spec{
{
- desc: "Syscall disallowed, action kill",
+ desc: "Syscall disallowed, action trap",
data: seccompData{nr: 2, arch: linux.AUDIT_ARCH_X86_64},
- want: linux.SECCOMP_RET_KILL,
+ want: linux.SECCOMP_RET_TRAP,
},
},
},
{
- filters: SyscallRules{
- 1: []Rule{
- {
- AllowAny{},
- AllowValue(0xf),
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ AllowAny{},
+ AllowValue(0xf),
+ },
+ },
},
+ Action: linux.SECCOMP_RET_ALLOW,
},
},
- kill: false,
+ defaultAction: linux.SECCOMP_RET_TRAP,
specs: []spec{
{
desc: "Syscall argument allowed",
@@ -193,17 +265,22 @@ func TestBasic(t *testing.T) {
},
},
{
- filters: SyscallRules{
- 1: []Rule{
- {
- AllowValue(0xf),
- },
- {
- AllowValue(0xe),
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ AllowValue(0xf),
+ },
+ {
+ AllowValue(0xe),
+ },
+ },
},
+ Action: linux.SECCOMP_RET_ALLOW,
},
},
- kill: false,
+ defaultAction: linux.SECCOMP_RET_TRAP,
specs: []spec{
{
desc: "Syscall argument allowed, two rules",
@@ -218,16 +295,21 @@ func TestBasic(t *testing.T) {
},
},
{
- filters: SyscallRules{
- 1: []Rule{
- {
- AllowValue(0),
- AllowValue(math.MaxUint64 - 1),
- AllowValue(math.MaxUint32),
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ AllowValue(0),
+ AllowValue(math.MaxUint64 - 1),
+ AllowValue(math.MaxUint32),
+ },
+ },
},
+ Action: linux.SECCOMP_RET_ALLOW,
},
},
- kill: false,
+ defaultAction: linux.SECCOMP_RET_TRAP,
specs: []spec{
{
desc: "64bit syscall argument allowed",
@@ -259,7 +341,7 @@ func TestBasic(t *testing.T) {
},
},
} {
- instrs, err := buildProgram(test.filters, test.kill)
+ instrs, err := BuildProgram(test.ruleSets, test.defaultAction)
if err != nil {
t.Errorf("%s: buildProgram() got error: %v", test.specs[0].desc, err)
continue
@@ -282,6 +364,7 @@ func TestBasic(t *testing.T) {
}
}
+// TestRandom tests that randomly generated rules are encoded correctly.
func TestRandom(t *testing.T) {
rand.Seed(time.Now().UnixNano())
size := rand.Intn(50) + 1
@@ -294,7 +377,12 @@ func TestRandom(t *testing.T) {
}
fmt.Printf("Testing filters: %v", syscallRules)
- instrs, err := buildProgram(syscallRules, false)
+ instrs, err := BuildProgram([]RuleSet{
+ RuleSet{
+ Rules: syscallRules,
+ Action: uint32(linux.SECCOMP_RET_ALLOW),
+ },
+ }, uint32(linux.SECCOMP_RET_TRAP))
if err != nil {
t.Fatalf("buildProgram() got error: %v", err)
}
@@ -319,8 +407,8 @@ func TestRandom(t *testing.T) {
}
}
-// TestReadDeal checks that a process dies when it trips over the filter and that it
-// doesn't die when the filter is not triggered.
+// TestReadDeal checks that a process dies when it trips over the filter and
+// that it doesn't die when the filter is not triggered.
func TestRealDeal(t *testing.T) {
for _, test := range []struct {
die bool
diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go
index 6682f8d9b..ae18534bf 100644
--- a/pkg/seccomp/seccomp_unsafe.go
+++ b/pkg/seccomp/seccomp_unsafe.go
@@ -17,7 +17,6 @@
package seccomp
import (
- "fmt"
"syscall"
"unsafe"
@@ -31,19 +30,28 @@ type sockFprog struct {
Filter *linux.BPFInstruction
}
-func seccomp(instrs []linux.BPFInstruction) error {
+// SetFilter installs the given BPF program.
+//
+// This is safe to call from an afterFork context.
+//
+//go:nosplit
+func SetFilter(instrs []linux.BPFInstruction) syscall.Errno {
// SYS_SECCOMP is not available in syscall package.
const SYS_SECCOMP = 317
// PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See seccomp(2) for details.
- if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0); err != 0 {
- return fmt.Errorf("failed to set PR_SET_NO_NEW_PRIVS: %v", err)
+ if _, _, errno := syscall.RawSyscall(syscall.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0); errno != 0 {
+ return errno
}
- sockProg := sockFprog{Len: uint16(len(instrs)), Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0]))}
// TODO: Use SECCOMP_FILTER_FLAG_KILL_PROCESS when available.
- if _, _, err := syscall.RawSyscall(SYS_SECCOMP, linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, uintptr(unsafe.Pointer(&sockProg))); err != 0 {
- return fmt.Errorf("failed to set seccomp filter: %v", err)
+ sockProg := sockFprog{
+ Len: uint16(len(instrs)),
+ Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])),
}
- return nil
+ if _, _, errno := syscall.RawSyscall(SYS_SECCOMP, linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, uintptr(unsafe.Pointer(&sockProg))); errno != 0 {
+ return errno
+ }
+
+ return 0
}