diff options
Diffstat (limited to 'pkg/seccomp')
-rw-r--r-- | pkg/seccomp/BUILD | 3 | ||||
-rw-r--r-- | pkg/seccomp/seccomp.go | 224 | ||||
-rw-r--r-- | pkg/seccomp/seccomp_rules.go | 8 | ||||
-rw-r--r-- | pkg/seccomp/seccomp_test.go | 172 | ||||
-rw-r--r-- | pkg/seccomp/seccomp_unsafe.go | 24 |
5 files changed, 306 insertions, 125 deletions
diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD index b3e2f0b38..1975d17a6 100644 --- a/pkg/seccomp/BUILD +++ b/pkg/seccomp/BUILD @@ -28,12 +28,9 @@ go_library( importpath = "gvisor.googlesource.com/gvisor/pkg/seccomp", visibility = ["//visibility:public"], deps = [ - "//pkg/abi", "//pkg/abi/linux", "//pkg/bpf", "//pkg/log", - "//pkg/sentry/arch", - "//pkg/sentry/strace", ], ) diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index 49da3c775..a746dc9b3 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -20,31 +20,36 @@ import ( "reflect" "sort" - "gvisor.googlesource.com/gvisor/pkg/abi" "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/bpf" "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/strace" ) const ( - // violationLabel is added to the program to take action on a violation. - violationLabel = "violation" - // skipOneInst is the offset to take for skipping one instruction. skipOneInst = 1 + + // defaultLabel is the label for the default action. + defaultLabel = "default_action" ) // Install generates BPF code based on the set of syscalls provided. It only -// allows syscalls that conform to the specification (*) and generates SIGSYS +// allows syscalls that conform to the specification and generates SIGSYS // trap unless kill is set. // -// (*) The current implementation only checks the syscall number. It does NOT -// validate any of the arguments. +// This is a convenience wrapper around BuildProgram and SetFilter. func Install(rules SyscallRules, kill bool) error { log.Infof("Installing seccomp filters for %d syscalls (kill=%t)", len(rules), kill) - instrs, err := buildProgram(rules, kill) + defaultAction := uint32(linux.SECCOMP_RET_TRAP) + if kill { + defaultAction = uint32(linux.SECCOMP_RET_KILL) + } + instrs, err := BuildProgram([]RuleSet{ + RuleSet{ + Rules: rules, + Action: uint32(linux.SECCOMP_RET_ALLOW), + }, + }, defaultAction) if log.IsLogging(log.Debug) { programStr, errDecode := bpf.DecodeProgram(instrs) if errDecode != nil { @@ -56,60 +61,84 @@ func Install(rules SyscallRules, kill bool) error { return err } - if err := seccomp(instrs); err != nil { - return err + // Perform the actual installation. + if errno := SetFilter(instrs); errno != 0 { + return fmt.Errorf("Failed to set filter: %v", errno) } log.Infof("Seccomp filters installed.") return nil } -// buildProgram builds a BPF program that whitelists all given syscall rules. -func buildProgram(rules SyscallRules, kill bool) ([]linux.BPFInstruction, error) { +// RuleSet is a set of rules and associated action. +type RuleSet struct { + Rules SyscallRules + Action uint32 + + // Vsyscall indicates that a check is made for a function being called + // from kernel mappings. This is where the vsyscall page is located + // (and typically) emulated, so this RuleSet will not match any + // functions not dispatched from the vsyscall page. + Vsyscall bool +} + +// SyscallName gives names to system calls. It is used purely for debugging purposes. +// +// An alternate namer can be provided to the package at initialization time. +var SyscallName = func(sysno uintptr) string { + return fmt.Sprintf("syscall_%d", sysno) +} + +// BuildProgram builds a BPF program from the given map of actions to matching +// SyscallRules. The single generated program covers all provided RuleSets. +func BuildProgram(rules []RuleSet, defaultAction uint32) ([]linux.BPFInstruction, error) { program := bpf.NewProgramBuilder() - violationAction := uint32(linux.SECCOMP_RET_KILL) - if !kill { - violationAction = linux.SECCOMP_RET_TRAP - } // Be paranoid and check that syscall is done in the expected architecture. // // A = seccomp_data.arch - // if (A != AUDIT_ARCH_X86_64) goto violation + // if (A != AUDIT_ARCH_X86_64) goto defaultAction. program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArch) - // violationLabel is at the bottom of the program. The size of program + // defaultLabel is at the bottom of the program. The size of program // may exceeds 255 lines, which is the limit of a condition jump. program.AddJump(bpf.Jmp|bpf.Jeq|bpf.K, linux.AUDIT_ARCH_X86_64, skipOneInst, 0) - program.AddDirectJumpLabel(violationLabel) - + program.AddDirectJumpLabel(defaultLabel) if err := buildIndex(rules, program); err != nil { return nil, err } - // violation: return violationAction - if err := program.AddLabel(violationLabel); err != nil { + // Exhausted: return defaultAction. + if err := program.AddLabel(defaultLabel); err != nil { return nil, err } - program.AddStmt(bpf.Ret|bpf.K, violationAction) + program.AddStmt(bpf.Ret|bpf.K, defaultAction) return program.Instructions() } -// buildIndex builds a BST to quickly search through all syscalls that are whitelisted. -func buildIndex(rules SyscallRules, program *bpf.ProgramBuilder) error { - syscalls := []uintptr{} - for sysno := range rules { - syscalls = append(syscalls, sysno) +// buildIndex builds a BST to quickly search through all syscalls. +func buildIndex(rules []RuleSet, program *bpf.ProgramBuilder) error { + // Build a list of all application system calls, across all given rule + // sets. We have a simple BST, but may dispatch individual matchers + // with different actions. The matchers are evaluated linearly. + requiredSyscalls := make(map[uintptr]struct{}) + for _, rs := range rules { + for sysno := range rs.Rules { + requiredSyscalls[sysno] = struct{}{} + } } - - t, ok := strace.Lookup(abi.Linux, arch.AMD64) - if !ok { - panic("Can't find amd64 Linux syscall table") + syscalls := make([]uintptr, 0, len(requiredSyscalls)) + for sysno, _ := range requiredSyscalls { + syscalls = append(syscalls, sysno) } - sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] }) - for _, s := range syscalls { - log.Infof("syscall filter: %v (%v): %s", s, t.Name(s), rules[s]) + for _, sysno := range syscalls { + for _, rs := range rules { + // Print only if there is a corresponding set of rules. + if _, ok := rs.Rules[sysno]; ok { + log.Debugf("syscall filter %v: %s => 0x%x", SyscallName(sysno), rs.Rules[sysno], rs.Action) + } + } } root := createBST(syscalls) @@ -119,7 +148,7 @@ func buildIndex(rules SyscallRules, program *bpf.ProgramBuilder) error { // // A = seccomp_data.nr program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetNR) - return root.traverse(buildBSTProgram, program, rules) + return root.traverse(buildBSTProgram, rules, program) } // createBST converts sorted syscall slice into a balanced BST. @@ -136,15 +165,23 @@ func createBST(syscalls []uintptr) *node { return &parent } -func ruleViolationLabel(sysno uintptr, idx int) string { - return fmt.Sprintf("ruleViolation_%v_%v", sysno, idx) +func vsyscallViolationLabel(ruleSetIdx int, sysno uintptr) string { + return fmt.Sprintf("vsyscallViolation_%v_%v", ruleSetIdx, sysno) +} + +func ruleViolationLabel(ruleSetIdx int, sysno uintptr, idx int) string { + return fmt.Sprintf("ruleViolation_%v_%v_%v", ruleSetIdx, sysno, idx) } func checkArgsLabel(sysno uintptr) string { return fmt.Sprintf("checkArgs_%v", sysno) } -func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, sysno uintptr) error { +// addSyscallArgsCheck adds argument checks for a single system call. It does +// not insert a jump to the default action at the end and it is the +// responsibility of the caller to insert an appropriate jump after calling +// this function. +func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action uint32, ruleSetIdx int, sysno uintptr) error { for ruleidx, rule := range rules { labelled := false for i, arg := range rule { @@ -155,28 +192,29 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, sysno uintptr) err high, low := uint32(a>>32), uint32(a) // assert arg_low == low p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgLow(i)) - p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(sysno, ruleidx)) + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) // assert arg_high == high p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgHigh(i)) - p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(sysno, ruleidx)) + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) labelled = true - default: return fmt.Errorf("unknown syscall rule type: %v", reflect.TypeOf(a)) } } } - // Matched, allow the syscall. - p.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW) - // Label the end of the rule if necessary. + + // Matched, emit the given action. + p.AddStmt(bpf.Ret|bpf.K, action) + + // Label the end of the rule if necessary. This is added for + // the jumps above when the argument check fails. if labelled { - if err := p.AddLabel(ruleViolationLabel(sysno, ruleidx)); err != nil { + if err := p.AddLabel(ruleViolationLabel(ruleSetIdx, sysno, ruleidx)); err != nil { return err } } } - // Not matched? - p.AddDirectJumpLabel(violationLabel) + return nil } @@ -188,16 +226,16 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, sysno uintptr) err // (A > 22) ? goto index_35 : goto index_9 // // index_9: // SYS_MMAP(9), leaf -// A == 9) ? goto argument check : violation +// A == 9) ? goto argument check : defaultLabel // // index_35: // SYS_NANOSLEEP(35), single child // (A == 35) ? goto argument check : continue -// (A > 35) ? goto index_50 : goto violation +// (A > 35) ? goto index_50 : goto defaultLabel // // index_50: // SYS_LISTEN(50), leaf -// (A == 50) ? goto argument check : goto violation +// (A == 50) ? goto argument check : goto defaultLabel // -func buildBSTProgram(program *bpf.ProgramBuilder, rules SyscallRules, n *node) error { +func buildBSTProgram(n *node, rules []RuleSet, program *bpf.ProgramBuilder) error { // Root node is never referenced by label, skip it. if !n.root { if err := program.AddLabel(n.label()); err != nil { @@ -209,11 +247,10 @@ func buildBSTProgram(program *bpf.ProgramBuilder, rules SyscallRules, n *node) e program.AddJumpTrueLabel(bpf.Jmp|bpf.Jeq|bpf.K, uint32(sysno), checkArgsLabel(sysno), 0) if n.left == nil && n.right == nil { // Leaf nodes don't require extra check. - program.AddDirectJumpLabel(violationLabel) + program.AddDirectJumpLabel(defaultLabel) } else { // Non-leaf node. Check which turn to take otherwise. Using direct jumps // in case that the offset may exceed the limit of a conditional jump (255) - // Note that 'violationLabel' is returned for nil children. program.AddJump(bpf.Jmp|bpf.Jgt|bpf.K, uint32(sysno), 0, skipOneInst) program.AddDirectJumpLabel(n.right.label()) program.AddDirectJumpLabel(n.left.label()) @@ -222,12 +259,60 @@ func buildBSTProgram(program *bpf.ProgramBuilder, rules SyscallRules, n *node) e if err := program.AddLabel(checkArgsLabel(sysno)); err != nil { return err } - // No rules, just allow it and save one jmp. - if len(rules[sysno]) == 0 { - program.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW) - return nil + + emitted := false + for ruleSetIdx, rs := range rules { + if _, ok := rs.Rules[sysno]; ok { + // If there are no rules, then this will always match. + // Remember we've done this so that we can emit a + // sensible error. We can't catch all overlaps, but we + // can catch this one at least. + if emitted { + return fmt.Errorf("unreachable action for %v: 0x%x (rule set %d)", SyscallName(sysno), rs.Action, ruleSetIdx) + } + + // Emit a vsyscall check if this rule requires a + // Vsyscall match. This rule ensures that the top bit + // is set in the instruction pointer, which is where + // the vsyscall page will be mapped. + if rs.Vsyscall { + program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetIPHigh) + program.AddJumpFalseLabel(bpf.Jmp|bpf.Jset|bpf.K, 0x80000000, 0, vsyscallViolationLabel(ruleSetIdx, sysno)) + } + + // Emit matchers. + if len(rs.Rules[sysno]) == 0 { + // This is a blanket action. + program.AddStmt(bpf.Ret|bpf.K, rs.Action) + emitted = true + } else { + // Add an argument check for these particular + // arguments. This will continue execution and + // check the next rule set. We need to ensure + // that at the very end, we insert a direct + // jump label for the unmatched case. + if err := addSyscallArgsCheck(program, rs.Rules[sysno], rs.Action, ruleSetIdx, sysno); err != nil { + return err + } + } + + // If there was a Vsyscall check for this rule, then we + // need to add an appropriate label for the jump above. + if rs.Vsyscall { + if err := program.AddLabel(vsyscallViolationLabel(ruleSetIdx, sysno)); err != nil { + return err + } + } + } } - return addSyscallArgsCheck(program, rules[sysno], sysno) + + // Not matched? We only need to insert a jump to the default label if + // not default action has been emitted for this call. + if !emitted { + program.AddDirectJumpLabel(defaultLabel) + } + + return nil } // node represents a tree node. @@ -238,26 +323,27 @@ type node struct { root bool } -// label returns the label corresponding to this node. If node is nil (syscall not present), -// violationLabel is returned for convenience. +// label returns the label corresponding to this node. +// +// If n is nil, then the defaultLabel is returned. func (n *node) label() string { if n == nil { - return violationLabel + return defaultLabel } return fmt.Sprintf("index_%v", n.value) } -type traverseFunc func(*bpf.ProgramBuilder, SyscallRules, *node) error +type traverseFunc func(*node, []RuleSet, *bpf.ProgramBuilder) error -func (n *node) traverse(fn traverseFunc, p *bpf.ProgramBuilder, rules SyscallRules) error { +func (n *node) traverse(fn traverseFunc, rules []RuleSet, p *bpf.ProgramBuilder) error { if n == nil { return nil } - if err := fn(p, rules, n); err != nil { + if err := fn(n, rules, p); err != nil { return err } - if err := n.left.traverse(fn, p, rules); err != nil { + if err := n.left.traverse(fn, rules, p); err != nil { return err } - return n.right.traverse(fn, p, rules) + return n.right.traverse(fn, rules, p) } diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go index 9215e5c90..6b707f195 100644 --- a/pkg/seccomp/seccomp_rules.go +++ b/pkg/seccomp/seccomp_rules.go @@ -24,9 +24,11 @@ import "fmt" // __u64 args[6]; // }; const ( - seccompDataOffsetNR = 0 - seccompDataOffsetArch = 4 - seccompDataOffsetArgs = 16 + seccompDataOffsetNR = 0 + seccompDataOffsetArch = 4 + seccompDataOffsetIPLow = 8 + seccompDataOffsetIPHigh = 12 + seccompDataOffsetArgs = 16 ) func seccompDataOffsetArgLow(i int) uint32 { diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go index 42cf85c03..0188ad4f3 100644 --- a/pkg/seccomp/seccomp_test.go +++ b/pkg/seccomp/seccomp_test.go @@ -76,14 +76,18 @@ func TestBasic(t *testing.T) { } for _, test := range []struct { - // filters are the set of syscall that are allowed. - filters SyscallRules - kill bool - specs []spec + ruleSets []RuleSet + defaultAction uint32 + specs []spec }{ { - filters: SyscallRules{1: {}}, - kill: false, + ruleSets: []RuleSet{ + { + Rules: SyscallRules{1: {}}, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, specs: []spec{ { desc: "Single syscall allowed", @@ -98,12 +102,61 @@ func TestBasic(t *testing.T) { }, }, { - filters: SyscallRules{ - 1: {}, - 3: {}, - 5: {}, + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + AllowValue(0x1), + }, + }, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + { + Rules: SyscallRules{ + 1: {}, + 2: {}, + }, + Action: linux.SECCOMP_RET_TRAP, + }, }, - kill: false, + defaultAction: linux.SECCOMP_RET_KILL, + specs: []spec{ + { + desc: "Multiple rulesets allowed (1a)", + data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{0x1}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "Multiple rulesets allowed (1b)", + data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "Multiple rulesets allowed (2)", + data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "Multiple rulesets allowed (2)", + data: seccompData{nr: 0, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_KILL, + }, + }, + }, + { + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: {}, + 3: {}, + 5: {}, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, specs: []spec{ { desc: "Multiple syscalls allowed (1)", @@ -148,8 +201,15 @@ func TestBasic(t *testing.T) { }, }, { - filters: SyscallRules{1: {}}, - kill: false, + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: {}, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, specs: []spec{ { desc: "Wrong architecture", @@ -159,26 +219,38 @@ func TestBasic(t *testing.T) { }, }, { - filters: SyscallRules{1: {}}, - kill: true, + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: {}, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, specs: []spec{ { - desc: "Syscall disallowed, action kill", + desc: "Syscall disallowed, action trap", data: seccompData{nr: 2, arch: linux.AUDIT_ARCH_X86_64}, - want: linux.SECCOMP_RET_KILL, + want: linux.SECCOMP_RET_TRAP, }, }, }, { - filters: SyscallRules{ - 1: []Rule{ - { - AllowAny{}, - AllowValue(0xf), + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + AllowAny{}, + AllowValue(0xf), + }, + }, }, + Action: linux.SECCOMP_RET_ALLOW, }, }, - kill: false, + defaultAction: linux.SECCOMP_RET_TRAP, specs: []spec{ { desc: "Syscall argument allowed", @@ -193,17 +265,22 @@ func TestBasic(t *testing.T) { }, }, { - filters: SyscallRules{ - 1: []Rule{ - { - AllowValue(0xf), - }, - { - AllowValue(0xe), + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + AllowValue(0xf), + }, + { + AllowValue(0xe), + }, + }, }, + Action: linux.SECCOMP_RET_ALLOW, }, }, - kill: false, + defaultAction: linux.SECCOMP_RET_TRAP, specs: []spec{ { desc: "Syscall argument allowed, two rules", @@ -218,16 +295,21 @@ func TestBasic(t *testing.T) { }, }, { - filters: SyscallRules{ - 1: []Rule{ - { - AllowValue(0), - AllowValue(math.MaxUint64 - 1), - AllowValue(math.MaxUint32), + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + AllowValue(0), + AllowValue(math.MaxUint64 - 1), + AllowValue(math.MaxUint32), + }, + }, }, + Action: linux.SECCOMP_RET_ALLOW, }, }, - kill: false, + defaultAction: linux.SECCOMP_RET_TRAP, specs: []spec{ { desc: "64bit syscall argument allowed", @@ -259,7 +341,7 @@ func TestBasic(t *testing.T) { }, }, } { - instrs, err := buildProgram(test.filters, test.kill) + instrs, err := BuildProgram(test.ruleSets, test.defaultAction) if err != nil { t.Errorf("%s: buildProgram() got error: %v", test.specs[0].desc, err) continue @@ -282,6 +364,7 @@ func TestBasic(t *testing.T) { } } +// TestRandom tests that randomly generated rules are encoded correctly. func TestRandom(t *testing.T) { rand.Seed(time.Now().UnixNano()) size := rand.Intn(50) + 1 @@ -294,7 +377,12 @@ func TestRandom(t *testing.T) { } fmt.Printf("Testing filters: %v", syscallRules) - instrs, err := buildProgram(syscallRules, false) + instrs, err := BuildProgram([]RuleSet{ + RuleSet{ + Rules: syscallRules, + Action: uint32(linux.SECCOMP_RET_ALLOW), + }, + }, uint32(linux.SECCOMP_RET_TRAP)) if err != nil { t.Fatalf("buildProgram() got error: %v", err) } @@ -319,8 +407,8 @@ func TestRandom(t *testing.T) { } } -// TestReadDeal checks that a process dies when it trips over the filter and that it -// doesn't die when the filter is not triggered. +// TestReadDeal checks that a process dies when it trips over the filter and +// that it doesn't die when the filter is not triggered. func TestRealDeal(t *testing.T) { for _, test := range []struct { die bool diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go index 6682f8d9b..ae18534bf 100644 --- a/pkg/seccomp/seccomp_unsafe.go +++ b/pkg/seccomp/seccomp_unsafe.go @@ -17,7 +17,6 @@ package seccomp import ( - "fmt" "syscall" "unsafe" @@ -31,19 +30,28 @@ type sockFprog struct { Filter *linux.BPFInstruction } -func seccomp(instrs []linux.BPFInstruction) error { +// SetFilter installs the given BPF program. +// +// This is safe to call from an afterFork context. +// +//go:nosplit +func SetFilter(instrs []linux.BPFInstruction) syscall.Errno { // SYS_SECCOMP is not available in syscall package. const SYS_SECCOMP = 317 // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See seccomp(2) for details. - if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0); err != 0 { - return fmt.Errorf("failed to set PR_SET_NO_NEW_PRIVS: %v", err) + if _, _, errno := syscall.RawSyscall(syscall.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0); errno != 0 { + return errno } - sockProg := sockFprog{Len: uint16(len(instrs)), Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0]))} // TODO: Use SECCOMP_FILTER_FLAG_KILL_PROCESS when available. - if _, _, err := syscall.RawSyscall(SYS_SECCOMP, linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, uintptr(unsafe.Pointer(&sockProg))); err != 0 { - return fmt.Errorf("failed to set seccomp filter: %v", err) + sockProg := sockFprog{ + Len: uint16(len(instrs)), + Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])), } - return nil + if _, _, errno := syscall.RawSyscall(SYS_SECCOMP, linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, uintptr(unsafe.Pointer(&sockProg))); errno != 0 { + return errno + } + + return 0 } |