diff options
Diffstat (limited to 'pkg/seccomp/seccomp.go')
-rw-r--r-- | pkg/seccomp/seccomp.go | 224 |
1 files changed, 155 insertions, 69 deletions
diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index 49da3c775..a746dc9b3 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -20,31 +20,36 @@ import ( "reflect" "sort" - "gvisor.googlesource.com/gvisor/pkg/abi" "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/bpf" "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/strace" ) const ( - // violationLabel is added to the program to take action on a violation. - violationLabel = "violation" - // skipOneInst is the offset to take for skipping one instruction. skipOneInst = 1 + + // defaultLabel is the label for the default action. + defaultLabel = "default_action" ) // Install generates BPF code based on the set of syscalls provided. It only -// allows syscalls that conform to the specification (*) and generates SIGSYS +// allows syscalls that conform to the specification and generates SIGSYS // trap unless kill is set. // -// (*) The current implementation only checks the syscall number. It does NOT -// validate any of the arguments. +// This is a convenience wrapper around BuildProgram and SetFilter. func Install(rules SyscallRules, kill bool) error { log.Infof("Installing seccomp filters for %d syscalls (kill=%t)", len(rules), kill) - instrs, err := buildProgram(rules, kill) + defaultAction := uint32(linux.SECCOMP_RET_TRAP) + if kill { + defaultAction = uint32(linux.SECCOMP_RET_KILL) + } + instrs, err := BuildProgram([]RuleSet{ + RuleSet{ + Rules: rules, + Action: uint32(linux.SECCOMP_RET_ALLOW), + }, + }, defaultAction) if log.IsLogging(log.Debug) { programStr, errDecode := bpf.DecodeProgram(instrs) if errDecode != nil { @@ -56,60 +61,84 @@ func Install(rules SyscallRules, kill bool) error { return err } - if err := seccomp(instrs); err != nil { - return err + // Perform the actual installation. + if errno := SetFilter(instrs); errno != 0 { + return fmt.Errorf("Failed to set filter: %v", errno) } log.Infof("Seccomp filters installed.") return nil } -// buildProgram builds a BPF program that whitelists all given syscall rules. -func buildProgram(rules SyscallRules, kill bool) ([]linux.BPFInstruction, error) { +// RuleSet is a set of rules and associated action. +type RuleSet struct { + Rules SyscallRules + Action uint32 + + // Vsyscall indicates that a check is made for a function being called + // from kernel mappings. This is where the vsyscall page is located + // (and typically) emulated, so this RuleSet will not match any + // functions not dispatched from the vsyscall page. + Vsyscall bool +} + +// SyscallName gives names to system calls. It is used purely for debugging purposes. +// +// An alternate namer can be provided to the package at initialization time. +var SyscallName = func(sysno uintptr) string { + return fmt.Sprintf("syscall_%d", sysno) +} + +// BuildProgram builds a BPF program from the given map of actions to matching +// SyscallRules. The single generated program covers all provided RuleSets. +func BuildProgram(rules []RuleSet, defaultAction uint32) ([]linux.BPFInstruction, error) { program := bpf.NewProgramBuilder() - violationAction := uint32(linux.SECCOMP_RET_KILL) - if !kill { - violationAction = linux.SECCOMP_RET_TRAP - } // Be paranoid and check that syscall is done in the expected architecture. // // A = seccomp_data.arch - // if (A != AUDIT_ARCH_X86_64) goto violation + // if (A != AUDIT_ARCH_X86_64) goto defaultAction. program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArch) - // violationLabel is at the bottom of the program. The size of program + // defaultLabel is at the bottom of the program. The size of program // may exceeds 255 lines, which is the limit of a condition jump. program.AddJump(bpf.Jmp|bpf.Jeq|bpf.K, linux.AUDIT_ARCH_X86_64, skipOneInst, 0) - program.AddDirectJumpLabel(violationLabel) - + program.AddDirectJumpLabel(defaultLabel) if err := buildIndex(rules, program); err != nil { return nil, err } - // violation: return violationAction - if err := program.AddLabel(violationLabel); err != nil { + // Exhausted: return defaultAction. + if err := program.AddLabel(defaultLabel); err != nil { return nil, err } - program.AddStmt(bpf.Ret|bpf.K, violationAction) + program.AddStmt(bpf.Ret|bpf.K, defaultAction) return program.Instructions() } -// buildIndex builds a BST to quickly search through all syscalls that are whitelisted. -func buildIndex(rules SyscallRules, program *bpf.ProgramBuilder) error { - syscalls := []uintptr{} - for sysno := range rules { - syscalls = append(syscalls, sysno) +// buildIndex builds a BST to quickly search through all syscalls. +func buildIndex(rules []RuleSet, program *bpf.ProgramBuilder) error { + // Build a list of all application system calls, across all given rule + // sets. We have a simple BST, but may dispatch individual matchers + // with different actions. The matchers are evaluated linearly. + requiredSyscalls := make(map[uintptr]struct{}) + for _, rs := range rules { + for sysno := range rs.Rules { + requiredSyscalls[sysno] = struct{}{} + } } - - t, ok := strace.Lookup(abi.Linux, arch.AMD64) - if !ok { - panic("Can't find amd64 Linux syscall table") + syscalls := make([]uintptr, 0, len(requiredSyscalls)) + for sysno, _ := range requiredSyscalls { + syscalls = append(syscalls, sysno) } - sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] }) - for _, s := range syscalls { - log.Infof("syscall filter: %v (%v): %s", s, t.Name(s), rules[s]) + for _, sysno := range syscalls { + for _, rs := range rules { + // Print only if there is a corresponding set of rules. + if _, ok := rs.Rules[sysno]; ok { + log.Debugf("syscall filter %v: %s => 0x%x", SyscallName(sysno), rs.Rules[sysno], rs.Action) + } + } } root := createBST(syscalls) @@ -119,7 +148,7 @@ func buildIndex(rules SyscallRules, program *bpf.ProgramBuilder) error { // // A = seccomp_data.nr program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetNR) - return root.traverse(buildBSTProgram, program, rules) + return root.traverse(buildBSTProgram, rules, program) } // createBST converts sorted syscall slice into a balanced BST. @@ -136,15 +165,23 @@ func createBST(syscalls []uintptr) *node { return &parent } -func ruleViolationLabel(sysno uintptr, idx int) string { - return fmt.Sprintf("ruleViolation_%v_%v", sysno, idx) +func vsyscallViolationLabel(ruleSetIdx int, sysno uintptr) string { + return fmt.Sprintf("vsyscallViolation_%v_%v", ruleSetIdx, sysno) +} + +func ruleViolationLabel(ruleSetIdx int, sysno uintptr, idx int) string { + return fmt.Sprintf("ruleViolation_%v_%v_%v", ruleSetIdx, sysno, idx) } func checkArgsLabel(sysno uintptr) string { return fmt.Sprintf("checkArgs_%v", sysno) } -func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, sysno uintptr) error { +// addSyscallArgsCheck adds argument checks for a single system call. It does +// not insert a jump to the default action at the end and it is the +// responsibility of the caller to insert an appropriate jump after calling +// this function. +func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action uint32, ruleSetIdx int, sysno uintptr) error { for ruleidx, rule := range rules { labelled := false for i, arg := range rule { @@ -155,28 +192,29 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, sysno uintptr) err high, low := uint32(a>>32), uint32(a) // assert arg_low == low p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgLow(i)) - p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(sysno, ruleidx)) + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) // assert arg_high == high p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgHigh(i)) - p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(sysno, ruleidx)) + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) labelled = true - default: return fmt.Errorf("unknown syscall rule type: %v", reflect.TypeOf(a)) } } } - // Matched, allow the syscall. - p.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW) - // Label the end of the rule if necessary. + + // Matched, emit the given action. + p.AddStmt(bpf.Ret|bpf.K, action) + + // Label the end of the rule if necessary. This is added for + // the jumps above when the argument check fails. if labelled { - if err := p.AddLabel(ruleViolationLabel(sysno, ruleidx)); err != nil { + if err := p.AddLabel(ruleViolationLabel(ruleSetIdx, sysno, ruleidx)); err != nil { return err } } } - // Not matched? - p.AddDirectJumpLabel(violationLabel) + return nil } @@ -188,16 +226,16 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, sysno uintptr) err // (A > 22) ? goto index_35 : goto index_9 // // index_9: // SYS_MMAP(9), leaf -// A == 9) ? goto argument check : violation +// A == 9) ? goto argument check : defaultLabel // // index_35: // SYS_NANOSLEEP(35), single child // (A == 35) ? goto argument check : continue -// (A > 35) ? goto index_50 : goto violation +// (A > 35) ? goto index_50 : goto defaultLabel // // index_50: // SYS_LISTEN(50), leaf -// (A == 50) ? goto argument check : goto violation +// (A == 50) ? goto argument check : goto defaultLabel // -func buildBSTProgram(program *bpf.ProgramBuilder, rules SyscallRules, n *node) error { +func buildBSTProgram(n *node, rules []RuleSet, program *bpf.ProgramBuilder) error { // Root node is never referenced by label, skip it. if !n.root { if err := program.AddLabel(n.label()); err != nil { @@ -209,11 +247,10 @@ func buildBSTProgram(program *bpf.ProgramBuilder, rules SyscallRules, n *node) e program.AddJumpTrueLabel(bpf.Jmp|bpf.Jeq|bpf.K, uint32(sysno), checkArgsLabel(sysno), 0) if n.left == nil && n.right == nil { // Leaf nodes don't require extra check. - program.AddDirectJumpLabel(violationLabel) + program.AddDirectJumpLabel(defaultLabel) } else { // Non-leaf node. Check which turn to take otherwise. Using direct jumps // in case that the offset may exceed the limit of a conditional jump (255) - // Note that 'violationLabel' is returned for nil children. program.AddJump(bpf.Jmp|bpf.Jgt|bpf.K, uint32(sysno), 0, skipOneInst) program.AddDirectJumpLabel(n.right.label()) program.AddDirectJumpLabel(n.left.label()) @@ -222,12 +259,60 @@ func buildBSTProgram(program *bpf.ProgramBuilder, rules SyscallRules, n *node) e if err := program.AddLabel(checkArgsLabel(sysno)); err != nil { return err } - // No rules, just allow it and save one jmp. - if len(rules[sysno]) == 0 { - program.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW) - return nil + + emitted := false + for ruleSetIdx, rs := range rules { + if _, ok := rs.Rules[sysno]; ok { + // If there are no rules, then this will always match. + // Remember we've done this so that we can emit a + // sensible error. We can't catch all overlaps, but we + // can catch this one at least. + if emitted { + return fmt.Errorf("unreachable action for %v: 0x%x (rule set %d)", SyscallName(sysno), rs.Action, ruleSetIdx) + } + + // Emit a vsyscall check if this rule requires a + // Vsyscall match. This rule ensures that the top bit + // is set in the instruction pointer, which is where + // the vsyscall page will be mapped. + if rs.Vsyscall { + program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetIPHigh) + program.AddJumpFalseLabel(bpf.Jmp|bpf.Jset|bpf.K, 0x80000000, 0, vsyscallViolationLabel(ruleSetIdx, sysno)) + } + + // Emit matchers. + if len(rs.Rules[sysno]) == 0 { + // This is a blanket action. + program.AddStmt(bpf.Ret|bpf.K, rs.Action) + emitted = true + } else { + // Add an argument check for these particular + // arguments. This will continue execution and + // check the next rule set. We need to ensure + // that at the very end, we insert a direct + // jump label for the unmatched case. + if err := addSyscallArgsCheck(program, rs.Rules[sysno], rs.Action, ruleSetIdx, sysno); err != nil { + return err + } + } + + // If there was a Vsyscall check for this rule, then we + // need to add an appropriate label for the jump above. + if rs.Vsyscall { + if err := program.AddLabel(vsyscallViolationLabel(ruleSetIdx, sysno)); err != nil { + return err + } + } + } } - return addSyscallArgsCheck(program, rules[sysno], sysno) + + // Not matched? We only need to insert a jump to the default label if + // not default action has been emitted for this call. + if !emitted { + program.AddDirectJumpLabel(defaultLabel) + } + + return nil } // node represents a tree node. @@ -238,26 +323,27 @@ type node struct { root bool } -// label returns the label corresponding to this node. If node is nil (syscall not present), -// violationLabel is returned for convenience. +// label returns the label corresponding to this node. +// +// If n is nil, then the defaultLabel is returned. func (n *node) label() string { if n == nil { - return violationLabel + return defaultLabel } return fmt.Sprintf("index_%v", n.value) } -type traverseFunc func(*bpf.ProgramBuilder, SyscallRules, *node) error +type traverseFunc func(*node, []RuleSet, *bpf.ProgramBuilder) error -func (n *node) traverse(fn traverseFunc, p *bpf.ProgramBuilder, rules SyscallRules) error { +func (n *node) traverse(fn traverseFunc, rules []RuleSet, p *bpf.ProgramBuilder) error { if n == nil { return nil } - if err := fn(p, rules, n); err != nil { + if err := fn(n, rules, p); err != nil { return err } - if err := n.left.traverse(fn, p, rules); err != nil { + if err := n.left.traverse(fn, rules, p); err != nil { return err } - return n.right.traverse(fn, p, rules) + return n.right.traverse(fn, rules, p) } |