diff options
author | Zhengyu He <hzy@google.com> | 2018-06-01 13:39:53 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-06-01 13:40:52 -0700 |
commit | d1ca50d49e52338feb1d46b69725b9ac21cc3ccc (patch) | |
tree | 11e0bcef11cffe780a407775f02f57ee2d142c5f | |
parent | 65dadc00297d946e86b2e95b0279fb6dc94542dd (diff) |
Add SyscallRules that supports argument filtering
PiperOrigin-RevId: 198919043
Change-Id: I7f1f0a3b3430cd0936a4ee4fc6859aab71820bdf
-rw-r--r-- | pkg/bpf/program_builder.go | 64 | ||||
-rw-r--r-- | pkg/seccomp/BUILD | 4 | ||||
-rw-r--r-- | pkg/seccomp/seccomp.go | 217 | ||||
-rw-r--r-- | pkg/seccomp/seccomp_rules.go | 119 | ||||
-rw-r--r-- | pkg/seccomp/seccomp_test.go | 125 | ||||
-rw-r--r-- | pkg/seccomp/seccomp_test_victim.go | 141 | ||||
-rw-r--r-- | runsc/boot/filter/config.go | 249 | ||||
-rw-r--r-- | runsc/boot/filter/extra_filters.go | 6 | ||||
-rw-r--r-- | runsc/boot/filter/extra_filters_msan.go | 10 | ||||
-rw-r--r-- | runsc/boot/filter/extra_filters_race.go | 19 | ||||
-rw-r--r-- | runsc/boot/filter/filter.go | 12 |
11 files changed, 640 insertions, 326 deletions
diff --git a/pkg/bpf/program_builder.go b/pkg/bpf/program_builder.go index 7554d47c1..bad56d7ac 100644 --- a/pkg/bpf/program_builder.go +++ b/pkg/bpf/program_builder.go @@ -21,7 +21,10 @@ import ( "gvisor.googlesource.com/gvisor/pkg/abi/linux" ) -const labelTarget = math.MaxUint8 +const ( + labelTarget = math.MaxUint8 + labelDirectTarget = math.MaxUint32 +) // ProgramBuilder assists with building a BPF program with jump // labels that are resolved to their proper offsets. @@ -47,6 +50,14 @@ type label struct { target int } +type jmpType int + +const ( + jDirect jmpType = iota + jTrue + jFalse +) + // source contains information about a single reference to a label. type source struct { // Program line where the label reference is present. @@ -54,7 +65,7 @@ type source struct { // True if label reference is in the 'jump if true' part of the jump. // False if label reference is in the 'jump if false' part of the jump. - jt bool + jt jmpType } // AddStmt adds a new statement to the program. @@ -67,23 +78,29 @@ func (b *ProgramBuilder) AddJump(code uint16, k uint32, jt, jf uint8) { b.instructions = append(b.instructions, Jump(code, k, jt, jf)) } +// AddDirectJumpLabel adds a new jump to the program where is labelled. +func (b *ProgramBuilder) AddDirectJumpLabel(labelName string) { + b.addLabelSource(labelName, jDirect) + b.AddJump(Jmp|Ja, labelDirectTarget, 0, 0) +} + // AddJumpTrueLabel adds a new jump to the program where 'jump if true' is a label. func (b *ProgramBuilder) AddJumpTrueLabel(code uint16, k uint32, jtLabel string, jf uint8) { - b.addLabelSource(jtLabel, true) + b.addLabelSource(jtLabel, jTrue) b.AddJump(code, k, labelTarget, jf) } // AddJumpFalseLabel adds a new jump to the program where 'jump if false' is a label. func (b *ProgramBuilder) AddJumpFalseLabel(code uint16, k uint32, jt uint8, jfLabel string) { - b.addLabelSource(jfLabel, false) - b.AddJump(code, k, jt, math.MaxUint8) + b.addLabelSource(jfLabel, jFalse) + b.AddJump(code, k, jt, labelTarget) } // AddJumpLabels adds a new jump to the program where both jump targets are labels. func (b *ProgramBuilder) AddJumpLabels(code uint16, k uint32, jtLabel, jfLabel string) { - b.addLabelSource(jtLabel, true) - b.addLabelSource(jfLabel, false) - b.AddJump(code, k, math.MaxUint8, math.MaxUint8) + b.addLabelSource(jtLabel, jTrue) + b.addLabelSource(jfLabel, jFalse) + b.AddJump(code, k, labelTarget, labelTarget) } // AddLabel sets the given label name at the current location. The next instruction is executed @@ -104,20 +121,22 @@ func (b *ProgramBuilder) AddLabel(name string) error { // Instructions returns an array of BPF instructions representing the program with all labels // resolved. Return error in case label resolution failed due to an invalid program. +// +// N.B. Partial results will be returned in the error case, which is useful for debugging. func (b *ProgramBuilder) Instructions() ([]linux.BPFInstruction, error) { if err := b.resolveLabels(); err != nil { - return nil, err + return b.instructions, err } return b.instructions, nil } -func (b *ProgramBuilder) addLabelSource(labelName string, jt bool) { +func (b *ProgramBuilder) addLabelSource(labelName string, t jmpType) { l, ok := b.labels[labelName] if !ok { l = &label{sources: make([]source, 0), target: -1} b.labels[labelName] = l } - l.sources = append(l.sources, source{line: len(b.instructions), jt: jt}) + l.sources = append(l.sources, source{line: len(b.instructions), jt: t}) } func (b *ProgramBuilder) resolveLabels() error { @@ -136,21 +155,34 @@ func (b *ProgramBuilder) resolveLabels() error { } // Calculates the jump offset from current line. offset := v.target - s.line - 1 - if offset > math.MaxUint8 { - return fmt.Errorf("jump offset to label '%v' is too large: %v", key, offset) - } // Sets offset into jump instruction. - if s.jt { + switch s.jt { + case jDirect: + if offset > labelDirectTarget { + return fmt.Errorf("jump offset to label '%v' is too large: %v, inst: %v, lineno: %v", key, offset, inst, s.line) + } + if inst.K != labelDirectTarget { + return fmt.Errorf("jump target is not a label") + } + inst.K = uint32(offset) + case jTrue: + if offset > labelTarget { + return fmt.Errorf("jump offset to label '%v' is too large: %v, inst: %v, lineno: %v", key, offset, inst, s.line) + } if inst.JumpIfTrue != labelTarget { return fmt.Errorf("jump target is not a label") } inst.JumpIfTrue = uint8(offset) - } else { + case jFalse: + if offset > labelTarget { + return fmt.Errorf("jump offset to label '%v' is too large: %v, inst: %v, lineno: %v", key, offset, inst, s.line) + } if inst.JumpIfFalse != labelTarget { return fmt.Errorf("jump target is not a label") } inst.JumpIfFalse = uint8(offset) } + b.instructions[s.line] = inst } } diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD index 1e19b1d25..cadd24505 100644 --- a/pkg/seccomp/BUILD +++ b/pkg/seccomp/BUILD @@ -21,14 +21,18 @@ go_library( name = "seccomp", srcs = [ "seccomp.go", + "seccomp_rules.go", "seccomp_unsafe.go", ], importpath = "gvisor.googlesource.com/gvisor/pkg/seccomp", visibility = ["//visibility:public"], deps = [ + "//pkg/abi", "//pkg/abi/linux", "//pkg/bpf", "//pkg/log", + "//pkg/sentry/arch", + "//pkg/sentry/strace", ], ) diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index 7ee63140c..cd6b0b4bc 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -12,24 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package seccomp provides basic seccomp filters. +// Package seccomp provides basic seccomp filters for x86_64 (little endian). package seccomp import ( "fmt" + "reflect" "sort" + "gvisor.googlesource.com/gvisor/pkg/abi" "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/bpf" "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/strace" ) const ( // violationLabel is added to the program to take action on a violation. violationLabel = "violation" - // allowLabel is added to the program to allow the syscall to take place. - allowLabel = "allow" + // skipOneInst is the offset to take for skipping one instruction. + skipOneInst = 1 ) // Install generates BPF code based on the set of syscalls provided. It only @@ -38,27 +42,19 @@ const ( // // (*) The current implementation only checks the syscall number. It does NOT // validate any of the arguments. -func Install(syscalls []uintptr, kill bool) error { - // Sort syscalls and remove duplicates to build the BST. - sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] }) - syscalls = filterUnique(syscalls) - - log.Infof("Installing seccomp filters for %d syscalls (kill=%t)", len(syscalls), kill) - for _, s := range syscalls { - log.Infof("syscall filter: %v", s) - } - - instrs, err := buildProgram(syscalls, kill) - if err != nil { - return err - } +func Install(rules SyscallRules, kill bool) error { + log.Infof("Installing seccomp filters for %d syscalls (kill=%t)", len(rules), kill) + instrs, err := buildProgram(rules, kill) if log.IsLogging(log.Debug) { - programStr, err := bpf.DecodeProgram(instrs) - if err != nil { - programStr = fmt.Sprintf("Error: %v\n%s", err, programStr) + programStr, errDecode := bpf.DecodeProgram(instrs) + if errDecode != nil { + programStr = fmt.Sprintf("Error: %v\n%s", errDecode, programStr) } log.Debugf("Seccomp program dump:\n%s", programStr) } + if err != nil { + return err + } if err := seccomp(instrs); err != nil { return err @@ -68,11 +64,8 @@ func Install(syscalls []uintptr, kill bool) error { return nil } -// buildProgram builds a BPF program that whitelists all given syscalls. -// -// Precondition: syscalls must be sorted and unique. -func buildProgram(syscalls []uintptr, kill bool) ([]linux.BPFInstruction, error) { - const archOffset = 4 // offsetof(seccomp_data, arch) +// buildProgram builds a BPF program that whitelists all given syscall rules. +func buildProgram(rules SyscallRules, kill bool) ([]linux.BPFInstruction, error) { program := bpf.NewProgramBuilder() violationAction := uint32(linux.SECCOMP_RET_KILL) if !kill { @@ -83,10 +76,13 @@ func buildProgram(syscalls []uintptr, kill bool) ([]linux.BPFInstruction, error) // // A = seccomp_data.arch // if (A != AUDIT_ARCH_X86_64) goto violation - program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, archOffset) - program.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, linux.AUDIT_ARCH_X86_64, 0, violationLabel) + program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArch) + // violationLabel is at the bottom of the program. The size of program + // may exceeds 255 lines, which is the limit of a condition jump. + program.AddJump(bpf.Jmp|bpf.Jeq|bpf.K, linux.AUDIT_ARCH_X86_64, skipOneInst, 0) + program.AddDirectJumpLabel(violationLabel) - if err := buildIndex(syscalls, program); err != nil { + if err := buildIndex(rules, program); err != nil { return nil, err } @@ -96,41 +92,34 @@ func buildProgram(syscalls []uintptr, kill bool) ([]linux.BPFInstruction, error) } program.AddStmt(bpf.Ret|bpf.K, violationAction) - // allow: return SECCOMP_RET_ALLOW - if err := program.AddLabel(allowLabel); err != nil { - return nil, err - } - program.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW) - return program.Instructions() } -// filterUnique filters unique system calls. -// -// Precondition: syscalls must be sorted. -func filterUnique(syscalls []uintptr) []uintptr { - filtered := make([]uintptr, 0, len(syscalls)) - for i := 0; i < len(syscalls); i++ { - if len(filtered) > 0 && syscalls[i] == filtered[len(filtered)-1] { - // This call has already been inserted, skip. - continue - } - filtered = append(filtered, syscalls[i]) +// buildIndex builds a BST to quickly search through all syscalls that are whitelisted. +func buildIndex(rules SyscallRules, program *bpf.ProgramBuilder) error { + syscalls := []uintptr{} + for sysno, _ := range rules { + syscalls = append(syscalls, sysno) + } + + t, ok := strace.Lookup(abi.Linux, arch.AMD64) + if !ok { + panic("Can't find amd64 Linux syscall table") + } + + sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] }) + for _, s := range syscalls { + log.Infof("syscall filter: %v (%v): %s", s, t.Name(s), rules[s]) } - return filtered -} -// buildIndex builds a BST to quickly search through all syscalls that are whitelisted. -// -// Precondition: syscalls must be sorted and unique. -func buildIndex(syscalls []uintptr, program *bpf.ProgramBuilder) error { root := createBST(syscalls) + root.root = true // Load syscall number into A and run through BST. // // A = seccomp_data.nr - program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, 0) - return root.buildBSTProgram(program, true) + program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetNR) + return root.traverse(buildBSTProgram, program, rules) } // createBST converts sorted syscall slice into a balanced BST. @@ -147,64 +136,128 @@ func createBST(syscalls []uintptr) *node { return &parent } -// node represents a tree node. -type node struct { - value uintptr - left *node - right *node +func ruleViolationLabel(sysno uintptr, idx int) string { + return fmt.Sprintf("ruleViolation_%v_%v", sysno, idx) } -// label returns the label corresponding to this node. If node is nil (syscall not present), -// violationLabel is returned for convenience. -func (n *node) label() string { - if n == nil { - return violationLabel +func checkArgsLabel(sysno uintptr) string { + return fmt.Sprintf("checkArgs_%v", sysno) +} + +func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, sysno uintptr) error { + for ruleidx, rule := range rules { + labelled := false + for i, arg := range rule { + if arg != nil { + switch a := arg.(type) { + case AllowAny: + case AllowValue: + high, low := uint32(a>>32), uint32(a) + // assert arg_low == low + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgLow(i)) + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(sysno, ruleidx)) + // assert arg_high == high + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgHigh(i)) + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(sysno, ruleidx)) + labelled = true + + default: + return fmt.Errorf("unknown syscall rule type: %v", reflect.TypeOf(a)) + } + } + } + // Matched, allow the syscall. + p.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW) + // Label the end of the rule if necessary. + if labelled { + if err := p.AddLabel(ruleViolationLabel(sysno, ruleidx)); err != nil { + return err + } + } } - return fmt.Sprintf("index_%v", n.value) + // Not matched? + p.AddDirectJumpLabel(violationLabel) + return nil } // buildBSTProgram converts a binary tree started in 'root' into BPF code. The ouline of the code // is as follows: // // // SYS_PIPE(22), root -// (A == 22) ? goto allow : continue +// (A == 22) ? goto argument check : continue // (A > 22) ? goto index_35 : goto index_9 // // index_9: // SYS_MMAP(9), leaf -// (A == 9) ? goto allow : goto violation +// A == 9) ? goto argument check : violation // // index_35: // SYS_NANOSLEEP(35), single child -// (A == 35) ? goto allow : continue +// (A == 35) ? goto argument check : continue // (A > 35) ? goto index_50 : goto violation // // index_50: // SYS_LISTEN(50), leaf -// (A == 50) ? goto allow : goto violation +// (A == 50) ? goto argument check : goto violation // -func (n *node) buildBSTProgram(program *bpf.ProgramBuilder, root bool) error { - if n == nil { - return nil - } - +func buildBSTProgram(program *bpf.ProgramBuilder, rules SyscallRules, n *node) error { // Root node is never referenced by label, skip it. - if !root { + if !n.root { if err := program.AddLabel(n.label()); err != nil { return err } } - // Leaf nodes don't require extra check, they either allow or violate! + sysno := n.value + program.AddJumpTrueLabel(bpf.Jmp|bpf.Jeq|bpf.K, uint32(sysno), checkArgsLabel(sysno), 0) if n.left == nil && n.right == nil { - program.AddJumpLabels(bpf.Jmp|bpf.Jeq|bpf.K, uint32(n.value), allowLabel, violationLabel) + // Leaf nodes don't require extra check. + program.AddDirectJumpLabel(violationLabel) + } else { + // Non-leaf node. Check which turn to take otherwise. Using direct jumps + // in case that the offset may exceed the limit of a conditional jump (255) + // Note that 'violationLabel' is returned for nil children. + program.AddJump(bpf.Jmp|bpf.Jgt|bpf.K, uint32(sysno), 0, skipOneInst) + program.AddDirectJumpLabel(n.right.label()) + program.AddDirectJumpLabel(n.left.label()) + } + + if err := program.AddLabel(checkArgsLabel(sysno)); err != nil { + return err + } + // No rules, just allow it and save one jmp. + if len(rules[sysno]) == 0 { + program.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW) return nil } + return addSyscallArgsCheck(program, rules[sysno], sysno) +} - // Non-leaf node. Allows syscall if it matches, check which turn to take otherwise. Note - // that 'violationLabel' is returned for nil children. - program.AddJumpTrueLabel(bpf.Jmp|bpf.Jeq|bpf.K, uint32(n.value), allowLabel, 0) - program.AddJumpLabels(bpf.Jmp|bpf.Jgt|bpf.K, uint32(n.value), n.right.label(), n.left.label()) +// node represents a tree node. +type node struct { + value uintptr + left *node + right *node + root bool +} + +// label returns the label corresponding to this node. If node is nil (syscall not present), +// violationLabel is returned for convenience. +func (n *node) label() string { + if n == nil { + return violationLabel + } + return fmt.Sprintf("index_%v", n.value) +} - if err := n.left.buildBSTProgram(program, false); err != nil { +type traverseFunc func(*bpf.ProgramBuilder, SyscallRules, *node) error + +func (n *node) traverse(fn traverseFunc, p *bpf.ProgramBuilder, rules SyscallRules) error { + if n == nil { + return nil + } + if err := fn(p, rules, n); err != nil { + return err + } + if err := n.left.traverse(fn, p, rules); err != nil { return err } - return n.right.buildBSTProgram(program, false) + return n.right.traverse(fn, p, rules) } diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go new file mode 100644 index 000000000..892ccabb4 --- /dev/null +++ b/pkg/seccomp/seccomp_rules.go @@ -0,0 +1,119 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seccomp + +import "fmt" + +// The offsets are based on the following struct in include/linux/seccomp.h. +// struct seccomp_data { +// int nr; +// __u32 arch; +// __u64 instruction_pointer; +// __u64 args[6]; +// }; +const ( + seccompDataOffsetNR = 0 + seccompDataOffsetArch = 4 + seccompDataOffsetArgs = 16 +) + +func seccompDataOffsetArgLow(i int) uint32 { + return uint32(seccompDataOffsetArgs + i*8) +} + +func seccompDataOffsetArgHigh(i int) uint32 { + return uint32(seccompDataOffsetArgs + i*8 + 4) +} + +// AllowAny is marker to indicate any value will be accepted. +type AllowAny struct{} + +func (a AllowAny) String() (s string) { + return "*" +} + +// AllowValue specifies a value that needs to be strictly matched. +type AllowValue uintptr + +func (a AllowValue) String() (s string) { + return fmt.Sprintf("%#x ", uintptr(a)) +} + +// Rule stores the whitelist of syscall arguments. +// +// For example: +// rule := Rule { +// AllowValue(linux.ARCH_GET_FS | linux.ARCH_SET_FS), // arg0 +// } +type Rule [6]interface{} + +func (r Rule) String() (s string) { + if len(r) == 0 { + return + } + s += "( " + for _, arg := range r { + if arg != nil { + s += fmt.Sprintf("%v ", arg) + } + } + s += ")" + return +} + +// SyscallRules stores a map of OR'ed whitelist rules indexed by the syscall number. +// If the 'Rules' is empty, we treat it as any argument is allowed. +// +// For example: +// rules := SyscallRules{ +// syscall.SYS_FUTEX: []Rule{ +// { +// AllowAny{}, +// AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), +// }, // OR +// { +// AllowAny{}, +// AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), +// }, +// }, +// syscall.SYS_GETPID: []Rule{}, +// } +type SyscallRules map[uintptr][]Rule + +// NewSyscallRules returns a new SyscallRules. +func NewSyscallRules() SyscallRules { + return make(map[uintptr][]Rule) +} + +// AddRule adds the given rule. It will create a new entry for a new syscall, otherwise +// it will append to the existing rules. +func (sr SyscallRules) AddRule(sysno uintptr, r Rule) { + if _, ok := sr[sysno]; ok { + sr[sysno] = append(sr[sysno], r) + } else { + sr[sysno] = []Rule{r} + } +} + +// Merge merges the given SyscallRules. +func (sr SyscallRules) Merge(rules SyscallRules) { + for sysno, rs := range rules { + if _, ok := sr[sysno]; ok { + sr[sysno] = append(sr[sysno], rs...) + } else { + sr[sysno] = rs + } + } +} diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go index c700d88d6..d3aca7ee9 100644 --- a/pkg/seccomp/seccomp_test.go +++ b/pkg/seccomp/seccomp_test.go @@ -19,10 +19,10 @@ import ( "fmt" "io" "io/ioutil" + "math" "math/rand" "os" "os/exec" - "sort" "strings" "testing" "time" @@ -77,12 +77,12 @@ func TestBasic(t *testing.T) { for _, test := range []struct { // filters are the set of syscall that are allowed. - filters []uintptr + filters SyscallRules kill bool specs []spec }{ { - filters: []uintptr{1}, + filters: SyscallRules{1: {}}, kill: false, specs: []spec{ { @@ -98,8 +98,12 @@ func TestBasic(t *testing.T) { }, }, { - filters: []uintptr{1, 3, 5}, - kill: false, + filters: SyscallRules{ + 1: {}, + 3: {}, + 5: {}, + }, + kill: false, specs: []spec{ { desc: "Multiple syscalls allowed (1)", @@ -144,7 +148,7 @@ func TestBasic(t *testing.T) { }, }, { - filters: []uintptr{1}, + filters: SyscallRules{1: {}}, kill: false, specs: []spec{ { @@ -155,7 +159,7 @@ func TestBasic(t *testing.T) { }, }, { - filters: []uintptr{1}, + filters: SyscallRules{1: {}}, kill: true, specs: []spec{ { @@ -165,8 +169,96 @@ func TestBasic(t *testing.T) { }, }, }, + { + filters: SyscallRules{ + 1: []Rule{ + { + AllowAny{}, + AllowValue(0xf), + }, + }, + }, + kill: false, + specs: []spec{ + { + desc: "Syscall argument allowed", + data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{0xf, 0xf}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "Syscall argument disallowed", + data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{0xf, 0xe}}, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, + { + filters: SyscallRules{ + 1: []Rule{ + { + AllowValue(0xf), + }, + { + AllowValue(0xe), + }, + }, + }, + kill: false, + specs: []spec{ + { + desc: "Syscall argument allowed, two rules", + data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{0xf}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "Syscall argument allowed, two rules", + data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{0xe}}, + want: linux.SECCOMP_RET_ALLOW, + }, + }, + }, + { + filters: SyscallRules{ + 1: []Rule{ + { + AllowValue(0), + AllowValue(math.MaxUint64 - 1), + AllowValue(math.MaxUint32), + }, + }, + }, + kill: false, + specs: []spec{ + { + desc: "64bit syscall argument allowed", + data: seccompData{ + nr: 1, + arch: linux.AUDIT_ARCH_X86_64, + args: [6]uint64{0, math.MaxUint64 - 1, math.MaxUint32}, + }, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "64bit syscall argument disallowed", + data: seccompData{ + nr: 1, + arch: linux.AUDIT_ARCH_X86_64, + args: [6]uint64{0, math.MaxUint64, math.MaxUint32}, + }, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "64bit syscall argument disallowed", + data: seccompData{ + nr: 1, + arch: linux.AUDIT_ARCH_X86_64, + args: [6]uint64{0, math.MaxUint64, math.MaxUint32 - 1}, + }, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, } { - sort.Slice(test.filters, func(i, j int) bool { return test.filters[i] < test.filters[j] }) instrs, err := buildProgram(test.filters, test.kill) if err != nil { t.Errorf("%s: buildProgram() got error: %v", test.specs[0].desc, err) @@ -193,19 +285,16 @@ func TestBasic(t *testing.T) { func TestRandom(t *testing.T) { rand.Seed(time.Now().UnixNano()) size := rand.Intn(50) + 1 - syscalls := make([]uintptr, 0, size) - syscallMap := make(map[uintptr]struct{}) - for len(syscalls) < size { + syscallRules := make(map[uintptr][]Rule) + for len(syscallRules) < size { n := uintptr(rand.Intn(200)) - if _, ok := syscallMap[n]; !ok { - syscalls = append(syscalls, n) - syscallMap[n] = struct{}{} + if _, ok := syscallRules[n]; !ok { + syscallRules[n] = []Rule{} } } - sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] }) - fmt.Printf("Testing filters: %v", syscalls) - instrs, err := buildProgram(syscalls, false) + fmt.Printf("Testing filters: %v", syscallRules) + instrs, err := buildProgram(syscallRules, false) if err != nil { t.Fatalf("buildProgram() got error: %v", err) } @@ -221,7 +310,7 @@ func TestRandom(t *testing.T) { continue } want := uint32(linux.SECCOMP_RET_TRAP) - if _, ok := syscallMap[uintptr(i)]; ok { + if _, ok := syscallRules[uintptr(i)]; ok { want = linux.SECCOMP_RET_ALLOW } if got != want { diff --git a/pkg/seccomp/seccomp_test_victim.go b/pkg/seccomp/seccomp_test_victim.go index fe3f96901..4f2ae4dac 100644 --- a/pkg/seccomp/seccomp_test_victim.go +++ b/pkg/seccomp/seccomp_test_victim.go @@ -29,76 +29,81 @@ func main() { dieFlag := flag.Bool("die", false, "trips over the filter if true") flag.Parse() - syscalls := []uintptr{ - syscall.SYS_ACCEPT, - syscall.SYS_ARCH_PRCTL, - syscall.SYS_BIND, - syscall.SYS_BRK, - syscall.SYS_CLOCK_GETTIME, - syscall.SYS_CLONE, - syscall.SYS_CLOSE, - syscall.SYS_DUP, - syscall.SYS_DUP2, - syscall.SYS_EPOLL_CREATE1, - syscall.SYS_EPOLL_CTL, - syscall.SYS_EPOLL_WAIT, - syscall.SYS_EXIT, - syscall.SYS_EXIT_GROUP, - syscall.SYS_FALLOCATE, - syscall.SYS_FCHMOD, - syscall.SYS_FCNTL, - syscall.SYS_FSTAT, - syscall.SYS_FSYNC, - syscall.SYS_FTRUNCATE, - syscall.SYS_FUTEX, - syscall.SYS_GETDENTS64, - syscall.SYS_GETPEERNAME, - syscall.SYS_GETPID, - syscall.SYS_GETSOCKNAME, - syscall.SYS_GETSOCKOPT, - syscall.SYS_GETTID, - syscall.SYS_GETTIMEOFDAY, - syscall.SYS_LISTEN, - syscall.SYS_LSEEK, - syscall.SYS_MADVISE, - syscall.SYS_MINCORE, - syscall.SYS_MMAP, - syscall.SYS_MPROTECT, - syscall.SYS_MUNLOCK, - syscall.SYS_MUNMAP, - syscall.SYS_NANOSLEEP, - syscall.SYS_NEWFSTATAT, - syscall.SYS_OPEN, - syscall.SYS_POLL, - syscall.SYS_PREAD64, - syscall.SYS_PSELECT6, - syscall.SYS_PWRITE64, - syscall.SYS_READ, - syscall.SYS_READLINKAT, - syscall.SYS_READV, - syscall.SYS_RECVMSG, - syscall.SYS_RENAMEAT, - syscall.SYS_RESTART_SYSCALL, - syscall.SYS_RT_SIGACTION, - syscall.SYS_RT_SIGPROCMASK, - syscall.SYS_RT_SIGRETURN, - syscall.SYS_SCHED_YIELD, - syscall.SYS_SENDMSG, - syscall.SYS_SETITIMER, - syscall.SYS_SET_ROBUST_LIST, - syscall.SYS_SETSOCKOPT, - syscall.SYS_SHUTDOWN, - syscall.SYS_SIGALTSTACK, - syscall.SYS_SOCKET, - syscall.SYS_SYNC_FILE_RANGE, - syscall.SYS_TGKILL, - syscall.SYS_UTIMENSAT, - syscall.SYS_WRITE, - syscall.SYS_WRITEV, + syscalls := seccomp.SyscallRules{ + syscall.SYS_ACCEPT: {}, + syscall.SYS_ARCH_PRCTL: {}, + syscall.SYS_BIND: {}, + syscall.SYS_BRK: {}, + syscall.SYS_CLOCK_GETTIME: {}, + syscall.SYS_CLONE: {}, + syscall.SYS_CLOSE: {}, + syscall.SYS_DUP: {}, + syscall.SYS_DUP2: {}, + syscall.SYS_EPOLL_CREATE1: {}, + syscall.SYS_EPOLL_CTL: {}, + syscall.SYS_EPOLL_WAIT: {}, + syscall.SYS_EPOLL_PWAIT: {}, + syscall.SYS_EXIT: {}, + syscall.SYS_EXIT_GROUP: {}, + syscall.SYS_FALLOCATE: {}, + syscall.SYS_FCHMOD: {}, + syscall.SYS_FCNTL: {}, + syscall.SYS_FSTAT: {}, + syscall.SYS_FSYNC: {}, + syscall.SYS_FTRUNCATE: {}, + syscall.SYS_FUTEX: {}, + syscall.SYS_GETDENTS64: {}, + syscall.SYS_GETPEERNAME: {}, + syscall.SYS_GETPID: {}, + syscall.SYS_GETSOCKNAME: {}, + syscall.SYS_GETSOCKOPT: {}, + syscall.SYS_GETTID: {}, + syscall.SYS_GETTIMEOFDAY: {}, + syscall.SYS_LISTEN: {}, + syscall.SYS_LSEEK: {}, + syscall.SYS_MADVISE: {}, + syscall.SYS_MINCORE: {}, + syscall.SYS_MMAP: {}, + syscall.SYS_MPROTECT: {}, + syscall.SYS_MUNLOCK: {}, + syscall.SYS_MUNMAP: {}, + syscall.SYS_NANOSLEEP: {}, + syscall.SYS_NEWFSTATAT: {}, + syscall.SYS_OPEN: {}, + syscall.SYS_POLL: {}, + syscall.SYS_PREAD64: {}, + syscall.SYS_PSELECT6: {}, + syscall.SYS_PWRITE64: {}, + syscall.SYS_READ: {}, + syscall.SYS_READLINKAT: {}, + syscall.SYS_READV: {}, + syscall.SYS_RECVMSG: {}, + syscall.SYS_RENAMEAT: {}, + syscall.SYS_RESTART_SYSCALL: {}, + syscall.SYS_RT_SIGACTION: {}, + syscall.SYS_RT_SIGPROCMASK: {}, + syscall.SYS_RT_SIGRETURN: {}, + syscall.SYS_SCHED_YIELD: {}, + syscall.SYS_SENDMSG: {}, + syscall.SYS_SETITIMER: {}, + syscall.SYS_SET_ROBUST_LIST: {}, + syscall.SYS_SETSOCKOPT: {}, + syscall.SYS_SHUTDOWN: {}, + syscall.SYS_SIGALTSTACK: {}, + syscall.SYS_SOCKET: {}, + syscall.SYS_SYNC_FILE_RANGE: {}, + syscall.SYS_TGKILL: {}, + syscall.SYS_UTIMENSAT: {}, + syscall.SYS_WRITE: {}, + syscall.SYS_WRITEV: {}, } die := *dieFlag if !die { - syscalls = append(syscalls, syscall.SYS_OPENAT) + syscalls[syscall.SYS_OPENAT] = []seccomp.Rule{ + { + seccomp.AllowValue(10), + }, + } } if err := seccomp.Install(syscalls, false); err != nil { @@ -107,6 +112,6 @@ func main() { } fmt.Printf("Filters installed\n") - syscall.RawSyscall(syscall.SYS_OPENAT, 0, 0, 0) + syscall.RawSyscall(syscall.SYS_OPENAT, 10, 0, 0) fmt.Printf("Syscall was allowed!!!\n") } diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 130e987df..86c256c5b 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -18,77 +18,78 @@ import ( "syscall" "golang.org/x/sys/unix" + "gvisor.googlesource.com/gvisor/pkg/seccomp" ) // allowedSyscalls is the set of syscalls executed by the Sentry // to the host OS. -var allowedSyscalls = []uintptr{ - syscall.SYS_ACCEPT, - syscall.SYS_ARCH_PRCTL, - syscall.SYS_CLOCK_GETTIME, - syscall.SYS_CLONE, - syscall.SYS_CLOSE, - syscall.SYS_DUP, - syscall.SYS_DUP2, - syscall.SYS_EPOLL_CREATE1, - syscall.SYS_EPOLL_CTL, - syscall.SYS_EPOLL_PWAIT, - syscall.SYS_EPOLL_WAIT, - syscall.SYS_EVENTFD2, - syscall.SYS_EXIT, - syscall.SYS_EXIT_GROUP, - syscall.SYS_FALLOCATE, - syscall.SYS_FCHMOD, - syscall.SYS_FCNTL, - syscall.SYS_FSTAT, - syscall.SYS_FSYNC, - syscall.SYS_FTRUNCATE, - syscall.SYS_FUTEX, - syscall.SYS_GETDENTS64, - syscall.SYS_GETPID, - unix.SYS_GETRANDOM, - syscall.SYS_GETSOCKOPT, - syscall.SYS_GETTID, - syscall.SYS_GETTIMEOFDAY, - syscall.SYS_LISTEN, - syscall.SYS_LSEEK, - syscall.SYS_MADVISE, - syscall.SYS_MINCORE, - syscall.SYS_MMAP, - syscall.SYS_MPROTECT, - syscall.SYS_MUNMAP, - syscall.SYS_NEWFSTATAT, - syscall.SYS_POLL, - syscall.SYS_PREAD64, - syscall.SYS_PSELECT6, - syscall.SYS_PWRITE64, - syscall.SYS_READ, - syscall.SYS_READLINKAT, - syscall.SYS_READV, - syscall.SYS_RECVMSG, - syscall.SYS_RENAMEAT, - syscall.SYS_RESTART_SYSCALL, - syscall.SYS_RT_SIGACTION, - syscall.SYS_RT_SIGPROCMASK, - syscall.SYS_RT_SIGRETURN, - syscall.SYS_SCHED_YIELD, - syscall.SYS_SENDMSG, - syscall.SYS_SETITIMER, - syscall.SYS_SHUTDOWN, - syscall.SYS_SIGALTSTACK, - syscall.SYS_SYNC_FILE_RANGE, - syscall.SYS_TGKILL, - syscall.SYS_UTIMENSAT, - syscall.SYS_WRITE, - syscall.SYS_WRITEV, +var allowedSyscalls = seccomp.SyscallRules{ + syscall.SYS_ACCEPT: {}, + syscall.SYS_ARCH_PRCTL: {}, + syscall.SYS_CLOCK_GETTIME: {}, + syscall.SYS_CLONE: {}, + syscall.SYS_CLOSE: {}, + syscall.SYS_DUP: {}, + syscall.SYS_DUP2: {}, + syscall.SYS_EPOLL_CREATE1: {}, + syscall.SYS_EPOLL_CTL: {}, + syscall.SYS_EPOLL_PWAIT: {}, + syscall.SYS_EPOLL_WAIT: {}, + syscall.SYS_EVENTFD2: {}, + syscall.SYS_EXIT: {}, + syscall.SYS_EXIT_GROUP: {}, + syscall.SYS_FALLOCATE: {}, + syscall.SYS_FCHMOD: {}, + syscall.SYS_FCNTL: {}, + syscall.SYS_FSTAT: {}, + syscall.SYS_FSYNC: {}, + syscall.SYS_FTRUNCATE: {}, + syscall.SYS_FUTEX: {}, + syscall.SYS_GETDENTS64: {}, + syscall.SYS_GETPID: {}, + unix.SYS_GETRANDOM: {}, + syscall.SYS_GETSOCKOPT: {}, + syscall.SYS_GETTID: {}, + syscall.SYS_GETTIMEOFDAY: {}, + syscall.SYS_LISTEN: {}, + syscall.SYS_LSEEK: {}, + syscall.SYS_MADVISE: {}, + syscall.SYS_MINCORE: {}, + syscall.SYS_MMAP: {}, + syscall.SYS_MPROTECT: {}, + syscall.SYS_MUNMAP: {}, + syscall.SYS_NEWFSTATAT: {}, + syscall.SYS_POLL: {}, + syscall.SYS_PREAD64: {}, + syscall.SYS_PSELECT6: {}, + syscall.SYS_PWRITE64: {}, + syscall.SYS_READ: {}, + syscall.SYS_READLINKAT: {}, + syscall.SYS_READV: {}, + syscall.SYS_RECVMSG: {}, + syscall.SYS_RENAMEAT: {}, + syscall.SYS_RESTART_SYSCALL: {}, + syscall.SYS_RT_SIGACTION: {}, + syscall.SYS_RT_SIGPROCMASK: {}, + syscall.SYS_RT_SIGRETURN: {}, + syscall.SYS_SCHED_YIELD: {}, + syscall.SYS_SENDMSG: {}, + syscall.SYS_SETITIMER: {}, + syscall.SYS_SHUTDOWN: {}, + syscall.SYS_SIGALTSTACK: {}, + syscall.SYS_SYNC_FILE_RANGE: {}, + syscall.SYS_TGKILL: {}, + syscall.SYS_UTIMENSAT: {}, + syscall.SYS_WRITE: {}, + syscall.SYS_WRITEV: {}, } // TODO: Ioctl is needed in order to support tty consoles. // Once filters support argument-checking, we should only allow ioctl // with tty-related arguments. -func consoleFilters() []uintptr { - return []uintptr{ - syscall.SYS_IOCTL, +func consoleFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_IOCTL: {}, } } @@ -97,79 +98,79 @@ func consoleFilters() []uintptr { // file operations that would otherwise be disabled by seccomp when a Gofer is // used. When whitelistFS is not used, openning new FD in the Sentry is // disallowed. -func whitelistFSFilters() []uintptr { - return []uintptr{ - syscall.SYS_ACCESS, - syscall.SYS_FCHMOD, - syscall.SYS_FSTAT, - syscall.SYS_FSYNC, - syscall.SYS_FTRUNCATE, - syscall.SYS_GETCWD, - syscall.SYS_GETDENTS, - syscall.SYS_GETDENTS64, - syscall.SYS_LSEEK, - syscall.SYS_LSTAT, - syscall.SYS_MKDIR, - syscall.SYS_MKDIRAT, - syscall.SYS_NEWFSTATAT, - syscall.SYS_OPEN, - syscall.SYS_OPENAT, - syscall.SYS_PREAD64, - syscall.SYS_PWRITE64, - syscall.SYS_READ, - syscall.SYS_READLINK, - syscall.SYS_READLINKAT, - syscall.SYS_RENAMEAT, - syscall.SYS_STAT, - syscall.SYS_SYMLINK, - syscall.SYS_SYMLINKAT, - syscall.SYS_SYNC_FILE_RANGE, - syscall.SYS_UNLINK, - syscall.SYS_UNLINKAT, - syscall.SYS_UTIMENSAT, - syscall.SYS_WRITE, +func whitelistFSFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_ACCESS: {}, + syscall.SYS_FCHMOD: {}, + syscall.SYS_FSTAT: {}, + syscall.SYS_FSYNC: {}, + syscall.SYS_FTRUNCATE: {}, + syscall.SYS_GETCWD: {}, + syscall.SYS_GETDENTS: {}, + syscall.SYS_GETDENTS64: {}, + syscall.SYS_LSEEK: {}, + syscall.SYS_LSTAT: {}, + syscall.SYS_MKDIR: {}, + syscall.SYS_MKDIRAT: {}, + syscall.SYS_NEWFSTATAT: {}, + syscall.SYS_OPEN: {}, + syscall.SYS_OPENAT: {}, + syscall.SYS_PREAD64: {}, + syscall.SYS_PWRITE64: {}, + syscall.SYS_READ: {}, + syscall.SYS_READLINK: {}, + syscall.SYS_READLINKAT: {}, + syscall.SYS_RENAMEAT: {}, + syscall.SYS_STAT: {}, + syscall.SYS_SYMLINK: {}, + syscall.SYS_SYMLINKAT: {}, + syscall.SYS_SYNC_FILE_RANGE: {}, + syscall.SYS_UNLINK: {}, + syscall.SYS_UNLINKAT: {}, + syscall.SYS_UTIMENSAT: {}, + syscall.SYS_WRITE: {}, } } // hostInetFilters contains syscalls that are needed by sentry/socket/hostinet. -func hostInetFilters() []uintptr { - return []uintptr{ - syscall.SYS_ACCEPT4, - syscall.SYS_BIND, - syscall.SYS_CONNECT, - syscall.SYS_GETPEERNAME, - syscall.SYS_GETSOCKNAME, - syscall.SYS_GETSOCKOPT, - syscall.SYS_IOCTL, - syscall.SYS_LISTEN, - syscall.SYS_READV, - syscall.SYS_RECVFROM, - syscall.SYS_RECVMSG, - syscall.SYS_SENDMSG, - syscall.SYS_SENDTO, - syscall.SYS_SETSOCKOPT, - syscall.SYS_SHUTDOWN, - syscall.SYS_SOCKET, - syscall.SYS_WRITEV, +func hostInetFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_ACCEPT4: {}, + syscall.SYS_BIND: {}, + syscall.SYS_CONNECT: {}, + syscall.SYS_GETPEERNAME: {}, + syscall.SYS_GETSOCKNAME: {}, + syscall.SYS_GETSOCKOPT: {}, + syscall.SYS_IOCTL: {}, + syscall.SYS_LISTEN: {}, + syscall.SYS_READV: {}, + syscall.SYS_RECVFROM: {}, + syscall.SYS_RECVMSG: {}, + syscall.SYS_SENDMSG: {}, + syscall.SYS_SENDTO: {}, + syscall.SYS_SETSOCKOPT: {}, + syscall.SYS_SHUTDOWN: {}, + syscall.SYS_SOCKET: {}, + syscall.SYS_WRITEV: {}, } } // ptraceFilters returns syscalls made exclusively by the ptrace platform. -func ptraceFilters() []uintptr { - return []uintptr{ - syscall.SYS_PTRACE, - syscall.SYS_WAIT4, - unix.SYS_GETCPU, - unix.SYS_SCHED_SETAFFINITY, +func ptraceFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_PTRACE: {}, + syscall.SYS_WAIT4: {}, + unix.SYS_GETCPU: {}, + unix.SYS_SCHED_SETAFFINITY: {}, } } // kvmFilters returns syscalls made exclusively by the KVM platform. -func kvmFilters() []uintptr { - return []uintptr{ - syscall.SYS_IOCTL, - syscall.SYS_RT_SIGSUSPEND, - syscall.SYS_RT_SIGTIMEDWAIT, - 0xffffffffffffffff, // KVM uses syscall -1 to transition to host. +func kvmFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_IOCTL: {}, + syscall.SYS_RT_SIGSUSPEND: {}, + syscall.SYS_RT_SIGTIMEDWAIT: {}, + 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host. } } diff --git a/runsc/boot/filter/extra_filters.go b/runsc/boot/filter/extra_filters.go index e10d9bf4c..82cf00dfb 100644 --- a/runsc/boot/filter/extra_filters.go +++ b/runsc/boot/filter/extra_filters.go @@ -16,9 +16,13 @@ package filter +import ( + "gvisor.googlesource.com/gvisor/pkg/seccomp" +) + // instrumentationFilters returns additional filters for syscalls used by // Go intrumentation tools, e.g. -race, -msan. // Returns empty when disabled. -func instrumentationFilters() []uintptr { +func instrumentationFilters() seccomp.SyscallRules { return nil } diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go index a862340f6..76f3f6865 100644 --- a/runsc/boot/filter/extra_filters_msan.go +++ b/runsc/boot/filter/extra_filters_msan.go @@ -18,13 +18,15 @@ package filter import ( "syscall" + + "gvisor.googlesource.com/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by MSAN. -func instrumentationFilters() []uintptr { +func instrumentationFilters() seccomp.SyscallRules { Report("MSAN is enabled: syscall filters less restrictive!") - return []uintptr{ - syscall.SYS_SCHED_GETAFFINITY, - syscall.SYS_SET_ROBUST_LIST, + return seccomp.SyscallRules{ + syscall.SYS_SCHED_GETAFFINITY: {}, + syscall.SYS_SET_ROBUST_LIST: {}, } } diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go index b0c74a58a..c810773df 100644 --- a/runsc/boot/filter/extra_filters_race.go +++ b/runsc/boot/filter/extra_filters_race.go @@ -18,16 +18,21 @@ package filter import ( "syscall" + + "gvisor.googlesource.com/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by TSAN. -func instrumentationFilters() []uintptr { +func instrumentationFilters() seccomp.SyscallRules { Report("TSAN is enabled: syscall filters less restrictive!") - return []uintptr{ - syscall.SYS_BRK, - syscall.SYS_MUNLOCK, - syscall.SYS_NANOSLEEP, - syscall.SYS_OPEN, - syscall.SYS_SET_ROBUST_LIST, + return seccomp.SyscallRules{ + syscall.SYS_BRK: {}, + syscall.SYS_CLONE: {}, + syscall.SYS_FUTEX: {}, + syscall.SYS_MMAP: {}, + syscall.SYS_MUNLOCK: {}, + syscall.SYS_NANOSLEEP: {}, + syscall.SYS_OPEN: {}, + syscall.SYS_SET_ROBUST_LIST: {}, } } diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go index 3ba56a318..6ea9c464e 100644 --- a/runsc/boot/filter/filter.go +++ b/runsc/boot/filter/filter.go @@ -33,26 +33,26 @@ func Install(p platform.Platform, whitelistFS, console, hostNetwork bool) error // Set of additional filters used by -race and -msan. Returns empty // when not enabled. - s = append(s, instrumentationFilters()...) + s.Merge(instrumentationFilters()) if whitelistFS { Report("direct file access allows unrestricted file access!") - s = append(s, whitelistFSFilters()...) + s.Merge(whitelistFSFilters()) } if console { Report("console is enabled: syscall filters less restrictive!") - s = append(s, consoleFilters()...) + s.Merge(consoleFilters()) } if hostNetwork { Report("host networking enabled: syscall filters less restrictive!") - s = append(s, hostInetFilters()...) + s.Merge(hostInetFilters()) } switch p := p.(type) { case *ptrace.PTrace: - s = append(s, ptraceFilters()...) + s.Merge(ptraceFilters()) case *kvm.KVM: - s = append(s, kvmFilters()...) + s.Merge(kvmFilters()) default: return fmt.Errorf("unknown platform type %T", p) } |