summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/bpf/program_builder.go64
-rw-r--r--pkg/seccomp/BUILD4
-rw-r--r--pkg/seccomp/seccomp.go217
-rw-r--r--pkg/seccomp/seccomp_rules.go119
-rw-r--r--pkg/seccomp/seccomp_test.go125
-rw-r--r--pkg/seccomp/seccomp_test_victim.go141
-rw-r--r--runsc/boot/filter/config.go249
-rw-r--r--runsc/boot/filter/extra_filters.go6
-rw-r--r--runsc/boot/filter/extra_filters_msan.go10
-rw-r--r--runsc/boot/filter/extra_filters_race.go19
-rw-r--r--runsc/boot/filter/filter.go12
11 files changed, 640 insertions, 326 deletions
diff --git a/pkg/bpf/program_builder.go b/pkg/bpf/program_builder.go
index 7554d47c1..bad56d7ac 100644
--- a/pkg/bpf/program_builder.go
+++ b/pkg/bpf/program_builder.go
@@ -21,7 +21,10 @@ import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
)
-const labelTarget = math.MaxUint8
+const (
+ labelTarget = math.MaxUint8
+ labelDirectTarget = math.MaxUint32
+)
// ProgramBuilder assists with building a BPF program with jump
// labels that are resolved to their proper offsets.
@@ -47,6 +50,14 @@ type label struct {
target int
}
+type jmpType int
+
+const (
+ jDirect jmpType = iota
+ jTrue
+ jFalse
+)
+
// source contains information about a single reference to a label.
type source struct {
// Program line where the label reference is present.
@@ -54,7 +65,7 @@ type source struct {
// True if label reference is in the 'jump if true' part of the jump.
// False if label reference is in the 'jump if false' part of the jump.
- jt bool
+ jt jmpType
}
// AddStmt adds a new statement to the program.
@@ -67,23 +78,29 @@ func (b *ProgramBuilder) AddJump(code uint16, k uint32, jt, jf uint8) {
b.instructions = append(b.instructions, Jump(code, k, jt, jf))
}
+// AddDirectJumpLabel adds a new jump to the program where is labelled.
+func (b *ProgramBuilder) AddDirectJumpLabel(labelName string) {
+ b.addLabelSource(labelName, jDirect)
+ b.AddJump(Jmp|Ja, labelDirectTarget, 0, 0)
+}
+
// AddJumpTrueLabel adds a new jump to the program where 'jump if true' is a label.
func (b *ProgramBuilder) AddJumpTrueLabel(code uint16, k uint32, jtLabel string, jf uint8) {
- b.addLabelSource(jtLabel, true)
+ b.addLabelSource(jtLabel, jTrue)
b.AddJump(code, k, labelTarget, jf)
}
// AddJumpFalseLabel adds a new jump to the program where 'jump if false' is a label.
func (b *ProgramBuilder) AddJumpFalseLabel(code uint16, k uint32, jt uint8, jfLabel string) {
- b.addLabelSource(jfLabel, false)
- b.AddJump(code, k, jt, math.MaxUint8)
+ b.addLabelSource(jfLabel, jFalse)
+ b.AddJump(code, k, jt, labelTarget)
}
// AddJumpLabels adds a new jump to the program where both jump targets are labels.
func (b *ProgramBuilder) AddJumpLabels(code uint16, k uint32, jtLabel, jfLabel string) {
- b.addLabelSource(jtLabel, true)
- b.addLabelSource(jfLabel, false)
- b.AddJump(code, k, math.MaxUint8, math.MaxUint8)
+ b.addLabelSource(jtLabel, jTrue)
+ b.addLabelSource(jfLabel, jFalse)
+ b.AddJump(code, k, labelTarget, labelTarget)
}
// AddLabel sets the given label name at the current location. The next instruction is executed
@@ -104,20 +121,22 @@ func (b *ProgramBuilder) AddLabel(name string) error {
// Instructions returns an array of BPF instructions representing the program with all labels
// resolved. Return error in case label resolution failed due to an invalid program.
+//
+// N.B. Partial results will be returned in the error case, which is useful for debugging.
func (b *ProgramBuilder) Instructions() ([]linux.BPFInstruction, error) {
if err := b.resolveLabels(); err != nil {
- return nil, err
+ return b.instructions, err
}
return b.instructions, nil
}
-func (b *ProgramBuilder) addLabelSource(labelName string, jt bool) {
+func (b *ProgramBuilder) addLabelSource(labelName string, t jmpType) {
l, ok := b.labels[labelName]
if !ok {
l = &label{sources: make([]source, 0), target: -1}
b.labels[labelName] = l
}
- l.sources = append(l.sources, source{line: len(b.instructions), jt: jt})
+ l.sources = append(l.sources, source{line: len(b.instructions), jt: t})
}
func (b *ProgramBuilder) resolveLabels() error {
@@ -136,21 +155,34 @@ func (b *ProgramBuilder) resolveLabels() error {
}
// Calculates the jump offset from current line.
offset := v.target - s.line - 1
- if offset > math.MaxUint8 {
- return fmt.Errorf("jump offset to label '%v' is too large: %v", key, offset)
- }
// Sets offset into jump instruction.
- if s.jt {
+ switch s.jt {
+ case jDirect:
+ if offset > labelDirectTarget {
+ return fmt.Errorf("jump offset to label '%v' is too large: %v, inst: %v, lineno: %v", key, offset, inst, s.line)
+ }
+ if inst.K != labelDirectTarget {
+ return fmt.Errorf("jump target is not a label")
+ }
+ inst.K = uint32(offset)
+ case jTrue:
+ if offset > labelTarget {
+ return fmt.Errorf("jump offset to label '%v' is too large: %v, inst: %v, lineno: %v", key, offset, inst, s.line)
+ }
if inst.JumpIfTrue != labelTarget {
return fmt.Errorf("jump target is not a label")
}
inst.JumpIfTrue = uint8(offset)
- } else {
+ case jFalse:
+ if offset > labelTarget {
+ return fmt.Errorf("jump offset to label '%v' is too large: %v, inst: %v, lineno: %v", key, offset, inst, s.line)
+ }
if inst.JumpIfFalse != labelTarget {
return fmt.Errorf("jump target is not a label")
}
inst.JumpIfFalse = uint8(offset)
}
+
b.instructions[s.line] = inst
}
}
diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD
index 1e19b1d25..cadd24505 100644
--- a/pkg/seccomp/BUILD
+++ b/pkg/seccomp/BUILD
@@ -21,14 +21,18 @@ go_library(
name = "seccomp",
srcs = [
"seccomp.go",
+ "seccomp_rules.go",
"seccomp_unsafe.go",
],
importpath = "gvisor.googlesource.com/gvisor/pkg/seccomp",
visibility = ["//visibility:public"],
deps = [
+ "//pkg/abi",
"//pkg/abi/linux",
"//pkg/bpf",
"//pkg/log",
+ "//pkg/sentry/arch",
+ "//pkg/sentry/strace",
],
)
diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go
index 7ee63140c..cd6b0b4bc 100644
--- a/pkg/seccomp/seccomp.go
+++ b/pkg/seccomp/seccomp.go
@@ -12,24 +12,28 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package seccomp provides basic seccomp filters.
+// Package seccomp provides basic seccomp filters for x86_64 (little endian).
package seccomp
import (
"fmt"
+ "reflect"
"sort"
+ "gvisor.googlesource.com/gvisor/pkg/abi"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/bpf"
"gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/strace"
)
const (
// violationLabel is added to the program to take action on a violation.
violationLabel = "violation"
- // allowLabel is added to the program to allow the syscall to take place.
- allowLabel = "allow"
+ // skipOneInst is the offset to take for skipping one instruction.
+ skipOneInst = 1
)
// Install generates BPF code based on the set of syscalls provided. It only
@@ -38,27 +42,19 @@ const (
//
// (*) The current implementation only checks the syscall number. It does NOT
// validate any of the arguments.
-func Install(syscalls []uintptr, kill bool) error {
- // Sort syscalls and remove duplicates to build the BST.
- sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] })
- syscalls = filterUnique(syscalls)
-
- log.Infof("Installing seccomp filters for %d syscalls (kill=%t)", len(syscalls), kill)
- for _, s := range syscalls {
- log.Infof("syscall filter: %v", s)
- }
-
- instrs, err := buildProgram(syscalls, kill)
- if err != nil {
- return err
- }
+func Install(rules SyscallRules, kill bool) error {
+ log.Infof("Installing seccomp filters for %d syscalls (kill=%t)", len(rules), kill)
+ instrs, err := buildProgram(rules, kill)
if log.IsLogging(log.Debug) {
- programStr, err := bpf.DecodeProgram(instrs)
- if err != nil {
- programStr = fmt.Sprintf("Error: %v\n%s", err, programStr)
+ programStr, errDecode := bpf.DecodeProgram(instrs)
+ if errDecode != nil {
+ programStr = fmt.Sprintf("Error: %v\n%s", errDecode, programStr)
}
log.Debugf("Seccomp program dump:\n%s", programStr)
}
+ if err != nil {
+ return err
+ }
if err := seccomp(instrs); err != nil {
return err
@@ -68,11 +64,8 @@ func Install(syscalls []uintptr, kill bool) error {
return nil
}
-// buildProgram builds a BPF program that whitelists all given syscalls.
-//
-// Precondition: syscalls must be sorted and unique.
-func buildProgram(syscalls []uintptr, kill bool) ([]linux.BPFInstruction, error) {
- const archOffset = 4 // offsetof(seccomp_data, arch)
+// buildProgram builds a BPF program that whitelists all given syscall rules.
+func buildProgram(rules SyscallRules, kill bool) ([]linux.BPFInstruction, error) {
program := bpf.NewProgramBuilder()
violationAction := uint32(linux.SECCOMP_RET_KILL)
if !kill {
@@ -83,10 +76,13 @@ func buildProgram(syscalls []uintptr, kill bool) ([]linux.BPFInstruction, error)
//
// A = seccomp_data.arch
// if (A != AUDIT_ARCH_X86_64) goto violation
- program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, archOffset)
- program.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, linux.AUDIT_ARCH_X86_64, 0, violationLabel)
+ program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArch)
+ // violationLabel is at the bottom of the program. The size of program
+ // may exceeds 255 lines, which is the limit of a condition jump.
+ program.AddJump(bpf.Jmp|bpf.Jeq|bpf.K, linux.AUDIT_ARCH_X86_64, skipOneInst, 0)
+ program.AddDirectJumpLabel(violationLabel)
- if err := buildIndex(syscalls, program); err != nil {
+ if err := buildIndex(rules, program); err != nil {
return nil, err
}
@@ -96,41 +92,34 @@ func buildProgram(syscalls []uintptr, kill bool) ([]linux.BPFInstruction, error)
}
program.AddStmt(bpf.Ret|bpf.K, violationAction)
- // allow: return SECCOMP_RET_ALLOW
- if err := program.AddLabel(allowLabel); err != nil {
- return nil, err
- }
- program.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW)
-
return program.Instructions()
}
-// filterUnique filters unique system calls.
-//
-// Precondition: syscalls must be sorted.
-func filterUnique(syscalls []uintptr) []uintptr {
- filtered := make([]uintptr, 0, len(syscalls))
- for i := 0; i < len(syscalls); i++ {
- if len(filtered) > 0 && syscalls[i] == filtered[len(filtered)-1] {
- // This call has already been inserted, skip.
- continue
- }
- filtered = append(filtered, syscalls[i])
+// buildIndex builds a BST to quickly search through all syscalls that are whitelisted.
+func buildIndex(rules SyscallRules, program *bpf.ProgramBuilder) error {
+ syscalls := []uintptr{}
+ for sysno, _ := range rules {
+ syscalls = append(syscalls, sysno)
+ }
+
+ t, ok := strace.Lookup(abi.Linux, arch.AMD64)
+ if !ok {
+ panic("Can't find amd64 Linux syscall table")
+ }
+
+ sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] })
+ for _, s := range syscalls {
+ log.Infof("syscall filter: %v (%v): %s", s, t.Name(s), rules[s])
}
- return filtered
-}
-// buildIndex builds a BST to quickly search through all syscalls that are whitelisted.
-//
-// Precondition: syscalls must be sorted and unique.
-func buildIndex(syscalls []uintptr, program *bpf.ProgramBuilder) error {
root := createBST(syscalls)
+ root.root = true
// Load syscall number into A and run through BST.
//
// A = seccomp_data.nr
- program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, 0)
- return root.buildBSTProgram(program, true)
+ program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetNR)
+ return root.traverse(buildBSTProgram, program, rules)
}
// createBST converts sorted syscall slice into a balanced BST.
@@ -147,64 +136,128 @@ func createBST(syscalls []uintptr) *node {
return &parent
}
-// node represents a tree node.
-type node struct {
- value uintptr
- left *node
- right *node
+func ruleViolationLabel(sysno uintptr, idx int) string {
+ return fmt.Sprintf("ruleViolation_%v_%v", sysno, idx)
}
-// label returns the label corresponding to this node. If node is nil (syscall not present),
-// violationLabel is returned for convenience.
-func (n *node) label() string {
- if n == nil {
- return violationLabel
+func checkArgsLabel(sysno uintptr) string {
+ return fmt.Sprintf("checkArgs_%v", sysno)
+}
+
+func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, sysno uintptr) error {
+ for ruleidx, rule := range rules {
+ labelled := false
+ for i, arg := range rule {
+ if arg != nil {
+ switch a := arg.(type) {
+ case AllowAny:
+ case AllowValue:
+ high, low := uint32(a>>32), uint32(a)
+ // assert arg_low == low
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgLow(i))
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(sysno, ruleidx))
+ // assert arg_high == high
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgHigh(i))
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(sysno, ruleidx))
+ labelled = true
+
+ default:
+ return fmt.Errorf("unknown syscall rule type: %v", reflect.TypeOf(a))
+ }
+ }
+ }
+ // Matched, allow the syscall.
+ p.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW)
+ // Label the end of the rule if necessary.
+ if labelled {
+ if err := p.AddLabel(ruleViolationLabel(sysno, ruleidx)); err != nil {
+ return err
+ }
+ }
}
- return fmt.Sprintf("index_%v", n.value)
+ // Not matched?
+ p.AddDirectJumpLabel(violationLabel)
+ return nil
}
// buildBSTProgram converts a binary tree started in 'root' into BPF code. The ouline of the code
// is as follows:
//
// // SYS_PIPE(22), root
-// (A == 22) ? goto allow : continue
+// (A == 22) ? goto argument check : continue
// (A > 22) ? goto index_35 : goto index_9
//
// index_9: // SYS_MMAP(9), leaf
-// (A == 9) ? goto allow : goto violation
+// A == 9) ? goto argument check : violation
//
// index_35: // SYS_NANOSLEEP(35), single child
-// (A == 35) ? goto allow : continue
+// (A == 35) ? goto argument check : continue
// (A > 35) ? goto index_50 : goto violation
//
// index_50: // SYS_LISTEN(50), leaf
-// (A == 50) ? goto allow : goto violation
+// (A == 50) ? goto argument check : goto violation
//
-func (n *node) buildBSTProgram(program *bpf.ProgramBuilder, root bool) error {
- if n == nil {
- return nil
- }
-
+func buildBSTProgram(program *bpf.ProgramBuilder, rules SyscallRules, n *node) error {
// Root node is never referenced by label, skip it.
- if !root {
+ if !n.root {
if err := program.AddLabel(n.label()); err != nil {
return err
}
}
- // Leaf nodes don't require extra check, they either allow or violate!
+ sysno := n.value
+ program.AddJumpTrueLabel(bpf.Jmp|bpf.Jeq|bpf.K, uint32(sysno), checkArgsLabel(sysno), 0)
if n.left == nil && n.right == nil {
- program.AddJumpLabels(bpf.Jmp|bpf.Jeq|bpf.K, uint32(n.value), allowLabel, violationLabel)
+ // Leaf nodes don't require extra check.
+ program.AddDirectJumpLabel(violationLabel)
+ } else {
+ // Non-leaf node. Check which turn to take otherwise. Using direct jumps
+ // in case that the offset may exceed the limit of a conditional jump (255)
+ // Note that 'violationLabel' is returned for nil children.
+ program.AddJump(bpf.Jmp|bpf.Jgt|bpf.K, uint32(sysno), 0, skipOneInst)
+ program.AddDirectJumpLabel(n.right.label())
+ program.AddDirectJumpLabel(n.left.label())
+ }
+
+ if err := program.AddLabel(checkArgsLabel(sysno)); err != nil {
+ return err
+ }
+ // No rules, just allow it and save one jmp.
+ if len(rules[sysno]) == 0 {
+ program.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW)
return nil
}
+ return addSyscallArgsCheck(program, rules[sysno], sysno)
+}
- // Non-leaf node. Allows syscall if it matches, check which turn to take otherwise. Note
- // that 'violationLabel' is returned for nil children.
- program.AddJumpTrueLabel(bpf.Jmp|bpf.Jeq|bpf.K, uint32(n.value), allowLabel, 0)
- program.AddJumpLabels(bpf.Jmp|bpf.Jgt|bpf.K, uint32(n.value), n.right.label(), n.left.label())
+// node represents a tree node.
+type node struct {
+ value uintptr
+ left *node
+ right *node
+ root bool
+}
+
+// label returns the label corresponding to this node. If node is nil (syscall not present),
+// violationLabel is returned for convenience.
+func (n *node) label() string {
+ if n == nil {
+ return violationLabel
+ }
+ return fmt.Sprintf("index_%v", n.value)
+}
- if err := n.left.buildBSTProgram(program, false); err != nil {
+type traverseFunc func(*bpf.ProgramBuilder, SyscallRules, *node) error
+
+func (n *node) traverse(fn traverseFunc, p *bpf.ProgramBuilder, rules SyscallRules) error {
+ if n == nil {
+ return nil
+ }
+ if err := fn(p, rules, n); err != nil {
+ return err
+ }
+ if err := n.left.traverse(fn, p, rules); err != nil {
return err
}
- return n.right.buildBSTProgram(program, false)
+ return n.right.traverse(fn, p, rules)
}
diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go
new file mode 100644
index 000000000..892ccabb4
--- /dev/null
+++ b/pkg/seccomp/seccomp_rules.go
@@ -0,0 +1,119 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package seccomp
+
+import "fmt"
+
+// The offsets are based on the following struct in include/linux/seccomp.h.
+// struct seccomp_data {
+// int nr;
+// __u32 arch;
+// __u64 instruction_pointer;
+// __u64 args[6];
+// };
+const (
+ seccompDataOffsetNR = 0
+ seccompDataOffsetArch = 4
+ seccompDataOffsetArgs = 16
+)
+
+func seccompDataOffsetArgLow(i int) uint32 {
+ return uint32(seccompDataOffsetArgs + i*8)
+}
+
+func seccompDataOffsetArgHigh(i int) uint32 {
+ return uint32(seccompDataOffsetArgs + i*8 + 4)
+}
+
+// AllowAny is marker to indicate any value will be accepted.
+type AllowAny struct{}
+
+func (a AllowAny) String() (s string) {
+ return "*"
+}
+
+// AllowValue specifies a value that needs to be strictly matched.
+type AllowValue uintptr
+
+func (a AllowValue) String() (s string) {
+ return fmt.Sprintf("%#x ", uintptr(a))
+}
+
+// Rule stores the whitelist of syscall arguments.
+//
+// For example:
+// rule := Rule {
+// AllowValue(linux.ARCH_GET_FS | linux.ARCH_SET_FS), // arg0
+// }
+type Rule [6]interface{}
+
+func (r Rule) String() (s string) {
+ if len(r) == 0 {
+ return
+ }
+ s += "( "
+ for _, arg := range r {
+ if arg != nil {
+ s += fmt.Sprintf("%v ", arg)
+ }
+ }
+ s += ")"
+ return
+}
+
+// SyscallRules stores a map of OR'ed whitelist rules indexed by the syscall number.
+// If the 'Rules' is empty, we treat it as any argument is allowed.
+//
+// For example:
+// rules := SyscallRules{
+// syscall.SYS_FUTEX: []Rule{
+// {
+// AllowAny{},
+// AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
+// }, // OR
+// {
+// AllowAny{},
+// AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
+// },
+// },
+// syscall.SYS_GETPID: []Rule{},
+// }
+type SyscallRules map[uintptr][]Rule
+
+// NewSyscallRules returns a new SyscallRules.
+func NewSyscallRules() SyscallRules {
+ return make(map[uintptr][]Rule)
+}
+
+// AddRule adds the given rule. It will create a new entry for a new syscall, otherwise
+// it will append to the existing rules.
+func (sr SyscallRules) AddRule(sysno uintptr, r Rule) {
+ if _, ok := sr[sysno]; ok {
+ sr[sysno] = append(sr[sysno], r)
+ } else {
+ sr[sysno] = []Rule{r}
+ }
+}
+
+// Merge merges the given SyscallRules.
+func (sr SyscallRules) Merge(rules SyscallRules) {
+ for sysno, rs := range rules {
+ if _, ok := sr[sysno]; ok {
+ sr[sysno] = append(sr[sysno], rs...)
+ } else {
+ sr[sysno] = rs
+ }
+ }
+}
diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go
index c700d88d6..d3aca7ee9 100644
--- a/pkg/seccomp/seccomp_test.go
+++ b/pkg/seccomp/seccomp_test.go
@@ -19,10 +19,10 @@ import (
"fmt"
"io"
"io/ioutil"
+ "math"
"math/rand"
"os"
"os/exec"
- "sort"
"strings"
"testing"
"time"
@@ -77,12 +77,12 @@ func TestBasic(t *testing.T) {
for _, test := range []struct {
// filters are the set of syscall that are allowed.
- filters []uintptr
+ filters SyscallRules
kill bool
specs []spec
}{
{
- filters: []uintptr{1},
+ filters: SyscallRules{1: {}},
kill: false,
specs: []spec{
{
@@ -98,8 +98,12 @@ func TestBasic(t *testing.T) {
},
},
{
- filters: []uintptr{1, 3, 5},
- kill: false,
+ filters: SyscallRules{
+ 1: {},
+ 3: {},
+ 5: {},
+ },
+ kill: false,
specs: []spec{
{
desc: "Multiple syscalls allowed (1)",
@@ -144,7 +148,7 @@ func TestBasic(t *testing.T) {
},
},
{
- filters: []uintptr{1},
+ filters: SyscallRules{1: {}},
kill: false,
specs: []spec{
{
@@ -155,7 +159,7 @@ func TestBasic(t *testing.T) {
},
},
{
- filters: []uintptr{1},
+ filters: SyscallRules{1: {}},
kill: true,
specs: []spec{
{
@@ -165,8 +169,96 @@ func TestBasic(t *testing.T) {
},
},
},
+ {
+ filters: SyscallRules{
+ 1: []Rule{
+ {
+ AllowAny{},
+ AllowValue(0xf),
+ },
+ },
+ },
+ kill: false,
+ specs: []spec{
+ {
+ desc: "Syscall argument allowed",
+ data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{0xf, 0xf}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "Syscall argument disallowed",
+ data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{0xf, 0xe}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ },
+ },
+ {
+ filters: SyscallRules{
+ 1: []Rule{
+ {
+ AllowValue(0xf),
+ },
+ {
+ AllowValue(0xe),
+ },
+ },
+ },
+ kill: false,
+ specs: []spec{
+ {
+ desc: "Syscall argument allowed, two rules",
+ data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{0xf}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "Syscall argument allowed, two rules",
+ data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{0xe}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ },
+ {
+ filters: SyscallRules{
+ 1: []Rule{
+ {
+ AllowValue(0),
+ AllowValue(math.MaxUint64 - 1),
+ AllowValue(math.MaxUint32),
+ },
+ },
+ },
+ kill: false,
+ specs: []spec{
+ {
+ desc: "64bit syscall argument allowed",
+ data: seccompData{
+ nr: 1,
+ arch: linux.AUDIT_ARCH_X86_64,
+ args: [6]uint64{0, math.MaxUint64 - 1, math.MaxUint32},
+ },
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "64bit syscall argument disallowed",
+ data: seccompData{
+ nr: 1,
+ arch: linux.AUDIT_ARCH_X86_64,
+ args: [6]uint64{0, math.MaxUint64, math.MaxUint32},
+ },
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "64bit syscall argument disallowed",
+ data: seccompData{
+ nr: 1,
+ arch: linux.AUDIT_ARCH_X86_64,
+ args: [6]uint64{0, math.MaxUint64, math.MaxUint32 - 1},
+ },
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ },
+ },
} {
- sort.Slice(test.filters, func(i, j int) bool { return test.filters[i] < test.filters[j] })
instrs, err := buildProgram(test.filters, test.kill)
if err != nil {
t.Errorf("%s: buildProgram() got error: %v", test.specs[0].desc, err)
@@ -193,19 +285,16 @@ func TestBasic(t *testing.T) {
func TestRandom(t *testing.T) {
rand.Seed(time.Now().UnixNano())
size := rand.Intn(50) + 1
- syscalls := make([]uintptr, 0, size)
- syscallMap := make(map[uintptr]struct{})
- for len(syscalls) < size {
+ syscallRules := make(map[uintptr][]Rule)
+ for len(syscallRules) < size {
n := uintptr(rand.Intn(200))
- if _, ok := syscallMap[n]; !ok {
- syscalls = append(syscalls, n)
- syscallMap[n] = struct{}{}
+ if _, ok := syscallRules[n]; !ok {
+ syscallRules[n] = []Rule{}
}
}
- sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] })
- fmt.Printf("Testing filters: %v", syscalls)
- instrs, err := buildProgram(syscalls, false)
+ fmt.Printf("Testing filters: %v", syscallRules)
+ instrs, err := buildProgram(syscallRules, false)
if err != nil {
t.Fatalf("buildProgram() got error: %v", err)
}
@@ -221,7 +310,7 @@ func TestRandom(t *testing.T) {
continue
}
want := uint32(linux.SECCOMP_RET_TRAP)
- if _, ok := syscallMap[uintptr(i)]; ok {
+ if _, ok := syscallRules[uintptr(i)]; ok {
want = linux.SECCOMP_RET_ALLOW
}
if got != want {
diff --git a/pkg/seccomp/seccomp_test_victim.go b/pkg/seccomp/seccomp_test_victim.go
index fe3f96901..4f2ae4dac 100644
--- a/pkg/seccomp/seccomp_test_victim.go
+++ b/pkg/seccomp/seccomp_test_victim.go
@@ -29,76 +29,81 @@ func main() {
dieFlag := flag.Bool("die", false, "trips over the filter if true")
flag.Parse()
- syscalls := []uintptr{
- syscall.SYS_ACCEPT,
- syscall.SYS_ARCH_PRCTL,
- syscall.SYS_BIND,
- syscall.SYS_BRK,
- syscall.SYS_CLOCK_GETTIME,
- syscall.SYS_CLONE,
- syscall.SYS_CLOSE,
- syscall.SYS_DUP,
- syscall.SYS_DUP2,
- syscall.SYS_EPOLL_CREATE1,
- syscall.SYS_EPOLL_CTL,
- syscall.SYS_EPOLL_WAIT,
- syscall.SYS_EXIT,
- syscall.SYS_EXIT_GROUP,
- syscall.SYS_FALLOCATE,
- syscall.SYS_FCHMOD,
- syscall.SYS_FCNTL,
- syscall.SYS_FSTAT,
- syscall.SYS_FSYNC,
- syscall.SYS_FTRUNCATE,
- syscall.SYS_FUTEX,
- syscall.SYS_GETDENTS64,
- syscall.SYS_GETPEERNAME,
- syscall.SYS_GETPID,
- syscall.SYS_GETSOCKNAME,
- syscall.SYS_GETSOCKOPT,
- syscall.SYS_GETTID,
- syscall.SYS_GETTIMEOFDAY,
- syscall.SYS_LISTEN,
- syscall.SYS_LSEEK,
- syscall.SYS_MADVISE,
- syscall.SYS_MINCORE,
- syscall.SYS_MMAP,
- syscall.SYS_MPROTECT,
- syscall.SYS_MUNLOCK,
- syscall.SYS_MUNMAP,
- syscall.SYS_NANOSLEEP,
- syscall.SYS_NEWFSTATAT,
- syscall.SYS_OPEN,
- syscall.SYS_POLL,
- syscall.SYS_PREAD64,
- syscall.SYS_PSELECT6,
- syscall.SYS_PWRITE64,
- syscall.SYS_READ,
- syscall.SYS_READLINKAT,
- syscall.SYS_READV,
- syscall.SYS_RECVMSG,
- syscall.SYS_RENAMEAT,
- syscall.SYS_RESTART_SYSCALL,
- syscall.SYS_RT_SIGACTION,
- syscall.SYS_RT_SIGPROCMASK,
- syscall.SYS_RT_SIGRETURN,
- syscall.SYS_SCHED_YIELD,
- syscall.SYS_SENDMSG,
- syscall.SYS_SETITIMER,
- syscall.SYS_SET_ROBUST_LIST,
- syscall.SYS_SETSOCKOPT,
- syscall.SYS_SHUTDOWN,
- syscall.SYS_SIGALTSTACK,
- syscall.SYS_SOCKET,
- syscall.SYS_SYNC_FILE_RANGE,
- syscall.SYS_TGKILL,
- syscall.SYS_UTIMENSAT,
- syscall.SYS_WRITE,
- syscall.SYS_WRITEV,
+ syscalls := seccomp.SyscallRules{
+ syscall.SYS_ACCEPT: {},
+ syscall.SYS_ARCH_PRCTL: {},
+ syscall.SYS_BIND: {},
+ syscall.SYS_BRK: {},
+ syscall.SYS_CLOCK_GETTIME: {},
+ syscall.SYS_CLONE: {},
+ syscall.SYS_CLOSE: {},
+ syscall.SYS_DUP: {},
+ syscall.SYS_DUP2: {},
+ syscall.SYS_EPOLL_CREATE1: {},
+ syscall.SYS_EPOLL_CTL: {},
+ syscall.SYS_EPOLL_WAIT: {},
+ syscall.SYS_EPOLL_PWAIT: {},
+ syscall.SYS_EXIT: {},
+ syscall.SYS_EXIT_GROUP: {},
+ syscall.SYS_FALLOCATE: {},
+ syscall.SYS_FCHMOD: {},
+ syscall.SYS_FCNTL: {},
+ syscall.SYS_FSTAT: {},
+ syscall.SYS_FSYNC: {},
+ syscall.SYS_FTRUNCATE: {},
+ syscall.SYS_FUTEX: {},
+ syscall.SYS_GETDENTS64: {},
+ syscall.SYS_GETPEERNAME: {},
+ syscall.SYS_GETPID: {},
+ syscall.SYS_GETSOCKNAME: {},
+ syscall.SYS_GETSOCKOPT: {},
+ syscall.SYS_GETTID: {},
+ syscall.SYS_GETTIMEOFDAY: {},
+ syscall.SYS_LISTEN: {},
+ syscall.SYS_LSEEK: {},
+ syscall.SYS_MADVISE: {},
+ syscall.SYS_MINCORE: {},
+ syscall.SYS_MMAP: {},
+ syscall.SYS_MPROTECT: {},
+ syscall.SYS_MUNLOCK: {},
+ syscall.SYS_MUNMAP: {},
+ syscall.SYS_NANOSLEEP: {},
+ syscall.SYS_NEWFSTATAT: {},
+ syscall.SYS_OPEN: {},
+ syscall.SYS_POLL: {},
+ syscall.SYS_PREAD64: {},
+ syscall.SYS_PSELECT6: {},
+ syscall.SYS_PWRITE64: {},
+ syscall.SYS_READ: {},
+ syscall.SYS_READLINKAT: {},
+ syscall.SYS_READV: {},
+ syscall.SYS_RECVMSG: {},
+ syscall.SYS_RENAMEAT: {},
+ syscall.SYS_RESTART_SYSCALL: {},
+ syscall.SYS_RT_SIGACTION: {},
+ syscall.SYS_RT_SIGPROCMASK: {},
+ syscall.SYS_RT_SIGRETURN: {},
+ syscall.SYS_SCHED_YIELD: {},
+ syscall.SYS_SENDMSG: {},
+ syscall.SYS_SETITIMER: {},
+ syscall.SYS_SET_ROBUST_LIST: {},
+ syscall.SYS_SETSOCKOPT: {},
+ syscall.SYS_SHUTDOWN: {},
+ syscall.SYS_SIGALTSTACK: {},
+ syscall.SYS_SOCKET: {},
+ syscall.SYS_SYNC_FILE_RANGE: {},
+ syscall.SYS_TGKILL: {},
+ syscall.SYS_UTIMENSAT: {},
+ syscall.SYS_WRITE: {},
+ syscall.SYS_WRITEV: {},
}
die := *dieFlag
if !die {
- syscalls = append(syscalls, syscall.SYS_OPENAT)
+ syscalls[syscall.SYS_OPENAT] = []seccomp.Rule{
+ {
+ seccomp.AllowValue(10),
+ },
+ }
}
if err := seccomp.Install(syscalls, false); err != nil {
@@ -107,6 +112,6 @@ func main() {
}
fmt.Printf("Filters installed\n")
- syscall.RawSyscall(syscall.SYS_OPENAT, 0, 0, 0)
+ syscall.RawSyscall(syscall.SYS_OPENAT, 10, 0, 0)
fmt.Printf("Syscall was allowed!!!\n")
}
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 130e987df..86c256c5b 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -18,77 +18,78 @@ import (
"syscall"
"golang.org/x/sys/unix"
+ "gvisor.googlesource.com/gvisor/pkg/seccomp"
)
// allowedSyscalls is the set of syscalls executed by the Sentry
// to the host OS.
-var allowedSyscalls = []uintptr{
- syscall.SYS_ACCEPT,
- syscall.SYS_ARCH_PRCTL,
- syscall.SYS_CLOCK_GETTIME,
- syscall.SYS_CLONE,
- syscall.SYS_CLOSE,
- syscall.SYS_DUP,
- syscall.SYS_DUP2,
- syscall.SYS_EPOLL_CREATE1,
- syscall.SYS_EPOLL_CTL,
- syscall.SYS_EPOLL_PWAIT,
- syscall.SYS_EPOLL_WAIT,
- syscall.SYS_EVENTFD2,
- syscall.SYS_EXIT,
- syscall.SYS_EXIT_GROUP,
- syscall.SYS_FALLOCATE,
- syscall.SYS_FCHMOD,
- syscall.SYS_FCNTL,
- syscall.SYS_FSTAT,
- syscall.SYS_FSYNC,
- syscall.SYS_FTRUNCATE,
- syscall.SYS_FUTEX,
- syscall.SYS_GETDENTS64,
- syscall.SYS_GETPID,
- unix.SYS_GETRANDOM,
- syscall.SYS_GETSOCKOPT,
- syscall.SYS_GETTID,
- syscall.SYS_GETTIMEOFDAY,
- syscall.SYS_LISTEN,
- syscall.SYS_LSEEK,
- syscall.SYS_MADVISE,
- syscall.SYS_MINCORE,
- syscall.SYS_MMAP,
- syscall.SYS_MPROTECT,
- syscall.SYS_MUNMAP,
- syscall.SYS_NEWFSTATAT,
- syscall.SYS_POLL,
- syscall.SYS_PREAD64,
- syscall.SYS_PSELECT6,
- syscall.SYS_PWRITE64,
- syscall.SYS_READ,
- syscall.SYS_READLINKAT,
- syscall.SYS_READV,
- syscall.SYS_RECVMSG,
- syscall.SYS_RENAMEAT,
- syscall.SYS_RESTART_SYSCALL,
- syscall.SYS_RT_SIGACTION,
- syscall.SYS_RT_SIGPROCMASK,
- syscall.SYS_RT_SIGRETURN,
- syscall.SYS_SCHED_YIELD,
- syscall.SYS_SENDMSG,
- syscall.SYS_SETITIMER,
- syscall.SYS_SHUTDOWN,
- syscall.SYS_SIGALTSTACK,
- syscall.SYS_SYNC_FILE_RANGE,
- syscall.SYS_TGKILL,
- syscall.SYS_UTIMENSAT,
- syscall.SYS_WRITE,
- syscall.SYS_WRITEV,
+var allowedSyscalls = seccomp.SyscallRules{
+ syscall.SYS_ACCEPT: {},
+ syscall.SYS_ARCH_PRCTL: {},
+ syscall.SYS_CLOCK_GETTIME: {},
+ syscall.SYS_CLONE: {},
+ syscall.SYS_CLOSE: {},
+ syscall.SYS_DUP: {},
+ syscall.SYS_DUP2: {},
+ syscall.SYS_EPOLL_CREATE1: {},
+ syscall.SYS_EPOLL_CTL: {},
+ syscall.SYS_EPOLL_PWAIT: {},
+ syscall.SYS_EPOLL_WAIT: {},
+ syscall.SYS_EVENTFD2: {},
+ syscall.SYS_EXIT: {},
+ syscall.SYS_EXIT_GROUP: {},
+ syscall.SYS_FALLOCATE: {},
+ syscall.SYS_FCHMOD: {},
+ syscall.SYS_FCNTL: {},
+ syscall.SYS_FSTAT: {},
+ syscall.SYS_FSYNC: {},
+ syscall.SYS_FTRUNCATE: {},
+ syscall.SYS_FUTEX: {},
+ syscall.SYS_GETDENTS64: {},
+ syscall.SYS_GETPID: {},
+ unix.SYS_GETRANDOM: {},
+ syscall.SYS_GETSOCKOPT: {},
+ syscall.SYS_GETTID: {},
+ syscall.SYS_GETTIMEOFDAY: {},
+ syscall.SYS_LISTEN: {},
+ syscall.SYS_LSEEK: {},
+ syscall.SYS_MADVISE: {},
+ syscall.SYS_MINCORE: {},
+ syscall.SYS_MMAP: {},
+ syscall.SYS_MPROTECT: {},
+ syscall.SYS_MUNMAP: {},
+ syscall.SYS_NEWFSTATAT: {},
+ syscall.SYS_POLL: {},
+ syscall.SYS_PREAD64: {},
+ syscall.SYS_PSELECT6: {},
+ syscall.SYS_PWRITE64: {},
+ syscall.SYS_READ: {},
+ syscall.SYS_READLINKAT: {},
+ syscall.SYS_READV: {},
+ syscall.SYS_RECVMSG: {},
+ syscall.SYS_RENAMEAT: {},
+ syscall.SYS_RESTART_SYSCALL: {},
+ syscall.SYS_RT_SIGACTION: {},
+ syscall.SYS_RT_SIGPROCMASK: {},
+ syscall.SYS_RT_SIGRETURN: {},
+ syscall.SYS_SCHED_YIELD: {},
+ syscall.SYS_SENDMSG: {},
+ syscall.SYS_SETITIMER: {},
+ syscall.SYS_SHUTDOWN: {},
+ syscall.SYS_SIGALTSTACK: {},
+ syscall.SYS_SYNC_FILE_RANGE: {},
+ syscall.SYS_TGKILL: {},
+ syscall.SYS_UTIMENSAT: {},
+ syscall.SYS_WRITE: {},
+ syscall.SYS_WRITEV: {},
}
// TODO: Ioctl is needed in order to support tty consoles.
// Once filters support argument-checking, we should only allow ioctl
// with tty-related arguments.
-func consoleFilters() []uintptr {
- return []uintptr{
- syscall.SYS_IOCTL,
+func consoleFilters() seccomp.SyscallRules {
+ return seccomp.SyscallRules{
+ syscall.SYS_IOCTL: {},
}
}
@@ -97,79 +98,79 @@ func consoleFilters() []uintptr {
// file operations that would otherwise be disabled by seccomp when a Gofer is
// used. When whitelistFS is not used, openning new FD in the Sentry is
// disallowed.
-func whitelistFSFilters() []uintptr {
- return []uintptr{
- syscall.SYS_ACCESS,
- syscall.SYS_FCHMOD,
- syscall.SYS_FSTAT,
- syscall.SYS_FSYNC,
- syscall.SYS_FTRUNCATE,
- syscall.SYS_GETCWD,
- syscall.SYS_GETDENTS,
- syscall.SYS_GETDENTS64,
- syscall.SYS_LSEEK,
- syscall.SYS_LSTAT,
- syscall.SYS_MKDIR,
- syscall.SYS_MKDIRAT,
- syscall.SYS_NEWFSTATAT,
- syscall.SYS_OPEN,
- syscall.SYS_OPENAT,
- syscall.SYS_PREAD64,
- syscall.SYS_PWRITE64,
- syscall.SYS_READ,
- syscall.SYS_READLINK,
- syscall.SYS_READLINKAT,
- syscall.SYS_RENAMEAT,
- syscall.SYS_STAT,
- syscall.SYS_SYMLINK,
- syscall.SYS_SYMLINKAT,
- syscall.SYS_SYNC_FILE_RANGE,
- syscall.SYS_UNLINK,
- syscall.SYS_UNLINKAT,
- syscall.SYS_UTIMENSAT,
- syscall.SYS_WRITE,
+func whitelistFSFilters() seccomp.SyscallRules {
+ return seccomp.SyscallRules{
+ syscall.SYS_ACCESS: {},
+ syscall.SYS_FCHMOD: {},
+ syscall.SYS_FSTAT: {},
+ syscall.SYS_FSYNC: {},
+ syscall.SYS_FTRUNCATE: {},
+ syscall.SYS_GETCWD: {},
+ syscall.SYS_GETDENTS: {},
+ syscall.SYS_GETDENTS64: {},
+ syscall.SYS_LSEEK: {},
+ syscall.SYS_LSTAT: {},
+ syscall.SYS_MKDIR: {},
+ syscall.SYS_MKDIRAT: {},
+ syscall.SYS_NEWFSTATAT: {},
+ syscall.SYS_OPEN: {},
+ syscall.SYS_OPENAT: {},
+ syscall.SYS_PREAD64: {},
+ syscall.SYS_PWRITE64: {},
+ syscall.SYS_READ: {},
+ syscall.SYS_READLINK: {},
+ syscall.SYS_READLINKAT: {},
+ syscall.SYS_RENAMEAT: {},
+ syscall.SYS_STAT: {},
+ syscall.SYS_SYMLINK: {},
+ syscall.SYS_SYMLINKAT: {},
+ syscall.SYS_SYNC_FILE_RANGE: {},
+ syscall.SYS_UNLINK: {},
+ syscall.SYS_UNLINKAT: {},
+ syscall.SYS_UTIMENSAT: {},
+ syscall.SYS_WRITE: {},
}
}
// hostInetFilters contains syscalls that are needed by sentry/socket/hostinet.
-func hostInetFilters() []uintptr {
- return []uintptr{
- syscall.SYS_ACCEPT4,
- syscall.SYS_BIND,
- syscall.SYS_CONNECT,
- syscall.SYS_GETPEERNAME,
- syscall.SYS_GETSOCKNAME,
- syscall.SYS_GETSOCKOPT,
- syscall.SYS_IOCTL,
- syscall.SYS_LISTEN,
- syscall.SYS_READV,
- syscall.SYS_RECVFROM,
- syscall.SYS_RECVMSG,
- syscall.SYS_SENDMSG,
- syscall.SYS_SENDTO,
- syscall.SYS_SETSOCKOPT,
- syscall.SYS_SHUTDOWN,
- syscall.SYS_SOCKET,
- syscall.SYS_WRITEV,
+func hostInetFilters() seccomp.SyscallRules {
+ return seccomp.SyscallRules{
+ syscall.SYS_ACCEPT4: {},
+ syscall.SYS_BIND: {},
+ syscall.SYS_CONNECT: {},
+ syscall.SYS_GETPEERNAME: {},
+ syscall.SYS_GETSOCKNAME: {},
+ syscall.SYS_GETSOCKOPT: {},
+ syscall.SYS_IOCTL: {},
+ syscall.SYS_LISTEN: {},
+ syscall.SYS_READV: {},
+ syscall.SYS_RECVFROM: {},
+ syscall.SYS_RECVMSG: {},
+ syscall.SYS_SENDMSG: {},
+ syscall.SYS_SENDTO: {},
+ syscall.SYS_SETSOCKOPT: {},
+ syscall.SYS_SHUTDOWN: {},
+ syscall.SYS_SOCKET: {},
+ syscall.SYS_WRITEV: {},
}
}
// ptraceFilters returns syscalls made exclusively by the ptrace platform.
-func ptraceFilters() []uintptr {
- return []uintptr{
- syscall.SYS_PTRACE,
- syscall.SYS_WAIT4,
- unix.SYS_GETCPU,
- unix.SYS_SCHED_SETAFFINITY,
+func ptraceFilters() seccomp.SyscallRules {
+ return seccomp.SyscallRules{
+ syscall.SYS_PTRACE: {},
+ syscall.SYS_WAIT4: {},
+ unix.SYS_GETCPU: {},
+ unix.SYS_SCHED_SETAFFINITY: {},
}
}
// kvmFilters returns syscalls made exclusively by the KVM platform.
-func kvmFilters() []uintptr {
- return []uintptr{
- syscall.SYS_IOCTL,
- syscall.SYS_RT_SIGSUSPEND,
- syscall.SYS_RT_SIGTIMEDWAIT,
- 0xffffffffffffffff, // KVM uses syscall -1 to transition to host.
+func kvmFilters() seccomp.SyscallRules {
+ return seccomp.SyscallRules{
+ syscall.SYS_IOCTL: {},
+ syscall.SYS_RT_SIGSUSPEND: {},
+ syscall.SYS_RT_SIGTIMEDWAIT: {},
+ 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host.
}
}
diff --git a/runsc/boot/filter/extra_filters.go b/runsc/boot/filter/extra_filters.go
index e10d9bf4c..82cf00dfb 100644
--- a/runsc/boot/filter/extra_filters.go
+++ b/runsc/boot/filter/extra_filters.go
@@ -16,9 +16,13 @@
package filter
+import (
+ "gvisor.googlesource.com/gvisor/pkg/seccomp"
+)
+
// instrumentationFilters returns additional filters for syscalls used by
// Go intrumentation tools, e.g. -race, -msan.
// Returns empty when disabled.
-func instrumentationFilters() []uintptr {
+func instrumentationFilters() seccomp.SyscallRules {
return nil
}
diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go
index a862340f6..76f3f6865 100644
--- a/runsc/boot/filter/extra_filters_msan.go
+++ b/runsc/boot/filter/extra_filters_msan.go
@@ -18,13 +18,15 @@ package filter
import (
"syscall"
+
+ "gvisor.googlesource.com/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by MSAN.
-func instrumentationFilters() []uintptr {
+func instrumentationFilters() seccomp.SyscallRules {
Report("MSAN is enabled: syscall filters less restrictive!")
- return []uintptr{
- syscall.SYS_SCHED_GETAFFINITY,
- syscall.SYS_SET_ROBUST_LIST,
+ return seccomp.SyscallRules{
+ syscall.SYS_SCHED_GETAFFINITY: {},
+ syscall.SYS_SET_ROBUST_LIST: {},
}
}
diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go
index b0c74a58a..c810773df 100644
--- a/runsc/boot/filter/extra_filters_race.go
+++ b/runsc/boot/filter/extra_filters_race.go
@@ -18,16 +18,21 @@ package filter
import (
"syscall"
+
+ "gvisor.googlesource.com/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by TSAN.
-func instrumentationFilters() []uintptr {
+func instrumentationFilters() seccomp.SyscallRules {
Report("TSAN is enabled: syscall filters less restrictive!")
- return []uintptr{
- syscall.SYS_BRK,
- syscall.SYS_MUNLOCK,
- syscall.SYS_NANOSLEEP,
- syscall.SYS_OPEN,
- syscall.SYS_SET_ROBUST_LIST,
+ return seccomp.SyscallRules{
+ syscall.SYS_BRK: {},
+ syscall.SYS_CLONE: {},
+ syscall.SYS_FUTEX: {},
+ syscall.SYS_MMAP: {},
+ syscall.SYS_MUNLOCK: {},
+ syscall.SYS_NANOSLEEP: {},
+ syscall.SYS_OPEN: {},
+ syscall.SYS_SET_ROBUST_LIST: {},
}
}
diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go
index 3ba56a318..6ea9c464e 100644
--- a/runsc/boot/filter/filter.go
+++ b/runsc/boot/filter/filter.go
@@ -33,26 +33,26 @@ func Install(p platform.Platform, whitelistFS, console, hostNetwork bool) error
// Set of additional filters used by -race and -msan. Returns empty
// when not enabled.
- s = append(s, instrumentationFilters()...)
+ s.Merge(instrumentationFilters())
if whitelistFS {
Report("direct file access allows unrestricted file access!")
- s = append(s, whitelistFSFilters()...)
+ s.Merge(whitelistFSFilters())
}
if console {
Report("console is enabled: syscall filters less restrictive!")
- s = append(s, consoleFilters()...)
+ s.Merge(consoleFilters())
}
if hostNetwork {
Report("host networking enabled: syscall filters less restrictive!")
- s = append(s, hostInetFilters()...)
+ s.Merge(hostInetFilters())
}
switch p := p.(type) {
case *ptrace.PTrace:
- s = append(s, ptraceFilters()...)
+ s.Merge(ptraceFilters())
case *kvm.KVM:
- s = append(s, kvmFilters()...)
+ s.Merge(kvmFilters())
default:
return fmt.Errorf("unknown platform type %T", p)
}