diff options
27 files changed, 1943 insertions, 455 deletions
diff --git a/pkg/abi/linux/seccomp.go b/pkg/abi/linux/seccomp.go index d0607e256..b07cafe12 100644 --- a/pkg/abi/linux/seccomp.go +++ b/pkg/abi/linux/seccomp.go @@ -34,11 +34,11 @@ type BPFAction uint32 const ( SECCOMP_RET_KILL_PROCESS BPFAction = 0x80000000 - SECCOMP_RET_KILL_THREAD = 0x00000000 - SECCOMP_RET_TRAP = 0x00030000 - SECCOMP_RET_ERRNO = 0x00050000 - SECCOMP_RET_TRACE = 0x7ff00000 - SECCOMP_RET_ALLOW = 0x7fff0000 + SECCOMP_RET_KILL_THREAD BPFAction = 0x00000000 + SECCOMP_RET_TRAP BPFAction = 0x00030000 + SECCOMP_RET_ERRNO BPFAction = 0x00050000 + SECCOMP_RET_TRACE BPFAction = 0x7ff00000 + SECCOMP_RET_ALLOW BPFAction = 0x7fff0000 ) func (a BPFAction) String() string { @@ -64,6 +64,19 @@ func (a BPFAction) Data() uint16 { return uint16(a & SECCOMP_RET_DATA) } +// WithReturnCode sets the lower 16 bits of the SECCOMP_RET_ERRNO or +// SECCOMP_RET_TRACE actions to the provided return code, overwriting the previous +// action, and returns a new BPFAction. If not SECCOMP_RET_ERRNO or +// SECCOMP_RET_TRACE then this panics. +func (a BPFAction) WithReturnCode(code uint16) BPFAction { + // mask out the previous return value + baseAction := a & SECCOMP_RET_ACTION_FULL + if baseAction == SECCOMP_RET_ERRNO || baseAction == SECCOMP_RET_TRACE { + return BPFAction(uint32(baseAction) | uint32(code)) + } + panic("WithReturnCode only valid for SECCOMP_RET_ERRNO and SECCOMP_RET_TRACE") +} + // SockFprog is sock_fprog taken from <linux/filter.h>. type SockFprog struct { Len uint16 diff --git a/pkg/bpf/decoder.go b/pkg/bpf/decoder.go index c8ee0c3b1..069d0395d 100644 --- a/pkg/bpf/decoder.go +++ b/pkg/bpf/decoder.go @@ -21,10 +21,15 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" ) -// DecodeProgram translates an array of BPF instructions into text format. -func DecodeProgram(program []linux.BPFInstruction) (string, error) { +// DecodeProgram translates a compiled BPF program into text format. +func DecodeProgram(p Program) (string, error) { + return DecodeInstructions(p.instructions) +} + +// DecodeInstructions translates an array of BPF instructions into text format. +func DecodeInstructions(instns []linux.BPFInstruction) (string, error) { var ret bytes.Buffer - for line, s := range program { + for line, s := range instns { ret.WriteString(fmt.Sprintf("%v: ", line)) if err := decode(s, line, &ret); err != nil { return "", err @@ -34,7 +39,7 @@ func DecodeProgram(program []linux.BPFInstruction) (string, error) { return ret.String(), nil } -// Decode translates BPF instruction into text format. +// Decode translates a single BPF instruction into text format. func Decode(inst linux.BPFInstruction) (string, error) { var ret bytes.Buffer err := decode(inst, -1, &ret) diff --git a/pkg/bpf/decoder_test.go b/pkg/bpf/decoder_test.go index 6a023f0c0..bb971ce21 100644 --- a/pkg/bpf/decoder_test.go +++ b/pkg/bpf/decoder_test.go @@ -93,7 +93,7 @@ func TestDecode(t *testing.T) { } } -func TestDecodeProgram(t *testing.T) { +func TestDecodeInstructions(t *testing.T) { for _, test := range []struct { name string program []linux.BPFInstruction @@ -126,7 +126,7 @@ func TestDecodeProgram(t *testing.T) { program: []linux.BPFInstruction{Stmt(Ld+Abs+W, 10), Stmt(Ld+Len+Mem, 0)}, fail: true}, } { - got, err := DecodeProgram(test.program) + got, err := DecodeInstructions(test.program) if test.fail { if err == nil { t.Errorf("%s: Decode(...) failed, expected: 'error', got: %q", test.name, got) diff --git a/pkg/bpf/program_builder.go b/pkg/bpf/program_builder.go index 7992044d0..caaf99c83 100644 --- a/pkg/bpf/program_builder.go +++ b/pkg/bpf/program_builder.go @@ -32,13 +32,21 @@ type ProgramBuilder struct { // Maps label names to label objects. labels map[string]*label + // unusableLabels are labels that are added before being referenced in a + // jump. Any labels added this way cannot be referenced later in order to + // avoid backwards references. + unusableLabels map[string]bool + // Array of BPF instructions that makes up the program. instructions []linux.BPFInstruction } // NewProgramBuilder creates a new ProgramBuilder instance. func NewProgramBuilder() *ProgramBuilder { - return &ProgramBuilder{labels: map[string]*label{}} + return &ProgramBuilder{ + labels: map[string]*label{}, + unusableLabels: map[string]bool{}, + } } // label contains information to resolve a label to an offset. @@ -108,9 +116,12 @@ func (b *ProgramBuilder) AddJumpLabels(code uint16, k uint32, jtLabel, jfLabel s func (b *ProgramBuilder) AddLabel(name string) error { l, ok := b.labels[name] if !ok { - // This is done to catch jump backwards cases, but it's not strictly wrong - // to have unused labels. - return fmt.Errorf("Adding a label that hasn't been used is not allowed: %v", name) + if _, ok = b.unusableLabels[name]; ok { + return fmt.Errorf("label %q already set", name) + } + // Mark the label as unusable. This is done to catch backwards jumps. + b.unusableLabels[name] = true + return nil } if l.target != -1 { return fmt.Errorf("label %q target already set: %v", name, l.target) @@ -141,6 +152,10 @@ func (b *ProgramBuilder) addLabelSource(labelName string, t jmpType) { func (b *ProgramBuilder) resolveLabels() error { for key, v := range b.labels { + if _, ok := b.unusableLabels[key]; ok { + return fmt.Errorf("backwards reference detected for label: %q", key) + } + if v.target == -1 { return fmt.Errorf("label target not set: %v", key) } diff --git a/pkg/bpf/program_builder_test.go b/pkg/bpf/program_builder_test.go index 92ca5f4c3..37f684f25 100644 --- a/pkg/bpf/program_builder_test.go +++ b/pkg/bpf/program_builder_test.go @@ -26,16 +26,16 @@ func validate(p *ProgramBuilder, expected []linux.BPFInstruction) error { if err != nil { return fmt.Errorf("Instructions() failed: %v", err) } - got, err := DecodeProgram(instructions) + got, err := DecodeInstructions(instructions) if err != nil { - return fmt.Errorf("DecodeProgram('instructions') failed: %v", err) + return fmt.Errorf("DecodeInstructions('instructions') failed: %v", err) } - expectedDecoded, err := DecodeProgram(expected) + expectedDecoded, err := DecodeInstructions(expected) if err != nil { - return fmt.Errorf("DecodeProgram('expected') failed: %v", err) + return fmt.Errorf("DecodeInstructions('expected') failed: %v", err) } if got != expectedDecoded { - return fmt.Errorf("DecodeProgram() failed, expected: %q, got: %q", expectedDecoded, got) + return fmt.Errorf("DecodeInstructions() failed, expected: %q, got: %q", expectedDecoded, got) } return nil } @@ -124,10 +124,38 @@ func TestProgramBuilderLabelWithNoInstruction(t *testing.T) { } } +// TestProgramBuilderUnusedLabel tests that adding an unused label doesn't +// cause program generation to fail. func TestProgramBuilderUnusedLabel(t *testing.T) { p := NewProgramBuilder() - if err := p.AddLabel("unused"); err == nil { - t.Errorf("AddLabel(unused) should have failed") + p.AddStmt(Ld+Abs+W, 10) + p.AddJump(Jmp+Ja, 10, 0, 0) + + expected := []linux.BPFInstruction{ + Stmt(Ld+Abs+W, 10), + Jump(Jmp+Ja, 10, 0, 0), + } + + if err := p.AddLabel("unused"); err != nil { + t.Errorf("AddLabel(unused) should have succeeded") + } + + if err := validate(p, expected); err != nil { + t.Errorf("Validate() failed: %v", err) + } +} + +// TestProgramBuilderBackwardsReference tests that including a backwards +// reference to a label in a program causes a failure. +func TestProgramBuilderBackwardsReference(t *testing.T) { + p := NewProgramBuilder() + if err := p.AddLabel("bw_label"); err != nil { + t.Errorf("failed to add label") + } + p.AddStmt(Ld+Abs+W, 10) + p.AddJumpTrueLabel(Jmp+Jeq+K, 10, "bw_label", 0) + if _, err := p.Instructions(); err == nil { + t.Errorf("Instructions() should have failed") } } diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index 55fd6967e..752e2dc32 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package seccomp provides basic seccomp filters for x86_64 (little endian). +// Package seccomp provides generation of basic seccomp filters. Currently, +// only little endian systems are supported. package seccomp import ( @@ -64,9 +65,9 @@ func Install(rules SyscallRules) error { Rules: rules, Action: linux.SECCOMP_RET_ALLOW, }, - }, defaultAction) + }, defaultAction, defaultAction) if log.IsLogging(log.Debug) { - programStr, errDecode := bpf.DecodeProgram(instrs) + programStr, errDecode := bpf.DecodeInstructions(instrs) if errDecode != nil { programStr = fmt.Sprintf("Error: %v\n%s", errDecode, programStr) } @@ -117,7 +118,7 @@ var SyscallName = func(sysno uintptr) string { // BuildProgram builds a BPF program from the given map of actions to matching // SyscallRules. The single generated program covers all provided RuleSets. -func BuildProgram(rules []RuleSet, defaultAction linux.BPFAction) ([]linux.BPFInstruction, error) { +func BuildProgram(rules []RuleSet, defaultAction, badArchAction linux.BPFAction) ([]linux.BPFInstruction, error) { program := bpf.NewProgramBuilder() // Be paranoid and check that syscall is done in the expected architecture. @@ -128,7 +129,7 @@ func BuildProgram(rules []RuleSet, defaultAction linux.BPFAction) ([]linux.BPFIn // defaultLabel is at the bottom of the program. The size of program // may exceeds 255 lines, which is the limit of a condition jump. program.AddJump(bpf.Jmp|bpf.Jeq|bpf.K, LINUX_AUDIT_ARCH, skipOneInst, 0) - program.AddDirectJumpLabel(defaultLabel) + program.AddStmt(bpf.Ret|bpf.K, uint32(badArchAction)) if err := buildIndex(rules, program); err != nil { return nil, err } @@ -144,6 +145,11 @@ func BuildProgram(rules []RuleSet, defaultAction linux.BPFAction) ([]linux.BPFIn // buildIndex builds a BST to quickly search through all syscalls. func buildIndex(rules []RuleSet, program *bpf.ProgramBuilder) error { + // Do nothing if rules is empty. + if len(rules) == 0 { + return nil + } + // Build a list of all application system calls, across all given rule // sets. We have a simple BST, but may dispatch individual matchers // with different actions. The matchers are evaluated linearly. @@ -216,42 +222,163 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action linux.BPFAc labelled := false for i, arg := range rule { if arg != nil { + // Break out early if using MatchAny since no further + // instructions are required. + if _, ok := arg.(MatchAny); ok { + continue + } + + // Determine the data offset for low and high bits of input. + dataOffsetLow := seccompDataOffsetArgLow(i) + dataOffsetHigh := seccompDataOffsetArgHigh(i) + if i == RuleIP { + dataOffsetLow = seccompDataOffsetIPLow + dataOffsetHigh = seccompDataOffsetIPHigh + } + + // Add the conditional operation. Input values to the BPF + // program are 64bit values. However, comparisons in BPF can + // only be done on 32bit values. This means that we need to do + // multiple BPF comparisons in order to do one logical 64bit + // comparison. switch a := arg.(type) { - case AllowAny: - case AllowValue: - dataOffsetLow := seccompDataOffsetArgLow(i) - dataOffsetHigh := seccompDataOffsetArgHigh(i) - if i == RuleIP { - dataOffsetLow = seccompDataOffsetIPLow - dataOffsetHigh = seccompDataOffsetIPHigh - } + case EqualTo: + // EqualTo checks that both the higher and lower 32bits are equal. high, low := uint32(a>>32), uint32(a) - // assert arg_low == low + + // Assert that the lower 32bits are equal. + // arg_low == low ? continue : violation p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow) p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) - // assert arg_high == high + + // Assert that the lower 32bits are also equal. + // arg_high == high ? continue/success : violation p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh) p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) labelled = true + case NotEqual: + // NotEqual checks that either the higher or lower 32bits + // are *not* equal. + high, low := uint32(a>>32), uint32(a) + labelGood := fmt.Sprintf("ne%v", i) + + // Check if the higher 32bits are (not) equal. + // arg_low == low ? continue : success + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow) + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) + + // Assert that the lower 32bits are not equal (assuming + // higher bits are equal). + // arg_high == high ? violation : continue/success + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh) + p.AddJumpTrueLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, ruleViolationLabel(ruleSetIdx, sysno, ruleidx), 0) + p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) + labelled = true case GreaterThan: - dataOffsetLow := seccompDataOffsetArgLow(i) - dataOffsetHigh := seccompDataOffsetArgHigh(i) - if i == RuleIP { - dataOffsetLow = seccompDataOffsetIPLow - dataOffsetHigh = seccompDataOffsetIPHigh - } - labelGood := fmt.Sprintf("gt%v", i) + // GreaterThan checks that the higher 32bits is greater + // *or* that the higher 32bits are equal and the lower + // 32bits are greater. high, low := uint32(a>>32), uint32(a) - // assert arg_high < high + labelGood := fmt.Sprintf("gt%v", i) + + // Assert the higher 32bits are greater than or equal. + // arg_high >= high ? continue : violation (arg_high < high) p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh) p.AddJumpFalseLabel(bpf.Jmp|bpf.Jge|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) - // arg_high > high + + // Assert that the lower 32bits are greater. + // arg_high == high ? continue : success (arg_high > high) p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) - // arg_low < low + // arg_low > low ? continue/success : violation (arg_high == high and arg_low <= low) p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow) p.AddJumpFalseLabel(bpf.Jmp|bpf.Jgt|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) labelled = true + case GreaterThanOrEqual: + // GreaterThanOrEqual checks that the higher 32bits is + // greater *or* that the higher 32bits are equal and the + // lower 32bits are greater than or equal. + high, low := uint32(a>>32), uint32(a) + labelGood := fmt.Sprintf("ge%v", i) + + // Assert the higher 32bits are greater than or equal. + // arg_high >= high ? continue : violation (arg_high < high) + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh) + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jge|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) + // arg_high == high ? continue : success (arg_high > high) + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) + + // Assert that the lower 32bits are greater (assuming the + // higher bits are equal). + // arg_low >= low ? continue/success : violation (arg_high == high and arg_low < low) + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow) + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jge|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) + p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) + labelled = true + case LessThan: + // LessThan checks that the higher 32bits is less *or* that + // the higher 32bits are equal and the lower 32bits are + // less. + high, low := uint32(a>>32), uint32(a) + labelGood := fmt.Sprintf("lt%v", i) + + // Assert the higher 32bits are less than or equal. + // arg_high > high ? violation : continue + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh) + p.AddJumpTrueLabel(bpf.Jmp|bpf.Jgt|bpf.K, high, ruleViolationLabel(ruleSetIdx, sysno, ruleidx), 0) + // arg_high == high ? continue : success (arg_high < high) + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) + + // Assert that the lower 32bits are less (assuming the + // higher bits are equal). + // arg_low >= low ? violation : continue + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow) + p.AddJumpTrueLabel(bpf.Jmp|bpf.Jge|bpf.K, low, ruleViolationLabel(ruleSetIdx, sysno, ruleidx), 0) + p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) + labelled = true + case LessThanOrEqual: + // LessThan checks that the higher 32bits is less *or* that + // the higher 32bits are equal and the lower 32bits are + // less than or equal. + high, low := uint32(a>>32), uint32(a) + labelGood := fmt.Sprintf("le%v", i) + + // Assert the higher 32bits are less than or equal. + // assert arg_high > high ? violation : continue + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh) + p.AddJumpTrueLabel(bpf.Jmp|bpf.Jgt|bpf.K, high, ruleViolationLabel(ruleSetIdx, sysno, ruleidx), 0) + // arg_high == high ? continue : success + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) + + // Assert the lower bits are less than or equal (assuming + // the higher bits are equal). + // arg_low > low ? violation : success + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow) + p.AddJumpTrueLabel(bpf.Jmp|bpf.Jgt|bpf.K, low, ruleViolationLabel(ruleSetIdx, sysno, ruleidx), 0) + p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) + labelled = true + case maskedEqual: + // MaskedEqual checks that the bitwise AND of the value and + // mask are equal for both the higher and lower 32bits. + high, low := uint32(a.value>>32), uint32(a.value) + maskHigh, maskLow := uint32(a.mask>>32), uint32(a.mask) + + // Assert that the lower 32bits are equal when masked. + // A <- arg_low. + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow) + // A <- arg_low & maskLow + p.AddStmt(bpf.Alu|bpf.And|bpf.K, maskLow) + // Assert that arg_low & maskLow == low. + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) + + // Assert that the higher 32bits are equal when masked. + // A <- arg_high + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh) + // A <- arg_high & maskHigh + p.AddStmt(bpf.Alu|bpf.And|bpf.K, maskHigh) + // Assert that arg_high & maskHigh == high. + p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) + labelled = true default: return fmt.Errorf("unknown syscall rule type: %v", reflect.TypeOf(a)) } diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go index a52dc1b4e..daf165bbf 100644 --- a/pkg/seccomp/seccomp_rules.go +++ b/pkg/seccomp/seccomp_rules.go @@ -39,28 +39,79 @@ func seccompDataOffsetArgHigh(i int) uint32 { return seccompDataOffsetArgLow(i) + 4 } -// AllowAny is marker to indicate any value will be accepted. -type AllowAny struct{} +// MatchAny is marker to indicate any value will be accepted. +type MatchAny struct{} -func (a AllowAny) String() (s string) { +func (a MatchAny) String() (s string) { return "*" } -// AllowValue specifies a value that needs to be strictly matched. -type AllowValue uintptr +// EqualTo specifies a value that needs to be strictly matched. +type EqualTo uintptr + +func (a EqualTo) String() (s string) { + return fmt.Sprintf("== %#x", uintptr(a)) +} + +// NotEqual specifies a value that is strictly not equal. +type NotEqual uintptr + +func (a NotEqual) String() (s string) { + return fmt.Sprintf("!= %#x", uintptr(a)) +} // GreaterThan specifies a value that needs to be strictly smaller. type GreaterThan uintptr -func (a AllowValue) String() (s string) { - return fmt.Sprintf("%#x ", uintptr(a)) +func (a GreaterThan) String() (s string) { + return fmt.Sprintf("> %#x", uintptr(a)) +} + +// GreaterThanOrEqual specifies a value that needs to be smaller or equal. +type GreaterThanOrEqual uintptr + +func (a GreaterThanOrEqual) String() (s string) { + return fmt.Sprintf(">= %#x", uintptr(a)) +} + +// LessThan specifies a value that needs to be strictly greater. +type LessThan uintptr + +func (a LessThan) String() (s string) { + return fmt.Sprintf("< %#x", uintptr(a)) +} + +// LessThanOrEqual specifies a value that needs to be greater or equal. +type LessThanOrEqual uintptr + +func (a LessThanOrEqual) String() (s string) { + return fmt.Sprintf("<= %#x", uintptr(a)) +} + +type maskedEqual struct { + mask uintptr + value uintptr +} + +func (a maskedEqual) String() (s string) { + return fmt.Sprintf("& %#x == %#x", a.mask, a.value) +} + +// MaskedEqual specifies a value that matches the input after the input is +// masked (bitwise &) against the given mask. Can be used to verify that input +// only includes certain approved flags. +func MaskedEqual(mask, value uintptr) interface{} { + return maskedEqual{ + mask: mask, + value: value, + } } // Rule stores the allowed syscall arguments. // // For example: // rule := Rule { -// AllowValue(linux.ARCH_GET_FS | linux.ARCH_SET_FS), // arg0 +// EqualTo(linux.ARCH_GET_FS | linux.ARCH_SET_FS), // arg0 // } type Rule [7]interface{} // 6 arguments + RIP @@ -89,12 +140,12 @@ func (r Rule) String() (s string) { // rules := SyscallRules{ // syscall.SYS_FUTEX: []Rule{ // { -// AllowAny{}, -// AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), +// MatchAny{}, +// EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), // }, // OR // { -// AllowAny{}, -// AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), +// MatchAny{}, +// EqualTo(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), // }, // }, // syscall.SYS_GETPID: []Rule{}, diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go index 5238df8bd..23f30678d 100644 --- a/pkg/seccomp/seccomp_test.go +++ b/pkg/seccomp/seccomp_test.go @@ -76,11 +76,14 @@ func TestBasic(t *testing.T) { } for _, test := range []struct { + name string ruleSets []RuleSet defaultAction linux.BPFAction + badArchAction linux.BPFAction specs []spec }{ { + name: "Single syscall", ruleSets: []RuleSet{ { Rules: SyscallRules{1: {}}, @@ -88,26 +91,28 @@ func TestBasic(t *testing.T) { }, }, defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, specs: []spec{ { - desc: "Single syscall allowed", + desc: "syscall allowed", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_ALLOW, }, { - desc: "Single syscall disallowed", + desc: "syscall disallowed", data: seccompData{nr: 2, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_TRAP, }, }, }, { + name: "Multiple rulesets", ruleSets: []RuleSet{ { Rules: SyscallRules{ 1: []Rule{ { - AllowValue(0x1), + EqualTo(0x1), }, }, }, @@ -122,30 +127,32 @@ func TestBasic(t *testing.T) { }, }, defaultAction: linux.SECCOMP_RET_KILL_THREAD, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, specs: []spec{ { - desc: "Multiple rulesets allowed (1a)", + desc: "allowed (1a)", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x1}}, want: linux.SECCOMP_RET_ALLOW, }, { - desc: "Multiple rulesets allowed (1b)", + desc: "allowed (1b)", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_TRAP, }, { - desc: "Multiple rulesets allowed (2)", + desc: "syscall 1 matched 2nd rule", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_TRAP, }, { - desc: "Multiple rulesets allowed (2)", + desc: "no match", data: seccompData{nr: 0, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_KILL_THREAD, }, }, }, { + name: "Multiple syscalls", ruleSets: []RuleSet{ { Rules: SyscallRules{ @@ -157,50 +164,52 @@ func TestBasic(t *testing.T) { }, }, defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, specs: []spec{ { - desc: "Multiple syscalls allowed (1)", + desc: "allowed (1)", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_ALLOW, }, { - desc: "Multiple syscalls allowed (3)", + desc: "allowed (3)", data: seccompData{nr: 3, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_ALLOW, }, { - desc: "Multiple syscalls allowed (5)", + desc: "allowed (5)", data: seccompData{nr: 5, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_ALLOW, }, { - desc: "Multiple syscalls disallowed (0)", + desc: "disallowed (0)", data: seccompData{nr: 0, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_TRAP, }, { - desc: "Multiple syscalls disallowed (2)", + desc: "disallowed (2)", data: seccompData{nr: 2, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_TRAP, }, { - desc: "Multiple syscalls disallowed (4)", + desc: "disallowed (4)", data: seccompData{nr: 4, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_TRAP, }, { - desc: "Multiple syscalls disallowed (6)", + desc: "disallowed (6)", data: seccompData{nr: 6, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_TRAP, }, { - desc: "Multiple syscalls disallowed (100)", + desc: "disallowed (100)", data: seccompData{nr: 100, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_TRAP, }, }, }, { + name: "Wrong architecture", ruleSets: []RuleSet{ { Rules: SyscallRules{ @@ -210,15 +219,17 @@ func TestBasic(t *testing.T) { }, }, defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, specs: []spec{ { - desc: "Wrong architecture", + desc: "arch (123)", data: seccompData{nr: 1, arch: 123}, - want: linux.SECCOMP_RET_TRAP, + want: linux.SECCOMP_RET_KILL_THREAD, }, }, }, { + name: "Syscall disallowed", ruleSets: []RuleSet{ { Rules: SyscallRules{ @@ -228,22 +239,24 @@ func TestBasic(t *testing.T) { }, }, defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, specs: []spec{ { - desc: "Syscall disallowed, action trap", + desc: "action trap", data: seccompData{nr: 2, arch: LINUX_AUDIT_ARCH}, want: linux.SECCOMP_RET_TRAP, }, }, }, { + name: "Syscall arguments", ruleSets: []RuleSet{ { Rules: SyscallRules{ 1: []Rule{ { - AllowAny{}, - AllowValue(0xf), + MatchAny{}, + EqualTo(0xf), }, }, }, @@ -251,29 +264,31 @@ func TestBasic(t *testing.T) { }, }, defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, specs: []spec{ { - desc: "Syscall argument allowed", + desc: "allowed", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xf, 0xf}}, want: linux.SECCOMP_RET_ALLOW, }, { - desc: "Syscall argument disallowed", + desc: "disallowed", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xf, 0xe}}, want: linux.SECCOMP_RET_TRAP, }, }, }, { + name: "Multiple arguments", ruleSets: []RuleSet{ { Rules: SyscallRules{ 1: []Rule{ { - AllowValue(0xf), + EqualTo(0xf), }, { - AllowValue(0xe), + EqualTo(0xe), }, }, }, @@ -281,28 +296,30 @@ func TestBasic(t *testing.T) { }, }, defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, specs: []spec{ { - desc: "Syscall argument allowed, two rules", + desc: "match first rule", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xf}}, want: linux.SECCOMP_RET_ALLOW, }, { - desc: "Syscall argument allowed, two rules", + desc: "match 2nd rule", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xe}}, want: linux.SECCOMP_RET_ALLOW, }, }, }, { + name: "EqualTo", ruleSets: []RuleSet{ { Rules: SyscallRules{ 1: []Rule{ { - AllowValue(0), - AllowValue(math.MaxUint64 - 1), - AllowValue(math.MaxUint32), + EqualTo(0), + EqualTo(math.MaxUint64 - 1), + EqualTo(math.MaxUint32), }, }, }, @@ -310,9 +327,10 @@ func TestBasic(t *testing.T) { }, }, defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, specs: []spec{ { - desc: "64bit syscall argument allowed", + desc: "argument allowed (all match)", data: seccompData{ nr: 1, arch: LINUX_AUDIT_ARCH, @@ -321,7 +339,7 @@ func TestBasic(t *testing.T) { want: linux.SECCOMP_RET_ALLOW, }, { - desc: "64bit syscall argument disallowed", + desc: "argument disallowed (one mismatch)", data: seccompData{ nr: 1, arch: LINUX_AUDIT_ARCH, @@ -330,7 +348,7 @@ func TestBasic(t *testing.T) { want: linux.SECCOMP_RET_TRAP, }, { - desc: "64bit syscall argument disallowed", + desc: "argument disallowed (multiple mismatch)", data: seccompData{ nr: 1, arch: LINUX_AUDIT_ARCH, @@ -341,6 +359,103 @@ func TestBasic(t *testing.T) { }, }, { + name: "NotEqual", + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + NotEqual(0x7aabbccdd), + NotEqual(math.MaxUint64 - 1), + NotEqual(math.MaxUint32), + }, + }, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, + specs: []spec{ + { + desc: "arg allowed", + data: seccompData{ + nr: 1, + arch: LINUX_AUDIT_ARCH, + args: [6]uint64{0, math.MaxUint64, math.MaxUint32 - 1}, + }, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "arg disallowed (one equal)", + data: seccompData{ + nr: 1, + arch: LINUX_AUDIT_ARCH, + args: [6]uint64{0x7aabbccdd, math.MaxUint64, math.MaxUint32 - 1}, + }, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "arg disallowed (all equal)", + data: seccompData{ + nr: 1, + arch: LINUX_AUDIT_ARCH, + args: [6]uint64{0x7aabbccdd, math.MaxUint64 - 1, math.MaxUint32}, + }, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, + { + name: "GreaterThan", + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + // 4294967298 + // Both upper 32 bits and lower 32 bits are non-zero. + // 00000000000000000000000000000010 + // 00000000000000000000000000000010 + GreaterThan(0x00000002_00000002), + }, + }, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, + specs: []spec{ + { + desc: "high 32bits greater", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000003_00000002}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "high 32bits equal, low 32bits greater", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000003}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "high 32bits equal, low 32bits equal", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000002}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "high 32bits equal, low 32bits less", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000001}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "high 32bits less", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000001_00000003}}, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, + { + name: "GreaterThan (multi)", ruleSets: []RuleSet{ { Rules: SyscallRules{ @@ -355,46 +470,145 @@ func TestBasic(t *testing.T) { }, }, defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, specs: []spec{ { - desc: "GreaterThan: Syscall argument allowed", + desc: "arg allowed", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xffffffff}}, want: linux.SECCOMP_RET_ALLOW, }, { - desc: "GreaterThan: Syscall argument disallowed (equal)", + desc: "arg disallowed (first arg equal)", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xf, 0xffffffff}}, want: linux.SECCOMP_RET_TRAP, }, { - desc: "Syscall argument disallowed (smaller)", + desc: "arg disallowed (first arg smaller)", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xffffffff}}, want: linux.SECCOMP_RET_TRAP, }, { - desc: "GreaterThan2: Syscall argument allowed", - data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xfbcd000d}}, + desc: "arg disallowed (second arg equal)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xabcd000d}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "arg disallowed (second arg smaller)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xa000ffff}}, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, + { + name: "GreaterThanOrEqual", + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + // 4294967298 + // Both upper 32 bits and lower 32 bits are non-zero. + // 00000000000000000000000000000010 + // 00000000000000000000000000000010 + GreaterThanOrEqual(0x00000002_00000002), + }, + }, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, + specs: []spec{ + { + desc: "high 32bits greater", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000003_00000002}}, want: linux.SECCOMP_RET_ALLOW, }, { - desc: "GreaterThan2: Syscall argument disallowed (equal)", - data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xabcd000d}}, + desc: "high 32bits equal, low 32bits greater", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000003}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "high 32bits equal, low 32bits equal", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000002}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "high 32bits equal, low 32bits less", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000001}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "high 32bits less", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000001_00000002}}, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, + { + name: "GreaterThanOrEqual (multi)", + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + GreaterThanOrEqual(0xf), + GreaterThanOrEqual(0xabcd000d), + }, + }, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, + specs: []spec{ + { + desc: "arg allowed (both greater)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xffffffff}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "arg allowed (first arg equal)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xf, 0xffffffff}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "arg disallowed (first arg smaller)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xffffffff}}, want: linux.SECCOMP_RET_TRAP, }, { - desc: "GreaterThan2: Syscall argument disallowed (smaller)", + desc: "arg allowed (second arg equal)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xabcd000d}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "arg disallowed (second arg smaller)", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xa000ffff}}, want: linux.SECCOMP_RET_TRAP, }, + { + desc: "arg disallowed (both arg smaller)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xa000ffff}}, + want: linux.SECCOMP_RET_TRAP, + }, }, }, { + name: "LessThan", ruleSets: []RuleSet{ { Rules: SyscallRules{ 1: []Rule{ { - RuleIP: AllowValue(0x7aabbccdd), + // 4294967298 + // Both upper 32 bits and lower 32 bits are non-zero. + // 00000000000000000000000000000010 + // 00000000000000000000000000000010 + LessThan(0x00000002_00000002), }, }, }, @@ -402,40 +616,307 @@ func TestBasic(t *testing.T) { }, }, defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, specs: []spec{ { - desc: "IP: Syscall instruction pointer allowed", + desc: "high 32bits greater", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000003_00000002}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "high 32bits equal, low 32bits greater", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000003}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "high 32bits equal, low 32bits equal", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000002}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "high 32bits equal, low 32bits less", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000001}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "high 32bits less", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000001_00000002}}, + want: linux.SECCOMP_RET_ALLOW, + }, + }, + }, + { + name: "LessThan (multi)", + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + LessThan(0x1), + LessThan(0xabcd000d), + }, + }, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, + specs: []spec{ + { + desc: "arg allowed", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0x0}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "arg disallowed (first arg equal)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x1, 0x0}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "arg disallowed (first arg greater)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x2, 0x0}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "arg disallowed (second arg equal)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xabcd000d}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "arg disallowed (second arg greater)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xffffffff}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "arg disallowed (both arg greater)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x2, 0xffffffff}}, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, + { + name: "LessThanOrEqual", + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + // 4294967298 + // Both upper 32 bits and lower 32 bits are non-zero. + // 00000000000000000000000000000010 + // 00000000000000000000000000000010 + LessThanOrEqual(0x00000002_00000002), + }, + }, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, + specs: []spec{ + { + desc: "high 32bits greater", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000003_00000002}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "high 32bits equal, low 32bits greater", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000003}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "high 32bits equal, low 32bits equal", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000002}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "high 32bits equal, low 32bits less", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000001}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "high 32bits less", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000001_00000002}}, + want: linux.SECCOMP_RET_ALLOW, + }, + }, + }, + + { + name: "LessThanOrEqual (multi)", + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + LessThanOrEqual(0x1), + LessThanOrEqual(0xabcd000d), + }, + }, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, + specs: []spec{ + { + desc: "arg allowed", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0x0}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "arg allowed (first arg equal)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x1, 0x0}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "arg disallowed (first arg greater)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x2, 0x0}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "arg allowed (second arg equal)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xabcd000d}}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "arg disallowed (second arg greater)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xffffffff}}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "arg disallowed (both arg greater)", + data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x2, 0xffffffff}}, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, + { + name: "MaskedEqual", + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + // x & 00000001 00000011 (0x103) == 00000000 00000001 (0x1) + // Input x must have lowest order bit set and + // must *not* have 8th or second lowest order bit set. + MaskedEqual(0x103, 0x1), + }, + }, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, + specs: []spec{ + { + desc: "arg allowed (low order mandatory bit)", + data: seccompData{ + nr: 1, + arch: LINUX_AUDIT_ARCH, + // 00000000 00000000 00000000 00000001 + args: [6]uint64{0x1}, + }, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "arg allowed (low order optional bit)", + data: seccompData{ + nr: 1, + arch: LINUX_AUDIT_ARCH, + // 00000000 00000000 00000000 00000101 + args: [6]uint64{0x5}, + }, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "arg disallowed (lowest order bit not set)", + data: seccompData{ + nr: 1, + arch: LINUX_AUDIT_ARCH, + // 00000000 00000000 00000000 00000010 + args: [6]uint64{0x2}, + }, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "arg disallowed (second lowest order bit set)", + data: seccompData{ + nr: 1, + arch: LINUX_AUDIT_ARCH, + // 00000000 00000000 00000000 00000011 + args: [6]uint64{0x3}, + }, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "arg disallowed (8th bit set)", + data: seccompData{ + nr: 1, + arch: LINUX_AUDIT_ARCH, + // 00000000 00000000 00000001 00000000 + args: [6]uint64{0x100}, + }, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, + { + name: "Instruction Pointer", + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + RuleIP: EqualTo(0x7aabbccdd), + }, + }, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, + badArchAction: linux.SECCOMP_RET_KILL_THREAD, + specs: []spec{ + { + desc: "allowed", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{}, instructionPointer: 0x7aabbccdd}, want: linux.SECCOMP_RET_ALLOW, }, { - desc: "IP: Syscall instruction pointer disallowed", + desc: "disallowed", data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{}, instructionPointer: 0x711223344}, want: linux.SECCOMP_RET_TRAP, }, }, }, } { - instrs, err := BuildProgram(test.ruleSets, test.defaultAction) - if err != nil { - t.Errorf("%s: buildProgram() got error: %v", test.specs[0].desc, err) - continue - } - p, err := bpf.Compile(instrs) - if err != nil { - t.Errorf("%s: bpf.Compile() got error: %v", test.specs[0].desc, err) - continue - } - for _, spec := range test.specs { - got, err := bpf.Exec(p, spec.data.asInput()) + t.Run(test.name, func(t *testing.T) { + instrs, err := BuildProgram(test.ruleSets, test.defaultAction, test.badArchAction) if err != nil { - t.Errorf("%s: bpf.Exec() got error: %v", spec.desc, err) - continue + t.Fatalf("BuildProgram() got error: %v", err) + } + p, err := bpf.Compile(instrs) + if err != nil { + t.Fatalf("bpf.Compile() got error: %v", err) } - if got != uint32(spec.want) { - t.Errorf("%s: bpd.Exec() = %d, want: %d", spec.desc, got, spec.want) + for _, spec := range test.specs { + got, err := bpf.Exec(p, spec.data.asInput()) + if err != nil { + t.Fatalf("%s: bpf.Exec() got error: %v", spec.desc, err) + } + if got != uint32(spec.want) { + // Include a decoded version of the program in output for debugging purposes. + decoded, _ := bpf.DecodeInstructions(instrs) + t.Fatalf("%s: got: %d, want: %d\nBPF Program\n%s", spec.desc, got, spec.want, decoded) + } } - } + }) } } @@ -457,7 +938,7 @@ func TestRandom(t *testing.T) { Rules: syscallRules, Action: linux.SECCOMP_RET_ALLOW, }, - }, linux.SECCOMP_RET_TRAP) + }, linux.SECCOMP_RET_TRAP, linux.SECCOMP_RET_KILL_THREAD) if err != nil { t.Fatalf("buildProgram() got error: %v", err) } diff --git a/pkg/seccomp/seccomp_test_victim.go b/pkg/seccomp/seccomp_test_victim.go index fe157f539..7f33e0d9e 100644 --- a/pkg/seccomp/seccomp_test_victim.go +++ b/pkg/seccomp/seccomp_test_victim.go @@ -100,7 +100,7 @@ func main() { if !die { syscalls[syscall.SYS_OPENAT] = []seccomp.Rule{ { - seccomp.AllowValue(10), + seccomp.EqualTo(10), }, } } diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go index 413111faf..332bdb8e8 100644 --- a/pkg/sentry/kernel/syscalls.go +++ b/pkg/sentry/kernel/syscalls.go @@ -348,6 +348,16 @@ func (s *SyscallTable) LookupName(sysno uintptr) string { return fmt.Sprintf("sys_%d", sysno) // Unlikely. } +// LookupNo looks up a syscall number by name. +func (s *SyscallTable) LookupNo(name string) (uintptr, error) { + for i, syscall := range s.Table { + if syscall.Name == name { + return uintptr(i), nil + } + } + return 0, fmt.Errorf("syscall %q not found", name) +} + // LookupEmulate looks up an emulation syscall number. func (s *SyscallTable) LookupEmulate(addr usermem.Addr) (uintptr, bool) { sysno, ok := s.Emulate[addr] diff --git a/pkg/sentry/platform/ptrace/subprocess_amd64.go b/pkg/sentry/platform/ptrace/subprocess_amd64.go index 84b699f0d..020bbda79 100644 --- a/pkg/sentry/platform/ptrace/subprocess_amd64.go +++ b/pkg/sentry/platform/ptrace/subprocess_amd64.go @@ -201,7 +201,7 @@ func appendArchSeccompRules(rules []seccomp.RuleSet, defaultAction linux.BPFActi seccomp.RuleSet{ Rules: seccomp.SyscallRules{ syscall.SYS_ARCH_PRCTL: []seccomp.Rule{ - {seccomp.AllowValue(linux.ARCH_SET_CPUID), seccomp.AllowValue(0)}, + {seccomp.EqualTo(linux.ARCH_SET_CPUID), seccomp.EqualTo(0)}, }, }, Action: linux.SECCOMP_RET_ALLOW, diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go index 2ce528601..8548853da 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux.go @@ -80,9 +80,9 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro Rules: seccomp.SyscallRules{ syscall.SYS_CLONE: []seccomp.Rule{ // Allow creation of new subprocesses (used by the master). - {seccomp.AllowValue(syscall.CLONE_FILES | syscall.SIGKILL)}, + {seccomp.EqualTo(syscall.CLONE_FILES | syscall.SIGKILL)}, // Allow creation of new threads within a single address space (used by addresss spaces). - {seccomp.AllowValue( + {seccomp.EqualTo( syscall.CLONE_FILES | syscall.CLONE_FS | syscall.CLONE_SIGHAND | @@ -97,14 +97,14 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro // For the stub prctl dance (all). syscall.SYS_PRCTL: []seccomp.Rule{ - {seccomp.AllowValue(syscall.PR_SET_PDEATHSIG), seccomp.AllowValue(syscall.SIGKILL)}, + {seccomp.EqualTo(syscall.PR_SET_PDEATHSIG), seccomp.EqualTo(syscall.SIGKILL)}, }, syscall.SYS_GETPPID: {}, // For the stub to stop itself (all). syscall.SYS_GETPID: {}, syscall.SYS_KILL: []seccomp.Rule{ - {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SIGSTOP)}, + {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SIGSTOP)}, }, // Injected to support the address space operations. @@ -115,7 +115,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro }) } rules = appendArchSeccompRules(rules, defaultAction) - instrs, err := seccomp.BuildProgram(rules, defaultAction) + instrs, err := seccomp.BuildProgram(rules, defaultAction, defaultAction) if err != nil { return nil, err } diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 704c66742..01f62d50a 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -26,6 +26,7 @@ go_library( deps = [ "//pkg/abi", "//pkg/abi/linux", + "//pkg/bpf", "//pkg/context", "//pkg/control/server", "//pkg/cpuid", @@ -107,6 +108,7 @@ go_library( "//runsc/boot/pprof", "//runsc/config", "//runsc/specutils", + "//runsc/specutils/seccomp", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", "@org_golang_x_sys//unix:go_default_library", diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 149eb0b1b..4ed28b5cd 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -29,7 +29,7 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_CLOCK_GETTIME: {}, syscall.SYS_CLONE: []seccomp.Rule{ { - seccomp.AllowValue( + seccomp.EqualTo( syscall.CLONE_VM | syscall.CLONE_FS | syscall.CLONE_FILES | @@ -42,26 +42,26 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_DUP: {}, syscall.SYS_DUP3: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.O_CLOEXEC), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.O_CLOEXEC), }, }, syscall.SYS_EPOLL_CREATE1: {}, syscall.SYS_EPOLL_CTL: {}, syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, }, syscall.SYS_EVENTFD2: []seccomp.Rule{ { - seccomp.AllowValue(0), - seccomp.AllowValue(0), + seccomp.EqualTo(0), + seccomp.EqualTo(0), }, }, syscall.SYS_EXIT: {}, @@ -70,16 +70,16 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_FCHMOD: {}, syscall.SYS_FCNTL: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_GETFL), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_GETFL), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_SETFL), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_SETFL), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_GETFD), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_GETFD), }, }, syscall.SYS_FSTAT: {}, @@ -87,52 +87,52 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_FTRUNCATE: {}, syscall.SYS_FUTEX: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), - seccomp.AllowAny{}, - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), + seccomp.MatchAny{}, + seccomp.MatchAny{}, }, { - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), + seccomp.MatchAny{}, }, // Non-private variants are included for flipcall support. They are otherwise // unncessary, as the sentry will use only private futexes internally. { - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAIT), - seccomp.AllowAny{}, - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAIT), + seccomp.MatchAny{}, + seccomp.MatchAny{}, }, { - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAKE), - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAKE), + seccomp.MatchAny{}, }, }, syscall.SYS_GETPID: {}, unix.SYS_GETRANDOM: {}, syscall.SYS_GETSOCKOPT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_DOMAIN), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_DOMAIN), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_TYPE), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_TYPE), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_ERROR), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_ERROR), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_SNDBUF), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_SNDBUF), }, }, syscall.SYS_GETTID: {}, @@ -141,34 +141,34 @@ var allowedSyscalls = seccomp.SyscallRules{ // setting/getting termios and winsize. syscall.SYS_IOCTL: []seccomp.Rule{ { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TCGETS), - seccomp.AllowAny{}, /* termios struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TCGETS), + seccomp.MatchAny{}, /* termios struct */ }, { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TCSETS), - seccomp.AllowAny{}, /* termios struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TCSETS), + seccomp.MatchAny{}, /* termios struct */ }, { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TCSETSF), - seccomp.AllowAny{}, /* termios struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TCSETSF), + seccomp.MatchAny{}, /* termios struct */ }, { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TCSETSW), - seccomp.AllowAny{}, /* termios struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TCSETSW), + seccomp.MatchAny{}, /* termios struct */ }, { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TIOCSWINSZ), - seccomp.AllowAny{}, /* winsize struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TIOCSWINSZ), + seccomp.MatchAny{}, /* winsize struct */ }, { - seccomp.AllowAny{}, /* fd */ - seccomp.AllowValue(linux.TIOCGWINSZ), - seccomp.AllowAny{}, /* winsize struct */ + seccomp.MatchAny{}, /* fd */ + seccomp.EqualTo(linux.TIOCGWINSZ), + seccomp.MatchAny{}, /* winsize struct */ }, }, syscall.SYS_LSEEK: {}, @@ -182,46 +182,46 @@ var allowedSyscalls = seccomp.SyscallRules{ // TODO(b/148688965): Remove once this is gone from Go. syscall.SYS_MLOCK: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(4096), + seccomp.MatchAny{}, + seccomp.EqualTo(4096), }, }, syscall.SYS_MMAP: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_SHARED), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_SHARED), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_STACK), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_STACK), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_NORESERVE), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_NORESERVE), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.PROT_WRITE | syscall.PROT_READ), - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.PROT_WRITE | syscall.PROT_READ), + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED), }, }, syscall.SYS_MPROTECT: {}, @@ -237,32 +237,32 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_READ: {}, syscall.SYS_RECVMSG: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK), }, }, syscall.SYS_RECVMMSG: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(fdbased.MaxMsgsPerRecv), - seccomp.AllowValue(syscall.MSG_DONTWAIT), - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(fdbased.MaxMsgsPerRecv), + seccomp.EqualTo(syscall.MSG_DONTWAIT), + seccomp.EqualTo(0), }, }, unix.SYS_SENDMMSG: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT), - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT), + seccomp.EqualTo(0), }, }, syscall.SYS_RESTART_SYSCALL: {}, @@ -272,49 +272,49 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_SCHED_YIELD: {}, syscall.SYS_SENDMSG: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL), }, }, syscall.SYS_SETITIMER: {}, syscall.SYS_SHUTDOWN: []seccomp.Rule{ // Used by fs/host to shutdown host sockets. - {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RD)}, - {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_WR)}, + {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RD)}, + {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_WR)}, // Used by unet to shutdown connections. - {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)}, + {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RDWR)}, }, syscall.SYS_SIGALTSTACK: {}, unix.SYS_STATX: {}, syscall.SYS_SYNC_FILE_RANGE: {}, syscall.SYS_TEE: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(1), /* len */ - seccomp.AllowValue(unix.SPLICE_F_NONBLOCK), /* flags */ + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(1), /* len */ + seccomp.EqualTo(unix.SPLICE_F_NONBLOCK), /* flags */ }, }, syscall.SYS_TGKILL: []seccomp.Rule{ { - seccomp.AllowValue(uint64(os.Getpid())), + seccomp.EqualTo(uint64(os.Getpid())), }, }, syscall.SYS_UTIMENSAT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(0), /* null pathname */ - seccomp.AllowAny{}, - seccomp.AllowValue(0), /* flags */ + seccomp.MatchAny{}, + seccomp.EqualTo(0), /* null pathname */ + seccomp.MatchAny{}, + seccomp.EqualTo(0), /* flags */ }, }, syscall.SYS_WRITE: {}, // For rawfile.NonBlockingWriteIovec. syscall.SYS_WRITEV: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, seccomp.GreaterThan(0), }, }, @@ -325,10 +325,10 @@ func hostInetFilters() seccomp.SyscallRules { return seccomp.SyscallRules{ syscall.SYS_ACCEPT4: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), }, }, syscall.SYS_BIND: {}, @@ -337,84 +337,84 @@ func hostInetFilters() seccomp.SyscallRules { syscall.SYS_GETSOCKNAME: {}, syscall.SYS_GETSOCKOPT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IP), - seccomp.AllowValue(syscall.IP_TOS), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IP), + seccomp.EqualTo(syscall.IP_TOS), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IP), - seccomp.AllowValue(syscall.IP_RECVTOS), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IP), + seccomp.EqualTo(syscall.IP_RECVTOS), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_TCLASS), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_TCLASS), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_RECVTCLASS), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_RECVTCLASS), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_V6ONLY), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_V6ONLY), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_ERROR), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_ERROR), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_KEEPALIVE), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_KEEPALIVE), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_SNDBUF), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_SNDBUF), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_RCVBUF), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_RCVBUF), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_REUSEADDR), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_REUSEADDR), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_TYPE), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_TYPE), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_LINGER), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_LINGER), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_TCP), - seccomp.AllowValue(syscall.TCP_NODELAY), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_TCP), + seccomp.EqualTo(syscall.TCP_NODELAY), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_TCP), - seccomp.AllowValue(syscall.TCP_INFO), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_TCP), + seccomp.EqualTo(syscall.TCP_INFO), }, }, syscall.SYS_IOCTL: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.TIOCOUTQ), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.TIOCOUTQ), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.TIOCINQ), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.TIOCINQ), }, }, syscall.SYS_LISTEN: {}, @@ -425,103 +425,103 @@ func hostInetFilters() seccomp.SyscallRules { syscall.SYS_SENDTO: {}, syscall.SYS_SETSOCKOPT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_V6ONLY), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_V6ONLY), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_SNDBUF), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_SNDBUF), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_RCVBUF), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_RCVBUF), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_REUSEADDR), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_REUSEADDR), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_TCP), - seccomp.AllowValue(syscall.TCP_NODELAY), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_TCP), + seccomp.EqualTo(syscall.TCP_NODELAY), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IP), - seccomp.AllowValue(syscall.IP_TOS), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IP), + seccomp.EqualTo(syscall.IP_TOS), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IP), - seccomp.AllowValue(syscall.IP_RECVTOS), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IP), + seccomp.EqualTo(syscall.IP_RECVTOS), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_TCLASS), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_TCLASS), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_IPV6), - seccomp.AllowValue(syscall.IPV6_RECVTCLASS), - seccomp.AllowAny{}, - seccomp.AllowValue(4), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_RECVTCLASS), + seccomp.MatchAny{}, + seccomp.EqualTo(4), }, }, syscall.SYS_SHUTDOWN: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SHUT_RD), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SHUT_RD), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SHUT_WR), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SHUT_WR), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SHUT_RDWR), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SHUT_RDWR), }, }, syscall.SYS_SOCKET: []seccomp.Rule{ { - seccomp.AllowValue(syscall.AF_INET), - seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_INET), + seccomp.EqualTo(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(0), }, { - seccomp.AllowValue(syscall.AF_INET), - seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_INET), + seccomp.EqualTo(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(0), }, { - seccomp.AllowValue(syscall.AF_INET6), - seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_INET6), + seccomp.EqualTo(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(0), }, { - seccomp.AllowValue(syscall.AF_INET6), - seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_INET6), + seccomp.EqualTo(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(0), }, }, syscall.SYS_WRITEV: {}, @@ -532,20 +532,20 @@ func controlServerFilters(fd int) seccomp.SyscallRules { return seccomp.SyscallRules{ syscall.SYS_ACCEPT: []seccomp.Rule{ { - seccomp.AllowValue(fd), + seccomp.EqualTo(fd), }, }, syscall.SYS_LISTEN: []seccomp.Rule{ { - seccomp.AllowValue(fd), - seccomp.AllowValue(16 /* unet.backlog */), + seccomp.EqualTo(fd), + seccomp.EqualTo(16 /* unet.backlog */), }, }, syscall.SYS_GETSOCKOPT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_PEERCRED), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_SOCKET), + seccomp.EqualTo(syscall.SO_PEERCRED), }, }, } diff --git a/runsc/boot/filter/config_amd64.go b/runsc/boot/filter/config_amd64.go index 5335ff82c..9b1799416 100644 --- a/runsc/boot/filter/config_amd64.go +++ b/runsc/boot/filter/config_amd64.go @@ -25,7 +25,7 @@ import ( func init() { allowedSyscalls[syscall.SYS_ARCH_PRCTL] = append(allowedSyscalls[syscall.SYS_ARCH_PRCTL], - seccomp.Rule{seccomp.AllowValue(linux.ARCH_GET_FS)}, - seccomp.Rule{seccomp.AllowValue(linux.ARCH_SET_FS)}, + seccomp.Rule{seccomp.EqualTo(linux.ARCH_GET_FS)}, + seccomp.Rule{seccomp.EqualTo(linux.ARCH_SET_FS)}, ) } diff --git a/runsc/boot/filter/config_profile.go b/runsc/boot/filter/config_profile.go index 194952a7b..7b8669595 100644 --- a/runsc/boot/filter/config_profile.go +++ b/runsc/boot/filter/config_profile.go @@ -25,9 +25,9 @@ func profileFilters() seccomp.SyscallRules { return seccomp.SyscallRules{ syscall.SYS_OPENAT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), }, }, } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index a136da21a..4940ea96a 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -27,6 +27,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bpf" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/fd" @@ -70,6 +71,7 @@ import ( "gvisor.dev/gvisor/runsc/boot/pprof" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/specutils/seccomp" // Include supported socket providers. "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" @@ -507,6 +509,7 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) { return mf, nil } +// installSeccompFilters installs sandbox seccomp filters with the host. func (l *Loader) installSeccompFilters() error { if l.root.conf.DisableSeccomp { filter.Report("syscall filter is DISABLED. Running in less secure mode.") @@ -577,6 +580,7 @@ func (l *Loader) run() error { if _, err := l.createContainerProcess(true, l.sandboxID, &l.root, ep); err != nil { return err } + } ep.tg = l.k.GlobalInit() @@ -764,6 +768,31 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn } } + // Install seccomp filters with the new task if there are any. + if info.conf.OCISeccomp { + if info.spec.Linux != nil && info.spec.Linux.Seccomp != nil { + program, err := seccomp.BuildProgram(info.spec.Linux.Seccomp) + if err != nil { + return nil, fmt.Errorf("building seccomp program: %v", err) + } + + if log.IsLogging(log.Debug) { + out, _ := bpf.DecodeProgram(program) + log.Debugf("Installing OCI seccomp filters\nProgram:\n%s", out) + } + + task := tg.Leader() + // NOTE: It seems Flags are ignored by runc so we ignore them too. + if err := task.AppendSyscallFilter(program, true); err != nil { + return nil, fmt.Errorf("appending seccomp filters: %v", err) + } + } + } else { + if info.spec.Linux != nil && info.spec.Linux.Seccomp != nil { + log.Warningf("Seccomp spec is being ignored") + } + } + return tg, nil } diff --git a/runsc/config/config.go b/runsc/config/config.go index df134bb2f..f30f79f68 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -157,8 +157,12 @@ type Config struct { // Enables FUSE usage. FUSE bool `flag:"fuse"` + // Allows overriding of flags in OCI annotations. AllowFlagOverride bool `flag:"allow-flag-override"` + // Enables seccomp inside the sandbox. + OCISeccomp bool `flag:"oci-seccomp"` + // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in // tests. It allows runsc to start the sandbox process as the current // user, and without chrooting the sandbox process. This can be diff --git a/runsc/config/flags.go b/runsc/config/flags.go index eff46e938..a5f25cfa2 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -63,6 +63,7 @@ func RegisterFlags() { flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.") flag.Var(leakModePtr(refs.NoLeakChecking), "ref-leak-mode", "sets reference leak check mode: disabled (default), log-names, log-traces.") flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)") + flag.Bool("oci-seccomp", false, "Enables loading OCI seccomp filters inside the sandbox.") // Flags that control sandbox runtime behavior: FS related. flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go index 88814b83c..0cb9b1cae 100644 --- a/runsc/fsgofer/filter/config.go +++ b/runsc/fsgofer/filter/config.go @@ -29,7 +29,7 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_CLOCK_GETTIME: {}, syscall.SYS_CLONE: []seccomp.Rule{ { - seccomp.AllowValue( + seccomp.EqualTo( syscall.CLONE_VM | syscall.CLONE_FS | syscall.CLONE_FILES | @@ -43,46 +43,46 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_EPOLL_CTL: {}, syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, }, syscall.SYS_EVENTFD2: []seccomp.Rule{ { - seccomp.AllowValue(0), - seccomp.AllowValue(0), + seccomp.EqualTo(0), + seccomp.EqualTo(0), }, }, syscall.SYS_EXIT: {}, syscall.SYS_EXIT_GROUP: {}, syscall.SYS_FALLOCATE: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, }, syscall.SYS_FCHMOD: {}, syscall.SYS_FCHOWNAT: {}, syscall.SYS_FCNTL: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_GETFL), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_GETFL), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_SETFL), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_SETFL), }, { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.F_GETFD), + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.F_GETFD), }, // Used by flipcall.PacketWindowAllocator.Init(). { - seccomp.AllowAny{}, - seccomp.AllowValue(unix.F_ADD_SEALS), + seccomp.MatchAny{}, + seccomp.EqualTo(unix.F_ADD_SEALS), }, }, syscall.SYS_FSTAT: {}, @@ -91,31 +91,31 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_FTRUNCATE: {}, syscall.SYS_FUTEX: { seccomp.Rule{ - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, seccomp.Rule{ - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, // Non-private futex used for flipcall. seccomp.Rule{ - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAIT), - seccomp.AllowAny{}, - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAIT), + seccomp.MatchAny{}, + seccomp.MatchAny{}, }, seccomp.Rule{ - seccomp.AllowAny{}, - seccomp.AllowValue(linux.FUTEX_WAKE), - seccomp.AllowAny{}, - seccomp.AllowAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(linux.FUTEX_WAKE), + seccomp.MatchAny{}, + seccomp.MatchAny{}, }, }, syscall.SYS_GETDENTS64: {}, @@ -137,28 +137,28 @@ var allowedSyscalls = seccomp.SyscallRules{ // TODO(b/148688965): Remove once this is gone from Go. syscall.SYS_MLOCK: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowValue(4096), + seccomp.MatchAny{}, + seccomp.EqualTo(4096), }, }, syscall.SYS_MMAP: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_SHARED), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_SHARED), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED), }, }, syscall.SYS_MPROTECT: {}, @@ -172,14 +172,14 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_READLINKAT: {}, syscall.SYS_RECVMSG: []seccomp.Rule{ { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC), }, { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK), }, }, syscall.SYS_RENAMEAT: {}, @@ -190,33 +190,33 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_SENDMSG: []seccomp.Rule{ // Used by fdchannel.Endpoint.SendFD(). { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(0), }, // Used by unet.SocketWriter.WriteVec(). { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL), + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL), }, }, syscall.SYS_SHUTDOWN: []seccomp.Rule{ - {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)}, + {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RDWR)}, }, syscall.SYS_SIGALTSTACK: {}, // Used by fdchannel.NewConnectedSockets(). syscall.SYS_SOCKETPAIR: { { - seccomp.AllowValue(syscall.AF_UNIX), - seccomp.AllowValue(syscall.SOCK_SEQPACKET | syscall.SOCK_CLOEXEC), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_UNIX), + seccomp.EqualTo(syscall.SOCK_SEQPACKET | syscall.SOCK_CLOEXEC), + seccomp.EqualTo(0), }, }, syscall.SYS_SYMLINKAT: {}, syscall.SYS_TGKILL: []seccomp.Rule{ { - seccomp.AllowValue(uint64(os.Getpid())), + seccomp.EqualTo(uint64(os.Getpid())), }, }, syscall.SYS_UNLINKAT: {}, @@ -227,24 +227,24 @@ var allowedSyscalls = seccomp.SyscallRules{ var udsSyscalls = seccomp.SyscallRules{ syscall.SYS_SOCKET: []seccomp.Rule{ { - seccomp.AllowValue(syscall.AF_UNIX), - seccomp.AllowValue(syscall.SOCK_STREAM), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_UNIX), + seccomp.EqualTo(syscall.SOCK_STREAM), + seccomp.EqualTo(0), }, { - seccomp.AllowValue(syscall.AF_UNIX), - seccomp.AllowValue(syscall.SOCK_DGRAM), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_UNIX), + seccomp.EqualTo(syscall.SOCK_DGRAM), + seccomp.EqualTo(0), }, { - seccomp.AllowValue(syscall.AF_UNIX), - seccomp.AllowValue(syscall.SOCK_SEQPACKET), - seccomp.AllowValue(0), + seccomp.EqualTo(syscall.AF_UNIX), + seccomp.EqualTo(syscall.SOCK_SEQPACKET), + seccomp.EqualTo(0), }, }, syscall.SYS_CONNECT: []seccomp.Rule{ { - seccomp.AllowAny{}, + seccomp.MatchAny{}, }, }, } diff --git a/runsc/fsgofer/filter/config_amd64.go b/runsc/fsgofer/filter/config_amd64.go index a4b28cb8b..53506b5e1 100644 --- a/runsc/fsgofer/filter/config_amd64.go +++ b/runsc/fsgofer/filter/config_amd64.go @@ -25,8 +25,8 @@ import ( func init() { allowedSyscalls[syscall.SYS_ARCH_PRCTL] = []seccomp.Rule{ - {seccomp.AllowValue(linux.ARCH_GET_FS)}, - {seccomp.AllowValue(linux.ARCH_SET_FS)}, + {seccomp.EqualTo(linux.ARCH_GET_FS)}, + {seccomp.EqualTo(linux.ARCH_SET_FS)}, } allowedSyscalls[syscall.SYS_NEWFSTATAT] = []seccomp.Rule{} diff --git a/runsc/specutils/seccomp/BUILD b/runsc/specutils/seccomp/BUILD new file mode 100644 index 000000000..3520f2d6d --- /dev/null +++ b/runsc/specutils/seccomp/BUILD @@ -0,0 +1,34 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "seccomp", + srcs = [ + "audit_amd64.go", + "audit_arm64.go", + "seccomp.go", + ], + visibility = ["//:sandbox"], + deps = [ + "//pkg/abi/linux", + "//pkg/bpf", + "//pkg/log", + "//pkg/seccomp", + "//pkg/sentry/kernel", + "//pkg/sentry/syscalls/linux", + "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + ], +) + +go_test( + name = "seccomp_test", + size = "small", + srcs = ["seccomp_test.go"], + library = ":seccomp", + deps = [ + "//pkg/binary", + "//pkg/bpf", + "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + ], +) diff --git a/runsc/specutils/seccomp/audit_amd64.go b/runsc/specutils/seccomp/audit_amd64.go new file mode 100644 index 000000000..417cf4a7a --- /dev/null +++ b/runsc/specutils/seccomp/audit_amd64.go @@ -0,0 +1,25 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package seccomp + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" +) + +const ( + nativeArchAuditNo = linux.AUDIT_ARCH_X86_64 +) diff --git a/runsc/specutils/seccomp/audit_arm64.go b/runsc/specutils/seccomp/audit_arm64.go new file mode 100644 index 000000000..b727ceff2 --- /dev/null +++ b/runsc/specutils/seccomp/audit_arm64.go @@ -0,0 +1,25 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package seccomp + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" +) + +const ( + nativeArchAuditNo = linux.AUDIT_ARCH_AARCH64 +) diff --git a/runsc/specutils/seccomp/seccomp.go b/runsc/specutils/seccomp/seccomp.go new file mode 100644 index 000000000..5932f7a41 --- /dev/null +++ b/runsc/specutils/seccomp/seccomp.go @@ -0,0 +1,229 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package seccomp implements some features of libseccomp in order to support +// OCI. +package seccomp + +import ( + "fmt" + "syscall" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/sentry/kernel" + slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" +) + +var ( + killThreadAction = linux.SECCOMP_RET_KILL_THREAD + trapAction = linux.SECCOMP_RET_TRAP + // runc always returns EPERM as the errorcode for SECCOMP_RET_ERRNO + errnoAction = linux.SECCOMP_RET_ERRNO.WithReturnCode(uint16(syscall.EPERM)) + // runc always returns EPERM as the errorcode for SECCOMP_RET_TRACE + traceAction = linux.SECCOMP_RET_TRACE.WithReturnCode(uint16(syscall.EPERM)) + allowAction = linux.SECCOMP_RET_ALLOW +) + +// BuildProgram generates a bpf program based on the given OCI seccomp +// config. +func BuildProgram(s *specs.LinuxSeccomp) (bpf.Program, error) { + defaultAction, err := convertAction(s.DefaultAction) + if err != nil { + return bpf.Program{}, fmt.Errorf("secomp default action: %w", err) + } + ruleset, err := convertRules(s) + if err != nil { + return bpf.Program{}, fmt.Errorf("invalid seccomp rules: %w", err) + } + + instrs, err := seccomp.BuildProgram(ruleset, defaultAction, killThreadAction) + if err != nil { + return bpf.Program{}, fmt.Errorf("building seccomp program: %w", err) + } + + program, err := bpf.Compile(instrs) + if err != nil { + return bpf.Program{}, fmt.Errorf("compiling seccomp program: %w", err) + } + + return program, nil +} + +// lookupSyscallNo gets the syscall number for the syscall with the given name +// for the given architecture. +func lookupSyscallNo(arch uint32, name string) (uint32, error) { + var table *kernel.SyscallTable + switch arch { + case linux.AUDIT_ARCH_X86_64: + table = slinux.AMD64 + case linux.AUDIT_ARCH_AARCH64: + table = slinux.ARM64 + } + if table == nil { + return 0, fmt.Errorf("unsupported architecture: %d", arch) + } + n, err := table.LookupNo(name) + if err != nil { + return 0, err + } + return uint32(n), nil +} + +// convertAction converts a LinuxSeccompAction to BPFAction +func convertAction(act specs.LinuxSeccompAction) (linux.BPFAction, error) { + // TODO(gvisor.dev/issue/3124): Update specs package to include ActLog and ActKillProcess. + switch act { + case specs.ActKill: + return killThreadAction, nil + case specs.ActTrap: + return trapAction, nil + case specs.ActErrno: + return errnoAction, nil + case specs.ActTrace: + return traceAction, nil + case specs.ActAllow: + return allowAction, nil + default: + return 0, fmt.Errorf("invalid action: %v", act) + } +} + +// convertRules converts OCI linux seccomp rules into RuleSets that can be used by +// the seccomp package to build a seccomp program. +func convertRules(s *specs.LinuxSeccomp) ([]seccomp.RuleSet, error) { + // NOTE: Architectures are only really relevant when calling 32bit syscalls + // on a 64bit system. Since we don't support that in gVisor anyway, we + // ignore Architectures and only test against the native architecture. + + ruleset := []seccomp.RuleSet{} + + for _, syscall := range s.Syscalls { + sysRules := seccomp.NewSyscallRules() + + action, err := convertAction(syscall.Action) + if err != nil { + return nil, err + } + + // Args + rules, err := convertArgs(syscall.Args) + if err != nil { + return nil, err + } + + for _, name := range syscall.Names { + syscallNo, err := lookupSyscallNo(nativeArchAuditNo, name) + if err != nil { + // If there is an error looking up the syscall number, assume it is + // not supported on this architecture and ignore it. This is, for + // better or worse, what runc does. + log.Warningf("OCI seccomp: ignoring syscall %q", name) + continue + } + + for _, rule := range rules { + sysRules.AddRule(uintptr(syscallNo), rule) + } + } + + ruleset = append(ruleset, seccomp.RuleSet{ + Rules: sysRules, + Action: action, + }) + } + + return ruleset, nil +} + +// convertArgs converts an OCI seccomp argument rule to a list of seccomp.Rule. +func convertArgs(args []specs.LinuxSeccompArg) ([]seccomp.Rule, error) { + argCounts := make([]uint, 6) + + for _, arg := range args { + if arg.Index > 6 { + return nil, fmt.Errorf("invalid index: %d", arg.Index) + } + + argCounts[arg.Index]++ + } + + // NOTE: If multiple rules apply to the same argument (same index) the + // action is triggered if any one of the rules matches (OR). If not, then + // all rules much match in order to trigger the action (AND). This appears to + // be some kind of legacy behavior of runc that nevertheless needs to be + // supported to maintain compatibility. + + hasMultipleArgs := false + for _, count := range argCounts { + if count > 1 { + hasMultipleArgs = true + break + } + } + + if hasMultipleArgs { + rules := []seccomp.Rule{} + + // Old runc behavior - do this for compatibility. + // Add rules as ORs by adding separate Rules. + for _, arg := range args { + rule := seccomp.Rule{nil, nil, nil, nil, nil, nil} + + if err := convertRule(arg, &rule); err != nil { + return nil, err + } + + rules = append(rules, rule) + } + + return rules, nil + } + + // Add rules as ANDs by adding to the same Rule. + rule := seccomp.Rule{nil, nil, nil, nil, nil, nil} + for _, arg := range args { + if err := convertRule(arg, &rule); err != nil { + return nil, err + } + } + + return []seccomp.Rule{rule}, nil +} + +// convertRule converts and adds the arg to a rule. +func convertRule(arg specs.LinuxSeccompArg, rule *seccomp.Rule) error { + switch arg.Op { + case specs.OpEqualTo: + rule[arg.Index] = seccomp.EqualTo(arg.Value) + case specs.OpNotEqual: + rule[arg.Index] = seccomp.NotEqual(arg.Value) + case specs.OpGreaterThan: + rule[arg.Index] = seccomp.GreaterThan(arg.Value) + case specs.OpGreaterEqual: + rule[arg.Index] = seccomp.GreaterThanOrEqual(arg.Value) + case specs.OpLessThan: + rule[arg.Index] = seccomp.LessThan(arg.Value) + case specs.OpLessEqual: + rule[arg.Index] = seccomp.LessThanOrEqual(arg.Value) + case specs.OpMaskedEqual: + rule[arg.Index] = seccomp.MaskedEqual(uintptr(arg.Value), uintptr(arg.ValueTwo)) + default: + return fmt.Errorf("unsupported operand: %q", arg.Op) + } + return nil +} diff --git a/runsc/specutils/seccomp/seccomp_test.go b/runsc/specutils/seccomp/seccomp_test.go new file mode 100644 index 000000000..2079cd2e9 --- /dev/null +++ b/runsc/specutils/seccomp/seccomp_test.go @@ -0,0 +1,414 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seccomp + +import ( + "fmt" + "syscall" + "testing" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/bpf" +) + +type seccompData struct { + nr uint32 + arch uint32 + instructionPointer uint64 + args [6]uint64 +} + +// asInput converts a seccompData to a bpf.Input. +func asInput(d seccompData) bpf.Input { + return bpf.InputBytes{binary.Marshal(nil, binary.LittleEndian, d), binary.LittleEndian} +} + +// testInput creates an Input struct with given seccomp input values. +func testInput(arch uint32, syscallName string, args *[6]uint64) bpf.Input { + syscallNo, err := lookupSyscallNo(arch, syscallName) + if err != nil { + // Assume tests set valid syscall names. + panic(err) + } + + if args == nil { + argArray := [6]uint64{0, 0, 0, 0, 0, 0} + args = &argArray + } + + data := seccompData{ + nr: syscallNo, + arch: arch, + args: *args, + } + + return asInput(data) +} + +// testCase holds a seccomp test case. +type testCase struct { + name string + config specs.LinuxSeccomp + input bpf.Input + expected uint32 +} + +var ( + // seccompTests is a list of speccomp test cases. + seccompTests = []testCase{ + { + name: "default_allow", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + }, + input: testInput(nativeArchAuditNo, "read", nil), + expected: uint32(allowAction), + }, + { + name: "default_deny", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActErrno, + }, + input: testInput(nativeArchAuditNo, "read", nil), + expected: uint32(errnoAction), + }, + { + name: "deny_arch", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getcwd", + }, + Action: specs.ActErrno, + }, + }, + }, + // Syscall matches but the arch is AUDIT_ARCH_X86 so the return + // value is the bad arch action. + input: asInput(seccompData{nr: 183, arch: 0x40000003}), // + expected: uint32(killThreadAction), + }, + { + name: "match_name_errno", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getcwd", + "chmod", + }, + Action: specs.ActErrno, + }, + { + Names: []string{ + "write", + }, + Action: specs.ActTrace, + }, + }, + }, + input: testInput(nativeArchAuditNo, "getcwd", nil), + expected: uint32(errnoAction), + }, + { + name: "match_name_trace", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getcwd", + "chmod", + }, + Action: specs.ActErrno, + }, + { + Names: []string{ + "write", + }, + Action: specs.ActTrace, + }, + }, + }, + input: testInput(nativeArchAuditNo, "write", nil), + expected: uint32(traceAction), + }, + { + name: "no_match_name_allow", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getcwd", + "chmod", + }, + Action: specs.ActErrno, + }, + { + Names: []string{ + "write", + }, + Action: specs.ActTrace, + }, + }, + }, + input: testInput(nativeArchAuditNo, "open", nil), + expected: uint32(allowAction), + }, + { + name: "simple_match_args", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: syscall.CLONE_FS, + Op: specs.OpEqualTo, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}), + expected: uint32(errnoAction), + }, + { + name: "match_args_or", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: syscall.CLONE_FS, + Op: specs.OpEqualTo, + }, + { + Index: 0, + Value: syscall.CLONE_VM, + Op: specs.OpEqualTo, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}), + expected: uint32(errnoAction), + }, + { + name: "match_args_and", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getsockopt", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 1, + Value: syscall.SOL_SOCKET, + Op: specs.OpEqualTo, + }, + { + Index: 2, + Value: syscall.SO_PEERCRED, + Op: specs.OpEqualTo, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, syscall.SOL_SOCKET, syscall.SO_PEERCRED}), + expected: uint32(errnoAction), + }, + { + name: "no_match_args_and", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "getsockopt", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 1, + Value: syscall.SOL_SOCKET, + Op: specs.OpEqualTo, + }, + { + Index: 2, + Value: syscall.SO_PEERCRED, + Op: specs.OpEqualTo, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, syscall.SOL_SOCKET}), + expected: uint32(allowAction), + }, + { + name: "Simple args (no match)", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: syscall.CLONE_FS, + Op: specs.OpEqualTo, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_VM}), + expected: uint32(allowAction), + }, + { + name: "OpMaskedEqual (match)", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: syscall.CLONE_FS, + ValueTwo: syscall.CLONE_FS, + Op: specs.OpMaskedEqual, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS | syscall.CLONE_VM}), + expected: uint32(errnoAction), + }, + { + name: "OpMaskedEqual (no match)", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: syscall.CLONE_FS | syscall.CLONE_VM, + ValueTwo: syscall.CLONE_FS | syscall.CLONE_VM, + Op: specs.OpMaskedEqual, + }, + }, + Action: specs.ActErrno, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}), + expected: uint32(allowAction), + }, + { + name: "OpMaskedEqual (clone)", + config: specs.LinuxSeccomp{ + DefaultAction: specs.ActErrno, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{ + "clone", + }, + // This comes from the Docker default seccomp + // profile for clone. + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: 0x7e020000, + ValueTwo: 0x0, + Op: specs.OpMaskedEqual, + }, + }, + Action: specs.ActAllow, + }, + }, + }, + input: testInput(nativeArchAuditNo, "clone", &[6]uint64{0x50f00}), + expected: uint32(allowAction), + }, + } +) + +// TestRunscSeccomp generates seccomp programs from OCI config and executes +// them using runsc's library, comparing against expected results. +func TestRunscSeccomp(t *testing.T) { + for _, tc := range seccompTests { + t.Run(tc.name, func(t *testing.T) { + runscProgram, err := BuildProgram(&tc.config) + if err != nil { + t.Fatalf("generating runsc BPF: %v", err) + } + + if err := checkProgram(runscProgram, tc.input, tc.expected); err != nil { + t.Fatalf("running runsc BPF: %v", err) + } + }) + } +} + +// checkProgram runs the given program over the given input and checks the +// result against the expected output. +func checkProgram(p bpf.Program, in bpf.Input, expected uint32) error { + result, err := bpf.Exec(p, in) + if err != nil { + return err + } + + if result != expected { + // Include a decoded version of the program in output for debugging purposes. + decoded, _ := bpf.DecodeProgram(p) + return fmt.Errorf("Unexpected result: got: %d, expected: %d\nBPF Program\n%s", result, expected, decoded) + } + + return nil +} diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index a2275398a..0392e3e83 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -111,11 +111,6 @@ func ValidateSpec(spec *specs.Spec) error { log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.") } - // TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox. - if spec.Linux != nil && spec.Linux.Seccomp != nil { - log.Warningf("Seccomp spec is being ignored") - } - if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { if err := validateRootfsPropagation(spec.Linux.RootfsPropagation); err != nil { return err |