summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorIan Lewis <ianlewis@google.com>2020-09-15 23:17:36 -0700
committergVisor bot <gvisor-bot@google.com>2020-09-15 23:19:17 -0700
commitdcd532e2e416aa81ca9ac42dc153731855f91418 (patch)
tree1fd10c9c150d8a0aec67e36d8f87c6910e16ff70
parentc053c4bb03819a9b9bb4d485000789cb653cd9c7 (diff)
Add support for OCI seccomp filters in the sandbox.
OCI configuration includes support for specifying seccomp filters. In runc, these filter configurations are converted into seccomp BPF programs and loaded into the kernel via libseccomp. runsc needs to be a static binary so, for runsc, we cannot rely on a C library and need to implement the functionality in Go. The generator added here implements basic support for taking OCI seccomp configuration and converting it into a seccomp BPF program with the same behavior as a program generated by libseccomp. - New conditional operations were added to pkg/seccomp to support operations available in OCI. - AllowAny and AllowValue were renamed to MatchAny and EqualTo to better reflect that syscalls matching the conditionals result in the provided action not simply SCMP_RET_ALLOW. - BuildProgram in pkg/seccomp no longer panics if provided an empty list of rules. It now builds a program with the architecture sanity check only. - ProgramBuilder now allows adding labels that are unused. However, backwards jumps are still not permitted. Fixes #510 PiperOrigin-RevId: 331938697
-rw-r--r--pkg/abi/linux/seccomp.go23
-rw-r--r--pkg/bpf/decoder.go13
-rw-r--r--pkg/bpf/decoder_test.go4
-rw-r--r--pkg/bpf/program_builder.go23
-rw-r--r--pkg/bpf/program_builder_test.go42
-rw-r--r--pkg/seccomp/seccomp.go177
-rw-r--r--pkg/seccomp/seccomp_rules.go75
-rw-r--r--pkg/seccomp/seccomp_test.go603
-rw-r--r--pkg/seccomp/seccomp_test_victim.go2
-rw-r--r--pkg/sentry/kernel/syscalls.go10
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_amd64.go2
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_linux.go10
-rw-r--r--runsc/boot/BUILD2
-rw-r--r--runsc/boot/filter/config.go478
-rw-r--r--runsc/boot/filter/config_amd64.go4
-rw-r--r--runsc/boot/filter/config_profile.go6
-rw-r--r--runsc/boot/loader.go29
-rw-r--r--runsc/config/config.go4
-rw-r--r--runsc/config/flags.go1
-rw-r--r--runsc/fsgofer/filter/config.go154
-rw-r--r--runsc/fsgofer/filter/config_amd64.go4
-rw-r--r--runsc/specutils/seccomp/BUILD34
-rw-r--r--runsc/specutils/seccomp/audit_amd64.go25
-rw-r--r--runsc/specutils/seccomp/audit_arm64.go25
-rw-r--r--runsc/specutils/seccomp/seccomp.go229
-rw-r--r--runsc/specutils/seccomp/seccomp_test.go414
-rw-r--r--runsc/specutils/specutils.go5
27 files changed, 1943 insertions, 455 deletions
diff --git a/pkg/abi/linux/seccomp.go b/pkg/abi/linux/seccomp.go
index d0607e256..b07cafe12 100644
--- a/pkg/abi/linux/seccomp.go
+++ b/pkg/abi/linux/seccomp.go
@@ -34,11 +34,11 @@ type BPFAction uint32
const (
SECCOMP_RET_KILL_PROCESS BPFAction = 0x80000000
- SECCOMP_RET_KILL_THREAD = 0x00000000
- SECCOMP_RET_TRAP = 0x00030000
- SECCOMP_RET_ERRNO = 0x00050000
- SECCOMP_RET_TRACE = 0x7ff00000
- SECCOMP_RET_ALLOW = 0x7fff0000
+ SECCOMP_RET_KILL_THREAD BPFAction = 0x00000000
+ SECCOMP_RET_TRAP BPFAction = 0x00030000
+ SECCOMP_RET_ERRNO BPFAction = 0x00050000
+ SECCOMP_RET_TRACE BPFAction = 0x7ff00000
+ SECCOMP_RET_ALLOW BPFAction = 0x7fff0000
)
func (a BPFAction) String() string {
@@ -64,6 +64,19 @@ func (a BPFAction) Data() uint16 {
return uint16(a & SECCOMP_RET_DATA)
}
+// WithReturnCode sets the lower 16 bits of the SECCOMP_RET_ERRNO or
+// SECCOMP_RET_TRACE actions to the provided return code, overwriting the previous
+// action, and returns a new BPFAction. If not SECCOMP_RET_ERRNO or
+// SECCOMP_RET_TRACE then this panics.
+func (a BPFAction) WithReturnCode(code uint16) BPFAction {
+ // mask out the previous return value
+ baseAction := a & SECCOMP_RET_ACTION_FULL
+ if baseAction == SECCOMP_RET_ERRNO || baseAction == SECCOMP_RET_TRACE {
+ return BPFAction(uint32(baseAction) | uint32(code))
+ }
+ panic("WithReturnCode only valid for SECCOMP_RET_ERRNO and SECCOMP_RET_TRACE")
+}
+
// SockFprog is sock_fprog taken from <linux/filter.h>.
type SockFprog struct {
Len uint16
diff --git a/pkg/bpf/decoder.go b/pkg/bpf/decoder.go
index c8ee0c3b1..069d0395d 100644
--- a/pkg/bpf/decoder.go
+++ b/pkg/bpf/decoder.go
@@ -21,10 +21,15 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
)
-// DecodeProgram translates an array of BPF instructions into text format.
-func DecodeProgram(program []linux.BPFInstruction) (string, error) {
+// DecodeProgram translates a compiled BPF program into text format.
+func DecodeProgram(p Program) (string, error) {
+ return DecodeInstructions(p.instructions)
+}
+
+// DecodeInstructions translates an array of BPF instructions into text format.
+func DecodeInstructions(instns []linux.BPFInstruction) (string, error) {
var ret bytes.Buffer
- for line, s := range program {
+ for line, s := range instns {
ret.WriteString(fmt.Sprintf("%v: ", line))
if err := decode(s, line, &ret); err != nil {
return "", err
@@ -34,7 +39,7 @@ func DecodeProgram(program []linux.BPFInstruction) (string, error) {
return ret.String(), nil
}
-// Decode translates BPF instruction into text format.
+// Decode translates a single BPF instruction into text format.
func Decode(inst linux.BPFInstruction) (string, error) {
var ret bytes.Buffer
err := decode(inst, -1, &ret)
diff --git a/pkg/bpf/decoder_test.go b/pkg/bpf/decoder_test.go
index 6a023f0c0..bb971ce21 100644
--- a/pkg/bpf/decoder_test.go
+++ b/pkg/bpf/decoder_test.go
@@ -93,7 +93,7 @@ func TestDecode(t *testing.T) {
}
}
-func TestDecodeProgram(t *testing.T) {
+func TestDecodeInstructions(t *testing.T) {
for _, test := range []struct {
name string
program []linux.BPFInstruction
@@ -126,7 +126,7 @@ func TestDecodeProgram(t *testing.T) {
program: []linux.BPFInstruction{Stmt(Ld+Abs+W, 10), Stmt(Ld+Len+Mem, 0)},
fail: true},
} {
- got, err := DecodeProgram(test.program)
+ got, err := DecodeInstructions(test.program)
if test.fail {
if err == nil {
t.Errorf("%s: Decode(...) failed, expected: 'error', got: %q", test.name, got)
diff --git a/pkg/bpf/program_builder.go b/pkg/bpf/program_builder.go
index 7992044d0..caaf99c83 100644
--- a/pkg/bpf/program_builder.go
+++ b/pkg/bpf/program_builder.go
@@ -32,13 +32,21 @@ type ProgramBuilder struct {
// Maps label names to label objects.
labels map[string]*label
+ // unusableLabels are labels that are added before being referenced in a
+ // jump. Any labels added this way cannot be referenced later in order to
+ // avoid backwards references.
+ unusableLabels map[string]bool
+
// Array of BPF instructions that makes up the program.
instructions []linux.BPFInstruction
}
// NewProgramBuilder creates a new ProgramBuilder instance.
func NewProgramBuilder() *ProgramBuilder {
- return &ProgramBuilder{labels: map[string]*label{}}
+ return &ProgramBuilder{
+ labels: map[string]*label{},
+ unusableLabels: map[string]bool{},
+ }
}
// label contains information to resolve a label to an offset.
@@ -108,9 +116,12 @@ func (b *ProgramBuilder) AddJumpLabels(code uint16, k uint32, jtLabel, jfLabel s
func (b *ProgramBuilder) AddLabel(name string) error {
l, ok := b.labels[name]
if !ok {
- // This is done to catch jump backwards cases, but it's not strictly wrong
- // to have unused labels.
- return fmt.Errorf("Adding a label that hasn't been used is not allowed: %v", name)
+ if _, ok = b.unusableLabels[name]; ok {
+ return fmt.Errorf("label %q already set", name)
+ }
+ // Mark the label as unusable. This is done to catch backwards jumps.
+ b.unusableLabels[name] = true
+ return nil
}
if l.target != -1 {
return fmt.Errorf("label %q target already set: %v", name, l.target)
@@ -141,6 +152,10 @@ func (b *ProgramBuilder) addLabelSource(labelName string, t jmpType) {
func (b *ProgramBuilder) resolveLabels() error {
for key, v := range b.labels {
+ if _, ok := b.unusableLabels[key]; ok {
+ return fmt.Errorf("backwards reference detected for label: %q", key)
+ }
+
if v.target == -1 {
return fmt.Errorf("label target not set: %v", key)
}
diff --git a/pkg/bpf/program_builder_test.go b/pkg/bpf/program_builder_test.go
index 92ca5f4c3..37f684f25 100644
--- a/pkg/bpf/program_builder_test.go
+++ b/pkg/bpf/program_builder_test.go
@@ -26,16 +26,16 @@ func validate(p *ProgramBuilder, expected []linux.BPFInstruction) error {
if err != nil {
return fmt.Errorf("Instructions() failed: %v", err)
}
- got, err := DecodeProgram(instructions)
+ got, err := DecodeInstructions(instructions)
if err != nil {
- return fmt.Errorf("DecodeProgram('instructions') failed: %v", err)
+ return fmt.Errorf("DecodeInstructions('instructions') failed: %v", err)
}
- expectedDecoded, err := DecodeProgram(expected)
+ expectedDecoded, err := DecodeInstructions(expected)
if err != nil {
- return fmt.Errorf("DecodeProgram('expected') failed: %v", err)
+ return fmt.Errorf("DecodeInstructions('expected') failed: %v", err)
}
if got != expectedDecoded {
- return fmt.Errorf("DecodeProgram() failed, expected: %q, got: %q", expectedDecoded, got)
+ return fmt.Errorf("DecodeInstructions() failed, expected: %q, got: %q", expectedDecoded, got)
}
return nil
}
@@ -124,10 +124,38 @@ func TestProgramBuilderLabelWithNoInstruction(t *testing.T) {
}
}
+// TestProgramBuilderUnusedLabel tests that adding an unused label doesn't
+// cause program generation to fail.
func TestProgramBuilderUnusedLabel(t *testing.T) {
p := NewProgramBuilder()
- if err := p.AddLabel("unused"); err == nil {
- t.Errorf("AddLabel(unused) should have failed")
+ p.AddStmt(Ld+Abs+W, 10)
+ p.AddJump(Jmp+Ja, 10, 0, 0)
+
+ expected := []linux.BPFInstruction{
+ Stmt(Ld+Abs+W, 10),
+ Jump(Jmp+Ja, 10, 0, 0),
+ }
+
+ if err := p.AddLabel("unused"); err != nil {
+ t.Errorf("AddLabel(unused) should have succeeded")
+ }
+
+ if err := validate(p, expected); err != nil {
+ t.Errorf("Validate() failed: %v", err)
+ }
+}
+
+// TestProgramBuilderBackwardsReference tests that including a backwards
+// reference to a label in a program causes a failure.
+func TestProgramBuilderBackwardsReference(t *testing.T) {
+ p := NewProgramBuilder()
+ if err := p.AddLabel("bw_label"); err != nil {
+ t.Errorf("failed to add label")
+ }
+ p.AddStmt(Ld+Abs+W, 10)
+ p.AddJumpTrueLabel(Jmp+Jeq+K, 10, "bw_label", 0)
+ if _, err := p.Instructions(); err == nil {
+ t.Errorf("Instructions() should have failed")
}
}
diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go
index 55fd6967e..752e2dc32 100644
--- a/pkg/seccomp/seccomp.go
+++ b/pkg/seccomp/seccomp.go
@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package seccomp provides basic seccomp filters for x86_64 (little endian).
+// Package seccomp provides generation of basic seccomp filters. Currently,
+// only little endian systems are supported.
package seccomp
import (
@@ -64,9 +65,9 @@ func Install(rules SyscallRules) error {
Rules: rules,
Action: linux.SECCOMP_RET_ALLOW,
},
- }, defaultAction)
+ }, defaultAction, defaultAction)
if log.IsLogging(log.Debug) {
- programStr, errDecode := bpf.DecodeProgram(instrs)
+ programStr, errDecode := bpf.DecodeInstructions(instrs)
if errDecode != nil {
programStr = fmt.Sprintf("Error: %v\n%s", errDecode, programStr)
}
@@ -117,7 +118,7 @@ var SyscallName = func(sysno uintptr) string {
// BuildProgram builds a BPF program from the given map of actions to matching
// SyscallRules. The single generated program covers all provided RuleSets.
-func BuildProgram(rules []RuleSet, defaultAction linux.BPFAction) ([]linux.BPFInstruction, error) {
+func BuildProgram(rules []RuleSet, defaultAction, badArchAction linux.BPFAction) ([]linux.BPFInstruction, error) {
program := bpf.NewProgramBuilder()
// Be paranoid and check that syscall is done in the expected architecture.
@@ -128,7 +129,7 @@ func BuildProgram(rules []RuleSet, defaultAction linux.BPFAction) ([]linux.BPFIn
// defaultLabel is at the bottom of the program. The size of program
// may exceeds 255 lines, which is the limit of a condition jump.
program.AddJump(bpf.Jmp|bpf.Jeq|bpf.K, LINUX_AUDIT_ARCH, skipOneInst, 0)
- program.AddDirectJumpLabel(defaultLabel)
+ program.AddStmt(bpf.Ret|bpf.K, uint32(badArchAction))
if err := buildIndex(rules, program); err != nil {
return nil, err
}
@@ -144,6 +145,11 @@ func BuildProgram(rules []RuleSet, defaultAction linux.BPFAction) ([]linux.BPFIn
// buildIndex builds a BST to quickly search through all syscalls.
func buildIndex(rules []RuleSet, program *bpf.ProgramBuilder) error {
+ // Do nothing if rules is empty.
+ if len(rules) == 0 {
+ return nil
+ }
+
// Build a list of all application system calls, across all given rule
// sets. We have a simple BST, but may dispatch individual matchers
// with different actions. The matchers are evaluated linearly.
@@ -216,42 +222,163 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action linux.BPFAc
labelled := false
for i, arg := range rule {
if arg != nil {
+ // Break out early if using MatchAny since no further
+ // instructions are required.
+ if _, ok := arg.(MatchAny); ok {
+ continue
+ }
+
+ // Determine the data offset for low and high bits of input.
+ dataOffsetLow := seccompDataOffsetArgLow(i)
+ dataOffsetHigh := seccompDataOffsetArgHigh(i)
+ if i == RuleIP {
+ dataOffsetLow = seccompDataOffsetIPLow
+ dataOffsetHigh = seccompDataOffsetIPHigh
+ }
+
+ // Add the conditional operation. Input values to the BPF
+ // program are 64bit values. However, comparisons in BPF can
+ // only be done on 32bit values. This means that we need to do
+ // multiple BPF comparisons in order to do one logical 64bit
+ // comparison.
switch a := arg.(type) {
- case AllowAny:
- case AllowValue:
- dataOffsetLow := seccompDataOffsetArgLow(i)
- dataOffsetHigh := seccompDataOffsetArgHigh(i)
- if i == RuleIP {
- dataOffsetLow = seccompDataOffsetIPLow
- dataOffsetHigh = seccompDataOffsetIPHigh
- }
+ case EqualTo:
+ // EqualTo checks that both the higher and lower 32bits are equal.
high, low := uint32(a>>32), uint32(a)
- // assert arg_low == low
+
+ // Assert that the lower 32bits are equal.
+ // arg_low == low ? continue : violation
p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow)
p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
- // assert arg_high == high
+
+ // Assert that the lower 32bits are also equal.
+ // arg_high == high ? continue/success : violation
p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh)
p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
labelled = true
+ case NotEqual:
+ // NotEqual checks that either the higher or lower 32bits
+ // are *not* equal.
+ high, low := uint32(a>>32), uint32(a)
+ labelGood := fmt.Sprintf("ne%v", i)
+
+ // Check if the higher 32bits are (not) equal.
+ // arg_low == low ? continue : success
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow)
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
+
+ // Assert that the lower 32bits are not equal (assuming
+ // higher bits are equal).
+ // arg_high == high ? violation : continue/success
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh)
+ p.AddJumpTrueLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, ruleViolationLabel(ruleSetIdx, sysno, ruleidx), 0)
+ p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
+ labelled = true
case GreaterThan:
- dataOffsetLow := seccompDataOffsetArgLow(i)
- dataOffsetHigh := seccompDataOffsetArgHigh(i)
- if i == RuleIP {
- dataOffsetLow = seccompDataOffsetIPLow
- dataOffsetHigh = seccompDataOffsetIPHigh
- }
- labelGood := fmt.Sprintf("gt%v", i)
+ // GreaterThan checks that the higher 32bits is greater
+ // *or* that the higher 32bits are equal and the lower
+ // 32bits are greater.
high, low := uint32(a>>32), uint32(a)
- // assert arg_high < high
+ labelGood := fmt.Sprintf("gt%v", i)
+
+ // Assert the higher 32bits are greater than or equal.
+ // arg_high >= high ? continue : violation (arg_high < high)
p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh)
p.AddJumpFalseLabel(bpf.Jmp|bpf.Jge|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
- // arg_high > high
+
+ // Assert that the lower 32bits are greater.
+ // arg_high == high ? continue : success (arg_high > high)
p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
- // arg_low < low
+ // arg_low > low ? continue/success : violation (arg_high == high and arg_low <= low)
p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow)
p.AddJumpFalseLabel(bpf.Jmp|bpf.Jgt|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
labelled = true
+ case GreaterThanOrEqual:
+ // GreaterThanOrEqual checks that the higher 32bits is
+ // greater *or* that the higher 32bits are equal and the
+ // lower 32bits are greater than or equal.
+ high, low := uint32(a>>32), uint32(a)
+ labelGood := fmt.Sprintf("ge%v", i)
+
+ // Assert the higher 32bits are greater than or equal.
+ // arg_high >= high ? continue : violation (arg_high < high)
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh)
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jge|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
+ // arg_high == high ? continue : success (arg_high > high)
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
+
+ // Assert that the lower 32bits are greater (assuming the
+ // higher bits are equal).
+ // arg_low >= low ? continue/success : violation (arg_high == high and arg_low < low)
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow)
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jge|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
+ p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
+ labelled = true
+ case LessThan:
+ // LessThan checks that the higher 32bits is less *or* that
+ // the higher 32bits are equal and the lower 32bits are
+ // less.
+ high, low := uint32(a>>32), uint32(a)
+ labelGood := fmt.Sprintf("lt%v", i)
+
+ // Assert the higher 32bits are less than or equal.
+ // arg_high > high ? violation : continue
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh)
+ p.AddJumpTrueLabel(bpf.Jmp|bpf.Jgt|bpf.K, high, ruleViolationLabel(ruleSetIdx, sysno, ruleidx), 0)
+ // arg_high == high ? continue : success (arg_high < high)
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
+
+ // Assert that the lower 32bits are less (assuming the
+ // higher bits are equal).
+ // arg_low >= low ? violation : continue
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow)
+ p.AddJumpTrueLabel(bpf.Jmp|bpf.Jge|bpf.K, low, ruleViolationLabel(ruleSetIdx, sysno, ruleidx), 0)
+ p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
+ labelled = true
+ case LessThanOrEqual:
+ // LessThan checks that the higher 32bits is less *or* that
+ // the higher 32bits are equal and the lower 32bits are
+ // less than or equal.
+ high, low := uint32(a>>32), uint32(a)
+ labelGood := fmt.Sprintf("le%v", i)
+
+ // Assert the higher 32bits are less than or equal.
+ // assert arg_high > high ? violation : continue
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh)
+ p.AddJumpTrueLabel(bpf.Jmp|bpf.Jgt|bpf.K, high, ruleViolationLabel(ruleSetIdx, sysno, ruleidx), 0)
+ // arg_high == high ? continue : success
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
+
+ // Assert the lower bits are less than or equal (assuming
+ // the higher bits are equal).
+ // arg_low > low ? violation : success
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow)
+ p.AddJumpTrueLabel(bpf.Jmp|bpf.Jgt|bpf.K, low, ruleViolationLabel(ruleSetIdx, sysno, ruleidx), 0)
+ p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
+ labelled = true
+ case maskedEqual:
+ // MaskedEqual checks that the bitwise AND of the value and
+ // mask are equal for both the higher and lower 32bits.
+ high, low := uint32(a.value>>32), uint32(a.value)
+ maskHigh, maskLow := uint32(a.mask>>32), uint32(a.mask)
+
+ // Assert that the lower 32bits are equal when masked.
+ // A <- arg_low.
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow)
+ // A <- arg_low & maskLow
+ p.AddStmt(bpf.Alu|bpf.And|bpf.K, maskLow)
+ // Assert that arg_low & maskLow == low.
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
+
+ // Assert that the higher 32bits are equal when masked.
+ // A <- arg_high
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh)
+ // A <- arg_high & maskHigh
+ p.AddStmt(bpf.Alu|bpf.And|bpf.K, maskHigh)
+ // Assert that arg_high & maskHigh == high.
+ p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
+ labelled = true
default:
return fmt.Errorf("unknown syscall rule type: %v", reflect.TypeOf(a))
}
diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go
index a52dc1b4e..daf165bbf 100644
--- a/pkg/seccomp/seccomp_rules.go
+++ b/pkg/seccomp/seccomp_rules.go
@@ -39,28 +39,79 @@ func seccompDataOffsetArgHigh(i int) uint32 {
return seccompDataOffsetArgLow(i) + 4
}
-// AllowAny is marker to indicate any value will be accepted.
-type AllowAny struct{}
+// MatchAny is marker to indicate any value will be accepted.
+type MatchAny struct{}
-func (a AllowAny) String() (s string) {
+func (a MatchAny) String() (s string) {
return "*"
}
-// AllowValue specifies a value that needs to be strictly matched.
-type AllowValue uintptr
+// EqualTo specifies a value that needs to be strictly matched.
+type EqualTo uintptr
+
+func (a EqualTo) String() (s string) {
+ return fmt.Sprintf("== %#x", uintptr(a))
+}
+
+// NotEqual specifies a value that is strictly not equal.
+type NotEqual uintptr
+
+func (a NotEqual) String() (s string) {
+ return fmt.Sprintf("!= %#x", uintptr(a))
+}
// GreaterThan specifies a value that needs to be strictly smaller.
type GreaterThan uintptr
-func (a AllowValue) String() (s string) {
- return fmt.Sprintf("%#x ", uintptr(a))
+func (a GreaterThan) String() (s string) {
+ return fmt.Sprintf("> %#x", uintptr(a))
+}
+
+// GreaterThanOrEqual specifies a value that needs to be smaller or equal.
+type GreaterThanOrEqual uintptr
+
+func (a GreaterThanOrEqual) String() (s string) {
+ return fmt.Sprintf(">= %#x", uintptr(a))
+}
+
+// LessThan specifies a value that needs to be strictly greater.
+type LessThan uintptr
+
+func (a LessThan) String() (s string) {
+ return fmt.Sprintf("< %#x", uintptr(a))
+}
+
+// LessThanOrEqual specifies a value that needs to be greater or equal.
+type LessThanOrEqual uintptr
+
+func (a LessThanOrEqual) String() (s string) {
+ return fmt.Sprintf("<= %#x", uintptr(a))
+}
+
+type maskedEqual struct {
+ mask uintptr
+ value uintptr
+}
+
+func (a maskedEqual) String() (s string) {
+ return fmt.Sprintf("& %#x == %#x", a.mask, a.value)
+}
+
+// MaskedEqual specifies a value that matches the input after the input is
+// masked (bitwise &) against the given mask. Can be used to verify that input
+// only includes certain approved flags.
+func MaskedEqual(mask, value uintptr) interface{} {
+ return maskedEqual{
+ mask: mask,
+ value: value,
+ }
}
// Rule stores the allowed syscall arguments.
//
// For example:
// rule := Rule {
-// AllowValue(linux.ARCH_GET_FS | linux.ARCH_SET_FS), // arg0
+// EqualTo(linux.ARCH_GET_FS | linux.ARCH_SET_FS), // arg0
// }
type Rule [7]interface{} // 6 arguments + RIP
@@ -89,12 +140,12 @@ func (r Rule) String() (s string) {
// rules := SyscallRules{
// syscall.SYS_FUTEX: []Rule{
// {
-// AllowAny{},
-// AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
+// MatchAny{},
+// EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
// }, // OR
// {
-// AllowAny{},
-// AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
+// MatchAny{},
+// EqualTo(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
// },
// },
// syscall.SYS_GETPID: []Rule{},
diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go
index 5238df8bd..23f30678d 100644
--- a/pkg/seccomp/seccomp_test.go
+++ b/pkg/seccomp/seccomp_test.go
@@ -76,11 +76,14 @@ func TestBasic(t *testing.T) {
}
for _, test := range []struct {
+ name string
ruleSets []RuleSet
defaultAction linux.BPFAction
+ badArchAction linux.BPFAction
specs []spec
}{
{
+ name: "Single syscall",
ruleSets: []RuleSet{
{
Rules: SyscallRules{1: {}},
@@ -88,26 +91,28 @@ func TestBasic(t *testing.T) {
},
},
defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
specs: []spec{
{
- desc: "Single syscall allowed",
+ desc: "syscall allowed",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_ALLOW,
},
{
- desc: "Single syscall disallowed",
+ desc: "syscall disallowed",
data: seccompData{nr: 2, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_TRAP,
},
},
},
{
+ name: "Multiple rulesets",
ruleSets: []RuleSet{
{
Rules: SyscallRules{
1: []Rule{
{
- AllowValue(0x1),
+ EqualTo(0x1),
},
},
},
@@ -122,30 +127,32 @@ func TestBasic(t *testing.T) {
},
},
defaultAction: linux.SECCOMP_RET_KILL_THREAD,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
specs: []spec{
{
- desc: "Multiple rulesets allowed (1a)",
+ desc: "allowed (1a)",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x1}},
want: linux.SECCOMP_RET_ALLOW,
},
{
- desc: "Multiple rulesets allowed (1b)",
+ desc: "allowed (1b)",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_TRAP,
},
{
- desc: "Multiple rulesets allowed (2)",
+ desc: "syscall 1 matched 2nd rule",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_TRAP,
},
{
- desc: "Multiple rulesets allowed (2)",
+ desc: "no match",
data: seccompData{nr: 0, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_KILL_THREAD,
},
},
},
{
+ name: "Multiple syscalls",
ruleSets: []RuleSet{
{
Rules: SyscallRules{
@@ -157,50 +164,52 @@ func TestBasic(t *testing.T) {
},
},
defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
specs: []spec{
{
- desc: "Multiple syscalls allowed (1)",
+ desc: "allowed (1)",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_ALLOW,
},
{
- desc: "Multiple syscalls allowed (3)",
+ desc: "allowed (3)",
data: seccompData{nr: 3, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_ALLOW,
},
{
- desc: "Multiple syscalls allowed (5)",
+ desc: "allowed (5)",
data: seccompData{nr: 5, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_ALLOW,
},
{
- desc: "Multiple syscalls disallowed (0)",
+ desc: "disallowed (0)",
data: seccompData{nr: 0, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_TRAP,
},
{
- desc: "Multiple syscalls disallowed (2)",
+ desc: "disallowed (2)",
data: seccompData{nr: 2, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_TRAP,
},
{
- desc: "Multiple syscalls disallowed (4)",
+ desc: "disallowed (4)",
data: seccompData{nr: 4, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_TRAP,
},
{
- desc: "Multiple syscalls disallowed (6)",
+ desc: "disallowed (6)",
data: seccompData{nr: 6, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_TRAP,
},
{
- desc: "Multiple syscalls disallowed (100)",
+ desc: "disallowed (100)",
data: seccompData{nr: 100, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_TRAP,
},
},
},
{
+ name: "Wrong architecture",
ruleSets: []RuleSet{
{
Rules: SyscallRules{
@@ -210,15 +219,17 @@ func TestBasic(t *testing.T) {
},
},
defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
specs: []spec{
{
- desc: "Wrong architecture",
+ desc: "arch (123)",
data: seccompData{nr: 1, arch: 123},
- want: linux.SECCOMP_RET_TRAP,
+ want: linux.SECCOMP_RET_KILL_THREAD,
},
},
},
{
+ name: "Syscall disallowed",
ruleSets: []RuleSet{
{
Rules: SyscallRules{
@@ -228,22 +239,24 @@ func TestBasic(t *testing.T) {
},
},
defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
specs: []spec{
{
- desc: "Syscall disallowed, action trap",
+ desc: "action trap",
data: seccompData{nr: 2, arch: LINUX_AUDIT_ARCH},
want: linux.SECCOMP_RET_TRAP,
},
},
},
{
+ name: "Syscall arguments",
ruleSets: []RuleSet{
{
Rules: SyscallRules{
1: []Rule{
{
- AllowAny{},
- AllowValue(0xf),
+ MatchAny{},
+ EqualTo(0xf),
},
},
},
@@ -251,29 +264,31 @@ func TestBasic(t *testing.T) {
},
},
defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
specs: []spec{
{
- desc: "Syscall argument allowed",
+ desc: "allowed",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xf, 0xf}},
want: linux.SECCOMP_RET_ALLOW,
},
{
- desc: "Syscall argument disallowed",
+ desc: "disallowed",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xf, 0xe}},
want: linux.SECCOMP_RET_TRAP,
},
},
},
{
+ name: "Multiple arguments",
ruleSets: []RuleSet{
{
Rules: SyscallRules{
1: []Rule{
{
- AllowValue(0xf),
+ EqualTo(0xf),
},
{
- AllowValue(0xe),
+ EqualTo(0xe),
},
},
},
@@ -281,28 +296,30 @@ func TestBasic(t *testing.T) {
},
},
defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
specs: []spec{
{
- desc: "Syscall argument allowed, two rules",
+ desc: "match first rule",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xf}},
want: linux.SECCOMP_RET_ALLOW,
},
{
- desc: "Syscall argument allowed, two rules",
+ desc: "match 2nd rule",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xe}},
want: linux.SECCOMP_RET_ALLOW,
},
},
},
{
+ name: "EqualTo",
ruleSets: []RuleSet{
{
Rules: SyscallRules{
1: []Rule{
{
- AllowValue(0),
- AllowValue(math.MaxUint64 - 1),
- AllowValue(math.MaxUint32),
+ EqualTo(0),
+ EqualTo(math.MaxUint64 - 1),
+ EqualTo(math.MaxUint32),
},
},
},
@@ -310,9 +327,10 @@ func TestBasic(t *testing.T) {
},
},
defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
specs: []spec{
{
- desc: "64bit syscall argument allowed",
+ desc: "argument allowed (all match)",
data: seccompData{
nr: 1,
arch: LINUX_AUDIT_ARCH,
@@ -321,7 +339,7 @@ func TestBasic(t *testing.T) {
want: linux.SECCOMP_RET_ALLOW,
},
{
- desc: "64bit syscall argument disallowed",
+ desc: "argument disallowed (one mismatch)",
data: seccompData{
nr: 1,
arch: LINUX_AUDIT_ARCH,
@@ -330,7 +348,7 @@ func TestBasic(t *testing.T) {
want: linux.SECCOMP_RET_TRAP,
},
{
- desc: "64bit syscall argument disallowed",
+ desc: "argument disallowed (multiple mismatch)",
data: seccompData{
nr: 1,
arch: LINUX_AUDIT_ARCH,
@@ -341,6 +359,103 @@ func TestBasic(t *testing.T) {
},
},
{
+ name: "NotEqual",
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ NotEqual(0x7aabbccdd),
+ NotEqual(math.MaxUint64 - 1),
+ NotEqual(math.MaxUint32),
+ },
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
+ specs: []spec{
+ {
+ desc: "arg allowed",
+ data: seccompData{
+ nr: 1,
+ arch: LINUX_AUDIT_ARCH,
+ args: [6]uint64{0, math.MaxUint64, math.MaxUint32 - 1},
+ },
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "arg disallowed (one equal)",
+ data: seccompData{
+ nr: 1,
+ arch: LINUX_AUDIT_ARCH,
+ args: [6]uint64{0x7aabbccdd, math.MaxUint64, math.MaxUint32 - 1},
+ },
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "arg disallowed (all equal)",
+ data: seccompData{
+ nr: 1,
+ arch: LINUX_AUDIT_ARCH,
+ args: [6]uint64{0x7aabbccdd, math.MaxUint64 - 1, math.MaxUint32},
+ },
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ },
+ },
+ {
+ name: "GreaterThan",
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ // 4294967298
+ // Both upper 32 bits and lower 32 bits are non-zero.
+ // 00000000000000000000000000000010
+ // 00000000000000000000000000000010
+ GreaterThan(0x00000002_00000002),
+ },
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
+ specs: []spec{
+ {
+ desc: "high 32bits greater",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000003_00000002}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "high 32bits equal, low 32bits greater",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000003}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "high 32bits equal, low 32bits equal",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000002}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "high 32bits equal, low 32bits less",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000001}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "high 32bits less",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000001_00000003}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ },
+ },
+ {
+ name: "GreaterThan (multi)",
ruleSets: []RuleSet{
{
Rules: SyscallRules{
@@ -355,46 +470,145 @@ func TestBasic(t *testing.T) {
},
},
defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
specs: []spec{
{
- desc: "GreaterThan: Syscall argument allowed",
+ desc: "arg allowed",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xffffffff}},
want: linux.SECCOMP_RET_ALLOW,
},
{
- desc: "GreaterThan: Syscall argument disallowed (equal)",
+ desc: "arg disallowed (first arg equal)",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xf, 0xffffffff}},
want: linux.SECCOMP_RET_TRAP,
},
{
- desc: "Syscall argument disallowed (smaller)",
+ desc: "arg disallowed (first arg smaller)",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xffffffff}},
want: linux.SECCOMP_RET_TRAP,
},
{
- desc: "GreaterThan2: Syscall argument allowed",
- data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xfbcd000d}},
+ desc: "arg disallowed (second arg equal)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xabcd000d}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "arg disallowed (second arg smaller)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xa000ffff}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ },
+ },
+ {
+ name: "GreaterThanOrEqual",
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ // 4294967298
+ // Both upper 32 bits and lower 32 bits are non-zero.
+ // 00000000000000000000000000000010
+ // 00000000000000000000000000000010
+ GreaterThanOrEqual(0x00000002_00000002),
+ },
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
+ specs: []spec{
+ {
+ desc: "high 32bits greater",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000003_00000002}},
want: linux.SECCOMP_RET_ALLOW,
},
{
- desc: "GreaterThan2: Syscall argument disallowed (equal)",
- data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xabcd000d}},
+ desc: "high 32bits equal, low 32bits greater",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000003}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "high 32bits equal, low 32bits equal",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000002}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "high 32bits equal, low 32bits less",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000001}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "high 32bits less",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000001_00000002}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ },
+ },
+ {
+ name: "GreaterThanOrEqual (multi)",
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ GreaterThanOrEqual(0xf),
+ GreaterThanOrEqual(0xabcd000d),
+ },
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
+ specs: []spec{
+ {
+ desc: "arg allowed (both greater)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xffffffff}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "arg allowed (first arg equal)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0xf, 0xffffffff}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "arg disallowed (first arg smaller)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xffffffff}},
want: linux.SECCOMP_RET_TRAP,
},
{
- desc: "GreaterThan2: Syscall argument disallowed (smaller)",
+ desc: "arg allowed (second arg equal)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xabcd000d}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "arg disallowed (second arg smaller)",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x10, 0xa000ffff}},
want: linux.SECCOMP_RET_TRAP,
},
+ {
+ desc: "arg disallowed (both arg smaller)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xa000ffff}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
},
},
{
+ name: "LessThan",
ruleSets: []RuleSet{
{
Rules: SyscallRules{
1: []Rule{
{
- RuleIP: AllowValue(0x7aabbccdd),
+ // 4294967298
+ // Both upper 32 bits and lower 32 bits are non-zero.
+ // 00000000000000000000000000000010
+ // 00000000000000000000000000000010
+ LessThan(0x00000002_00000002),
},
},
},
@@ -402,40 +616,307 @@ func TestBasic(t *testing.T) {
},
},
defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
specs: []spec{
{
- desc: "IP: Syscall instruction pointer allowed",
+ desc: "high 32bits greater",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000003_00000002}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "high 32bits equal, low 32bits greater",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000003}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "high 32bits equal, low 32bits equal",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000002}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "high 32bits equal, low 32bits less",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000001}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "high 32bits less",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000001_00000002}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ },
+ {
+ name: "LessThan (multi)",
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ LessThan(0x1),
+ LessThan(0xabcd000d),
+ },
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
+ specs: []spec{
+ {
+ desc: "arg allowed",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0x0}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "arg disallowed (first arg equal)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x1, 0x0}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "arg disallowed (first arg greater)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x2, 0x0}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "arg disallowed (second arg equal)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xabcd000d}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "arg disallowed (second arg greater)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xffffffff}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "arg disallowed (both arg greater)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x2, 0xffffffff}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ },
+ },
+ {
+ name: "LessThanOrEqual",
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ // 4294967298
+ // Both upper 32 bits and lower 32 bits are non-zero.
+ // 00000000000000000000000000000010
+ // 00000000000000000000000000000010
+ LessThanOrEqual(0x00000002_00000002),
+ },
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
+ specs: []spec{
+ {
+ desc: "high 32bits greater",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000003_00000002}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "high 32bits equal, low 32bits greater",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000003}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "high 32bits equal, low 32bits equal",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000002}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "high 32bits equal, low 32bits less",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000002_00000001}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "high 32bits less",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x00000001_00000002}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ },
+
+ {
+ name: "LessThanOrEqual (multi)",
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ LessThanOrEqual(0x1),
+ LessThanOrEqual(0xabcd000d),
+ },
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
+ specs: []spec{
+ {
+ desc: "arg allowed",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0x0}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "arg allowed (first arg equal)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x1, 0x0}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "arg disallowed (first arg greater)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x2, 0x0}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "arg allowed (second arg equal)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xabcd000d}},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "arg disallowed (second arg greater)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x0, 0xffffffff}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "arg disallowed (both arg greater)",
+ data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{0x2, 0xffffffff}},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ },
+ },
+ {
+ name: "MaskedEqual",
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ // x & 00000001 00000011 (0x103) == 00000000 00000001 (0x1)
+ // Input x must have lowest order bit set and
+ // must *not* have 8th or second lowest order bit set.
+ MaskedEqual(0x103, 0x1),
+ },
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
+ specs: []spec{
+ {
+ desc: "arg allowed (low order mandatory bit)",
+ data: seccompData{
+ nr: 1,
+ arch: LINUX_AUDIT_ARCH,
+ // 00000000 00000000 00000000 00000001
+ args: [6]uint64{0x1},
+ },
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "arg allowed (low order optional bit)",
+ data: seccompData{
+ nr: 1,
+ arch: LINUX_AUDIT_ARCH,
+ // 00000000 00000000 00000000 00000101
+ args: [6]uint64{0x5},
+ },
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "arg disallowed (lowest order bit not set)",
+ data: seccompData{
+ nr: 1,
+ arch: LINUX_AUDIT_ARCH,
+ // 00000000 00000000 00000000 00000010
+ args: [6]uint64{0x2},
+ },
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "arg disallowed (second lowest order bit set)",
+ data: seccompData{
+ nr: 1,
+ arch: LINUX_AUDIT_ARCH,
+ // 00000000 00000000 00000000 00000011
+ args: [6]uint64{0x3},
+ },
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ {
+ desc: "arg disallowed (8th bit set)",
+ data: seccompData{
+ nr: 1,
+ arch: LINUX_AUDIT_ARCH,
+ // 00000000 00000000 00000001 00000000
+ args: [6]uint64{0x100},
+ },
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ },
+ },
+ {
+ name: "Instruction Pointer",
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ RuleIP: EqualTo(0x7aabbccdd),
+ },
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
+ badArchAction: linux.SECCOMP_RET_KILL_THREAD,
+ specs: []spec{
+ {
+ desc: "allowed",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{}, instructionPointer: 0x7aabbccdd},
want: linux.SECCOMP_RET_ALLOW,
},
{
- desc: "IP: Syscall instruction pointer disallowed",
+ desc: "disallowed",
data: seccompData{nr: 1, arch: LINUX_AUDIT_ARCH, args: [6]uint64{}, instructionPointer: 0x711223344},
want: linux.SECCOMP_RET_TRAP,
},
},
},
} {
- instrs, err := BuildProgram(test.ruleSets, test.defaultAction)
- if err != nil {
- t.Errorf("%s: buildProgram() got error: %v", test.specs[0].desc, err)
- continue
- }
- p, err := bpf.Compile(instrs)
- if err != nil {
- t.Errorf("%s: bpf.Compile() got error: %v", test.specs[0].desc, err)
- continue
- }
- for _, spec := range test.specs {
- got, err := bpf.Exec(p, spec.data.asInput())
+ t.Run(test.name, func(t *testing.T) {
+ instrs, err := BuildProgram(test.ruleSets, test.defaultAction, test.badArchAction)
if err != nil {
- t.Errorf("%s: bpf.Exec() got error: %v", spec.desc, err)
- continue
+ t.Fatalf("BuildProgram() got error: %v", err)
+ }
+ p, err := bpf.Compile(instrs)
+ if err != nil {
+ t.Fatalf("bpf.Compile() got error: %v", err)
}
- if got != uint32(spec.want) {
- t.Errorf("%s: bpd.Exec() = %d, want: %d", spec.desc, got, spec.want)
+ for _, spec := range test.specs {
+ got, err := bpf.Exec(p, spec.data.asInput())
+ if err != nil {
+ t.Fatalf("%s: bpf.Exec() got error: %v", spec.desc, err)
+ }
+ if got != uint32(spec.want) {
+ // Include a decoded version of the program in output for debugging purposes.
+ decoded, _ := bpf.DecodeInstructions(instrs)
+ t.Fatalf("%s: got: %d, want: %d\nBPF Program\n%s", spec.desc, got, spec.want, decoded)
+ }
}
- }
+ })
}
}
@@ -457,7 +938,7 @@ func TestRandom(t *testing.T) {
Rules: syscallRules,
Action: linux.SECCOMP_RET_ALLOW,
},
- }, linux.SECCOMP_RET_TRAP)
+ }, linux.SECCOMP_RET_TRAP, linux.SECCOMP_RET_KILL_THREAD)
if err != nil {
t.Fatalf("buildProgram() got error: %v", err)
}
diff --git a/pkg/seccomp/seccomp_test_victim.go b/pkg/seccomp/seccomp_test_victim.go
index fe157f539..7f33e0d9e 100644
--- a/pkg/seccomp/seccomp_test_victim.go
+++ b/pkg/seccomp/seccomp_test_victim.go
@@ -100,7 +100,7 @@ func main() {
if !die {
syscalls[syscall.SYS_OPENAT] = []seccomp.Rule{
{
- seccomp.AllowValue(10),
+ seccomp.EqualTo(10),
},
}
}
diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go
index 413111faf..332bdb8e8 100644
--- a/pkg/sentry/kernel/syscalls.go
+++ b/pkg/sentry/kernel/syscalls.go
@@ -348,6 +348,16 @@ func (s *SyscallTable) LookupName(sysno uintptr) string {
return fmt.Sprintf("sys_%d", sysno) // Unlikely.
}
+// LookupNo looks up a syscall number by name.
+func (s *SyscallTable) LookupNo(name string) (uintptr, error) {
+ for i, syscall := range s.Table {
+ if syscall.Name == name {
+ return uintptr(i), nil
+ }
+ }
+ return 0, fmt.Errorf("syscall %q not found", name)
+}
+
// LookupEmulate looks up an emulation syscall number.
func (s *SyscallTable) LookupEmulate(addr usermem.Addr) (uintptr, bool) {
sysno, ok := s.Emulate[addr]
diff --git a/pkg/sentry/platform/ptrace/subprocess_amd64.go b/pkg/sentry/platform/ptrace/subprocess_amd64.go
index 84b699f0d..020bbda79 100644
--- a/pkg/sentry/platform/ptrace/subprocess_amd64.go
+++ b/pkg/sentry/platform/ptrace/subprocess_amd64.go
@@ -201,7 +201,7 @@ func appendArchSeccompRules(rules []seccomp.RuleSet, defaultAction linux.BPFActi
seccomp.RuleSet{
Rules: seccomp.SyscallRules{
syscall.SYS_ARCH_PRCTL: []seccomp.Rule{
- {seccomp.AllowValue(linux.ARCH_SET_CPUID), seccomp.AllowValue(0)},
+ {seccomp.EqualTo(linux.ARCH_SET_CPUID), seccomp.EqualTo(0)},
},
},
Action: linux.SECCOMP_RET_ALLOW,
diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go
index 2ce528601..8548853da 100644
--- a/pkg/sentry/platform/ptrace/subprocess_linux.go
+++ b/pkg/sentry/platform/ptrace/subprocess_linux.go
@@ -80,9 +80,9 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
Rules: seccomp.SyscallRules{
syscall.SYS_CLONE: []seccomp.Rule{
// Allow creation of new subprocesses (used by the master).
- {seccomp.AllowValue(syscall.CLONE_FILES | syscall.SIGKILL)},
+ {seccomp.EqualTo(syscall.CLONE_FILES | syscall.SIGKILL)},
// Allow creation of new threads within a single address space (used by addresss spaces).
- {seccomp.AllowValue(
+ {seccomp.EqualTo(
syscall.CLONE_FILES |
syscall.CLONE_FS |
syscall.CLONE_SIGHAND |
@@ -97,14 +97,14 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
// For the stub prctl dance (all).
syscall.SYS_PRCTL: []seccomp.Rule{
- {seccomp.AllowValue(syscall.PR_SET_PDEATHSIG), seccomp.AllowValue(syscall.SIGKILL)},
+ {seccomp.EqualTo(syscall.PR_SET_PDEATHSIG), seccomp.EqualTo(syscall.SIGKILL)},
},
syscall.SYS_GETPPID: {},
// For the stub to stop itself (all).
syscall.SYS_GETPID: {},
syscall.SYS_KILL: []seccomp.Rule{
- {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SIGSTOP)},
+ {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SIGSTOP)},
},
// Injected to support the address space operations.
@@ -115,7 +115,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
})
}
rules = appendArchSeccompRules(rules, defaultAction)
- instrs, err := seccomp.BuildProgram(rules, defaultAction)
+ instrs, err := seccomp.BuildProgram(rules, defaultAction, defaultAction)
if err != nil {
return nil, err
}
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 704c66742..01f62d50a 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -26,6 +26,7 @@ go_library(
deps = [
"//pkg/abi",
"//pkg/abi/linux",
+ "//pkg/bpf",
"//pkg/context",
"//pkg/control/server",
"//pkg/cpuid",
@@ -107,6 +108,7 @@ go_library(
"//runsc/boot/pprof",
"//runsc/config",
"//runsc/specutils",
+ "//runsc/specutils/seccomp",
"@com_github_golang_protobuf//proto:go_default_library",
"@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 149eb0b1b..4ed28b5cd 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -29,7 +29,7 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_CLOCK_GETTIME: {},
syscall.SYS_CLONE: []seccomp.Rule{
{
- seccomp.AllowValue(
+ seccomp.EqualTo(
syscall.CLONE_VM |
syscall.CLONE_FS |
syscall.CLONE_FILES |
@@ -42,26 +42,26 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_DUP: {},
syscall.SYS_DUP3: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.O_CLOEXEC),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.O_CLOEXEC),
},
},
syscall.SYS_EPOLL_CREATE1: {},
syscall.SYS_EPOLL_CTL: {},
syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(0),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(0),
},
},
syscall.SYS_EVENTFD2: []seccomp.Rule{
{
- seccomp.AllowValue(0),
- seccomp.AllowValue(0),
+ seccomp.EqualTo(0),
+ seccomp.EqualTo(0),
},
},
syscall.SYS_EXIT: {},
@@ -70,16 +70,16 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_FCHMOD: {},
syscall.SYS_FCNTL: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.F_GETFL),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.F_GETFL),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.F_SETFL),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.F_SETFL),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.F_GETFD),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.F_GETFD),
},
},
syscall.SYS_FSTAT: {},
@@ -87,52 +87,52 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_FTRUNCATE: {},
syscall.SYS_FUTEX: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
- seccomp.AllowAny{},
- seccomp.AllowAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
- seccomp.AllowAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
+ seccomp.MatchAny{},
},
// Non-private variants are included for flipcall support. They are otherwise
// unncessary, as the sentry will use only private futexes internally.
{
- seccomp.AllowAny{},
- seccomp.AllowValue(linux.FUTEX_WAIT),
- seccomp.AllowAny{},
- seccomp.AllowAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(linux.FUTEX_WAIT),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(linux.FUTEX_WAKE),
- seccomp.AllowAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(linux.FUTEX_WAKE),
+ seccomp.MatchAny{},
},
},
syscall.SYS_GETPID: {},
unix.SYS_GETRANDOM: {},
syscall.SYS_GETSOCKOPT: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_DOMAIN),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_DOMAIN),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_TYPE),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_TYPE),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_ERROR),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_ERROR),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_SNDBUF),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_SNDBUF),
},
},
syscall.SYS_GETTID: {},
@@ -141,34 +141,34 @@ var allowedSyscalls = seccomp.SyscallRules{
// setting/getting termios and winsize.
syscall.SYS_IOCTL: []seccomp.Rule{
{
- seccomp.AllowAny{}, /* fd */
- seccomp.AllowValue(linux.TCGETS),
- seccomp.AllowAny{}, /* termios struct */
+ seccomp.MatchAny{}, /* fd */
+ seccomp.EqualTo(linux.TCGETS),
+ seccomp.MatchAny{}, /* termios struct */
},
{
- seccomp.AllowAny{}, /* fd */
- seccomp.AllowValue(linux.TCSETS),
- seccomp.AllowAny{}, /* termios struct */
+ seccomp.MatchAny{}, /* fd */
+ seccomp.EqualTo(linux.TCSETS),
+ seccomp.MatchAny{}, /* termios struct */
},
{
- seccomp.AllowAny{}, /* fd */
- seccomp.AllowValue(linux.TCSETSF),
- seccomp.AllowAny{}, /* termios struct */
+ seccomp.MatchAny{}, /* fd */
+ seccomp.EqualTo(linux.TCSETSF),
+ seccomp.MatchAny{}, /* termios struct */
},
{
- seccomp.AllowAny{}, /* fd */
- seccomp.AllowValue(linux.TCSETSW),
- seccomp.AllowAny{}, /* termios struct */
+ seccomp.MatchAny{}, /* fd */
+ seccomp.EqualTo(linux.TCSETSW),
+ seccomp.MatchAny{}, /* termios struct */
},
{
- seccomp.AllowAny{}, /* fd */
- seccomp.AllowValue(linux.TIOCSWINSZ),
- seccomp.AllowAny{}, /* winsize struct */
+ seccomp.MatchAny{}, /* fd */
+ seccomp.EqualTo(linux.TIOCSWINSZ),
+ seccomp.MatchAny{}, /* winsize struct */
},
{
- seccomp.AllowAny{}, /* fd */
- seccomp.AllowValue(linux.TIOCGWINSZ),
- seccomp.AllowAny{}, /* winsize struct */
+ seccomp.MatchAny{}, /* fd */
+ seccomp.EqualTo(linux.TIOCGWINSZ),
+ seccomp.MatchAny{}, /* winsize struct */
},
},
syscall.SYS_LSEEK: {},
@@ -182,46 +182,46 @@ var allowedSyscalls = seccomp.SyscallRules{
// TODO(b/148688965): Remove once this is gone from Go.
syscall.SYS_MLOCK: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(4096),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(4096),
},
},
syscall.SYS_MMAP: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MAP_SHARED),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MAP_SHARED),
},
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MAP_PRIVATE),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MAP_PRIVATE),
},
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS),
},
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_STACK),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_STACK),
},
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_NORESERVE),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_NORESERVE),
},
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.PROT_WRITE | syscall.PROT_READ),
- seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.PROT_WRITE | syscall.PROT_READ),
+ seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED),
},
},
syscall.SYS_MPROTECT: {},
@@ -237,32 +237,32 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_READ: {},
syscall.SYS_RECVMSG: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC),
},
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK),
},
},
syscall.SYS_RECVMMSG: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(fdbased.MaxMsgsPerRecv),
- seccomp.AllowValue(syscall.MSG_DONTWAIT),
- seccomp.AllowValue(0),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(fdbased.MaxMsgsPerRecv),
+ seccomp.EqualTo(syscall.MSG_DONTWAIT),
+ seccomp.EqualTo(0),
},
},
unix.SYS_SENDMMSG: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MSG_DONTWAIT),
- seccomp.AllowValue(0),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MSG_DONTWAIT),
+ seccomp.EqualTo(0),
},
},
syscall.SYS_RESTART_SYSCALL: {},
@@ -272,49 +272,49 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_SCHED_YIELD: {},
syscall.SYS_SENDMSG: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL),
},
},
syscall.SYS_SETITIMER: {},
syscall.SYS_SHUTDOWN: []seccomp.Rule{
// Used by fs/host to shutdown host sockets.
- {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RD)},
- {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_WR)},
+ {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RD)},
+ {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_WR)},
// Used by unet to shutdown connections.
- {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)},
+ {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RDWR)},
},
syscall.SYS_SIGALTSTACK: {},
unix.SYS_STATX: {},
syscall.SYS_SYNC_FILE_RANGE: {},
syscall.SYS_TEE: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(1), /* len */
- seccomp.AllowValue(unix.SPLICE_F_NONBLOCK), /* flags */
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(1), /* len */
+ seccomp.EqualTo(unix.SPLICE_F_NONBLOCK), /* flags */
},
},
syscall.SYS_TGKILL: []seccomp.Rule{
{
- seccomp.AllowValue(uint64(os.Getpid())),
+ seccomp.EqualTo(uint64(os.Getpid())),
},
},
syscall.SYS_UTIMENSAT: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(0), /* null pathname */
- seccomp.AllowAny{},
- seccomp.AllowValue(0), /* flags */
+ seccomp.MatchAny{},
+ seccomp.EqualTo(0), /* null pathname */
+ seccomp.MatchAny{},
+ seccomp.EqualTo(0), /* flags */
},
},
syscall.SYS_WRITE: {},
// For rawfile.NonBlockingWriteIovec.
syscall.SYS_WRITEV: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
seccomp.GreaterThan(0),
},
},
@@ -325,10 +325,10 @@ func hostInetFilters() seccomp.SyscallRules {
return seccomp.SyscallRules{
syscall.SYS_ACCEPT4: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
},
},
syscall.SYS_BIND: {},
@@ -337,84 +337,84 @@ func hostInetFilters() seccomp.SyscallRules {
syscall.SYS_GETSOCKNAME: {},
syscall.SYS_GETSOCKOPT: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_IP),
- seccomp.AllowValue(syscall.IP_TOS),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_IP),
+ seccomp.EqualTo(syscall.IP_TOS),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_IP),
- seccomp.AllowValue(syscall.IP_RECVTOS),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_IP),
+ seccomp.EqualTo(syscall.IP_RECVTOS),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_IPV6),
- seccomp.AllowValue(syscall.IPV6_TCLASS),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_IPV6),
+ seccomp.EqualTo(syscall.IPV6_TCLASS),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_IPV6),
- seccomp.AllowValue(syscall.IPV6_RECVTCLASS),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_IPV6),
+ seccomp.EqualTo(syscall.IPV6_RECVTCLASS),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_IPV6),
- seccomp.AllowValue(syscall.IPV6_V6ONLY),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_IPV6),
+ seccomp.EqualTo(syscall.IPV6_V6ONLY),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_ERROR),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_ERROR),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_KEEPALIVE),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_KEEPALIVE),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_SNDBUF),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_SNDBUF),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_RCVBUF),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_RCVBUF),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_REUSEADDR),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_REUSEADDR),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_TYPE),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_TYPE),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_LINGER),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_LINGER),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_TCP),
- seccomp.AllowValue(syscall.TCP_NODELAY),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_TCP),
+ seccomp.EqualTo(syscall.TCP_NODELAY),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_TCP),
- seccomp.AllowValue(syscall.TCP_INFO),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_TCP),
+ seccomp.EqualTo(syscall.TCP_INFO),
},
},
syscall.SYS_IOCTL: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.TIOCOUTQ),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.TIOCOUTQ),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.TIOCINQ),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.TIOCINQ),
},
},
syscall.SYS_LISTEN: {},
@@ -425,103 +425,103 @@ func hostInetFilters() seccomp.SyscallRules {
syscall.SYS_SENDTO: {},
syscall.SYS_SETSOCKOPT: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_IPV6),
- seccomp.AllowValue(syscall.IPV6_V6ONLY),
- seccomp.AllowAny{},
- seccomp.AllowValue(4),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_IPV6),
+ seccomp.EqualTo(syscall.IPV6_V6ONLY),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(4),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_SNDBUF),
- seccomp.AllowAny{},
- seccomp.AllowValue(4),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_SNDBUF),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(4),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_RCVBUF),
- seccomp.AllowAny{},
- seccomp.AllowValue(4),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_RCVBUF),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(4),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_REUSEADDR),
- seccomp.AllowAny{},
- seccomp.AllowValue(4),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_REUSEADDR),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(4),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_TCP),
- seccomp.AllowValue(syscall.TCP_NODELAY),
- seccomp.AllowAny{},
- seccomp.AllowValue(4),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_TCP),
+ seccomp.EqualTo(syscall.TCP_NODELAY),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(4),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_IP),
- seccomp.AllowValue(syscall.IP_TOS),
- seccomp.AllowAny{},
- seccomp.AllowValue(4),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_IP),
+ seccomp.EqualTo(syscall.IP_TOS),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(4),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_IP),
- seccomp.AllowValue(syscall.IP_RECVTOS),
- seccomp.AllowAny{},
- seccomp.AllowValue(4),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_IP),
+ seccomp.EqualTo(syscall.IP_RECVTOS),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(4),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_IPV6),
- seccomp.AllowValue(syscall.IPV6_TCLASS),
- seccomp.AllowAny{},
- seccomp.AllowValue(4),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_IPV6),
+ seccomp.EqualTo(syscall.IPV6_TCLASS),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(4),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_IPV6),
- seccomp.AllowValue(syscall.IPV6_RECVTCLASS),
- seccomp.AllowAny{},
- seccomp.AllowValue(4),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_IPV6),
+ seccomp.EqualTo(syscall.IPV6_RECVTCLASS),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(4),
},
},
syscall.SYS_SHUTDOWN: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SHUT_RD),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SHUT_RD),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SHUT_WR),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SHUT_WR),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SHUT_RDWR),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SHUT_RDWR),
},
},
syscall.SYS_SOCKET: []seccomp.Rule{
{
- seccomp.AllowValue(syscall.AF_INET),
- seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
- seccomp.AllowValue(0),
+ seccomp.EqualTo(syscall.AF_INET),
+ seccomp.EqualTo(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.EqualTo(0),
},
{
- seccomp.AllowValue(syscall.AF_INET),
- seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
- seccomp.AllowValue(0),
+ seccomp.EqualTo(syscall.AF_INET),
+ seccomp.EqualTo(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.EqualTo(0),
},
{
- seccomp.AllowValue(syscall.AF_INET6),
- seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
- seccomp.AllowValue(0),
+ seccomp.EqualTo(syscall.AF_INET6),
+ seccomp.EqualTo(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.EqualTo(0),
},
{
- seccomp.AllowValue(syscall.AF_INET6),
- seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
- seccomp.AllowValue(0),
+ seccomp.EqualTo(syscall.AF_INET6),
+ seccomp.EqualTo(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
+ seccomp.EqualTo(0),
},
},
syscall.SYS_WRITEV: {},
@@ -532,20 +532,20 @@ func controlServerFilters(fd int) seccomp.SyscallRules {
return seccomp.SyscallRules{
syscall.SYS_ACCEPT: []seccomp.Rule{
{
- seccomp.AllowValue(fd),
+ seccomp.EqualTo(fd),
},
},
syscall.SYS_LISTEN: []seccomp.Rule{
{
- seccomp.AllowValue(fd),
- seccomp.AllowValue(16 /* unet.backlog */),
+ seccomp.EqualTo(fd),
+ seccomp.EqualTo(16 /* unet.backlog */),
},
},
syscall.SYS_GETSOCKOPT: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_PEERCRED),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.SOL_SOCKET),
+ seccomp.EqualTo(syscall.SO_PEERCRED),
},
},
}
diff --git a/runsc/boot/filter/config_amd64.go b/runsc/boot/filter/config_amd64.go
index 5335ff82c..9b1799416 100644
--- a/runsc/boot/filter/config_amd64.go
+++ b/runsc/boot/filter/config_amd64.go
@@ -25,7 +25,7 @@ import (
func init() {
allowedSyscalls[syscall.SYS_ARCH_PRCTL] = append(allowedSyscalls[syscall.SYS_ARCH_PRCTL],
- seccomp.Rule{seccomp.AllowValue(linux.ARCH_GET_FS)},
- seccomp.Rule{seccomp.AllowValue(linux.ARCH_SET_FS)},
+ seccomp.Rule{seccomp.EqualTo(linux.ARCH_GET_FS)},
+ seccomp.Rule{seccomp.EqualTo(linux.ARCH_SET_FS)},
)
}
diff --git a/runsc/boot/filter/config_profile.go b/runsc/boot/filter/config_profile.go
index 194952a7b..7b8669595 100644
--- a/runsc/boot/filter/config_profile.go
+++ b/runsc/boot/filter/config_profile.go
@@ -25,9 +25,9 @@ func profileFilters() seccomp.SyscallRules {
return seccomp.SyscallRules{
syscall.SYS_OPENAT: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC),
},
},
}
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index a136da21a..4940ea96a 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -27,6 +27,7 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bpf"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/fd"
@@ -70,6 +71,7 @@ import (
"gvisor.dev/gvisor/runsc/boot/pprof"
"gvisor.dev/gvisor/runsc/config"
"gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/specutils/seccomp"
// Include supported socket providers.
"gvisor.dev/gvisor/pkg/sentry/socket/hostinet"
@@ -507,6 +509,7 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
return mf, nil
}
+// installSeccompFilters installs sandbox seccomp filters with the host.
func (l *Loader) installSeccompFilters() error {
if l.root.conf.DisableSeccomp {
filter.Report("syscall filter is DISABLED. Running in less secure mode.")
@@ -577,6 +580,7 @@ func (l *Loader) run() error {
if _, err := l.createContainerProcess(true, l.sandboxID, &l.root, ep); err != nil {
return err
}
+
}
ep.tg = l.k.GlobalInit()
@@ -764,6 +768,31 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
}
}
+ // Install seccomp filters with the new task if there are any.
+ if info.conf.OCISeccomp {
+ if info.spec.Linux != nil && info.spec.Linux.Seccomp != nil {
+ program, err := seccomp.BuildProgram(info.spec.Linux.Seccomp)
+ if err != nil {
+ return nil, fmt.Errorf("building seccomp program: %v", err)
+ }
+
+ if log.IsLogging(log.Debug) {
+ out, _ := bpf.DecodeProgram(program)
+ log.Debugf("Installing OCI seccomp filters\nProgram:\n%s", out)
+ }
+
+ task := tg.Leader()
+ // NOTE: It seems Flags are ignored by runc so we ignore them too.
+ if err := task.AppendSyscallFilter(program, true); err != nil {
+ return nil, fmt.Errorf("appending seccomp filters: %v", err)
+ }
+ }
+ } else {
+ if info.spec.Linux != nil && info.spec.Linux.Seccomp != nil {
+ log.Warningf("Seccomp spec is being ignored")
+ }
+ }
+
return tg, nil
}
diff --git a/runsc/config/config.go b/runsc/config/config.go
index df134bb2f..f30f79f68 100644
--- a/runsc/config/config.go
+++ b/runsc/config/config.go
@@ -157,8 +157,12 @@ type Config struct {
// Enables FUSE usage.
FUSE bool `flag:"fuse"`
+ // Allows overriding of flags in OCI annotations.
AllowFlagOverride bool `flag:"allow-flag-override"`
+ // Enables seccomp inside the sandbox.
+ OCISeccomp bool `flag:"oci-seccomp"`
+
// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
// tests. It allows runsc to start the sandbox process as the current
// user, and without chrooting the sandbox process. This can be
diff --git a/runsc/config/flags.go b/runsc/config/flags.go
index eff46e938..a5f25cfa2 100644
--- a/runsc/config/flags.go
+++ b/runsc/config/flags.go
@@ -63,6 +63,7 @@ func RegisterFlags() {
flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
flag.Var(leakModePtr(refs.NoLeakChecking), "ref-leak-mode", "sets reference leak check mode: disabled (default), log-names, log-traces.")
flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)")
+ flag.Bool("oci-seccomp", false, "Enables loading OCI seccomp filters inside the sandbox.")
// Flags that control sandbox runtime behavior: FS related.
flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index 88814b83c..0cb9b1cae 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -29,7 +29,7 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_CLOCK_GETTIME: {},
syscall.SYS_CLONE: []seccomp.Rule{
{
- seccomp.AllowValue(
+ seccomp.EqualTo(
syscall.CLONE_VM |
syscall.CLONE_FS |
syscall.CLONE_FILES |
@@ -43,46 +43,46 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_EPOLL_CTL: {},
syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(0),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(0),
},
},
syscall.SYS_EVENTFD2: []seccomp.Rule{
{
- seccomp.AllowValue(0),
- seccomp.AllowValue(0),
+ seccomp.EqualTo(0),
+ seccomp.EqualTo(0),
},
},
syscall.SYS_EXIT: {},
syscall.SYS_EXIT_GROUP: {},
syscall.SYS_FALLOCATE: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(0),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(0),
},
},
syscall.SYS_FCHMOD: {},
syscall.SYS_FCHOWNAT: {},
syscall.SYS_FCNTL: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.F_GETFL),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.F_GETFL),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.F_SETFL),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.F_SETFL),
},
{
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.F_GETFD),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.F_GETFD),
},
// Used by flipcall.PacketWindowAllocator.Init().
{
- seccomp.AllowAny{},
- seccomp.AllowValue(unix.F_ADD_SEALS),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(unix.F_ADD_SEALS),
},
},
syscall.SYS_FSTAT: {},
@@ -91,31 +91,31 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_FTRUNCATE: {},
syscall.SYS_FUTEX: {
seccomp.Rule{
- seccomp.AllowAny{},
- seccomp.AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(0),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(0),
},
seccomp.Rule{
- seccomp.AllowAny{},
- seccomp.AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(0),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(0),
},
// Non-private futex used for flipcall.
seccomp.Rule{
- seccomp.AllowAny{},
- seccomp.AllowValue(linux.FUTEX_WAIT),
- seccomp.AllowAny{},
- seccomp.AllowAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(linux.FUTEX_WAIT),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
},
seccomp.Rule{
- seccomp.AllowAny{},
- seccomp.AllowValue(linux.FUTEX_WAKE),
- seccomp.AllowAny{},
- seccomp.AllowAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(linux.FUTEX_WAKE),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
},
},
syscall.SYS_GETDENTS64: {},
@@ -137,28 +137,28 @@ var allowedSyscalls = seccomp.SyscallRules{
// TODO(b/148688965): Remove once this is gone from Go.
syscall.SYS_MLOCK: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowValue(4096),
+ seccomp.MatchAny{},
+ seccomp.EqualTo(4096),
},
},
syscall.SYS_MMAP: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MAP_SHARED),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MAP_SHARED),
},
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS),
},
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED),
},
},
syscall.SYS_MPROTECT: {},
@@ -172,14 +172,14 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_READLINKAT: {},
syscall.SYS_RECVMSG: []seccomp.Rule{
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC),
},
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK),
},
},
syscall.SYS_RENAMEAT: {},
@@ -190,33 +190,33 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_SENDMSG: []seccomp.Rule{
// Used by fdchannel.Endpoint.SendFD().
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(0),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(0),
},
// Used by unet.SocketWriter.WriteVec().
{
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL),
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.EqualTo(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL),
},
},
syscall.SYS_SHUTDOWN: []seccomp.Rule{
- {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)},
+ {seccomp.MatchAny{}, seccomp.EqualTo(syscall.SHUT_RDWR)},
},
syscall.SYS_SIGALTSTACK: {},
// Used by fdchannel.NewConnectedSockets().
syscall.SYS_SOCKETPAIR: {
{
- seccomp.AllowValue(syscall.AF_UNIX),
- seccomp.AllowValue(syscall.SOCK_SEQPACKET | syscall.SOCK_CLOEXEC),
- seccomp.AllowValue(0),
+ seccomp.EqualTo(syscall.AF_UNIX),
+ seccomp.EqualTo(syscall.SOCK_SEQPACKET | syscall.SOCK_CLOEXEC),
+ seccomp.EqualTo(0),
},
},
syscall.SYS_SYMLINKAT: {},
syscall.SYS_TGKILL: []seccomp.Rule{
{
- seccomp.AllowValue(uint64(os.Getpid())),
+ seccomp.EqualTo(uint64(os.Getpid())),
},
},
syscall.SYS_UNLINKAT: {},
@@ -227,24 +227,24 @@ var allowedSyscalls = seccomp.SyscallRules{
var udsSyscalls = seccomp.SyscallRules{
syscall.SYS_SOCKET: []seccomp.Rule{
{
- seccomp.AllowValue(syscall.AF_UNIX),
- seccomp.AllowValue(syscall.SOCK_STREAM),
- seccomp.AllowValue(0),
+ seccomp.EqualTo(syscall.AF_UNIX),
+ seccomp.EqualTo(syscall.SOCK_STREAM),
+ seccomp.EqualTo(0),
},
{
- seccomp.AllowValue(syscall.AF_UNIX),
- seccomp.AllowValue(syscall.SOCK_DGRAM),
- seccomp.AllowValue(0),
+ seccomp.EqualTo(syscall.AF_UNIX),
+ seccomp.EqualTo(syscall.SOCK_DGRAM),
+ seccomp.EqualTo(0),
},
{
- seccomp.AllowValue(syscall.AF_UNIX),
- seccomp.AllowValue(syscall.SOCK_SEQPACKET),
- seccomp.AllowValue(0),
+ seccomp.EqualTo(syscall.AF_UNIX),
+ seccomp.EqualTo(syscall.SOCK_SEQPACKET),
+ seccomp.EqualTo(0),
},
},
syscall.SYS_CONNECT: []seccomp.Rule{
{
- seccomp.AllowAny{},
+ seccomp.MatchAny{},
},
},
}
diff --git a/runsc/fsgofer/filter/config_amd64.go b/runsc/fsgofer/filter/config_amd64.go
index a4b28cb8b..53506b5e1 100644
--- a/runsc/fsgofer/filter/config_amd64.go
+++ b/runsc/fsgofer/filter/config_amd64.go
@@ -25,8 +25,8 @@ import (
func init() {
allowedSyscalls[syscall.SYS_ARCH_PRCTL] = []seccomp.Rule{
- {seccomp.AllowValue(linux.ARCH_GET_FS)},
- {seccomp.AllowValue(linux.ARCH_SET_FS)},
+ {seccomp.EqualTo(linux.ARCH_GET_FS)},
+ {seccomp.EqualTo(linux.ARCH_SET_FS)},
}
allowedSyscalls[syscall.SYS_NEWFSTATAT] = []seccomp.Rule{}
diff --git a/runsc/specutils/seccomp/BUILD b/runsc/specutils/seccomp/BUILD
new file mode 100644
index 000000000..3520f2d6d
--- /dev/null
+++ b/runsc/specutils/seccomp/BUILD
@@ -0,0 +1,34 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "seccomp",
+ srcs = [
+ "audit_amd64.go",
+ "audit_arm64.go",
+ "seccomp.go",
+ ],
+ visibility = ["//:sandbox"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/bpf",
+ "//pkg/log",
+ "//pkg/seccomp",
+ "//pkg/sentry/kernel",
+ "//pkg/sentry/syscalls/linux",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
+ ],
+)
+
+go_test(
+ name = "seccomp_test",
+ size = "small",
+ srcs = ["seccomp_test.go"],
+ library = ":seccomp",
+ deps = [
+ "//pkg/binary",
+ "//pkg/bpf",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
+ ],
+)
diff --git a/runsc/specutils/seccomp/audit_amd64.go b/runsc/specutils/seccomp/audit_amd64.go
new file mode 100644
index 000000000..417cf4a7a
--- /dev/null
+++ b/runsc/specutils/seccomp/audit_amd64.go
@@ -0,0 +1,25 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package seccomp
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+)
+
+const (
+ nativeArchAuditNo = linux.AUDIT_ARCH_X86_64
+)
diff --git a/runsc/specutils/seccomp/audit_arm64.go b/runsc/specutils/seccomp/audit_arm64.go
new file mode 100644
index 000000000..b727ceff2
--- /dev/null
+++ b/runsc/specutils/seccomp/audit_arm64.go
@@ -0,0 +1,25 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package seccomp
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+)
+
+const (
+ nativeArchAuditNo = linux.AUDIT_ARCH_AARCH64
+)
diff --git a/runsc/specutils/seccomp/seccomp.go b/runsc/specutils/seccomp/seccomp.go
new file mode 100644
index 000000000..5932f7a41
--- /dev/null
+++ b/runsc/specutils/seccomp/seccomp.go
@@ -0,0 +1,229 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package seccomp implements some features of libseccomp in order to support
+// OCI.
+package seccomp
+
+import (
+ "fmt"
+ "syscall"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+)
+
+var (
+ killThreadAction = linux.SECCOMP_RET_KILL_THREAD
+ trapAction = linux.SECCOMP_RET_TRAP
+ // runc always returns EPERM as the errorcode for SECCOMP_RET_ERRNO
+ errnoAction = linux.SECCOMP_RET_ERRNO.WithReturnCode(uint16(syscall.EPERM))
+ // runc always returns EPERM as the errorcode for SECCOMP_RET_TRACE
+ traceAction = linux.SECCOMP_RET_TRACE.WithReturnCode(uint16(syscall.EPERM))
+ allowAction = linux.SECCOMP_RET_ALLOW
+)
+
+// BuildProgram generates a bpf program based on the given OCI seccomp
+// config.
+func BuildProgram(s *specs.LinuxSeccomp) (bpf.Program, error) {
+ defaultAction, err := convertAction(s.DefaultAction)
+ if err != nil {
+ return bpf.Program{}, fmt.Errorf("secomp default action: %w", err)
+ }
+ ruleset, err := convertRules(s)
+ if err != nil {
+ return bpf.Program{}, fmt.Errorf("invalid seccomp rules: %w", err)
+ }
+
+ instrs, err := seccomp.BuildProgram(ruleset, defaultAction, killThreadAction)
+ if err != nil {
+ return bpf.Program{}, fmt.Errorf("building seccomp program: %w", err)
+ }
+
+ program, err := bpf.Compile(instrs)
+ if err != nil {
+ return bpf.Program{}, fmt.Errorf("compiling seccomp program: %w", err)
+ }
+
+ return program, nil
+}
+
+// lookupSyscallNo gets the syscall number for the syscall with the given name
+// for the given architecture.
+func lookupSyscallNo(arch uint32, name string) (uint32, error) {
+ var table *kernel.SyscallTable
+ switch arch {
+ case linux.AUDIT_ARCH_X86_64:
+ table = slinux.AMD64
+ case linux.AUDIT_ARCH_AARCH64:
+ table = slinux.ARM64
+ }
+ if table == nil {
+ return 0, fmt.Errorf("unsupported architecture: %d", arch)
+ }
+ n, err := table.LookupNo(name)
+ if err != nil {
+ return 0, err
+ }
+ return uint32(n), nil
+}
+
+// convertAction converts a LinuxSeccompAction to BPFAction
+func convertAction(act specs.LinuxSeccompAction) (linux.BPFAction, error) {
+ // TODO(gvisor.dev/issue/3124): Update specs package to include ActLog and ActKillProcess.
+ switch act {
+ case specs.ActKill:
+ return killThreadAction, nil
+ case specs.ActTrap:
+ return trapAction, nil
+ case specs.ActErrno:
+ return errnoAction, nil
+ case specs.ActTrace:
+ return traceAction, nil
+ case specs.ActAllow:
+ return allowAction, nil
+ default:
+ return 0, fmt.Errorf("invalid action: %v", act)
+ }
+}
+
+// convertRules converts OCI linux seccomp rules into RuleSets that can be used by
+// the seccomp package to build a seccomp program.
+func convertRules(s *specs.LinuxSeccomp) ([]seccomp.RuleSet, error) {
+ // NOTE: Architectures are only really relevant when calling 32bit syscalls
+ // on a 64bit system. Since we don't support that in gVisor anyway, we
+ // ignore Architectures and only test against the native architecture.
+
+ ruleset := []seccomp.RuleSet{}
+
+ for _, syscall := range s.Syscalls {
+ sysRules := seccomp.NewSyscallRules()
+
+ action, err := convertAction(syscall.Action)
+ if err != nil {
+ return nil, err
+ }
+
+ // Args
+ rules, err := convertArgs(syscall.Args)
+ if err != nil {
+ return nil, err
+ }
+
+ for _, name := range syscall.Names {
+ syscallNo, err := lookupSyscallNo(nativeArchAuditNo, name)
+ if err != nil {
+ // If there is an error looking up the syscall number, assume it is
+ // not supported on this architecture and ignore it. This is, for
+ // better or worse, what runc does.
+ log.Warningf("OCI seccomp: ignoring syscall %q", name)
+ continue
+ }
+
+ for _, rule := range rules {
+ sysRules.AddRule(uintptr(syscallNo), rule)
+ }
+ }
+
+ ruleset = append(ruleset, seccomp.RuleSet{
+ Rules: sysRules,
+ Action: action,
+ })
+ }
+
+ return ruleset, nil
+}
+
+// convertArgs converts an OCI seccomp argument rule to a list of seccomp.Rule.
+func convertArgs(args []specs.LinuxSeccompArg) ([]seccomp.Rule, error) {
+ argCounts := make([]uint, 6)
+
+ for _, arg := range args {
+ if arg.Index > 6 {
+ return nil, fmt.Errorf("invalid index: %d", arg.Index)
+ }
+
+ argCounts[arg.Index]++
+ }
+
+ // NOTE: If multiple rules apply to the same argument (same index) the
+ // action is triggered if any one of the rules matches (OR). If not, then
+ // all rules much match in order to trigger the action (AND). This appears to
+ // be some kind of legacy behavior of runc that nevertheless needs to be
+ // supported to maintain compatibility.
+
+ hasMultipleArgs := false
+ for _, count := range argCounts {
+ if count > 1 {
+ hasMultipleArgs = true
+ break
+ }
+ }
+
+ if hasMultipleArgs {
+ rules := []seccomp.Rule{}
+
+ // Old runc behavior - do this for compatibility.
+ // Add rules as ORs by adding separate Rules.
+ for _, arg := range args {
+ rule := seccomp.Rule{nil, nil, nil, nil, nil, nil}
+
+ if err := convertRule(arg, &rule); err != nil {
+ return nil, err
+ }
+
+ rules = append(rules, rule)
+ }
+
+ return rules, nil
+ }
+
+ // Add rules as ANDs by adding to the same Rule.
+ rule := seccomp.Rule{nil, nil, nil, nil, nil, nil}
+ for _, arg := range args {
+ if err := convertRule(arg, &rule); err != nil {
+ return nil, err
+ }
+ }
+
+ return []seccomp.Rule{rule}, nil
+}
+
+// convertRule converts and adds the arg to a rule.
+func convertRule(arg specs.LinuxSeccompArg, rule *seccomp.Rule) error {
+ switch arg.Op {
+ case specs.OpEqualTo:
+ rule[arg.Index] = seccomp.EqualTo(arg.Value)
+ case specs.OpNotEqual:
+ rule[arg.Index] = seccomp.NotEqual(arg.Value)
+ case specs.OpGreaterThan:
+ rule[arg.Index] = seccomp.GreaterThan(arg.Value)
+ case specs.OpGreaterEqual:
+ rule[arg.Index] = seccomp.GreaterThanOrEqual(arg.Value)
+ case specs.OpLessThan:
+ rule[arg.Index] = seccomp.LessThan(arg.Value)
+ case specs.OpLessEqual:
+ rule[arg.Index] = seccomp.LessThanOrEqual(arg.Value)
+ case specs.OpMaskedEqual:
+ rule[arg.Index] = seccomp.MaskedEqual(uintptr(arg.Value), uintptr(arg.ValueTwo))
+ default:
+ return fmt.Errorf("unsupported operand: %q", arg.Op)
+ }
+ return nil
+}
diff --git a/runsc/specutils/seccomp/seccomp_test.go b/runsc/specutils/seccomp/seccomp_test.go
new file mode 100644
index 000000000..2079cd2e9
--- /dev/null
+++ b/runsc/specutils/seccomp/seccomp_test.go
@@ -0,0 +1,414 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package seccomp
+
+import (
+ "fmt"
+ "syscall"
+ "testing"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/bpf"
+)
+
+type seccompData struct {
+ nr uint32
+ arch uint32
+ instructionPointer uint64
+ args [6]uint64
+}
+
+// asInput converts a seccompData to a bpf.Input.
+func asInput(d seccompData) bpf.Input {
+ return bpf.InputBytes{binary.Marshal(nil, binary.LittleEndian, d), binary.LittleEndian}
+}
+
+// testInput creates an Input struct with given seccomp input values.
+func testInput(arch uint32, syscallName string, args *[6]uint64) bpf.Input {
+ syscallNo, err := lookupSyscallNo(arch, syscallName)
+ if err != nil {
+ // Assume tests set valid syscall names.
+ panic(err)
+ }
+
+ if args == nil {
+ argArray := [6]uint64{0, 0, 0, 0, 0, 0}
+ args = &argArray
+ }
+
+ data := seccompData{
+ nr: syscallNo,
+ arch: arch,
+ args: *args,
+ }
+
+ return asInput(data)
+}
+
+// testCase holds a seccomp test case.
+type testCase struct {
+ name string
+ config specs.LinuxSeccomp
+ input bpf.Input
+ expected uint32
+}
+
+var (
+ // seccompTests is a list of speccomp test cases.
+ seccompTests = []testCase{
+ {
+ name: "default_allow",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ },
+ input: testInput(nativeArchAuditNo, "read", nil),
+ expected: uint32(allowAction),
+ },
+ {
+ name: "default_deny",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActErrno,
+ },
+ input: testInput(nativeArchAuditNo, "read", nil),
+ expected: uint32(errnoAction),
+ },
+ {
+ name: "deny_arch",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "getcwd",
+ },
+ Action: specs.ActErrno,
+ },
+ },
+ },
+ // Syscall matches but the arch is AUDIT_ARCH_X86 so the return
+ // value is the bad arch action.
+ input: asInput(seccompData{nr: 183, arch: 0x40000003}), //
+ expected: uint32(killThreadAction),
+ },
+ {
+ name: "match_name_errno",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "getcwd",
+ "chmod",
+ },
+ Action: specs.ActErrno,
+ },
+ {
+ Names: []string{
+ "write",
+ },
+ Action: specs.ActTrace,
+ },
+ },
+ },
+ input: testInput(nativeArchAuditNo, "getcwd", nil),
+ expected: uint32(errnoAction),
+ },
+ {
+ name: "match_name_trace",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "getcwd",
+ "chmod",
+ },
+ Action: specs.ActErrno,
+ },
+ {
+ Names: []string{
+ "write",
+ },
+ Action: specs.ActTrace,
+ },
+ },
+ },
+ input: testInput(nativeArchAuditNo, "write", nil),
+ expected: uint32(traceAction),
+ },
+ {
+ name: "no_match_name_allow",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "getcwd",
+ "chmod",
+ },
+ Action: specs.ActErrno,
+ },
+ {
+ Names: []string{
+ "write",
+ },
+ Action: specs.ActTrace,
+ },
+ },
+ },
+ input: testInput(nativeArchAuditNo, "open", nil),
+ expected: uint32(allowAction),
+ },
+ {
+ name: "simple_match_args",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "clone",
+ },
+ Args: []specs.LinuxSeccompArg{
+ {
+ Index: 0,
+ Value: syscall.CLONE_FS,
+ Op: specs.OpEqualTo,
+ },
+ },
+ Action: specs.ActErrno,
+ },
+ },
+ },
+ input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}),
+ expected: uint32(errnoAction),
+ },
+ {
+ name: "match_args_or",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "clone",
+ },
+ Args: []specs.LinuxSeccompArg{
+ {
+ Index: 0,
+ Value: syscall.CLONE_FS,
+ Op: specs.OpEqualTo,
+ },
+ {
+ Index: 0,
+ Value: syscall.CLONE_VM,
+ Op: specs.OpEqualTo,
+ },
+ },
+ Action: specs.ActErrno,
+ },
+ },
+ },
+ input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}),
+ expected: uint32(errnoAction),
+ },
+ {
+ name: "match_args_and",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "getsockopt",
+ },
+ Args: []specs.LinuxSeccompArg{
+ {
+ Index: 1,
+ Value: syscall.SOL_SOCKET,
+ Op: specs.OpEqualTo,
+ },
+ {
+ Index: 2,
+ Value: syscall.SO_PEERCRED,
+ Op: specs.OpEqualTo,
+ },
+ },
+ Action: specs.ActErrno,
+ },
+ },
+ },
+ input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, syscall.SOL_SOCKET, syscall.SO_PEERCRED}),
+ expected: uint32(errnoAction),
+ },
+ {
+ name: "no_match_args_and",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "getsockopt",
+ },
+ Args: []specs.LinuxSeccompArg{
+ {
+ Index: 1,
+ Value: syscall.SOL_SOCKET,
+ Op: specs.OpEqualTo,
+ },
+ {
+ Index: 2,
+ Value: syscall.SO_PEERCRED,
+ Op: specs.OpEqualTo,
+ },
+ },
+ Action: specs.ActErrno,
+ },
+ },
+ },
+ input: testInput(nativeArchAuditNo, "getsockopt", &[6]uint64{0, syscall.SOL_SOCKET}),
+ expected: uint32(allowAction),
+ },
+ {
+ name: "Simple args (no match)",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "clone",
+ },
+ Args: []specs.LinuxSeccompArg{
+ {
+ Index: 0,
+ Value: syscall.CLONE_FS,
+ Op: specs.OpEqualTo,
+ },
+ },
+ Action: specs.ActErrno,
+ },
+ },
+ },
+ input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_VM}),
+ expected: uint32(allowAction),
+ },
+ {
+ name: "OpMaskedEqual (match)",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "clone",
+ },
+ Args: []specs.LinuxSeccompArg{
+ {
+ Index: 0,
+ Value: syscall.CLONE_FS,
+ ValueTwo: syscall.CLONE_FS,
+ Op: specs.OpMaskedEqual,
+ },
+ },
+ Action: specs.ActErrno,
+ },
+ },
+ },
+ input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS | syscall.CLONE_VM}),
+ expected: uint32(errnoAction),
+ },
+ {
+ name: "OpMaskedEqual (no match)",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActAllow,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "clone",
+ },
+ Args: []specs.LinuxSeccompArg{
+ {
+ Index: 0,
+ Value: syscall.CLONE_FS | syscall.CLONE_VM,
+ ValueTwo: syscall.CLONE_FS | syscall.CLONE_VM,
+ Op: specs.OpMaskedEqual,
+ },
+ },
+ Action: specs.ActErrno,
+ },
+ },
+ },
+ input: testInput(nativeArchAuditNo, "clone", &[6]uint64{syscall.CLONE_FS}),
+ expected: uint32(allowAction),
+ },
+ {
+ name: "OpMaskedEqual (clone)",
+ config: specs.LinuxSeccomp{
+ DefaultAction: specs.ActErrno,
+ Syscalls: []specs.LinuxSyscall{
+ {
+ Names: []string{
+ "clone",
+ },
+ // This comes from the Docker default seccomp
+ // profile for clone.
+ Args: []specs.LinuxSeccompArg{
+ {
+ Index: 0,
+ Value: 0x7e020000,
+ ValueTwo: 0x0,
+ Op: specs.OpMaskedEqual,
+ },
+ },
+ Action: specs.ActAllow,
+ },
+ },
+ },
+ input: testInput(nativeArchAuditNo, "clone", &[6]uint64{0x50f00}),
+ expected: uint32(allowAction),
+ },
+ }
+)
+
+// TestRunscSeccomp generates seccomp programs from OCI config and executes
+// them using runsc's library, comparing against expected results.
+func TestRunscSeccomp(t *testing.T) {
+ for _, tc := range seccompTests {
+ t.Run(tc.name, func(t *testing.T) {
+ runscProgram, err := BuildProgram(&tc.config)
+ if err != nil {
+ t.Fatalf("generating runsc BPF: %v", err)
+ }
+
+ if err := checkProgram(runscProgram, tc.input, tc.expected); err != nil {
+ t.Fatalf("running runsc BPF: %v", err)
+ }
+ })
+ }
+}
+
+// checkProgram runs the given program over the given input and checks the
+// result against the expected output.
+func checkProgram(p bpf.Program, in bpf.Input, expected uint32) error {
+ result, err := bpf.Exec(p, in)
+ if err != nil {
+ return err
+ }
+
+ if result != expected {
+ // Include a decoded version of the program in output for debugging purposes.
+ decoded, _ := bpf.DecodeProgram(p)
+ return fmt.Errorf("Unexpected result: got: %d, expected: %d\nBPF Program\n%s", result, expected, decoded)
+ }
+
+ return nil
+}
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index a2275398a..0392e3e83 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -111,11 +111,6 @@ func ValidateSpec(spec *specs.Spec) error {
log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.")
}
- // TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox.
- if spec.Linux != nil && spec.Linux.Seccomp != nil {
- log.Warningf("Seccomp spec is being ignored")
- }
-
if spec.Linux != nil && spec.Linux.RootfsPropagation != "" {
if err := validateRootfsPropagation(spec.Linux.RootfsPropagation); err != nil {
return err