diff options
author | Brian Geffon <bgeffon@google.com> | 2018-08-02 08:09:03 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-08-02 08:10:30 -0700 |
commit | cf44aff6e08b0e19935d5cd98455b4af98fd8794 (patch) | |
tree | b4c95523871f54a8ec739a426bb0cc84f7f11b48 /pkg/sentry/kernel/seccomp.go | |
parent | 3cd7824410302da00d1c8c8323db8959a124814a (diff) |
Add seccomp(2) support.
Add support for the seccomp syscall and the flag SECCOMP_FILTER_FLAG_TSYNC.
PiperOrigin-RevId: 207101507
Change-Id: I5eb8ba9d5ef71b0e683930a6429182726dc23175
Diffstat (limited to 'pkg/sentry/kernel/seccomp.go')
-rw-r--r-- | pkg/sentry/kernel/seccomp.go | 62 |
1 files changed, 52 insertions, 10 deletions
diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go index b7c4a507f..d77c05e2f 100644 --- a/pkg/sentry/kernel/seccomp.go +++ b/pkg/sentry/kernel/seccomp.go @@ -144,10 +144,15 @@ func (t *Task) evaluateSyscallFilters(sysno int32, args arch.SyscallArguments, i input := data.asBPFInput() ret := uint32(linux.SECCOMP_RET_ALLOW) + f := t.syscallFilters.Load() + if f == nil { + return ret + } + // "Every filter successfully installed will be evaluated (in reverse // order) for each system call the task makes." - kernel/seccomp.c - for i := len(t.syscallFilters) - 1; i >= 0; i-- { - thisRet, err := bpf.Exec(t.syscallFilters[i], input) + for i := len(f.([]bpf.Program)) - 1; i >= 0; i-- { + thisRet, err := bpf.Exec(f.([]bpf.Program)[i], input) if err != nil { t.Debugf("seccomp-bpf filter %d returned error: %v", i, err) thisRet = linux.SECCOMP_RET_KILL @@ -180,15 +185,53 @@ func (t *Task) AppendSyscallFilter(p bpf.Program) error { // maxSyscallFilterInstructions. (This restriction is inherited from // Linux.) totalLength := p.Length() - for _, f := range t.syscallFilters { - totalLength += f.Length() + 4 + var newFilters []bpf.Program + + // While syscallFilters are an atomic.Value we must take the mutex to + // prevent our read-copy-update from happening while another task + // is syncing syscall filters to us, this keeps the filters in a + // consistent state. + t.mu.Lock() + defer t.mu.Unlock() + if sf := t.syscallFilters.Load(); sf != nil { + oldFilters := sf.([]bpf.Program) + for _, f := range oldFilters { + totalLength += f.Length() + 4 + } + newFilters = append(newFilters, oldFilters...) } + if totalLength > maxSyscallFilterInstructions { return syserror.ENOMEM } - t.mu.Lock() - defer t.mu.Unlock() - t.syscallFilters = append(t.syscallFilters, p) + + newFilters = append(newFilters, p) + t.syscallFilters.Store(newFilters) + return nil +} + +// SyncSyscallFiltersToThreadGroup will copy this task's filters to all other +// threads in our thread group. +func (t *Task) SyncSyscallFiltersToThreadGroup() error { + f := t.syscallFilters.Load() + + t.tg.pidns.owner.mu.RLock() + defer t.tg.pidns.owner.mu.RUnlock() + + // Note: No new privs is always assumed to be set. + for ot := t.tg.tasks.Front(); ot != nil; ot = ot.Next() { + if ot.ThreadID() != t.ThreadID() { + // We must take the other task's mutex to prevent it from + // appending to its own syscall filters while we're syncing. + ot.mu.Lock() + var copiedFilters []bpf.Program + if f != nil { + copiedFilters = append(copiedFilters, f.([]bpf.Program)...) + } + ot.syscallFilters.Store(copiedFilters) + ot.mu.Unlock() + } + } return nil } @@ -196,9 +239,8 @@ func (t *Task) AppendSyscallFilter(p bpf.Program) error { // seccomp syscall filtering mode, appropriate for both prctl(PR_GET_SECCOMP) // and /proc/[pid]/status. func (t *Task) SeccompMode() int { - t.mu.Lock() - defer t.mu.Unlock() - if len(t.syscallFilters) > 0 { + f := t.syscallFilters.Load() + if f != nil && len(f.([]bpf.Program)) > 0 { return linux.SECCOMP_MODE_FILTER } return linux.SECCOMP_MODE_NONE |