diff options
author | Googler <noreply@google.com> | 2018-04-27 10:37:02 -0700 |
---|---|---|
committer | Adin Scannell <ascannell@google.com> | 2018-04-28 01:44:26 -0400 |
commit | d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch) | |
tree | 54f95eef73aee6bacbfc736fffc631be2605ed53 /pkg/seccomp | |
parent | f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff) |
Check in gVisor.
PiperOrigin-RevId: 194583126
Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
Diffstat (limited to 'pkg/seccomp')
-rw-r--r-- | pkg/seccomp/BUILD | 48 | ||||
-rw-r--r-- | pkg/seccomp/seccomp.go | 210 | ||||
-rw-r--r-- | pkg/seccomp/seccomp_test.go | 268 | ||||
-rw-r--r-- | pkg/seccomp/seccomp_test_victim.go | 112 | ||||
-rw-r--r-- | pkg/seccomp/seccomp_unsafe.go | 49 |
5 files changed, 687 insertions, 0 deletions
diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD new file mode 100644 index 000000000..1e19b1d25 --- /dev/null +++ b/pkg/seccomp/BUILD @@ -0,0 +1,48 @@ +package(licenses = ["notice"]) # Apache 2.0 + +load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_embed_data", "go_library", "go_test") + +go_binary( + name = "victim", + testonly = 1, + srcs = ["seccomp_test_victim.go"], + deps = [":seccomp"], +) + +go_embed_data( + name = "victim_data", + testonly = 1, + src = "victim", + package = "seccomp", + var = "victimData", +) + +go_library( + name = "seccomp", + srcs = [ + "seccomp.go", + "seccomp_unsafe.go", + ], + importpath = "gvisor.googlesource.com/gvisor/pkg/seccomp", + visibility = ["//visibility:public"], + deps = [ + "//pkg/abi/linux", + "//pkg/bpf", + "//pkg/log", + ], +) + +go_test( + name = "seccomp_test", + size = "small", + srcs = [ + "seccomp_test.go", + ":victim_data", + ], + embed = [":seccomp"], + deps = [ + "//pkg/abi/linux", + "//pkg/binary", + "//pkg/bpf", + ], +) diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go new file mode 100644 index 000000000..7ee63140c --- /dev/null +++ b/pkg/seccomp/seccomp.go @@ -0,0 +1,210 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package seccomp provides basic seccomp filters. +package seccomp + +import ( + "fmt" + "sort" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/bpf" + "gvisor.googlesource.com/gvisor/pkg/log" +) + +const ( + // violationLabel is added to the program to take action on a violation. + violationLabel = "violation" + + // allowLabel is added to the program to allow the syscall to take place. + allowLabel = "allow" +) + +// Install generates BPF code based on the set of syscalls provided. It only +// allows syscalls that conform to the specification (*) and generates SIGSYS +// trap unless kill is set. +// +// (*) The current implementation only checks the syscall number. It does NOT +// validate any of the arguments. +func Install(syscalls []uintptr, kill bool) error { + // Sort syscalls and remove duplicates to build the BST. + sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] }) + syscalls = filterUnique(syscalls) + + log.Infof("Installing seccomp filters for %d syscalls (kill=%t)", len(syscalls), kill) + for _, s := range syscalls { + log.Infof("syscall filter: %v", s) + } + + instrs, err := buildProgram(syscalls, kill) + if err != nil { + return err + } + if log.IsLogging(log.Debug) { + programStr, err := bpf.DecodeProgram(instrs) + if err != nil { + programStr = fmt.Sprintf("Error: %v\n%s", err, programStr) + } + log.Debugf("Seccomp program dump:\n%s", programStr) + } + + if err := seccomp(instrs); err != nil { + return err + } + + log.Infof("Seccomp filters installed.") + return nil +} + +// buildProgram builds a BPF program that whitelists all given syscalls. +// +// Precondition: syscalls must be sorted and unique. +func buildProgram(syscalls []uintptr, kill bool) ([]linux.BPFInstruction, error) { + const archOffset = 4 // offsetof(seccomp_data, arch) + program := bpf.NewProgramBuilder() + violationAction := uint32(linux.SECCOMP_RET_KILL) + if !kill { + violationAction = linux.SECCOMP_RET_TRAP + } + + // Be paranoid and check that syscall is done in the expected architecture. + // + // A = seccomp_data.arch + // if (A != AUDIT_ARCH_X86_64) goto violation + program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, archOffset) + program.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, linux.AUDIT_ARCH_X86_64, 0, violationLabel) + + if err := buildIndex(syscalls, program); err != nil { + return nil, err + } + + // violation: return violationAction + if err := program.AddLabel(violationLabel); err != nil { + return nil, err + } + program.AddStmt(bpf.Ret|bpf.K, violationAction) + + // allow: return SECCOMP_RET_ALLOW + if err := program.AddLabel(allowLabel); err != nil { + return nil, err + } + program.AddStmt(bpf.Ret|bpf.K, linux.SECCOMP_RET_ALLOW) + + return program.Instructions() +} + +// filterUnique filters unique system calls. +// +// Precondition: syscalls must be sorted. +func filterUnique(syscalls []uintptr) []uintptr { + filtered := make([]uintptr, 0, len(syscalls)) + for i := 0; i < len(syscalls); i++ { + if len(filtered) > 0 && syscalls[i] == filtered[len(filtered)-1] { + // This call has already been inserted, skip. + continue + } + filtered = append(filtered, syscalls[i]) + } + return filtered +} + +// buildIndex builds a BST to quickly search through all syscalls that are whitelisted. +// +// Precondition: syscalls must be sorted and unique. +func buildIndex(syscalls []uintptr, program *bpf.ProgramBuilder) error { + root := createBST(syscalls) + + // Load syscall number into A and run through BST. + // + // A = seccomp_data.nr + program.AddStmt(bpf.Ld|bpf.Abs|bpf.W, 0) + return root.buildBSTProgram(program, true) +} + +// createBST converts sorted syscall slice into a balanced BST. +// Panics if syscalls is empty. +func createBST(syscalls []uintptr) *node { + i := len(syscalls) / 2 + parent := node{value: syscalls[i]} + if i > 0 { + parent.left = createBST(syscalls[:i]) + } + if i+1 < len(syscalls) { + parent.right = createBST(syscalls[i+1:]) + } + return &parent +} + +// node represents a tree node. +type node struct { + value uintptr + left *node + right *node +} + +// label returns the label corresponding to this node. If node is nil (syscall not present), +// violationLabel is returned for convenience. +func (n *node) label() string { + if n == nil { + return violationLabel + } + return fmt.Sprintf("index_%v", n.value) +} + +// buildBSTProgram converts a binary tree started in 'root' into BPF code. The ouline of the code +// is as follows: +// +// // SYS_PIPE(22), root +// (A == 22) ? goto allow : continue +// (A > 22) ? goto index_35 : goto index_9 +// +// index_9: // SYS_MMAP(9), leaf +// (A == 9) ? goto allow : goto violation +// +// index_35: // SYS_NANOSLEEP(35), single child +// (A == 35) ? goto allow : continue +// (A > 35) ? goto index_50 : goto violation +// +// index_50: // SYS_LISTEN(50), leaf +// (A == 50) ? goto allow : goto violation +// +func (n *node) buildBSTProgram(program *bpf.ProgramBuilder, root bool) error { + if n == nil { + return nil + } + + // Root node is never referenced by label, skip it. + if !root { + if err := program.AddLabel(n.label()); err != nil { + return err + } + } + + // Leaf nodes don't require extra check, they either allow or violate! + if n.left == nil && n.right == nil { + program.AddJumpLabels(bpf.Jmp|bpf.Jeq|bpf.K, uint32(n.value), allowLabel, violationLabel) + return nil + } + + // Non-leaf node. Allows syscall if it matches, check which turn to take otherwise. Note + // that 'violationLabel' is returned for nil children. + program.AddJumpTrueLabel(bpf.Jmp|bpf.Jeq|bpf.K, uint32(n.value), allowLabel, 0) + program.AddJumpLabels(bpf.Jmp|bpf.Jgt|bpf.K, uint32(n.value), n.right.label(), n.left.label()) + + if err := n.left.buildBSTProgram(program, false); err != nil { + return err + } + return n.right.buildBSTProgram(program, false) +} diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go new file mode 100644 index 000000000..c700d88d6 --- /dev/null +++ b/pkg/seccomp/seccomp_test.go @@ -0,0 +1,268 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seccomp + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "math/rand" + "os" + "os/exec" + "sort" + "strings" + "testing" + "time" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/binary" + "gvisor.googlesource.com/gvisor/pkg/bpf" +) + +type seccompData struct { + nr uint32 + arch uint32 + instructionPointer uint64 + args [6]uint64 +} + +// newVictim makes a victim binary. +func newVictim() (string, error) { + f, err := ioutil.TempFile("", "victim") + if err != nil { + return "", err + } + defer f.Close() + path := f.Name() + if _, err := io.Copy(f, bytes.NewBuffer(victimData)); err != nil { + os.Remove(path) + return "", err + } + if err := os.Chmod(path, 0755); err != nil { + os.Remove(path) + return "", err + } + return path, nil +} + +// asInput converts a seccompData to a bpf.Input. +func (d *seccompData) asInput() bpf.Input { + return bpf.InputBytes{binary.Marshal(nil, binary.LittleEndian, d), binary.LittleEndian} +} + +func TestBasic(t *testing.T) { + type spec struct { + // desc is the test's description. + desc string + + // data is the input data. + data seccompData + + // want is the expected return value of the BPF program. + want uint32 + } + + for _, test := range []struct { + // filters are the set of syscall that are allowed. + filters []uintptr + kill bool + specs []spec + }{ + { + filters: []uintptr{1}, + kill: false, + specs: []spec{ + { + desc: "Single syscall allowed", + data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "Single syscall disallowed", + data: seccompData{nr: 2, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, + { + filters: []uintptr{1, 3, 5}, + kill: false, + specs: []spec{ + { + desc: "Multiple syscalls allowed (1)", + data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "Multiple syscalls allowed (3)", + data: seccompData{nr: 3, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "Multiple syscalls allowed (5)", + data: seccompData{nr: 5, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "Multiple syscalls disallowed (0)", + data: seccompData{nr: 0, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "Multiple syscalls disallowed (2)", + data: seccompData{nr: 2, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "Multiple syscalls disallowed (4)", + data: seccompData{nr: 4, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "Multiple syscalls disallowed (6)", + data: seccompData{nr: 6, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_TRAP, + }, + { + desc: "Multiple syscalls disallowed (100)", + data: seccompData{nr: 100, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, + { + filters: []uintptr{1}, + kill: false, + specs: []spec{ + { + desc: "Wrong architecture", + data: seccompData{nr: 1, arch: 123}, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, + { + filters: []uintptr{1}, + kill: true, + specs: []spec{ + { + desc: "Syscall disallowed, action kill", + data: seccompData{nr: 2, arch: linux.AUDIT_ARCH_X86_64}, + want: linux.SECCOMP_RET_KILL, + }, + }, + }, + } { + sort.Slice(test.filters, func(i, j int) bool { return test.filters[i] < test.filters[j] }) + instrs, err := buildProgram(test.filters, test.kill) + if err != nil { + t.Errorf("%s: buildProgram() got error: %v", test.specs[0].desc, err) + continue + } + p, err := bpf.Compile(instrs) + if err != nil { + t.Errorf("%s: bpf.Compile() got error: %v", test.specs[0].desc, err) + continue + } + for _, spec := range test.specs { + got, err := bpf.Exec(p, spec.data.asInput()) + if err != nil { + t.Errorf("%s: bpf.Exec() got error: %v", spec.desc, err) + continue + } + if got != spec.want { + t.Errorf("%s: bpd.Exec() = %d, want: %d", spec.desc, got, spec.want) + } + } + } +} + +func TestRandom(t *testing.T) { + rand.Seed(time.Now().UnixNano()) + size := rand.Intn(50) + 1 + syscalls := make([]uintptr, 0, size) + syscallMap := make(map[uintptr]struct{}) + for len(syscalls) < size { + n := uintptr(rand.Intn(200)) + if _, ok := syscallMap[n]; !ok { + syscalls = append(syscalls, n) + syscallMap[n] = struct{}{} + } + } + + sort.Slice(syscalls, func(i, j int) bool { return syscalls[i] < syscalls[j] }) + fmt.Printf("Testing filters: %v", syscalls) + instrs, err := buildProgram(syscalls, false) + if err != nil { + t.Fatalf("buildProgram() got error: %v", err) + } + p, err := bpf.Compile(instrs) + if err != nil { + t.Fatalf("bpf.Compile() got error: %v", err) + } + for i := uint32(0); i < 200; i++ { + data := seccompData{nr: i, arch: linux.AUDIT_ARCH_X86_64} + got, err := bpf.Exec(p, data.asInput()) + if err != nil { + t.Errorf("bpf.Exec() got error: %v, for syscall %d", err, i) + continue + } + want := uint32(linux.SECCOMP_RET_TRAP) + if _, ok := syscallMap[uintptr(i)]; ok { + want = linux.SECCOMP_RET_ALLOW + } + if got != want { + t.Errorf("bpf.Exec() = %d, want: %d, for syscall %d", got, want, i) + } + } +} + +// TestReadDeal checks that a process dies when it trips over the filter and that it +// doesn't die when the filter is not triggered. +func TestRealDeal(t *testing.T) { + for _, test := range []struct { + die bool + want string + }{ + {die: true, want: "bad system call"}, + {die: false, want: "Syscall was allowed!!!"}, + } { + victim, err := newVictim() + if err != nil { + t.Fatalf("unable to get victim: %v", err) + } + defer os.Remove(victim) + dieFlag := fmt.Sprintf("-die=%v", test.die) + cmd := exec.Command(victim, dieFlag) + + out, err := cmd.CombinedOutput() + if test.die { + if err == nil { + t.Errorf("victim was not killed as expected, output: %s", out) + continue + } + } else { + if err != nil { + t.Errorf("victim failed to execute, err: %v", err) + continue + } + } + if !strings.Contains(string(out), test.want) { + t.Errorf("Victim output is wrong, got: %v, want: %v", err, test.want) + continue + } + } +} diff --git a/pkg/seccomp/seccomp_test_victim.go b/pkg/seccomp/seccomp_test_victim.go new file mode 100644 index 000000000..fe3f96901 --- /dev/null +++ b/pkg/seccomp/seccomp_test_victim.go @@ -0,0 +1,112 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Test binary used to test that seccomp filters are properly constructed and +// indeed kill the process on violation. +package main + +import ( + "flag" + "fmt" + "os" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/seccomp" +) + +func main() { + dieFlag := flag.Bool("die", false, "trips over the filter if true") + flag.Parse() + + syscalls := []uintptr{ + syscall.SYS_ACCEPT, + syscall.SYS_ARCH_PRCTL, + syscall.SYS_BIND, + syscall.SYS_BRK, + syscall.SYS_CLOCK_GETTIME, + syscall.SYS_CLONE, + syscall.SYS_CLOSE, + syscall.SYS_DUP, + syscall.SYS_DUP2, + syscall.SYS_EPOLL_CREATE1, + syscall.SYS_EPOLL_CTL, + syscall.SYS_EPOLL_WAIT, + syscall.SYS_EXIT, + syscall.SYS_EXIT_GROUP, + syscall.SYS_FALLOCATE, + syscall.SYS_FCHMOD, + syscall.SYS_FCNTL, + syscall.SYS_FSTAT, + syscall.SYS_FSYNC, + syscall.SYS_FTRUNCATE, + syscall.SYS_FUTEX, + syscall.SYS_GETDENTS64, + syscall.SYS_GETPEERNAME, + syscall.SYS_GETPID, + syscall.SYS_GETSOCKNAME, + syscall.SYS_GETSOCKOPT, + syscall.SYS_GETTID, + syscall.SYS_GETTIMEOFDAY, + syscall.SYS_LISTEN, + syscall.SYS_LSEEK, + syscall.SYS_MADVISE, + syscall.SYS_MINCORE, + syscall.SYS_MMAP, + syscall.SYS_MPROTECT, + syscall.SYS_MUNLOCK, + syscall.SYS_MUNMAP, + syscall.SYS_NANOSLEEP, + syscall.SYS_NEWFSTATAT, + syscall.SYS_OPEN, + syscall.SYS_POLL, + syscall.SYS_PREAD64, + syscall.SYS_PSELECT6, + syscall.SYS_PWRITE64, + syscall.SYS_READ, + syscall.SYS_READLINKAT, + syscall.SYS_READV, + syscall.SYS_RECVMSG, + syscall.SYS_RENAMEAT, + syscall.SYS_RESTART_SYSCALL, + syscall.SYS_RT_SIGACTION, + syscall.SYS_RT_SIGPROCMASK, + syscall.SYS_RT_SIGRETURN, + syscall.SYS_SCHED_YIELD, + syscall.SYS_SENDMSG, + syscall.SYS_SETITIMER, + syscall.SYS_SET_ROBUST_LIST, + syscall.SYS_SETSOCKOPT, + syscall.SYS_SHUTDOWN, + syscall.SYS_SIGALTSTACK, + syscall.SYS_SOCKET, + syscall.SYS_SYNC_FILE_RANGE, + syscall.SYS_TGKILL, + syscall.SYS_UTIMENSAT, + syscall.SYS_WRITE, + syscall.SYS_WRITEV, + } + die := *dieFlag + if !die { + syscalls = append(syscalls, syscall.SYS_OPENAT) + } + + if err := seccomp.Install(syscalls, false); err != nil { + fmt.Printf("Failed to install seccomp: %v", err) + os.Exit(1) + } + fmt.Printf("Filters installed\n") + + syscall.RawSyscall(syscall.SYS_OPENAT, 0, 0, 0) + fmt.Printf("Syscall was allowed!!!\n") +} diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go new file mode 100644 index 000000000..6682f8d9b --- /dev/null +++ b/pkg/seccomp/seccomp_unsafe.go @@ -0,0 +1,49 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package seccomp + +import ( + "fmt" + "syscall" + "unsafe" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" +) + +// sockFprog is sock_fprog taken from <linux/filter.h>. +type sockFprog struct { + Len uint16 + pad [6]byte + Filter *linux.BPFInstruction +} + +func seccomp(instrs []linux.BPFInstruction) error { + // SYS_SECCOMP is not available in syscall package. + const SYS_SECCOMP = 317 + + // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See seccomp(2) for details. + if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0); err != 0 { + return fmt.Errorf("failed to set PR_SET_NO_NEW_PRIVS: %v", err) + } + sockProg := sockFprog{Len: uint16(len(instrs)), Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0]))} + + // TODO: Use SECCOMP_FILTER_FLAG_KILL_PROCESS when available. + if _, _, err := syscall.RawSyscall(SYS_SECCOMP, linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, uintptr(unsafe.Pointer(&sockProg))); err != 0 { + return fmt.Errorf("failed to set seccomp filter: %v", err) + } + return nil +} |