From d29e59af9fbd420e34378bcbf7ae543134070217 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 27 Jan 2020 10:04:07 -0800 Subject: Standardize on tools directory. PiperOrigin-RevId: 291745021 --- runsc/specutils/BUILD | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'runsc/specutils') diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD index 205638803..4ccd77f63 100644 --- a/runsc/specutils/BUILD +++ b/runsc/specutils/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +10,6 @@ go_library( "namespace.go", "specutils.go", ], - importpath = "gvisor.dev/gvisor/runsc/specutils", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", @@ -28,6 +27,6 @@ go_test( name = "specutils_test", size = "small", srcs = ["specutils_test.go"], - embed = [":specutils"], + library = ":specutils", deps = ["@com_github_opencontainers_runtime-spec//specs-go:go_default_library"], ) -- cgit v1.2.3 From f2e4b5ab932a3816e4957171b303db645fd04a94 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 12 Mar 2020 12:31:16 -0700 Subject: Kill sandbox process when parent process terminates When the sandbox runs in attached more, e.g. runsc do, runsc run, the sandbox lifetime is controlled by the parent process. This wasn't working in all cases because PR_GET_PDEATHSIG doesn't propagate through execve when the process changes uid/gid. So it was getting dropped when the sandbox execve's to change to user nobody. PiperOrigin-RevId: 300601247 --- runsc/cmd/boot.go | 75 +++++++++++++------- runsc/container/container_test.go | 17 ----- runsc/sandbox/sandbox.go | 12 ++-- runsc/specutils/namespace.go | 3 + runsc/testutil/testutil.go | 23 +++--- test/root/BUILD | 3 + test/root/runsc_test.go | 146 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 221 insertions(+), 58 deletions(-) create mode 100644 test/root/runsc_test.go (limited to 'runsc/specutils') diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 0f3da69a0..0938944a6 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -23,6 +23,7 @@ import ( "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/boot/platforms" @@ -82,8 +83,13 @@ type Boot struct { // sandbox (e.g. gofer) and sent through this FD. mountsFD int - // pidns is set if the sanadbox is in its own pid namespace. + // pidns is set if the sandbox is in its own pid namespace. pidns bool + + // attached is set to true to kill the sandbox process when the parent process + // terminates. This flag is set when the command execve's itself because + // parent death signal doesn't propagate through execve when uid/gid changes. + attached bool } // Name implements subcommands.Command.Name. @@ -118,6 +124,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) { f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup") f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).") + f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates") } // Execute implements subcommands.Command.Execute. It starts a sandbox in a @@ -133,29 +140,32 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) conf := args[0].(*boot.Config) + if b.attached { + // Ensure this process is killed after parent process terminates when + // attached mode is enabled. In the unfortunate event that the parent + // terminates before this point, this process leaks. + if err := unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0); err != nil { + Fatalf("error setting parent death signal: %v", err) + } + } + if b.setUpRoot { if err := setUpChroot(b.pidns); err != nil { Fatalf("error setting up chroot: %v", err) } - if !b.applyCaps { - // Remove --setup-root arg to call myself. - var args []string - for _, arg := range os.Args { - if !strings.Contains(arg, "setup-root") { - args = append(args, arg) - } - } - if !conf.Rootless { - // Note that we've already read the spec from the spec FD, and - // we will read it again after the exec call. This works - // because the ReadSpecFromFile function seeks to the beginning - // of the file before reading. - if err := callSelfAsNobody(args); err != nil { - Fatalf("%v", err) - } - panic("callSelfAsNobody must never return success") + if !b.applyCaps && !conf.Rootless { + // Remove --apply-caps arg to call myself. It has already been done. + args := prepareArgs(b.attached, "setup-root") + + // Note that we've already read the spec from the spec FD, and + // we will read it again after the exec call. This works + // because the ReadSpecFromFile function seeks to the beginning + // of the file before reading. + if err := callSelfAsNobody(args); err != nil { + Fatalf("%v", err) } + panic("callSelfAsNobody must never return success") } } @@ -181,13 +191,9 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) caps.Permitted = append(caps.Permitted, c) } - // Remove --apply-caps arg to call myself. - var args []string - for _, arg := range os.Args { - if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") { - args = append(args, arg) - } - } + // Remove --apply-caps and --setup-root arg to call myself. Both have + // already been done. + args := prepareArgs(b.attached, "setup-root", "apply-caps") // Note that we've already read the spec from the spec FD, and // we will read it again after the exec call. This works @@ -258,3 +264,22 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) l.Destroy() return subcommands.ExitSuccess } + +func prepareArgs(attached bool, exclude ...string) []string { + var args []string + for _, arg := range os.Args { + for _, excl := range exclude { + if strings.Contains(arg, excl) { + goto skip + } + } + args = append(args, arg) + if attached && arg == "boot" { + // Strategicaly place "--attached" after the command. This is needed + // to ensure the new process is killed when the parent process terminates. + args = append(args, "--attached") + } + skip: + } + return args +} diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index c7eea85b3..442e80ac0 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -124,23 +124,6 @@ func procListsEqual(got, want []*control.Process) (bool, error) { return true, nil } -// getAndCheckProcLists is similar to waitForProcessList, but does not wait and retry the -// test for equality. This is because we already confirmed that exec occurred. -func getAndCheckProcLists(cont *Container, want []*control.Process) error { - got, err := cont.Processes() - if err != nil { - return fmt.Errorf("error getting process data from container: %v", err) - } - equal, err := procListsEqual(got, want) - if err != nil { - return err - } - if equal { - return nil - } - return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(want)) -} - func procListToString(pl []*control.Process) string { strs := make([]string, 0, len(pl)) for _, p := range pl { diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 6177d6aa7..8de75ae57 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -701,6 +701,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } + if args.Attached { + // Kill sandbox if parent process exits in attached mode. + cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + // Tells boot that any process it creates must have pdeathsig set. + cmd.Args = append(cmd.Args, "--attached") + } + // Add container as the last argument. cmd.Args = append(cmd.Args, s.ID) @@ -709,11 +716,6 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF log.Debugf("Donating FD %d: %q", i+3, f.Name()) } - if args.Attached { - // Kill sandbox if parent process exits in attached mode. - cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL - } - log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args) log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr) if err := specutils.StartInNS(cmd, nss); err != nil { diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index c7dd3051c..60bb7b7ee 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -252,6 +252,9 @@ func MaybeRunAsRoot() error { }, Credential: &syscall.Credential{Uid: 0, Gid: 0}, GidMappingsEnableSetgroups: false, + + // Make sure child is killed when the parent terminates. + Pdeathsig: syscall.SIGKILL, } cmd.Env = os.Environ() diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go index 92d677e71..51e487715 100644 --- a/runsc/testutil/testutil.go +++ b/runsc/testutil/testutil.go @@ -87,18 +87,19 @@ func TestConfig() *boot.Config { logDir = dir + "/" } return &boot.Config{ - Debug: true, - DebugLog: logDir, - LogFormat: "text", - DebugLogFormat: "text", - AlsoLogToStderr: true, - LogPackets: true, - Network: boot.NetworkNone, - Strace: true, - Platform: "ptrace", - FileAccess: boot.FileAccessExclusive, + Debug: true, + DebugLog: logDir, + LogFormat: "text", + DebugLogFormat: "text", + AlsoLogToStderr: true, + LogPackets: true, + Network: boot.NetworkNone, + Strace: true, + Platform: "ptrace", + FileAccess: boot.FileAccessExclusive, + NumNetworkChannels: 1, + TestOnlyAllowRunAsCurrentUserWithoutChroot: true, - NumNetworkChannels: 1, } } diff --git a/test/root/BUILD b/test/root/BUILD index 23ce2a70f..ddc9b4955 100644 --- a/test/root/BUILD +++ b/test/root/BUILD @@ -16,6 +16,7 @@ go_test( "crictl_test.go", "main_test.go", "oom_score_adj_test.go", + "runsc_test.go", ], data = [ "//runsc", @@ -37,7 +38,9 @@ go_test( "//runsc/specutils", "//runsc/testutil", "//test/root/testdata", + "@com_github_cenkalti_backoff//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", "@com_github_syndtr_gocapability//capability:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/test/root/runsc_test.go b/test/root/runsc_test.go new file mode 100644 index 000000000..28bb60a12 --- /dev/null +++ b/test/root/runsc_test.go @@ -0,0 +1,146 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package root + +import ( + "bytes" + "fmt" + "io/ioutil" + "os/exec" + "path/filepath" + "strconv" + "strings" + "testing" + "time" + + "github.com/cenkalti/backoff" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/testutil" +) + +// TestDoKill checks that when "runsc do..." is killed, the sandbox process is +// also terminated. This ensures that parent death signal is propagate to the +// sandbox process correctly. +func TestDoKill(t *testing.T) { + // Make the sandbox process be reparented here when it's killed, so we can + // wait for it. + if err := unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0); err != nil { + t.Fatalf("prctl(PR_SET_CHILD_SUBREAPER): %v", err) + } + + cmd := exec.Command(specutils.ExePath, "do", "sleep", "10000") + buf := &bytes.Buffer{} + cmd.Stdout = buf + cmd.Stderr = buf + cmd.Start() + + var pid int + findSandbox := func() error { + var err error + pid, err = sandboxPid(cmd.Process.Pid) + if err != nil { + return &backoff.PermanentError{Err: err} + } + if pid == 0 { + return fmt.Errorf("sandbox process not found") + } + return nil + } + if err := testutil.Poll(findSandbox, 10*time.Second); err != nil { + t.Fatalf("failed to find sandbox: %v", err) + } + t.Logf("Found sandbox, pid: %d", pid) + + if err := cmd.Process.Kill(); err != nil { + t.Fatalf("failed to kill run process: %v", err) + } + cmd.Wait() + t.Logf("Parent process killed (%d). Output: %s", cmd.Process.Pid, buf.String()) + + ch := make(chan struct{}) + go func() { + defer func() { ch <- struct{}{} }() + t.Logf("Waiting for sandbox process (%d) termination", pid) + if _, err := unix.Wait4(pid, nil, 0, nil); err != nil { + t.Errorf("error waiting for sandbox process (%d): %v", pid, err) + } + }() + select { + case <-ch: + // Done + case <-time.After(5 * time.Second): + t.Fatalf("timeout waiting for sandbox process (%d) to exit", pid) + } +} + +// sandboxPid looks for the sandbox process inside the process tree starting +// from "pid". It returns 0 and no error if no sandbox process is found. It +// returns error if anything failed. +func sandboxPid(pid int) (int, error) { + cmd := exec.Command("pgrep", "-P", strconv.Itoa(pid)) + buf := &bytes.Buffer{} + cmd.Stdout = buf + if err := cmd.Start(); err != nil { + return 0, err + } + ps, err := cmd.Process.Wait() + if err != nil { + return 0, err + } + if ps.ExitCode() == 1 { + // pgrep returns 1 when no process is found. + return 0, nil + } + + var children []int + for _, line := range strings.Split(buf.String(), "\n") { + if len(line) == 0 { + continue + } + child, err := strconv.Atoi(line) + if err != nil { + return 0, err + } + + cmdline, err := ioutil.ReadFile(filepath.Join("/proc", line, "cmdline")) + if err != nil { + return 0, err + } + args := strings.SplitN(string(cmdline), "\x00", 2) + if len(args) == 0 { + return 0, fmt.Errorf("malformed cmdline file: %q", cmdline) + } + // The sandbox process has the first argument set to "runsc-sandbox". + if args[0] == "runsc-sandbox" { + return child, nil + } + + children = append(children, child) + } + + // Sandbox process wasn't found, try another level down. + for _, pid := range children { + sand, err := sandboxPid(pid) + if err != nil { + return 0, err + } + if sand != 0 { + return sand, nil + } + // Not found, continue the search. + } + return 0, nil +} -- cgit v1.2.3 From 56054fc1fb0b92cb985f96467f9059e202d8095c Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Tue, 7 Apr 2020 18:49:52 -0700 Subject: Add friendlier messages for frequently encountered errors. Issue #2270 Issue #1765 PiperOrigin-RevId: 305385436 --- runsc/boot/fs.go | 15 +++++++++++- runsc/sandbox/sandbox.go | 58 ++++++++++++++++++++++++++++++++++++++++++-- runsc/specutils/specutils.go | 5 ++++ 3 files changed, 75 insertions(+), 3 deletions(-) (limited to 'runsc/specutils') diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 0f62842ea..82cc612d2 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -824,7 +824,20 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(opts, ","), nil) if err != nil { - return fmt.Errorf("creating mount with source %q: %v", m.Source, err) + err := fmt.Errorf("creating mount with source %q: %v", m.Source, err) + // Check to see if this is a common error due to a Linux bug. + // This error is generated here in order to cause it to be + // printed to the user using Docker via 'runsc create' etc. rather + // than simply printed to the logs for the 'runsc boot' command. + // + // We check the error message string rather than type because the + // actual error types (syscall.EIO, syscall.EPIPE) are lost by file system + // implementation (e.g. p9). + // TODO(gvisor.dev/issue/1765): Remove message when bug is resolved. + if strings.Contains(err.Error(), syscall.EIO.Error()) || strings.Contains(err.Error(), syscall.EPIPE.Error()) { + return fmt.Errorf("%v: %s", err, specutils.FaqErrorMsg("memlock", "you may be encountering a Linux kernel bug")) + } + return err } // If there are submounts, we need to overlay the mount on top of a ramfs diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 3b06da98b..2d464b1bf 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -18,10 +18,12 @@ package sandbox import ( "context" "fmt" + "io" "math" "os" "os/exec" "strconv" + "strings" "syscall" "time" @@ -142,7 +144,19 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) { // Wait until the sandbox has booted. b := make([]byte, 1) if l, err := clientSyncFile.Read(b); err != nil || l != 1 { - return nil, fmt.Errorf("waiting for sandbox to start: %v", err) + err := fmt.Errorf("waiting for sandbox to start: %v", err) + // If the sandbox failed to start, it may be because the binary + // permissions were incorrect. Check the bits and return a more helpful + // error message. + // + // NOTE: The error message is checked because error types are lost over + // rpc calls. + if strings.Contains(err.Error(), io.EOF.Error()) { + if permsErr := checkBinaryPermissions(conf); permsErr != nil { + return nil, fmt.Errorf("%v: %v", err, permsErr) + } + } + return nil, err } c.Release() @@ -706,7 +720,19 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args) log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr) if err := specutils.StartInNS(cmd, nss); err != nil { - return fmt.Errorf("Sandbox: %v", err) + err := fmt.Errorf("starting sandbox: %v", err) + // If the sandbox failed to start, it may be because the binary + // permissions were incorrect. Check the bits and return a more helpful + // error message. + // + // NOTE: The error message is checked because error types are lost over + // rpc calls. + if strings.Contains(err.Error(), syscall.EACCES.Error()) { + if permsErr := checkBinaryPermissions(conf); permsErr != nil { + return fmt.Errorf("%v: %v", err, permsErr) + } + } + return err } s.child = true s.Pid = cmd.Process.Pid @@ -1169,3 +1195,31 @@ func deviceFileForPlatform(name string) (*os.File, error) { } return f, nil } + +// checkBinaryPermissions verifies that the required binary bits are set on +// the runsc executable. +func checkBinaryPermissions(conf *boot.Config) error { + // All platforms need the other exe bit + neededBits := os.FileMode(0001) + if conf.Platform == platforms.Ptrace { + // Ptrace needs the other read bit + neededBits |= os.FileMode(0004) + } + + exePath, err := os.Executable() + if err != nil { + return fmt.Errorf("getting exe path: %v", err) + } + + // Check the permissions of the runsc binary and print an error if it + // doesn't match expectations. + info, err := os.Stat(exePath) + if err != nil { + return fmt.Errorf("stat file: %v", err) + } + + if info.Mode().Perm()&neededBits != neededBits { + return fmt.Errorf(specutils.FaqErrorMsg("runsc-perms", fmt.Sprintf("%s does not have the correct permissions", exePath))) + } + return nil +} diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index d3c2e4e78..0f4a9cf6d 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -528,3 +528,8 @@ func EnvVar(env []string, name string) (string, bool) { } return "", false } + +// FaqErrorMsg returns an error message pointing to the FAQ. +func FaqErrorMsg(anchor, msg string) string { + return fmt.Sprintf("%s; see https://gvisor.dev/faq#%s for more details", msg, anchor) +} -- cgit v1.2.3 From daf3322498b698518a3c8545ad05f790deb3848c Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Fri, 10 Apr 2020 20:31:07 -0700 Subject: Add logging message for noNewPrivileges OCI option. noNewPrivileges is ignored if set to false since gVisor assumes that PR_SET_NO_NEW_PRIVS is always enabled. PiperOrigin-RevId: 305991947 --- pkg/sentry/kernel/task_identity.go | 2 +- pkg/sentry/syscalls/linux/sys_prctl.go | 4 ++-- runsc/specutils/specutils.go | 6 ++++++ 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'runsc/specutils') diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go index ce3e6ef28..0325967e4 100644 --- a/pkg/sentry/kernel/task_identity.go +++ b/pkg/sentry/kernel/task_identity.go @@ -455,7 +455,7 @@ func (t *Task) SetKeepCaps(k bool) { t.creds.Store(creds) } -// updateCredsForExec updates t.creds to reflect an execve(). +// updateCredsForExecLocked updates t.creds to reflect an execve(). // // NOTE(b/30815691): We currently do not implement privileged executables // (set-user/group-ID bits and file capabilities). This allows us to make a lot diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index 9c6728530..f92bf8096 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -161,8 +161,8 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if args[1].Int() != 1 || args[2].Int() != 0 || args[3].Int() != 0 || args[4].Int() != 0 { return 0, nil, syserror.EINVAL } - // no_new_privs is assumed to always be set. See - // kernel.Task.updateCredsForExec. + // PR_SET_NO_NEW_PRIVS is assumed to always be set. + // See kernel.Task.updateCredsForExecLocked. return 0, nil, nil case linux.PR_GET_NO_NEW_PRIVS: diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 0f4a9cf6d..837d5e238 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -92,6 +92,12 @@ func ValidateSpec(spec *specs.Spec) error { log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile) } + // PR_SET_NO_NEW_PRIVS is assumed to always be set. + // See kernel.Task.updateCredsForExecLocked. + if !spec.Process.NoNewPrivileges { + log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.") + } + // TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox. if spec.Linux != nil && spec.Linux.Seccomp != nil { log.Warningf("Seccomp spec is being ignored") -- cgit v1.2.3