diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/compat.go | 2 | ||||
-rw-r--r-- | runsc/boot/filter/config.go | 2 | ||||
-rw-r--r-- | runsc/boot/fs.go | 15 | ||||
-rw-r--r-- | runsc/cmd/gofer.go | 5 | ||||
-rw-r--r-- | runsc/container/container.go | 4 | ||||
-rw-r--r-- | runsc/container/test_app/test_app.go | 2 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer.go | 4 | ||||
-rw-r--r-- | runsc/main.go | 8 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 97 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 11 |
10 files changed, 107 insertions, 43 deletions
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index 8995d678e..b7cfb35bf 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -65,7 +65,7 @@ func newCompatEmitter(logFD int) (*compatEmitter, error) { if logFD > 0 { f := os.NewFile(uintptr(logFD), "user log file") - target := &log.MultiEmitter{c.sink, &log.K8sJSONEmitter{log.Writer{Next: f}}} + target := &log.MultiEmitter{c.sink, log.K8sJSONEmitter{&log.Writer{Next: f}}} c.sink = &log.BasicLogger{Level: log.Info, Emitter: target} } return c, nil diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 06b9f888a..1828d116a 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -44,7 +44,7 @@ var allowedSyscalls = seccomp.SyscallRules{ { seccomp.AllowAny{}, seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.AllowValue(syscall.O_CLOEXEC), }, }, syscall.SYS_EPOLL_CREATE1: {}, diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 0f62842ea..82cc612d2 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -824,7 +824,20 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(opts, ","), nil) if err != nil { - return fmt.Errorf("creating mount with source %q: %v", m.Source, err) + err := fmt.Errorf("creating mount with source %q: %v", m.Source, err) + // Check to see if this is a common error due to a Linux bug. + // This error is generated here in order to cause it to be + // printed to the user using Docker via 'runsc create' etc. rather + // than simply printed to the logs for the 'runsc boot' command. + // + // We check the error message string rather than type because the + // actual error types (syscall.EIO, syscall.EPIPE) are lost by file system + // implementation (e.g. p9). + // TODO(gvisor.dev/issue/1765): Remove message when bug is resolved. + if strings.Contains(err.Error(), syscall.EIO.Error()) || strings.Contains(err.Error(), syscall.EPIPE.Error()) { + return fmt.Errorf("%v: %s", err, specutils.FaqErrorMsg("memlock", "you may be encountering a Linux kernel bug")) + } + return err } // If there are submounts, we need to overlay the mount on top of a ramfs diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 02e5af3d3..28f0d54b9 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -272,9 +272,8 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error { root := spec.Root.Path if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { - // FIXME: runsc can't be re-executed without - // /proc, so we create a tmpfs mount, mount ./proc and ./root - // there, then move this mount to the root and after + // runsc can't be re-executed without /proc, so we create a tmpfs mount, + // mount ./proc and ./root there, then move this mount to the root and after // setCapsAndCallSelf, runsc will chroot into /root. // // We need a directory to construct a new root and we know that diff --git a/runsc/container/container.go b/runsc/container/container.go index c9839044c..7233659b1 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -1077,9 +1077,9 @@ func (c *Container) adjustGoferOOMScoreAdj() error { // oom_score_adj is set to the lowest oom_score_adj among the containers // running in the sandbox. // -// TODO(gvisor.dev/issue/512): This call could race with other containers being +// TODO(gvisor.dev/issue/238): This call could race with other containers being // created at the same time and end up setting the wrong oom_score_adj to the -// sandbox. +// sandbox. Use rpc client to synchronize. func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool) error { containers, err := loadSandbox(rootDir, s.ID) if err != nil { diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go index 01c47c79f..5f1c4b7d6 100644 --- a/runsc/container/test_app/test_app.go +++ b/runsc/container/test_app/test_app.go @@ -96,7 +96,7 @@ func (c *uds) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) listener, err := net.Listen("unix", c.socketPath) if err != nil { - log.Fatal("error listening on socket %q:", c.socketPath, err) + log.Fatalf("error listening on socket %q: %v", c.socketPath, err) } go server(listener, outputFile) diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index cadd83273..1942f50d7 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -767,22 +767,18 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { return err } -// TODO(b/127675828): support getxattr. func (*localFile) GetXattr(string, uint64) (string, error) { return "", syscall.EOPNOTSUPP } -// TODO(b/127675828): support setxattr. func (*localFile) SetXattr(string, string, uint32) error { return syscall.EOPNOTSUPP } -// TODO(b/148303075): support listxattr. func (*localFile) ListXattr(uint64) (map[string]struct{}, error) { return nil, syscall.EOPNOTSUPP } -// TODO(b/148303075): support removexattr. func (*localFile) RemoveXattr(string) error { return syscall.EOPNOTSUPP } diff --git a/runsc/main.go b/runsc/main.go index 62e184ec9..59f624842 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -291,7 +291,7 @@ func main() { // want with them. Since Docker and Containerd both eat boot's stderr, we // dup our stderr to the provided log FD so that panics will appear in the // logs, rather than just disappear. - if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil { + if err := syscall.Dup3(fd, int(os.Stderr.Fd()), syscall.O_CLOEXEC); err != nil { cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err) } } @@ -342,11 +342,11 @@ func main() { func newEmitter(format string, logFile io.Writer) log.Emitter { switch format { case "text": - return &log.GoogleEmitter{log.Writer{Next: logFile}} + return log.GoogleEmitter{&log.Writer{Next: logFile}} case "json": - return &log.JSONEmitter{log.Writer{Next: logFile}} + return log.JSONEmitter{&log.Writer{Next: logFile}} case "json-k8s": - return &log.K8sJSONEmitter{log.Writer{Next: logFile}} + return log.K8sJSONEmitter{&log.Writer{Next: logFile}} } cmd.Fatalf("invalid log format %q, must be 'text', 'json', or 'json-k8s'", format) panic("unreachable") diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 6c15727fa..e82bcef6f 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -18,10 +18,12 @@ package sandbox import ( "context" "fmt" + "io" "math" "os" "os/exec" "strconv" + "strings" "syscall" "time" @@ -142,7 +144,19 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) { // Wait until the sandbox has booted. b := make([]byte, 1) if l, err := clientSyncFile.Read(b); err != nil || l != 1 { - return nil, fmt.Errorf("waiting for sandbox to start: %v", err) + err := fmt.Errorf("waiting for sandbox to start: %v", err) + // If the sandbox failed to start, it may be because the binary + // permissions were incorrect. Check the bits and return a more helpful + // error message. + // + // NOTE: The error message is checked because error types are lost over + // rpc calls. + if strings.Contains(err.Error(), io.EOF.Error()) { + if permsErr := checkBinaryPermissions(conf); permsErr != nil { + return nil, fmt.Errorf("%v: %v", err, permsErr) + } + } + return nil, err } c.Release() @@ -388,8 +402,6 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } - cmd.Args = append(cmd.Args, "--panic-signal="+strconv.Itoa(int(syscall.SIGTERM))) - // Add the "boot" command to the args. // // All flags after this must be for the boot command @@ -444,6 +456,12 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } + // TODO(b/151157106): syscall tests fail by timeout if asyncpreemptoff + // isn't set. + if conf.Platform == "kvm" { + cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1") + } + // The current process' stdio must be passed to the application via the // --stdio-fds flag. The stdio of the sandbox process itself must not // be connected to the same FDs, otherwise we risk leaking sandbox @@ -582,45 +600,32 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace}) cmd.Args = append(cmd.Args, "--setup-root") + const nobody = 65534 if conf.Rootless { - log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid()) + log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid()) cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ { - ContainerID: 0, + ContainerID: nobody, HostID: os.Getuid(), Size: 1, }, } cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ { - ContainerID: 0, + ContainerID: nobody, HostID: os.Getgid(), Size: 1, }, } - cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0} } else { // Map nobody in the new namespace to nobody in the parent namespace. // // A sandbox process will construct an empty - // root for itself, so it has to have the CAP_SYS_ADMIN - // capability. - // - // FIXME(b/122554829): The current implementations of - // os/exec doesn't allow to set ambient capabilities if - // a process is started in a new user namespace. As a - // workaround, we start the sandbox process with the 0 - // UID and then it constructs a chroot and sets UID to - // nobody. https://github.com/golang/go/issues/2315 - const nobody = 65534 + // root for itself, so it has to have + // CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities. cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ { - ContainerID: 0, - HostID: nobody - 1, - Size: 1, - }, - { ContainerID: nobody, HostID: nobody, Size: 1, @@ -633,11 +638,11 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF Size: 1, }, } - - // Set credentials to run as user and group nobody. - cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody} } + // Set credentials to run as user and group nobody. + cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody} + cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT)) } else { return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID") } @@ -713,7 +718,19 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args) log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr) if err := specutils.StartInNS(cmd, nss); err != nil { - return fmt.Errorf("Sandbox: %v", err) + err := fmt.Errorf("starting sandbox: %v", err) + // If the sandbox failed to start, it may be because the binary + // permissions were incorrect. Check the bits and return a more helpful + // error message. + // + // NOTE: The error message is checked because error types are lost over + // rpc calls. + if strings.Contains(err.Error(), syscall.EACCES.Error()) { + if permsErr := checkBinaryPermissions(conf); permsErr != nil { + return fmt.Errorf("%v: %v", err, permsErr) + } + } + return err } s.child = true s.Pid = cmd.Process.Pid @@ -1176,3 +1193,31 @@ func deviceFileForPlatform(name string) (*os.File, error) { } return f, nil } + +// checkBinaryPermissions verifies that the required binary bits are set on +// the runsc executable. +func checkBinaryPermissions(conf *boot.Config) error { + // All platforms need the other exe bit + neededBits := os.FileMode(0001) + if conf.Platform == platforms.Ptrace { + // Ptrace needs the other read bit + neededBits |= os.FileMode(0004) + } + + exePath, err := os.Executable() + if err != nil { + return fmt.Errorf("getting exe path: %v", err) + } + + // Check the permissions of the runsc binary and print an error if it + // doesn't match expectations. + info, err := os.Stat(exePath) + if err != nil { + return fmt.Errorf("stat file: %v", err) + } + + if info.Mode().Perm()&neededBits != neededBits { + return fmt.Errorf(specutils.FaqErrorMsg("runsc-perms", fmt.Sprintf("%s does not have the correct permissions", exePath))) + } + return nil +} diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index d3c2e4e78..837d5e238 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -92,6 +92,12 @@ func ValidateSpec(spec *specs.Spec) error { log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile) } + // PR_SET_NO_NEW_PRIVS is assumed to always be set. + // See kernel.Task.updateCredsForExecLocked. + if !spec.Process.NoNewPrivileges { + log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.") + } + // TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox. if spec.Linux != nil && spec.Linux.Seccomp != nil { log.Warningf("Seccomp spec is being ignored") @@ -528,3 +534,8 @@ func EnvVar(env []string, name string) (string, bool) { } return "", false } + +// FaqErrorMsg returns an error message pointing to the FAQ. +func FaqErrorMsg(anchor, msg string) string { + return fmt.Sprintf("%s; see https://gvisor.dev/faq#%s for more details", msg, anchor) +} |