summaryrefslogtreecommitdiffhomepage
path: root/runsc/sandbox/sandbox.go
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/sandbox/sandbox.go')
-rw-r--r--runsc/sandbox/sandbox.go232
1 files changed, 151 insertions, 81 deletions
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 032190636..4a11f617d 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -28,16 +28,17 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/pkg/control/client"
- "gvisor.googlesource.com/gvisor/pkg/control/server"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/cgroup"
- "gvisor.googlesource.com/gvisor/runsc/console"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/control/client"
+ "gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/boot/platforms"
+ "gvisor.dev/gvisor/runsc/cgroup"
+ "gvisor.dev/gvisor/runsc/console"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Sandbox wraps a sandbox process.
@@ -73,10 +74,46 @@ type Sandbox struct {
statusMu sync.Mutex
}
+// Args is used to configure a new sandbox.
+type Args struct {
+ // ID is the sandbox unique identifier.
+ ID string
+
+ // Spec is the OCI spec that describes the container.
+ Spec *specs.Spec
+
+ // BundleDir is the directory containing the container bundle.
+ BundleDir string
+
+ // ConsoleSocket is the path to a unix domain socket that will receive
+ // the console FD. It may be empty.
+ ConsoleSocket string
+
+ // UserLog is the filename to send user-visible logs to. It may be empty.
+ UserLog string
+
+ // IOFiles is the list of files that connect to a 9P endpoint for the mounts
+ // points using Gofers. They must be in the same order as mounts appear in
+ // the spec.
+ IOFiles []*os.File
+
+ // MountsFile is a file container mount information from the spec. It's
+ // equivalent to the mounts from the spec, except that all paths have been
+ // resolved to their final absolute location.
+ MountsFile *os.File
+
+ // Gcgroup is the cgroup that the sandbox is part of.
+ Cgroup *cgroup.Cgroup
+
+ // Attached indicates that the sandbox lifecycle is attached with the caller.
+ // If the caller exits, the sandbox should exit too.
+ Attached bool
+}
+
// New creates the sandbox process. The caller must call Destroy() on the
// sandbox.
-func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, specFile *os.File, cg *cgroup.Cgroup) (*Sandbox, error) {
- s := &Sandbox{ID: id, Cgroup: cg}
+func New(conf *boot.Config, args *Args) (*Sandbox, error) {
+ s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup}
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
c := specutils.MakeCleanup(func() {
@@ -93,7 +130,7 @@ func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
defer clientSyncFile.Close()
// Create the sandbox process.
- err = s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, sandboxSyncFile)
+ err = s.createSandboxProcess(conf, args, sandboxSyncFile)
// sandboxSyncFile has to be closed to be able to detect when the sandbox
// process exits unexpectedly.
sandboxSyncFile.Close()
@@ -291,7 +328,7 @@ func (s *Sandbox) connError(err error) error {
// createSandboxProcess starts the sandbox as a subprocess by running the "boot"
// command, passing in the bundle dir.
-func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, mountsFile, startSyncFile *os.File) error {
+func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncFile *os.File) error {
// nextFD is used to get unused FDs that we can pass to the sandbox. It
// starts at 3 because 0, 1, and 2 are taken by stdin/out/err.
nextFD := 3
@@ -327,7 +364,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// Add the "boot" command to the args.
//
// All flags after this must be for the boot command
- cmd.Args = append(cmd.Args, "boot", "--bundle="+bundleDir)
+ cmd.Args = append(cmd.Args, "boot", "--bundle="+args.BundleDir)
// Create a socket for the control server and donate it to the sandbox.
addr := boot.ControlSocketAddr(s.ID)
@@ -342,12 +379,12 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
cmd.Args = append(cmd.Args, "--controller-fd="+strconv.Itoa(nextFD))
nextFD++
- defer mountsFile.Close()
- cmd.ExtraFiles = append(cmd.ExtraFiles, mountsFile)
+ defer args.MountsFile.Close()
+ cmd.ExtraFiles = append(cmd.ExtraFiles, args.MountsFile)
cmd.Args = append(cmd.Args, "--mounts-fd="+strconv.Itoa(nextFD))
nextFD++
- specFile, err := specutils.OpenSpec(bundleDir)
+ specFile, err := specutils.OpenSpec(args.BundleDir)
if err != nil {
return err
}
@@ -361,7 +398,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
nextFD++
// If there is a gofer, sends all socket ends to the sandbox.
- for _, f := range ioFiles {
+ for _, f := range args.IOFiles {
defer f.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
cmd.Args = append(cmd.Args, "--io-fds="+strconv.Itoa(nextFD))
@@ -389,23 +426,22 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// If the console control socket file is provided, then create a new
// pty master/slave pair and set the TTY on the sandbox process.
- if consoleSocket != "" {
+ if args.ConsoleSocket != "" {
cmd.Args = append(cmd.Args, "--console=true")
// console.NewWithSocket will send the master on the given
// socket, and return the slave.
- tty, err := console.NewWithSocket(consoleSocket)
+ tty, err := console.NewWithSocket(args.ConsoleSocket)
if err != nil {
- return fmt.Errorf("setting up console with socket %q: %v", consoleSocket, err)
+ return fmt.Errorf("setting up console with socket %q: %v", args.ConsoleSocket, err)
}
defer tty.Close()
// Set the TTY as a controlling TTY on the sandbox process.
- // Note that the Ctty field must be the FD of the TTY in the
- // *new* process, not this process. Since we are about to
- // assign the TTY to nextFD, we can use that value here.
- // stdin, we can use FD 0 here.
cmd.SysProcAttr.Setctty = true
+ // The Ctty FD must be the FD in the child process's FD table,
+ // which will be nextFD in this case.
+ // See https://github.com/golang/go/issues/29458.
cmd.SysProcAttr.Ctty = nextFD
// Pass the tty as all stdio fds to sandbox.
@@ -456,7 +492,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
{Type: specs.UTSNamespace},
}
- if conf.Platform == boot.PlatformPtrace {
+ if conf.Platform == platforms.Ptrace {
// TODO(b/75837838): Also set a new PID namespace so that we limit
// access to other host processes.
log.Infof("Sandbox will be started in the current PID namespace")
@@ -469,7 +505,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// Joins the network namespace if network is enabled. the sandbox talks
// directly to the host network, which may have been configured in the
// namespace.
- if ns, ok := specutils.GetNS(specs.NetworkNamespace, spec); ok && conf.Network != boot.NetworkNone {
+ if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != boot.NetworkNone {
log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
nss = append(nss, ns)
} else if conf.Network == boot.NetworkHost {
@@ -483,10 +519,10 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// inside the user namespace specified in the spec or the current namespace
// if none is configured.
if conf.Network == boot.NetworkHost {
- if userns, ok := specutils.GetNS(specs.UserNamespace, spec); ok {
+ if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok {
log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
nss = append(nss, userns)
- specutils.SetUIDGIDMappings(cmd, spec)
+ specutils.SetUIDGIDMappings(cmd, args.Spec)
} else {
log.Infof("Sandbox will be started in the current user namespace")
}
@@ -515,46 +551,64 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
log.Infof("Sandbox will be started in new user namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
+ cmd.Args = append(cmd.Args, "--setup-root")
- // Map nobody in the new namespace to nobody in the parent namespace.
- //
- // A sandbox process will construct an empty
- // root for itself, so it has to have the CAP_SYS_ADMIN
- // capability.
- //
- // FIXME(b/122554829): The current implementations of
- // os/exec doesn't allow to set ambient capabilities if
- // a process is started in a new user namespace. As a
- // workaround, we start the sandbox process with the 0
- // UID and then it constructs a chroot and sets UID to
- // nobody. https://github.com/golang/go/issues/2315
- const nobody = 65534
- cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: int(0),
- HostID: int(nobody - 1),
- Size: int(1),
- },
- {
- ContainerID: int(nobody),
- HostID: int(nobody),
- Size: int(1),
- },
- }
- cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: int(nobody),
- HostID: int(nobody),
- Size: int(1),
- },
+ if conf.Rootless {
+ log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
+ cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: os.Getuid(),
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: os.Getgid(),
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
+
+ } else {
+ // Map nobody in the new namespace to nobody in the parent namespace.
+ //
+ // A sandbox process will construct an empty
+ // root for itself, so it has to have the CAP_SYS_ADMIN
+ // capability.
+ //
+ // FIXME(b/122554829): The current implementations of
+ // os/exec doesn't allow to set ambient capabilities if
+ // a process is started in a new user namespace. As a
+ // workaround, we start the sandbox process with the 0
+ // UID and then it constructs a chroot and sets UID to
+ // nobody. https://github.com/golang/go/issues/2315
+ const nobody = 65534
+ cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: nobody - 1,
+ Size: 1,
+ },
+ {
+ ContainerID: nobody,
+ HostID: nobody,
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: nobody,
+ HostID: nobody,
+ Size: 1,
+ },
+ }
+
+ // Set credentials to run as user and group nobody.
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody}
}
- // Set credentials to run as user and group nobody.
- cmd.SysProcAttr.Credential = &syscall.Credential{
- Uid: 0,
- Gid: nobody,
- }
- cmd.Args = append(cmd.Args, "--setup-root")
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}
@@ -580,8 +634,8 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
}
}
- if userLog != "" {
- f, err := os.OpenFile(userLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664)
+ if args.UserLog != "" {
+ f, err := os.OpenFile(args.UserLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664)
if err != nil {
return fmt.Errorf("opening compat log file: %v", err)
}
@@ -600,6 +654,11 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
log.Debugf("Donating FD %d: %q", i+3, f.Name())
}
+ if args.Attached {
+ // Kill sandbox if parent process exits in attached mode.
+ cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
+ }
+
log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args)
log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr)
if err := specutils.StartInNS(cmd, nss); err != nil {
@@ -902,7 +961,7 @@ func (s *Sandbox) StartTrace(f *os.File) error {
return nil
}
-// StopTrace stops a previously started trace..
+// StopTrace stops a previously started trace.
func (s *Sandbox) StopTrace() error {
log.Debugf("Trace stop %q", s.ID)
conn, err := s.sandboxConnect()
@@ -917,6 +976,21 @@ func (s *Sandbox) StopTrace() error {
return nil
}
+// ChangeLogging changes logging options.
+func (s *Sandbox) ChangeLogging(args control.LoggingArgs) error {
+ log.Debugf("Change logging start %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ if err := conn.Call(boot.ChangeLogging, &args, nil); err != nil {
+ return fmt.Errorf("changing sandbox %q logging: %v", s.ID, err)
+ }
+ return nil
+}
+
// DestroyContainer destroys the given container. If it is the root container,
// then the entire sandbox is destroyed.
func (s *Sandbox) DestroyContainer(cid string) error {
@@ -973,19 +1047,15 @@ func (s *Sandbox) waitForStopped() error {
// deviceFileForPlatform opens the device file for the given platform. If the
// platform does not need a device file, then nil is returned.
-func deviceFileForPlatform(p boot.PlatformType) (*os.File, error) {
- var (
- f *os.File
- err error
- )
- switch p {
- case boot.PlatformKVM:
- f, err = kvm.OpenDevice()
- default:
- return nil, nil
+func deviceFileForPlatform(name string) (*os.File, error) {
+ p, err := platform.Lookup(name)
+ if err != nil {
+ return nil, err
}
+
+ f, err := p.OpenDevice()
if err != nil {
return nil, fmt.Errorf("opening device file for platform %q: %v", p, err)
}
- return f, err
+ return f, nil
}