summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot')
-rw-r--r--runsc/boot/controller.go45
-rw-r--r--runsc/boot/loader.go193
2 files changed, 176 insertions, 62 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index ae727f144..1a598199d 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -15,9 +15,12 @@
package boot
import (
+ "errors"
"fmt"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.googlesource.com/gvisor/pkg/control/server"
+ "gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/control"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
@@ -50,6 +53,10 @@ const (
// ContainerSignal is used to send a signal to a container.
ContainerSignal = "containerManager.Signal"
+ // ContainerStart is the URPC endpoint for running a non-root container
+ // within a sandbox.
+ ContainerStart = "containerManager.Start"
+
// ContainerWait is used to wait on the init process of the container
// and return its ExitStatus.
ContainerWait = "containerManager.Wait"
@@ -127,10 +134,14 @@ type containerManager struct {
// watchdog is the kernel watchdog.
watchdog *watchdog.Watchdog
+
+ // l is the loader that creates containers and sandboxes.
+ l *Loader
}
// StartRoot will start the root container process.
func (cm *containerManager) StartRoot(_, _ *struct{}) error {
+ log.Debugf("containerManager.StartRoot")
// Tell the root container to start and wait for the result.
cm.startChan <- struct{}{}
return <-cm.startResultChan
@@ -138,11 +149,42 @@ func (cm *containerManager) StartRoot(_, _ *struct{}) error {
// Processes retrieves information about processes running in the sandbox.
func (cm *containerManager) Processes(_, out *[]*control.Process) error {
+ log.Debugf("containerManager.Processes")
return control.Processes(cm.k, out)
}
+// StartArgs contains arguments to the Start method.
+type StartArgs struct {
+ // Spec is the spec of the container to start.
+ Spec *specs.Spec
+
+ // TODO: Separate sandbox and container configs.
+ // Config is the runsc-specific configuration for the sandbox.
+ Conf *Config
+}
+
+// Start runs a created container within a sandbox.
+func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
+ log.Debugf("containerManager.Start")
+
+ // Validate arguments.
+ if args == nil {
+ return errors.New("start missing arguments")
+ }
+ if args.Spec == nil {
+ return errors.New("start arguments missing spec")
+ }
+ if args.Conf == nil {
+ return errors.New("start arguments missing config")
+ }
+
+ cm.l.startContainer(args, cm.k)
+ return nil
+}
+
// Execute runs a command on a created or running sandbox.
func (cm *containerManager) Execute(e *control.ExecArgs, waitStatus *uint32) error {
+ log.Debugf("containerManager.Execute")
proc := control.Proc{Kernel: cm.k}
if err := proc.Exec(e, waitStatus); err != nil {
return fmt.Errorf("error executing: %+v: %v", e, err)
@@ -152,6 +194,7 @@ func (cm *containerManager) Execute(e *control.ExecArgs, waitStatus *uint32) err
// Checkpoint pauses a sandbox and saves its state.
func (cm *containerManager) Checkpoint(o *control.SaveOpts, _ *struct{}) error {
+ log.Debugf("containerManager.Checkpoint")
state := control.State{
Kernel: cm.k,
Watchdog: cm.watchdog,
@@ -173,6 +216,7 @@ func (cm *containerManager) Resume(_, _ *struct{}) error {
// Wait waits for the init process in the given container.
func (cm *containerManager) Wait(cid *string, waitStatus *uint32) error {
+ log.Debugf("containerManager.Wait")
// TODO: Use the cid and wait on the init process in that
// container. Currently we just wait on PID 1 in the sandbox.
tg := cm.k.TaskSet().Root.ThreadGroupWithID(1)
@@ -195,6 +239,7 @@ type SignalArgs struct {
// Signal sends a signal to the init process of the container.
func (cm *containerManager) Signal(args *SignalArgs, _ *struct{}) error {
+ log.Debugf("containerManager.Signal")
// TODO: Use the cid and send the signal to the init
// process in theat container. Currently we just signal PID 1 in the
// sandbox.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 526e8f8bb..d1a413cc7 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package boot loads the kernel and runs a container..
+// Package boot loads the kernel and runs a container.
package boot
import (
@@ -79,8 +79,8 @@ type Loader struct {
// container. It should be called when a sandbox is destroyed.
stopSignalForwarding func()
- // procArgs refers to the root container task.
- procArgs kernel.CreateProcessArgs
+ // rootProcArgs refers to the root sandbox init task.
+ rootProcArgs kernel.CreateProcessArgs
}
func init() {
@@ -117,12 +117,6 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
}
tk.SetClocks(time.NewCalibratedClocks())
- // Create initial limits.
- ls, err := createLimitSet(spec)
- if err != nil {
- return nil, fmt.Errorf("error creating limits: %v", err)
- }
-
// Create capabilities.
caps, err := specutils.Capabilities(spec.Process.Capabilities)
if err != nil {
@@ -154,13 +148,6 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
return nil, fmt.Errorf("failed to enable strace: %v", err)
}
- // Get the executable path, which is a bit tricky because we have to
- // inspect the environment PATH which is relative to the root path.
- exec, err := specutils.GetExecutablePath(spec.Process.Args[0], spec.Root.Path, spec.Process.Env)
- if err != nil {
- return nil, fmt.Errorf("error getting executable path: %v", err)
- }
-
// Create an empty network stack because the network namespace may be empty at
// this point. Netns is configured before Run() is called. Netstack is
// configured using a control uRPC message. Host network is configured inside
@@ -223,16 +210,56 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
return nil, fmt.Errorf("error creating control server: %v", err)
}
+ // We don't care about child signals; some platforms can generate a
+ // tremendous number of useless ones (I'm looking at you, ptrace).
+ if err := sighandling.IgnoreChildStop(); err != nil {
+ return nil, fmt.Errorf("failed to ignore child stop signals: %v", err)
+ }
+ // Ensure that most signals received in sentry context are forwarded to
+ // the emulated kernel.
+ stopSignalForwarding := sighandling.StartForwarding(k)
+
+ procArgs, err := newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create root process: %v", err)
+ }
+
+ l := &Loader{
+ k: k,
+ ctrl: ctrl,
+ conf: conf,
+ console: console,
+ watchdog: watchdog,
+ stopSignalForwarding: stopSignalForwarding,
+ rootProcArgs: procArgs,
+ }
+ ctrl.manager.l = l
+ return l, nil
+}
+
+// newProcess creates a process that can be run with kernel.CreateProcess.
+func newProcess(spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, utsns *kernel.UTSNamespace, ipcns *kernel.IPCNamespace, k *kernel.Kernel) (kernel.CreateProcessArgs, error) {
+ // Create initial limits.
+ ls, err := createLimitSet(spec)
+ if err != nil {
+ return kernel.CreateProcessArgs{}, fmt.Errorf("error creating limits: %v", err)
+ }
+
+ // Get the executable path, which is a bit tricky because we have to
+ // inspect the environment PATH which is relative to the root path.
+ exec, err := specutils.GetExecutablePath(spec.Process.Args[0], spec.Root.Path, spec.Process.Env)
+ if err != nil {
+ return kernel.CreateProcessArgs{}, fmt.Errorf("error getting executable path: %v", err)
+ }
+
// Create the process arguments.
procArgs := kernel.CreateProcessArgs{
- Filename: exec,
- Argv: spec.Process.Args,
- Envv: spec.Process.Env,
- WorkingDirectory: spec.Process.Cwd,
- Credentials: creds,
- // Creating the FDMap requires that we have kernel.Kernel.fdMapUids, so
- // it must wait until we have a Kernel.
- Umask: uint(syscall.Umask(0)),
+ Filename: exec,
+ Argv: spec.Process.Args,
+ Envv: spec.Process.Env,
+ WorkingDirectory: spec.Process.Cwd,
+ Credentials: creds,
+ Umask: uint(0022),
Limits: ls,
MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
UTSNamespace: utsns,
@@ -240,52 +267,42 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
}
ctx := procArgs.NewContext(k)
- // Use root user to configure mounts. The current user might not have
- // permission to do so.
- rootProcArgs := kernel.CreateProcessArgs{
- WorkingDirectory: "/",
- Credentials: auth.NewRootCredentials(creds.UserNamespace),
- Umask: uint(syscall.Umask(0022)),
- MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
- }
- rootCtx := rootProcArgs.NewContext(k)
-
- // Create the virtual filesystem.
- mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs)
- if err != nil {
- return nil, fmt.Errorf("error creating mounts: %v", err)
- }
- k.SetRootMountNamespace(mns)
-
- // Create the FD map, which will set stdin, stdout, and stderr. If console
- // is true, then ioctl calls will be passed through to the host fd.
+ // Create the FD map, which will set stdin, stdout, and stderr. If
+ // console is true, then ioctl calls will be passed through to the host
+ // fd.
fdm, err := createFDMap(ctx, k, ls, console)
if err != nil {
- return nil, fmt.Errorf("error importing fds: %v", err)
+ return kernel.CreateProcessArgs{}, fmt.Errorf("error importing fds: %v", err)
}
// CreateProcess takes a reference on FDMap if successful. We
// won't need ours either way.
procArgs.FDMap = fdm
- // We don't care about child signals; some platforms can generate a
- // tremendous number of useless ones (I'm looking at you, ptrace).
- if err := sighandling.IgnoreChildStop(); err != nil {
- return nil, fmt.Errorf("failed to ignore child stop signals: %v", err)
+ // If this is the root container, we also need to setup the root mount
+ // namespace.
+ if k.RootMountNamespace() == nil {
+ // Use root user to configure mounts. The current user might not have
+ // permission to do so.
+ rootProcArgs := kernel.CreateProcessArgs{
+ WorkingDirectory: "/",
+ Credentials: auth.NewRootCredentials(creds.UserNamespace),
+ // The sentry should run with a umask of 0.
+ Umask: uint(syscall.Umask(0)),
+ MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
+ }
+ rootCtx := rootProcArgs.NewContext(k)
+
+ // Create the virtual filesystem.
+ mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs)
+ if err != nil {
+ return kernel.CreateProcessArgs{}, fmt.Errorf("error creating mounts: %v", err)
+ }
+
+ k.SetRootMountNamespace(mns)
}
- // Ensure that most signals received in sentry context are forwarded to
- // the emulated kernel.
- stopSignalForwarding := sighandling.StartForwarding(k)
- return &Loader{
- k: k,
- ctrl: ctrl,
- conf: conf,
- console: console,
- watchdog: watchdog,
- stopSignalForwarding: stopSignalForwarding,
- procArgs: procArgs,
- }, nil
+ return procArgs, nil
}
// Destroy cleans up all resources used by the loader.
@@ -350,17 +367,69 @@ func (l *Loader) run() error {
}
// Create the root container init task.
- if _, err := l.k.CreateProcess(l.procArgs); err != nil {
+ if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
return fmt.Errorf("failed to create init process: %v", err)
}
// CreateProcess takes a reference on FDMap if successful.
- l.procArgs.FDMap.DecRef()
+ l.rootProcArgs.FDMap.DecRef()
l.watchdog.Start()
return l.k.Start()
}
+func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) error {
+ spec := args.Spec
+ // Create capabilities.
+ caps, err := specutils.Capabilities(spec.Process.Capabilities)
+ if err != nil {
+ return fmt.Errorf("error creating capabilities: %v", err)
+ }
+
+ // Convert the spec's additional GIDs to KGIDs.
+ extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids))
+ for _, GID := range spec.Process.User.AdditionalGids {
+ extraKGIDs = append(extraKGIDs, auth.KGID(GID))
+ }
+
+ // Create credentials. We reuse the root user namespace because the
+ // sentry currently supports only 1 mount namespace, which is tied to a
+ // single user namespace. Thus we must run in the same user namespace
+ // to access mounts.
+ // TODO: Create a new mount namespace for the container.
+ creds := auth.NewUserCredentials(
+ auth.KUID(spec.Process.User.UID),
+ auth.KGID(spec.Process.User.GID),
+ extraKGIDs,
+ caps,
+ l.k.RootUserNamespace())
+
+ // TODO New containers should be started in new PID namespaces
+ // when indicated by the spec.
+
+ procArgs, err := newProcess(
+ args.Spec,
+ args.Conf,
+ nil, // ioFDs
+ false, // console
+ creds,
+ k.RootUTSNamespace(),
+ k.RootIPCNamespace(),
+ k)
+ if err != nil {
+ return fmt.Errorf("failed to create new process: %v", err)
+ }
+
+ if _, err := l.k.CreateProcess(procArgs); err != nil {
+ return fmt.Errorf("failed to create process in sentry: %v", err)
+ }
+
+ // CreateProcess takes a reference on FDMap if successful.
+ procArgs.FDMap.DecRef()
+
+ return nil
+}
+
// WaitForStartSignal waits for a start signal from the control server.
func (l *Loader) WaitForStartSignal() {
<-l.ctrl.manager.startChan