diff options
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/controller.go | 45 | ||||
-rw-r--r-- | runsc/boot/loader.go | 193 |
2 files changed, 176 insertions, 62 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index ae727f144..1a598199d 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -15,9 +15,12 @@ package boot import ( + "errors" "fmt" + specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.googlesource.com/gvisor/pkg/control/server" + "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/control" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" @@ -50,6 +53,10 @@ const ( // ContainerSignal is used to send a signal to a container. ContainerSignal = "containerManager.Signal" + // ContainerStart is the URPC endpoint for running a non-root container + // within a sandbox. + ContainerStart = "containerManager.Start" + // ContainerWait is used to wait on the init process of the container // and return its ExitStatus. ContainerWait = "containerManager.Wait" @@ -127,10 +134,14 @@ type containerManager struct { // watchdog is the kernel watchdog. watchdog *watchdog.Watchdog + + // l is the loader that creates containers and sandboxes. + l *Loader } // StartRoot will start the root container process. func (cm *containerManager) StartRoot(_, _ *struct{}) error { + log.Debugf("containerManager.StartRoot") // Tell the root container to start and wait for the result. cm.startChan <- struct{}{} return <-cm.startResultChan @@ -138,11 +149,42 @@ func (cm *containerManager) StartRoot(_, _ *struct{}) error { // Processes retrieves information about processes running in the sandbox. func (cm *containerManager) Processes(_, out *[]*control.Process) error { + log.Debugf("containerManager.Processes") return control.Processes(cm.k, out) } +// StartArgs contains arguments to the Start method. +type StartArgs struct { + // Spec is the spec of the container to start. + Spec *specs.Spec + + // TODO: Separate sandbox and container configs. + // Config is the runsc-specific configuration for the sandbox. + Conf *Config +} + +// Start runs a created container within a sandbox. +func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { + log.Debugf("containerManager.Start") + + // Validate arguments. + if args == nil { + return errors.New("start missing arguments") + } + if args.Spec == nil { + return errors.New("start arguments missing spec") + } + if args.Conf == nil { + return errors.New("start arguments missing config") + } + + cm.l.startContainer(args, cm.k) + return nil +} + // Execute runs a command on a created or running sandbox. func (cm *containerManager) Execute(e *control.ExecArgs, waitStatus *uint32) error { + log.Debugf("containerManager.Execute") proc := control.Proc{Kernel: cm.k} if err := proc.Exec(e, waitStatus); err != nil { return fmt.Errorf("error executing: %+v: %v", e, err) @@ -152,6 +194,7 @@ func (cm *containerManager) Execute(e *control.ExecArgs, waitStatus *uint32) err // Checkpoint pauses a sandbox and saves its state. func (cm *containerManager) Checkpoint(o *control.SaveOpts, _ *struct{}) error { + log.Debugf("containerManager.Checkpoint") state := control.State{ Kernel: cm.k, Watchdog: cm.watchdog, @@ -173,6 +216,7 @@ func (cm *containerManager) Resume(_, _ *struct{}) error { // Wait waits for the init process in the given container. func (cm *containerManager) Wait(cid *string, waitStatus *uint32) error { + log.Debugf("containerManager.Wait") // TODO: Use the cid and wait on the init process in that // container. Currently we just wait on PID 1 in the sandbox. tg := cm.k.TaskSet().Root.ThreadGroupWithID(1) @@ -195,6 +239,7 @@ type SignalArgs struct { // Signal sends a signal to the init process of the container. func (cm *containerManager) Signal(args *SignalArgs, _ *struct{}) error { + log.Debugf("containerManager.Signal") // TODO: Use the cid and send the signal to the init // process in theat container. Currently we just signal PID 1 in the // sandbox. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 526e8f8bb..d1a413cc7 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package boot loads the kernel and runs a container.. +// Package boot loads the kernel and runs a container. package boot import ( @@ -79,8 +79,8 @@ type Loader struct { // container. It should be called when a sandbox is destroyed. stopSignalForwarding func() - // procArgs refers to the root container task. - procArgs kernel.CreateProcessArgs + // rootProcArgs refers to the root sandbox init task. + rootProcArgs kernel.CreateProcessArgs } func init() { @@ -117,12 +117,6 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in } tk.SetClocks(time.NewCalibratedClocks()) - // Create initial limits. - ls, err := createLimitSet(spec) - if err != nil { - return nil, fmt.Errorf("error creating limits: %v", err) - } - // Create capabilities. caps, err := specutils.Capabilities(spec.Process.Capabilities) if err != nil { @@ -154,13 +148,6 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in return nil, fmt.Errorf("failed to enable strace: %v", err) } - // Get the executable path, which is a bit tricky because we have to - // inspect the environment PATH which is relative to the root path. - exec, err := specutils.GetExecutablePath(spec.Process.Args[0], spec.Root.Path, spec.Process.Env) - if err != nil { - return nil, fmt.Errorf("error getting executable path: %v", err) - } - // Create an empty network stack because the network namespace may be empty at // this point. Netns is configured before Run() is called. Netstack is // configured using a control uRPC message. Host network is configured inside @@ -223,16 +210,56 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in return nil, fmt.Errorf("error creating control server: %v", err) } + // We don't care about child signals; some platforms can generate a + // tremendous number of useless ones (I'm looking at you, ptrace). + if err := sighandling.IgnoreChildStop(); err != nil { + return nil, fmt.Errorf("failed to ignore child stop signals: %v", err) + } + // Ensure that most signals received in sentry context are forwarded to + // the emulated kernel. + stopSignalForwarding := sighandling.StartForwarding(k) + + procArgs, err := newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k) + if err != nil { + return nil, fmt.Errorf("failed to create root process: %v", err) + } + + l := &Loader{ + k: k, + ctrl: ctrl, + conf: conf, + console: console, + watchdog: watchdog, + stopSignalForwarding: stopSignalForwarding, + rootProcArgs: procArgs, + } + ctrl.manager.l = l + return l, nil +} + +// newProcess creates a process that can be run with kernel.CreateProcess. +func newProcess(spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, utsns *kernel.UTSNamespace, ipcns *kernel.IPCNamespace, k *kernel.Kernel) (kernel.CreateProcessArgs, error) { + // Create initial limits. + ls, err := createLimitSet(spec) + if err != nil { + return kernel.CreateProcessArgs{}, fmt.Errorf("error creating limits: %v", err) + } + + // Get the executable path, which is a bit tricky because we have to + // inspect the environment PATH which is relative to the root path. + exec, err := specutils.GetExecutablePath(spec.Process.Args[0], spec.Root.Path, spec.Process.Env) + if err != nil { + return kernel.CreateProcessArgs{}, fmt.Errorf("error getting executable path: %v", err) + } + // Create the process arguments. procArgs := kernel.CreateProcessArgs{ - Filename: exec, - Argv: spec.Process.Args, - Envv: spec.Process.Env, - WorkingDirectory: spec.Process.Cwd, - Credentials: creds, - // Creating the FDMap requires that we have kernel.Kernel.fdMapUids, so - // it must wait until we have a Kernel. - Umask: uint(syscall.Umask(0)), + Filename: exec, + Argv: spec.Process.Args, + Envv: spec.Process.Env, + WorkingDirectory: spec.Process.Cwd, + Credentials: creds, + Umask: uint(0022), Limits: ls, MaxSymlinkTraversals: linux.MaxSymlinkTraversals, UTSNamespace: utsns, @@ -240,52 +267,42 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in } ctx := procArgs.NewContext(k) - // Use root user to configure mounts. The current user might not have - // permission to do so. - rootProcArgs := kernel.CreateProcessArgs{ - WorkingDirectory: "/", - Credentials: auth.NewRootCredentials(creds.UserNamespace), - Umask: uint(syscall.Umask(0022)), - MaxSymlinkTraversals: linux.MaxSymlinkTraversals, - } - rootCtx := rootProcArgs.NewContext(k) - - // Create the virtual filesystem. - mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs) - if err != nil { - return nil, fmt.Errorf("error creating mounts: %v", err) - } - k.SetRootMountNamespace(mns) - - // Create the FD map, which will set stdin, stdout, and stderr. If console - // is true, then ioctl calls will be passed through to the host fd. + // Create the FD map, which will set stdin, stdout, and stderr. If + // console is true, then ioctl calls will be passed through to the host + // fd. fdm, err := createFDMap(ctx, k, ls, console) if err != nil { - return nil, fmt.Errorf("error importing fds: %v", err) + return kernel.CreateProcessArgs{}, fmt.Errorf("error importing fds: %v", err) } // CreateProcess takes a reference on FDMap if successful. We // won't need ours either way. procArgs.FDMap = fdm - // We don't care about child signals; some platforms can generate a - // tremendous number of useless ones (I'm looking at you, ptrace). - if err := sighandling.IgnoreChildStop(); err != nil { - return nil, fmt.Errorf("failed to ignore child stop signals: %v", err) + // If this is the root container, we also need to setup the root mount + // namespace. + if k.RootMountNamespace() == nil { + // Use root user to configure mounts. The current user might not have + // permission to do so. + rootProcArgs := kernel.CreateProcessArgs{ + WorkingDirectory: "/", + Credentials: auth.NewRootCredentials(creds.UserNamespace), + // The sentry should run with a umask of 0. + Umask: uint(syscall.Umask(0)), + MaxSymlinkTraversals: linux.MaxSymlinkTraversals, + } + rootCtx := rootProcArgs.NewContext(k) + + // Create the virtual filesystem. + mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs) + if err != nil { + return kernel.CreateProcessArgs{}, fmt.Errorf("error creating mounts: %v", err) + } + + k.SetRootMountNamespace(mns) } - // Ensure that most signals received in sentry context are forwarded to - // the emulated kernel. - stopSignalForwarding := sighandling.StartForwarding(k) - return &Loader{ - k: k, - ctrl: ctrl, - conf: conf, - console: console, - watchdog: watchdog, - stopSignalForwarding: stopSignalForwarding, - procArgs: procArgs, - }, nil + return procArgs, nil } // Destroy cleans up all resources used by the loader. @@ -350,17 +367,69 @@ func (l *Loader) run() error { } // Create the root container init task. - if _, err := l.k.CreateProcess(l.procArgs); err != nil { + if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil { return fmt.Errorf("failed to create init process: %v", err) } // CreateProcess takes a reference on FDMap if successful. - l.procArgs.FDMap.DecRef() + l.rootProcArgs.FDMap.DecRef() l.watchdog.Start() return l.k.Start() } +func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) error { + spec := args.Spec + // Create capabilities. + caps, err := specutils.Capabilities(spec.Process.Capabilities) + if err != nil { + return fmt.Errorf("error creating capabilities: %v", err) + } + + // Convert the spec's additional GIDs to KGIDs. + extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids)) + for _, GID := range spec.Process.User.AdditionalGids { + extraKGIDs = append(extraKGIDs, auth.KGID(GID)) + } + + // Create credentials. We reuse the root user namespace because the + // sentry currently supports only 1 mount namespace, which is tied to a + // single user namespace. Thus we must run in the same user namespace + // to access mounts. + // TODO: Create a new mount namespace for the container. + creds := auth.NewUserCredentials( + auth.KUID(spec.Process.User.UID), + auth.KGID(spec.Process.User.GID), + extraKGIDs, + caps, + l.k.RootUserNamespace()) + + // TODO New containers should be started in new PID namespaces + // when indicated by the spec. + + procArgs, err := newProcess( + args.Spec, + args.Conf, + nil, // ioFDs + false, // console + creds, + k.RootUTSNamespace(), + k.RootIPCNamespace(), + k) + if err != nil { + return fmt.Errorf("failed to create new process: %v", err) + } + + if _, err := l.k.CreateProcess(procArgs); err != nil { + return fmt.Errorf("failed to create process in sentry: %v", err) + } + + // CreateProcess takes a reference on FDMap if successful. + procArgs.FDMap.DecRef() + + return nil +} + // WaitForStartSignal waits for a start signal from the control server. func (l *Loader) WaitForStartSignal() { <-l.ctrl.manager.startChan |