summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot/loader.go
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot/loader.go')
-rw-r--r--runsc/boot/loader.go193
1 files changed, 131 insertions, 62 deletions
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 526e8f8bb..d1a413cc7 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package boot loads the kernel and runs a container..
+// Package boot loads the kernel and runs a container.
package boot
import (
@@ -79,8 +79,8 @@ type Loader struct {
// container. It should be called when a sandbox is destroyed.
stopSignalForwarding func()
- // procArgs refers to the root container task.
- procArgs kernel.CreateProcessArgs
+ // rootProcArgs refers to the root sandbox init task.
+ rootProcArgs kernel.CreateProcessArgs
}
func init() {
@@ -117,12 +117,6 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
}
tk.SetClocks(time.NewCalibratedClocks())
- // Create initial limits.
- ls, err := createLimitSet(spec)
- if err != nil {
- return nil, fmt.Errorf("error creating limits: %v", err)
- }
-
// Create capabilities.
caps, err := specutils.Capabilities(spec.Process.Capabilities)
if err != nil {
@@ -154,13 +148,6 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
return nil, fmt.Errorf("failed to enable strace: %v", err)
}
- // Get the executable path, which is a bit tricky because we have to
- // inspect the environment PATH which is relative to the root path.
- exec, err := specutils.GetExecutablePath(spec.Process.Args[0], spec.Root.Path, spec.Process.Env)
- if err != nil {
- return nil, fmt.Errorf("error getting executable path: %v", err)
- }
-
// Create an empty network stack because the network namespace may be empty at
// this point. Netns is configured before Run() is called. Netstack is
// configured using a control uRPC message. Host network is configured inside
@@ -223,16 +210,56 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
return nil, fmt.Errorf("error creating control server: %v", err)
}
+ // We don't care about child signals; some platforms can generate a
+ // tremendous number of useless ones (I'm looking at you, ptrace).
+ if err := sighandling.IgnoreChildStop(); err != nil {
+ return nil, fmt.Errorf("failed to ignore child stop signals: %v", err)
+ }
+ // Ensure that most signals received in sentry context are forwarded to
+ // the emulated kernel.
+ stopSignalForwarding := sighandling.StartForwarding(k)
+
+ procArgs, err := newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create root process: %v", err)
+ }
+
+ l := &Loader{
+ k: k,
+ ctrl: ctrl,
+ conf: conf,
+ console: console,
+ watchdog: watchdog,
+ stopSignalForwarding: stopSignalForwarding,
+ rootProcArgs: procArgs,
+ }
+ ctrl.manager.l = l
+ return l, nil
+}
+
+// newProcess creates a process that can be run with kernel.CreateProcess.
+func newProcess(spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, utsns *kernel.UTSNamespace, ipcns *kernel.IPCNamespace, k *kernel.Kernel) (kernel.CreateProcessArgs, error) {
+ // Create initial limits.
+ ls, err := createLimitSet(spec)
+ if err != nil {
+ return kernel.CreateProcessArgs{}, fmt.Errorf("error creating limits: %v", err)
+ }
+
+ // Get the executable path, which is a bit tricky because we have to
+ // inspect the environment PATH which is relative to the root path.
+ exec, err := specutils.GetExecutablePath(spec.Process.Args[0], spec.Root.Path, spec.Process.Env)
+ if err != nil {
+ return kernel.CreateProcessArgs{}, fmt.Errorf("error getting executable path: %v", err)
+ }
+
// Create the process arguments.
procArgs := kernel.CreateProcessArgs{
- Filename: exec,
- Argv: spec.Process.Args,
- Envv: spec.Process.Env,
- WorkingDirectory: spec.Process.Cwd,
- Credentials: creds,
- // Creating the FDMap requires that we have kernel.Kernel.fdMapUids, so
- // it must wait until we have a Kernel.
- Umask: uint(syscall.Umask(0)),
+ Filename: exec,
+ Argv: spec.Process.Args,
+ Envv: spec.Process.Env,
+ WorkingDirectory: spec.Process.Cwd,
+ Credentials: creds,
+ Umask: uint(0022),
Limits: ls,
MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
UTSNamespace: utsns,
@@ -240,52 +267,42 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
}
ctx := procArgs.NewContext(k)
- // Use root user to configure mounts. The current user might not have
- // permission to do so.
- rootProcArgs := kernel.CreateProcessArgs{
- WorkingDirectory: "/",
- Credentials: auth.NewRootCredentials(creds.UserNamespace),
- Umask: uint(syscall.Umask(0022)),
- MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
- }
- rootCtx := rootProcArgs.NewContext(k)
-
- // Create the virtual filesystem.
- mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs)
- if err != nil {
- return nil, fmt.Errorf("error creating mounts: %v", err)
- }
- k.SetRootMountNamespace(mns)
-
- // Create the FD map, which will set stdin, stdout, and stderr. If console
- // is true, then ioctl calls will be passed through to the host fd.
+ // Create the FD map, which will set stdin, stdout, and stderr. If
+ // console is true, then ioctl calls will be passed through to the host
+ // fd.
fdm, err := createFDMap(ctx, k, ls, console)
if err != nil {
- return nil, fmt.Errorf("error importing fds: %v", err)
+ return kernel.CreateProcessArgs{}, fmt.Errorf("error importing fds: %v", err)
}
// CreateProcess takes a reference on FDMap if successful. We
// won't need ours either way.
procArgs.FDMap = fdm
- // We don't care about child signals; some platforms can generate a
- // tremendous number of useless ones (I'm looking at you, ptrace).
- if err := sighandling.IgnoreChildStop(); err != nil {
- return nil, fmt.Errorf("failed to ignore child stop signals: %v", err)
+ // If this is the root container, we also need to setup the root mount
+ // namespace.
+ if k.RootMountNamespace() == nil {
+ // Use root user to configure mounts. The current user might not have
+ // permission to do so.
+ rootProcArgs := kernel.CreateProcessArgs{
+ WorkingDirectory: "/",
+ Credentials: auth.NewRootCredentials(creds.UserNamespace),
+ // The sentry should run with a umask of 0.
+ Umask: uint(syscall.Umask(0)),
+ MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
+ }
+ rootCtx := rootProcArgs.NewContext(k)
+
+ // Create the virtual filesystem.
+ mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs)
+ if err != nil {
+ return kernel.CreateProcessArgs{}, fmt.Errorf("error creating mounts: %v", err)
+ }
+
+ k.SetRootMountNamespace(mns)
}
- // Ensure that most signals received in sentry context are forwarded to
- // the emulated kernel.
- stopSignalForwarding := sighandling.StartForwarding(k)
- return &Loader{
- k: k,
- ctrl: ctrl,
- conf: conf,
- console: console,
- watchdog: watchdog,
- stopSignalForwarding: stopSignalForwarding,
- procArgs: procArgs,
- }, nil
+ return procArgs, nil
}
// Destroy cleans up all resources used by the loader.
@@ -350,17 +367,69 @@ func (l *Loader) run() error {
}
// Create the root container init task.
- if _, err := l.k.CreateProcess(l.procArgs); err != nil {
+ if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
return fmt.Errorf("failed to create init process: %v", err)
}
// CreateProcess takes a reference on FDMap if successful.
- l.procArgs.FDMap.DecRef()
+ l.rootProcArgs.FDMap.DecRef()
l.watchdog.Start()
return l.k.Start()
}
+func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) error {
+ spec := args.Spec
+ // Create capabilities.
+ caps, err := specutils.Capabilities(spec.Process.Capabilities)
+ if err != nil {
+ return fmt.Errorf("error creating capabilities: %v", err)
+ }
+
+ // Convert the spec's additional GIDs to KGIDs.
+ extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids))
+ for _, GID := range spec.Process.User.AdditionalGids {
+ extraKGIDs = append(extraKGIDs, auth.KGID(GID))
+ }
+
+ // Create credentials. We reuse the root user namespace because the
+ // sentry currently supports only 1 mount namespace, which is tied to a
+ // single user namespace. Thus we must run in the same user namespace
+ // to access mounts.
+ // TODO: Create a new mount namespace for the container.
+ creds := auth.NewUserCredentials(
+ auth.KUID(spec.Process.User.UID),
+ auth.KGID(spec.Process.User.GID),
+ extraKGIDs,
+ caps,
+ l.k.RootUserNamespace())
+
+ // TODO New containers should be started in new PID namespaces
+ // when indicated by the spec.
+
+ procArgs, err := newProcess(
+ args.Spec,
+ args.Conf,
+ nil, // ioFDs
+ false, // console
+ creds,
+ k.RootUTSNamespace(),
+ k.RootIPCNamespace(),
+ k)
+ if err != nil {
+ return fmt.Errorf("failed to create new process: %v", err)
+ }
+
+ if _, err := l.k.CreateProcess(procArgs); err != nil {
+ return fmt.Errorf("failed to create process in sentry: %v", err)
+ }
+
+ // CreateProcess takes a reference on FDMap if successful.
+ procArgs.FDMap.DecRef()
+
+ return nil
+}
+
// WaitForStartSignal waits for a start signal from the control server.
func (l *Loader) WaitForStartSignal() {
<-l.ctrl.manager.startChan