diff options
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/controller.go | 131 | ||||
-rw-r--r-- | runsc/boot/events.go | 4 | ||||
-rw-r--r-- | runsc/boot/fs.go | 45 | ||||
-rw-r--r-- | runsc/boot/loader.go | 222 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 3 |
5 files changed, 253 insertions, 152 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index ff75a382e..c6e934e66 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -23,9 +23,13 @@ import ( "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/control" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket" + "gvisor.googlesource.com/gvisor/pkg/sentry/state" + "gvisor.googlesource.com/gvisor/pkg/sentry/time" "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" + "gvisor.googlesource.com/gvisor/pkg/urpc" ) const ( @@ -47,9 +51,15 @@ const ( // processes running in a container. ContainerProcesses = "containerManager.Processes" + // ContainerRestore restores a container from a statefile. + ContainerRestore = "containerManager.Restore" + // ContainerResume unpauses the paused container. ContainerResume = "containerManager.Resume" + // ContainerWaitForLoader blocks until the container's loader has been created. + ContainerWaitForLoader = "containerManager.WaitForLoader" + // ContainerSignal is used to send a signal to a container. ContainerSignal = "containerManager.Signal" @@ -85,7 +95,7 @@ func ControlSocketAddr(id string) string { // controller holds the control server, and is used for communication into the // sandbox. type controller struct { - // srv is the contorl server. + // srv is the control server. srv *server.Server // manager holds the containerManager methods. @@ -100,10 +110,9 @@ func newController(fd int, k *kernel.Kernel, w *watchdog.Watchdog) (*controller, } manager := &containerManager{ - startChan: make(chan struct{}), - startResultChan: make(chan error), - k: k, - watchdog: w, + startChan: make(chan struct{}), + startResultChan: make(chan error), + loaderCreatedChan: make(chan struct{}), } srv.Register(manager) @@ -137,15 +146,13 @@ type containerManager struct { // channel. A nil value indicates success. startResultChan chan error - // k is the emulated linux kernel on which the sandboxed - // containers run. - k *kernel.Kernel - - // watchdog is the kernel watchdog. - watchdog *watchdog.Watchdog - // l is the loader that creates containers and sandboxes. l *Loader + + // loaderCreatedChan is used to signal when the loader has been created. + // After a loader is created, a notify method is called that writes to + // this channel. + loaderCreatedChan chan struct{} } // StartRoot will start the root container process. @@ -160,7 +167,7 @@ func (cm *containerManager) StartRoot(cid *string, _ *struct{}) error { // Processes retrieves information about processes running in the sandbox. func (cm *containerManager) Processes(_, out *[]*control.Process) error { log.Debugf("containerManager.Processes") - return control.Processes(cm.k, out) + return control.Processes(cm.l.k, out) } // StartArgs contains arguments to the Start method. @@ -194,7 +201,7 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { return errors.New("start argument missing container ID") } - tgid, err := cm.l.startContainer(args, cm.k) + tgid, err := cm.l.startContainer(args, cm.l.k) if err != nil { return err } @@ -206,7 +213,7 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { // Execute runs a command on a created or running sandbox. func (cm *containerManager) Execute(e *control.ExecArgs, waitStatus *uint32) error { log.Debugf("containerManager.Execute") - proc := control.Proc{Kernel: cm.k} + proc := control.Proc{Kernel: cm.l.k} if err := proc.Exec(e, waitStatus); err != nil { return fmt.Errorf("error executing: %+v: %v", e, err) } @@ -217,21 +224,105 @@ func (cm *containerManager) Execute(e *control.ExecArgs, waitStatus *uint32) err func (cm *containerManager) Checkpoint(o *control.SaveOpts, _ *struct{}) error { log.Debugf("containerManager.Checkpoint") state := control.State{ - Kernel: cm.k, - Watchdog: cm.watchdog, + Kernel: cm.l.k, + Watchdog: cm.l.watchdog, } return state.Save(o, nil) } // Pause suspends a container. func (cm *containerManager) Pause(_, _ *struct{}) error { - cm.k.Pause() + cm.l.k.Pause() return nil } +// WaitForLoader blocks until the container's loader has been created. +func (cm *containerManager) WaitForLoader(_, _ *struct{}) error { + log.Debugf("containerManager.WaitForLoader") + <-cm.loaderCreatedChan + return nil +} + +// RestoreOpts contains options related to restoring a container's file system. +type RestoreOpts struct { + // FilePayload contains the state file to be restored. + urpc.FilePayload + + // SandboxID contains the ID of the sandbox. + SandboxID string +} + +// Restore loads a container from a statefile. +// The container's current kernel is destroyed, a restore environment is created, +// and the kernel is recreated with the restore state file. The container then sends the +// signal to start. +func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { + log.Debugf("containerManager.Restore") + if len(o.FilePayload.Files) != 1 { + return fmt.Errorf("exactly one file must be provided") + } + defer o.FilePayload.Files[0].Close() + + // Destroy the old kernel and create a new kernel. + cm.l.k.Pause() + cm.l.k.Destroy() + + p, err := createPlatform(cm.l.conf) + if err != nil { + return fmt.Errorf("error creating platform: %v", err) + } + k := &kernel.Kernel{ + Platform: p, + } + cm.l.k = k + + // Set up the restore environment. + fds := &fdDispenser{fds: cm.l.ioFDs} + renv, err := createRestoreEnvironment(cm.l.spec, cm.l.conf, fds) + if err != nil { + return fmt.Errorf("error creating RestoreEnvironment: %v", err) + } + fs.SetRestoreEnvironment(*renv) + + // Prepare to load from the state file. + networkStack := newEmptyNetworkStack(cm.l.conf, k) + info, err := o.FilePayload.Files[0].Stat() + if err != nil { + return err + } + if info.Size() == 0 { + return fmt.Errorf("error file was empty") + } + + // Load the state. + loadOpts := state.LoadOpts{ + Source: o.FilePayload.Files[0], + } + if err := loadOpts.Load(k, p, networkStack); err != nil { + return err + } + + // Set timekeeper. + k.Timekeeper().SetClocks(time.NewCalibratedClocks()) + + // Since we have a new kernel we also must make a new watchdog. + watchdog := watchdog.New(k, watchdog.DefaultTimeout, cm.l.conf.WatchdogAction) + + // Change the loader fields to reflect the changes made when restoring. + cm.l.k = k + cm.l.watchdog = watchdog + cm.l.rootProcArgs = kernel.CreateProcessArgs{} + cm.l.setRootContainerID(o.SandboxID) + cm.l.restore = true + + // Tell the root container to start and wait for the result. + cm.startChan <- struct{}{} + return <-cm.startResultChan +} + // Resume unpauses a container. func (cm *containerManager) Resume(_, _ *struct{}) error { - cm.k.Unpause() + cm.l.k.Unpause() return nil } @@ -272,7 +363,7 @@ func (cm *containerManager) Signal(args *SignalArgs, _ *struct{}) error { // process in theat container. Currently we just signal PID 1 in the // sandbox. si := arch.SignalInfo{Signo: args.Signo} - t := cm.k.TaskSet().Root.TaskWithID(1) + t := cm.l.k.TaskSet().Root.TaskWithID(1) if t == nil { return fmt.Errorf("cannot signal: no task with id 1") } diff --git a/runsc/boot/events.go b/runsc/boot/events.go index 0eb75c14c..832339cf4 100644 --- a/runsc/boot/events.go +++ b/runsc/boot/events.go @@ -62,8 +62,8 @@ type Memory struct { // Event gets the events from the container. func (cm *containerManager) Event(_ *struct{}, out *Event) error { stats := &Stats{} - stats.populateMemory(cm.k) - stats.populatePIDs(cm.k) + stats.populateMemory(cm.l.k) + stats.populatePIDs(cm.l.k) *out = Event{Type: "stats", Data: stats} return nil } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 51c8d620d..e596c739f 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -27,6 +27,9 @@ import ( _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys" _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs" _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tty" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.googlesource.com/gvisor/pkg/sentry/limits" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.googlesource.com/gvisor/pkg/abi/linux" @@ -563,3 +566,45 @@ func subtargets(root string, mnts []specs.Mount) []string { } return targets } + +// setFileSystemForProcess is used to set up the file system and amend the procArgs accordingly. +// procArgs are passed by reference and the FDMap field is modified. +func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel) error { + ctx := procArgs.NewContext(k) + + // Create the FD map, which will set stdin, stdout, and stderr. If + // console is true, then ioctl calls will be passed through to the host + // fd. + fdm, err := createFDMap(ctx, k, ls, console) + if err != nil { + return fmt.Errorf("error importing fds: %v", err) + } + + // CreateProcess takes a reference on FDMap if successful. We + // won't need ours either way. + procArgs.FDMap = fdm + + // If this is the root container, we also need to setup the root mount + // namespace. + if k.RootMountNamespace() == nil { + // Use root user to configure mounts. The current user might not have + // permission to do so. + rootProcArgs := kernel.CreateProcessArgs{ + WorkingDirectory: "/", + Credentials: auth.NewRootCredentials(creds.UserNamespace), + Umask: 0022, + MaxSymlinkTraversals: linux.MaxSymlinkTraversals, + } + rootCtx := rootProcArgs.NewContext(k) + + // Create the virtual filesystem. + mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs) + if err != nil { + return fmt.Errorf("error creating mounts: %v", err) + } + + k.SetRootMountNamespace(mns) + } + + return nil +} diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 706910d8a..66394cdf8 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -19,7 +19,6 @@ import ( "errors" "fmt" "math/rand" - "os" "runtime" "sync" "sync/atomic" @@ -29,7 +28,6 @@ import ( "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/cpuid" "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/inet" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" @@ -38,7 +36,6 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm" "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace" "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling" - "gvisor.googlesource.com/gvisor/pkg/sentry/state" slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux" "gvisor.googlesource.com/gvisor/pkg/sentry/time" "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" @@ -77,6 +74,12 @@ type Loader struct { watchdog *watchdog.Watchdog + // ioFDs are the FDs that attach the sandbox to the gofers. + ioFDs []int + + // spec is the base configuration for the root container. + spec *specs.Spec + // stopSignalForwarding disables forwarding of signals to the sandboxed // container. It should be called when a sandbox is destroyed. stopSignalForwarding func() @@ -111,16 +114,7 @@ func init() { // New initializes a new kernel loader configured by spec. // New also handles setting up a kernel for restoring a container. -func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []int, console bool) (*Loader, error) { - var ( - tk *kernel.Timekeeper - creds *auth.Credentials - vdso *loader.VDSO - utsns *kernel.UTSNamespace - ipcns *kernel.IPCNamespace - restoreFile *os.File - procArgs kernel.CreateProcessArgs - ) +func New(spec *specs.Spec, conf *Config, controllerFD int, ioFDs []int, console bool) (*Loader, error) { // Create kernel and platform. p, err := createPlatform(conf) if err != nil { @@ -130,60 +124,47 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in Platform: p, } - if restoreFD == -1 { - // Create VDSO. - // - // Pass k as the platform since it is savable, unlike the actual platform. - vdso, err = loader.PrepareVDSO(k) - if err != nil { - return nil, fmt.Errorf("error creating vdso: %v", err) - } + // Create VDSO. + // + // Pass k as the platform since it is savable, unlike the actual platform. + vdso, err := loader.PrepareVDSO(k) + if err != nil { + return nil, fmt.Errorf("error creating vdso: %v", err) + } - // Create timekeeper. - tk, err = kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) - if err != nil { - return nil, fmt.Errorf("error creating timekeeper: %v", err) - } - tk.SetClocks(time.NewCalibratedClocks()) + // Create timekeeper. + tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) + if err != nil { + return nil, fmt.Errorf("error creating timekeeper: %v", err) + } + tk.SetClocks(time.NewCalibratedClocks()) - // Create capabilities. - caps, err := specutils.Capabilities(spec.Process.Capabilities) - if err != nil { - return nil, fmt.Errorf("error creating capabilities: %v", err) - } + // Create capabilities. + caps, err := specutils.Capabilities(spec.Process.Capabilities) + if err != nil { + return nil, fmt.Errorf("error creating capabilities: %v", err) + } - // Convert the spec's additional GIDs to KGIDs. - extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids)) - for _, GID := range spec.Process.User.AdditionalGids { - extraKGIDs = append(extraKGIDs, auth.KGID(GID)) - } + // Convert the spec's additional GIDs to KGIDs. + extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids)) + for _, GID := range spec.Process.User.AdditionalGids { + extraKGIDs = append(extraKGIDs, auth.KGID(GID)) + } - // Create credentials. - creds = auth.NewUserCredentials( - auth.KUID(spec.Process.User.UID), - auth.KGID(spec.Process.User.GID), - extraKGIDs, - caps, - auth.NewRootUserNamespace()) + // Create credentials. + creds := auth.NewUserCredentials( + auth.KUID(spec.Process.User.UID), + auth.KGID(spec.Process.User.GID), + extraKGIDs, + caps, + auth.NewRootUserNamespace()) - // Create user namespace. - // TODO: Not clear what domain name should be here. It is - // not configurable from runtime spec. - utsns = kernel.NewUTSNamespace(spec.Hostname, "", creds.UserNamespace) + // Create user namespace. + // TODO: Not clear what domain name should be here. It is + // not configurable from runtime spec. + utsns := kernel.NewUTSNamespace(spec.Hostname, "", creds.UserNamespace) - ipcns = kernel.NewIPCNamespace(creds.UserNamespace) - } else { - // Create and set RestoreEnvironment - fds := &fdDispenser{fds: ioFDs} - renv, err := createRestoreEnvironment(spec, conf, fds) - if err != nil { - return nil, fmt.Errorf("error creating RestoreEnvironment: %v", err) - } - fs.SetRestoreEnvironment(*renv) - - restoreFile = os.NewFile(uintptr(restoreFD), "restore_file") - defer restoreFile.Close() - } + ipcns := kernel.NewIPCNamespace(creds.UserNamespace) if err := enableStrace(conf); err != nil { return nil, fmt.Errorf("failed to enable strace: %v", err) @@ -195,33 +176,20 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in // Run(). networkStack := newEmptyNetworkStack(conf, k) - if restoreFile == nil { - // Initiate the Kernel object, which is required by the Context passed - // to createVFS in order to mount (among other things) procfs. - if err = k.Init(kernel.InitKernelArgs{ - FeatureSet: cpuid.HostFeatureSet(), - Timekeeper: tk, - RootUserNamespace: creds.UserNamespace, - NetworkStack: networkStack, - // TODO: use number of logical processors from cgroups. - ApplicationCores: uint(runtime.NumCPU()), - Vdso: vdso, - RootUTSNamespace: utsns, - RootIPCNamespace: ipcns, - }); err != nil { - return nil, fmt.Errorf("error initializing kernel: %v", err) - } - } else { - // Load the state. - loadOpts := state.LoadOpts{ - Source: restoreFile, - } - if err := loadOpts.Load(k, p, networkStack); err != nil { - return nil, err - } - - // Set timekeeper. - k.Timekeeper().SetClocks(time.NewCalibratedClocks()) + // Initiate the Kernel object, which is required by the Context passed + // to createVFS in order to mount (among other things) procfs. + if err = k.Init(kernel.InitKernelArgs{ + FeatureSet: cpuid.HostFeatureSet(), + Timekeeper: tk, + RootUserNamespace: creds.UserNamespace, + NetworkStack: networkStack, + // TODO: use number of logical processors from cgroups. + ApplicationCores: uint(runtime.NumCPU()), + Vdso: vdso, + RootUTSNamespace: utsns, + RootIPCNamespace: ipcns, + }); err != nil { + return nil, fmt.Errorf("error initializing kernel: %v", err) } // Turn on packet logging if enabled. @@ -258,11 +226,9 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in // Ensure that signals received are forwarded to the emulated kernel. stopSignalForwarding := sighandling.PrepareForwarding(k, false)() - if restoreFile == nil { - procArgs, err = newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k) - if err != nil { - return nil, fmt.Errorf("failed to create root process: %v", err) - } + procArgs, err := newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k) + if err != nil { + return nil, fmt.Errorf("failed to create root process: %v", err) } l := &Loader{ @@ -271,9 +237,10 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in conf: conf, console: console, watchdog: watchdog, + ioFDs: ioFDs, + spec: spec, stopSignalForwarding: stopSignalForwarding, rootProcArgs: procArgs, - restore: restoreFile != nil, } ctrl.manager.l = l return l, nil @@ -307,41 +274,6 @@ func newProcess(spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds UTSNamespace: utsns, IPCNamespace: ipcns, } - ctx := procArgs.NewContext(k) - - // Create the FD map, which will set stdin, stdout, and stderr. If - // console is true, then ioctl calls will be passed through to the host - // fd. - fdm, err := createFDMap(ctx, k, ls, console) - if err != nil { - return kernel.CreateProcessArgs{}, fmt.Errorf("error importing fds: %v", err) - } - - // CreateProcess takes a reference on FDMap if successful. We - // won't need ours either way. - procArgs.FDMap = fdm - - // If this is the root container, we also need to setup the root mount - // namespace. - if k.RootMountNamespace() == nil { - // Use root user to configure mounts. The current user might not have - // permission to do so. - rootProcArgs := kernel.CreateProcessArgs{ - WorkingDirectory: "/", - Credentials: auth.NewRootCredentials(creds.UserNamespace), - Umask: 0022, - MaxSymlinkTraversals: linux.MaxSymlinkTraversals, - } - rootCtx := rootProcArgs.NewContext(k) - - // Create the virtual filesystem. - mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs) - if err != nil { - return kernel.CreateProcessArgs{}, fmt.Errorf("error creating mounts: %v", err) - } - - k.SetRootMountNamespace(mns) - } return procArgs, nil } @@ -411,7 +343,20 @@ func (l *Loader) run() error { } // If we are restoring, we do not want to create a process. + // l.restore is set by the container manager when a restore call is made. if !l.restore { + err := setFileSystemForProcess( + &l.rootProcArgs, + l.spec, + l.conf, + l.ioFDs, + l.console, + l.rootProcArgs.Credentials, + l.rootProcArgs.Limits, + l.k) + if err != nil { + return err + } // Create the root container init task. if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil { return fmt.Errorf("failed to create init process: %v", err) @@ -421,6 +366,7 @@ func (l *Loader) run() error { l.rootProcArgs.FDMap.DecRef() } + log.Infof("Process should have started...") l.watchdog.Start() return l.k.Start() } @@ -468,6 +414,18 @@ func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.Threa if err != nil { return 0, fmt.Errorf("failed to create new process: %v", err) } + err = setFileSystemForProcess( + &procArgs, + args.Spec, + args.Conf, + nil, + false, + creds, + procArgs.Limits, + k) + if err != nil { + return 0, fmt.Errorf("failed to create new process: %v", err) + } tg, err := l.k.CreateProcess(procArgs) if err != nil { @@ -553,6 +511,12 @@ func (l *Loader) WaitForStartSignal() { <-l.ctrl.manager.startChan } +// NotifyLoaderCreated sends a signal to the container manager that this +// loader has been created. +func (l *Loader) NotifyLoaderCreated() { + l.ctrl.manager.loaderCreatedChan <- struct{}{} +} + // WaitExit waits for the root container to exit, and returns its exit status. func (l *Loader) WaitExit() kernel.ExitStatus { // Wait for container. diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 30ec236e4..7ea2e1ee5 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -61,7 +61,8 @@ func createLoader() (*Loader, error) { FileAccess: FileAccessDirect, DisableSeccomp: true, } - return New(testSpec(), conf, fd, -1, nil, false) + spec := testSpec() + return New(spec, conf, fd, nil, false) } // TestRun runs a simple application in a sandbox and checks that it succeeds. |