diff options
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/fs.go | 103 | ||||
-rw-r--r-- | runsc/boot/loader.go | 146 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 74 |
3 files changed, 221 insertions, 102 deletions
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index e0d7fc769..a9b2f225a 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -38,6 +38,14 @@ import ( "gvisor.googlesource.com/gvisor/runsc/specutils" ) +const ( + // Filesystem name for 9p gofer mounts. + rootFsName = "9p" + + // Device name for root mount. + rootDevice = "9pfs-/" +) + type fdDispenser struct { fds []int } @@ -64,7 +72,8 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec if err != nil { return nil, fmt.Errorf("failed to create root mount namespace: %v", err) } - if err := configureMounts(rootCtx, spec, conf, mns, fds); err != nil { + mounts := compileMounts(spec) + if err := setMounts(rootCtx, conf, mns, fds, mounts); err != nil { return nil, fmt.Errorf("failed to configure mounts: %v", err) } if !fds.empty() { @@ -73,27 +82,23 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec return mns, nil } -// configureMounts iterates over Spec.Mounts and mounts them in the specified -// mount namespace. -func configureMounts(ctx context.Context, spec *specs.Spec, conf *Config, mns *fs.MountNamespace, fds *fdDispenser) error { +// compileMounts returns the supported mounts from the mount spec, adding any +// additional mounts that are required by the OCI specification. +func compileMounts(spec *specs.Spec) []specs.Mount { // Keep track of whether proc, sys, and tmp were mounted. var procMounted, sysMounted, tmpMounted bool + var mounts []specs.Mount // Always mount /dev. - if err := mountSubmount(ctx, spec, conf, mns, nil, specs.Mount{ + mounts = append(mounts, specs.Mount{ Type: "devtmpfs", Destination: "/dev", - }); err != nil { - return err - } + }) - // Always mount /dev/pts. - if err := mountSubmount(ctx, spec, conf, mns, nil, specs.Mount{ + mounts = append(mounts, specs.Mount{ Type: "devpts", Destination: "/dev/pts", - }); err != nil { - return err - } + }) // Mount all submounts from the spec. for _, m := range spec.Mounts { @@ -101,6 +106,7 @@ func configureMounts(ctx context.Context, spec *specs.Spec, conf *Config, mns *f log.Warningf("ignoring dev mount at %q", m.Destination) continue } + mounts = append(mounts, m) switch filepath.Clean(m.Destination) { case "/proc": procMounted = true @@ -109,43 +115,45 @@ func configureMounts(ctx context.Context, spec *specs.Spec, conf *Config, mns *f case "/tmp": tmpMounted = true } - - if err := mountSubmount(ctx, spec, conf, mns, fds, m); err != nil { - return err - } } // Mount proc and sys even if the user did not ask for it, as the spec // says we SHOULD. if !procMounted { - if err := mountSubmount(ctx, spec, conf, mns, nil, specs.Mount{ + mounts = append(mounts, specs.Mount{ Type: "proc", Destination: "/proc", - }); err != nil { - return err - } + }) } if !sysMounted { - if err := mountSubmount(ctx, spec, conf, mns, nil, specs.Mount{ + mounts = append(mounts, specs.Mount{ Type: "sysfs", Destination: "/sys", - }); err != nil { - return err - } + }) } // Technically we don't have to mount tmpfs at /tmp, as we could just // rely on the host /tmp, but this is a nice optimization, and fixes // some apps that call mknod in /tmp. if !tmpMounted { - if err := mountSubmount(ctx, spec, conf, mns, nil, specs.Mount{ + mounts = append(mounts, specs.Mount{ Type: "tmpfs", Destination: "/tmp", - }); err != nil { + }) + } + return mounts +} + +// setMounts iterates over mounts and mounts them in the specified +// mount namespace. +func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, mounts []specs.Mount) error { + + // Mount all submounts from mounts. + for _, m := range mounts { + if err := mountSubmount(ctx, conf, mns, fds, m, mounts); err != nil { return err } } - return nil } @@ -158,19 +166,20 @@ func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *f rootInode *fs.Inode err error ) + switch conf.FileAccess { case FileAccessProxy: fd := fds.remove() log.Infof("Mounting root over 9P, ioFD: %d", fd) hostFS := mustFindFilesystem("9p") - rootInode, err = hostFS.Mount(ctx, "root", mf, fmt.Sprintf("trans=fd,rfdno=%d,wfdno=%d,privateunixsocket=true", fd, fd)) + rootInode, err = hostFS.Mount(ctx, rootDevice, mf, fmt.Sprintf("trans=fd,rfdno=%d,wfdno=%d,privateunixsocket=true", fd, fd)) if err != nil { return nil, fmt.Errorf("failed to generate root mount point: %v", err) } case FileAccessDirect: hostFS := mustFindFilesystem("whitelistfs") - rootInode, err = hostFS.Mount(ctx, "root", mf, "root="+spec.Root.Path+",dont_translate_ownership=true") + rootInode, err = hostFS.Mount(ctx, rootDevice, mf, "root="+spec.Root.Path+",dont_translate_ownership=true") if err != nil { return nil, fmt.Errorf("failed to generate root mount point: %v", err) } @@ -263,7 +272,7 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri return fsName, data, useOverlay, err } -func mountSubmount(ctx context.Context, spec *specs.Spec, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount) error { +func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount) error { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. fsName, data, useOverlay, err := getMountNameAndOptions(conf, m, fds) @@ -285,14 +294,13 @@ func mountSubmount(ctx context.Context, spec *specs.Spec, conf *Config, mns *fs. mf.ReadOnly = true } - inode, err := filesystem.Mount(ctx, m.Type, mf, strings.Join(data, ",")) + inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(data, ",")) if err != nil { return fmt.Errorf("failed to create mount with source %q: %v", m.Source, err) } // If there are submounts, we need to overlay the mount on top of a // ramfs with stub directories for submount paths. - mounts := specutils.SupportedMounts(spec.Mounts) submounts := subtargets(m.Destination, mounts) if len(submounts) > 0 { log.Infof("Adding submount overlay over %q", m.Destination) @@ -406,7 +414,7 @@ func mountDevice(m specs.Mount) string { if m.Type == "bind" { // Make a device string that includes the target, which is consistent across // S/R and uniquely identifies the connection. - return "p9fs-" + m.Destination + return "9pfs-" + m.Destination } // All other fs types use device "none". return "none" @@ -417,14 +425,24 @@ func mountDevice(m specs.Mount) string { func addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount, fds *fdDispenser) error { fsName, data, _, err := getMountNameAndOptions(conf, m, fds) dataString := strings.Join(data, ",") + + // Return the error or nil that corresponds to the default case in getMountNameAndOptions. if err != nil { return err } - renv.MountSources[fsName] = append(renv.MountSources[fsName], fs.MountArgs{ + // TODO: Fix this when we support all the mount types and make this a + // fatal error. + if fsName == "" { + return nil + } + + newMount := fs.MountArgs{ Dev: mountDevice(m), Flags: mountFlags(m.Options), Data: dataString, - }) + } + renv.MountSources[fsName] = append(renv.MountSources[fsName], newMount) + log.Infof("Added mount at %q: %+v", fsName, newMount) return nil } @@ -438,6 +456,8 @@ func createRestoreEnvironment(spec *specs.Spec, conf *Config, fds *fdDispenser) MountSources: make(map[string][]fs.MountArgs), } + mounts := compileMounts(spec) + // Add root mount. fd := fds.remove() dataString := strings.Join([]string{"trans=fd", fmt.Sprintf("rfdno=%d", fd), fmt.Sprintf("wfdno=%d", fd), "privateunixsocket=true"}, ",") @@ -445,15 +465,16 @@ func createRestoreEnvironment(spec *specs.Spec, conf *Config, fds *fdDispenser) if spec.Root.Readonly { mf.ReadOnly = true } - const rootFSName = "9p" - renv.MountSources[rootFSName] = append(renv.MountSources[rootFSName], fs.MountArgs{ - Dev: "p9fs-/", + + rootMount := fs.MountArgs{ + Dev: rootDevice, Flags: mf, Data: dataString, - }) + } + renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount) // Add submounts - for _, m := range spec.Mounts { + for _, m := range mounts { if err := addRestoreMount(conf, renv, m, fds); err != nil { return nil, err } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 014908179..6fcfba5cb 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -29,6 +29,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/cpuid" "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/inet" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" @@ -80,6 +81,9 @@ type Loader struct { // container. It should be called when a sandbox is destroyed. stopSignalForwarding func() + // restore is set to true if we are restoring a container. + restore bool + // rootProcArgs refers to the root sandbox init task. rootProcArgs kernel.CreateProcessArgs @@ -106,7 +110,17 @@ func init() { } // New initializes a new kernel loader configured by spec. +// New also handles setting up a kernel for restoring a container. func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []int, console bool) (*Loader, error) { + var ( + tk *kernel.Timekeeper + creds *auth.Credentials + vdso *loader.VDSO + utsns *kernel.UTSNamespace + ipcns *kernel.IPCNamespace + restoreFile *os.File + procArgs kernel.CreateProcessArgs + ) // Create kernel and platform. p, err := createPlatform(conf) if err != nil { @@ -116,47 +130,60 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in Platform: p, } - // Create VDSO. - // - // Pass k as the platform since it is savable, unlike the actual platform. - vdso, err := loader.PrepareVDSO(k) - if err != nil { - return nil, fmt.Errorf("error creating vdso: %v", err) - } + if restoreFD == -1 { + // Create VDSO. + // + // Pass k as the platform since it is savable, unlike the actual platform. + vdso, err := loader.PrepareVDSO(k) + if err != nil { + return nil, fmt.Errorf("error creating vdso: %v", err) + } - // Create timekeeper. - tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) - if err != nil { - return nil, fmt.Errorf("error creating timekeeper: %v", err) - } - tk.SetClocks(time.NewCalibratedClocks()) + // Create timekeeper. + tk, err = kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) + if err != nil { + return nil, fmt.Errorf("error creating timekeeper: %v", err) + } + tk.SetClocks(time.NewCalibratedClocks()) - // Create capabilities. - caps, err := specutils.Capabilities(spec.Process.Capabilities) - if err != nil { - return nil, fmt.Errorf("error creating capabilities: %v", err) - } + // Create capabilities. + caps, err := specutils.Capabilities(spec.Process.Capabilities) + if err != nil { + return nil, fmt.Errorf("error creating capabilities: %v", err) + } - // Convert the spec's additional GIDs to KGIDs. - extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids)) - for _, GID := range spec.Process.User.AdditionalGids { - extraKGIDs = append(extraKGIDs, auth.KGID(GID)) - } + // Convert the spec's additional GIDs to KGIDs. + extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids)) + for _, GID := range spec.Process.User.AdditionalGids { + extraKGIDs = append(extraKGIDs, auth.KGID(GID)) + } - // Create credentials. - creds := auth.NewUserCredentials( - auth.KUID(spec.Process.User.UID), - auth.KGID(spec.Process.User.GID), - extraKGIDs, - caps, - auth.NewRootUserNamespace()) + // Create credentials. + creds = auth.NewUserCredentials( + auth.KUID(spec.Process.User.UID), + auth.KGID(spec.Process.User.GID), + extraKGIDs, + caps, + auth.NewRootUserNamespace()) - // Create user namespace. - // TODO: Not clear what domain name should be here. It is - // not configurable from runtime spec. - utsns := kernel.NewUTSNamespace(spec.Hostname, "", creds.UserNamespace) + // Create user namespace. + // TODO: Not clear what domain name should be here. It is + // not configurable from runtime spec. + utsns = kernel.NewUTSNamespace(spec.Hostname, "", creds.UserNamespace) - ipcns := kernel.NewIPCNamespace(creds.UserNamespace) + ipcns = kernel.NewIPCNamespace(creds.UserNamespace) + } else { + // Create and set RestoreEnvironment + fds := &fdDispenser{fds: ioFDs} + renv, err := createRestoreEnvironment(spec, conf, fds) + if err != nil { + return nil, fmt.Errorf("error creating RestoreEnvironment: %v", err) + } + fs.SetRestoreEnvironment(*renv) + + restoreFile = os.NewFile(uintptr(restoreFD), "restore_file") + defer restoreFile.Close() + } if err := enableStrace(conf); err != nil { return nil, fmt.Errorf("failed to enable strace: %v", err) @@ -168,19 +195,7 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in // Run(). networkStack := newEmptyNetworkStack(conf, k) - // Check if we need to restore the kernel - if restoreFD != -1 { - restoreFile := os.NewFile(uintptr(restoreFD), "restore_file") - defer restoreFile.Close() - - // Load the state. - loadOpts := state.LoadOpts{ - Source: restoreFile, - } - if err := loadOpts.Load(k, p, networkStack); err != nil { - return nil, err - } - } else { + if restoreFile == nil { // Initiate the Kernel object, which is required by the Context passed // to createVFS in order to mount (among other things) procfs. if err = k.Init(kernel.InitKernelArgs{ @@ -196,6 +211,17 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in }); err != nil { return nil, fmt.Errorf("error initializing kernel: %v", err) } + } else { + // Load the state. + loadOpts := state.LoadOpts{ + Source: restoreFile, + } + if err := loadOpts.Load(k, p, networkStack); err != nil { + return nil, err + } + + // Set timekeeper. + k.Timekeeper().SetClocks(time.NewCalibratedClocks()) } // Turn on packet logging if enabled. @@ -232,9 +258,11 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in // Ensure that signals received are forwarded to the emulated kernel. stopSignalForwarding := sighandling.PrepareForwarding(k, false)() - procArgs, err := newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k) - if err != nil { - return nil, fmt.Errorf("failed to create root process: %v", err) + if restoreFile == nil { + procArgs, err = newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k) + if err != nil { + return nil, fmt.Errorf("failed to create root process: %v", err) + } } l := &Loader{ @@ -245,6 +273,7 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in watchdog: watchdog, stopSignalForwarding: stopSignalForwarding, rootProcArgs: procArgs, + restore: restoreFile != nil, } ctrl.manager.l = l return l, nil @@ -378,13 +407,16 @@ func (l *Loader) run() error { } } - // Create the root container init task. - if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil { - return fmt.Errorf("failed to create init process: %v", err) - } + // If we are restoring, we do not want to create a process. + if !l.restore { + // Create the root container init task. + if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil { + return fmt.Errorf("failed to create init process: %v", err) + } - // CreateProcess takes a reference on FDMap if successful. - l.rootProcArgs.FDMap.DecRef() + // CreateProcess takes a reference on FDMap if successful. + l.rootProcArgs.FDMap.DecRef() + } l.watchdog.Start() return l.k.Start() diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 15ced0601..28d45b54b 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -364,7 +364,7 @@ func TestRestoreEnvironment(t *testing.T) { MountSources: map[string][]fs.MountArgs{ "9p": { { - Dev: "p9fs-/", + Dev: "9pfs-/", Flags: fs.MountSourceFlags{ReadOnly: true}, Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true", }, @@ -376,6 +376,24 @@ func TestRestoreEnvironment(t *testing.T) { { Dev: "none", }, + { + Dev: "none", + }, + }, + "devtmpfs": { + { + Dev: "none", + }, + }, + "devpts": { + { + Dev: "none", + }, + }, + "sysfs": { + { + Dev: "none", + }, }, }, }, @@ -406,15 +424,40 @@ func TestRestoreEnvironment(t *testing.T) { MountSources: map[string][]fs.MountArgs{ "9p": { { - Dev: "p9fs-/", + Dev: "9pfs-/", Flags: fs.MountSourceFlags{ReadOnly: true}, Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true", }, { - Dev: "p9fs-/dev/fd-foo", + Dev: "9pfs-/dev/fd-foo", Data: "trans=fd,rfdno=1,wfdno=1,privateunixsocket=true", }, }, + "tmpfs": { + { + Dev: "none", + }, + }, + "devtmpfs": { + { + Dev: "none", + }, + }, + "devpts": { + { + Dev: "none", + }, + }, + "proc": { + { + Dev: "none", + }, + }, + "sysfs": { + { + Dev: "none", + }, + }, }, }, }, @@ -445,7 +488,7 @@ func TestRestoreEnvironment(t *testing.T) { MountSources: map[string][]fs.MountArgs{ "9p": { { - Dev: "p9fs-/", + Dev: "9pfs-/", Flags: fs.MountSourceFlags{ReadOnly: true}, Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true", }, @@ -456,6 +499,29 @@ func TestRestoreEnvironment(t *testing.T) { Flags: fs.MountSourceFlags{NoAtime: true}, Data: "uid=1022", }, + { + Dev: "none", + }, + }, + "devtmpfs": { + { + Dev: "none", + }, + }, + "devpts": { + { + Dev: "none", + }, + }, + "proc": { + { + Dev: "none", + }, + }, + "sysfs": { + { + Dev: "none", + }, }, }, }, |