summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot')
-rw-r--r--runsc/boot/fs.go103
-rw-r--r--runsc/boot/loader.go146
-rw-r--r--runsc/boot/loader_test.go74
3 files changed, 221 insertions, 102 deletions
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index e0d7fc769..a9b2f225a 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -38,6 +38,14 @@ import (
"gvisor.googlesource.com/gvisor/runsc/specutils"
)
+const (
+ // Filesystem name for 9p gofer mounts.
+ rootFsName = "9p"
+
+ // Device name for root mount.
+ rootDevice = "9pfs-/"
+)
+
type fdDispenser struct {
fds []int
}
@@ -64,7 +72,8 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec
if err != nil {
return nil, fmt.Errorf("failed to create root mount namespace: %v", err)
}
- if err := configureMounts(rootCtx, spec, conf, mns, fds); err != nil {
+ mounts := compileMounts(spec)
+ if err := setMounts(rootCtx, conf, mns, fds, mounts); err != nil {
return nil, fmt.Errorf("failed to configure mounts: %v", err)
}
if !fds.empty() {
@@ -73,27 +82,23 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec
return mns, nil
}
-// configureMounts iterates over Spec.Mounts and mounts them in the specified
-// mount namespace.
-func configureMounts(ctx context.Context, spec *specs.Spec, conf *Config, mns *fs.MountNamespace, fds *fdDispenser) error {
+// compileMounts returns the supported mounts from the mount spec, adding any
+// additional mounts that are required by the OCI specification.
+func compileMounts(spec *specs.Spec) []specs.Mount {
// Keep track of whether proc, sys, and tmp were mounted.
var procMounted, sysMounted, tmpMounted bool
+ var mounts []specs.Mount
// Always mount /dev.
- if err := mountSubmount(ctx, spec, conf, mns, nil, specs.Mount{
+ mounts = append(mounts, specs.Mount{
Type: "devtmpfs",
Destination: "/dev",
- }); err != nil {
- return err
- }
+ })
- // Always mount /dev/pts.
- if err := mountSubmount(ctx, spec, conf, mns, nil, specs.Mount{
+ mounts = append(mounts, specs.Mount{
Type: "devpts",
Destination: "/dev/pts",
- }); err != nil {
- return err
- }
+ })
// Mount all submounts from the spec.
for _, m := range spec.Mounts {
@@ -101,6 +106,7 @@ func configureMounts(ctx context.Context, spec *specs.Spec, conf *Config, mns *f
log.Warningf("ignoring dev mount at %q", m.Destination)
continue
}
+ mounts = append(mounts, m)
switch filepath.Clean(m.Destination) {
case "/proc":
procMounted = true
@@ -109,43 +115,45 @@ func configureMounts(ctx context.Context, spec *specs.Spec, conf *Config, mns *f
case "/tmp":
tmpMounted = true
}
-
- if err := mountSubmount(ctx, spec, conf, mns, fds, m); err != nil {
- return err
- }
}
// Mount proc and sys even if the user did not ask for it, as the spec
// says we SHOULD.
if !procMounted {
- if err := mountSubmount(ctx, spec, conf, mns, nil, specs.Mount{
+ mounts = append(mounts, specs.Mount{
Type: "proc",
Destination: "/proc",
- }); err != nil {
- return err
- }
+ })
}
if !sysMounted {
- if err := mountSubmount(ctx, spec, conf, mns, nil, specs.Mount{
+ mounts = append(mounts, specs.Mount{
Type: "sysfs",
Destination: "/sys",
- }); err != nil {
- return err
- }
+ })
}
// Technically we don't have to mount tmpfs at /tmp, as we could just
// rely on the host /tmp, but this is a nice optimization, and fixes
// some apps that call mknod in /tmp.
if !tmpMounted {
- if err := mountSubmount(ctx, spec, conf, mns, nil, specs.Mount{
+ mounts = append(mounts, specs.Mount{
Type: "tmpfs",
Destination: "/tmp",
- }); err != nil {
+ })
+ }
+ return mounts
+}
+
+// setMounts iterates over mounts and mounts them in the specified
+// mount namespace.
+func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, mounts []specs.Mount) error {
+
+ // Mount all submounts from mounts.
+ for _, m := range mounts {
+ if err := mountSubmount(ctx, conf, mns, fds, m, mounts); err != nil {
return err
}
}
-
return nil
}
@@ -158,19 +166,20 @@ func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *f
rootInode *fs.Inode
err error
)
+
switch conf.FileAccess {
case FileAccessProxy:
fd := fds.remove()
log.Infof("Mounting root over 9P, ioFD: %d", fd)
hostFS := mustFindFilesystem("9p")
- rootInode, err = hostFS.Mount(ctx, "root", mf, fmt.Sprintf("trans=fd,rfdno=%d,wfdno=%d,privateunixsocket=true", fd, fd))
+ rootInode, err = hostFS.Mount(ctx, rootDevice, mf, fmt.Sprintf("trans=fd,rfdno=%d,wfdno=%d,privateunixsocket=true", fd, fd))
if err != nil {
return nil, fmt.Errorf("failed to generate root mount point: %v", err)
}
case FileAccessDirect:
hostFS := mustFindFilesystem("whitelistfs")
- rootInode, err = hostFS.Mount(ctx, "root", mf, "root="+spec.Root.Path+",dont_translate_ownership=true")
+ rootInode, err = hostFS.Mount(ctx, rootDevice, mf, "root="+spec.Root.Path+",dont_translate_ownership=true")
if err != nil {
return nil, fmt.Errorf("failed to generate root mount point: %v", err)
}
@@ -263,7 +272,7 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri
return fsName, data, useOverlay, err
}
-func mountSubmount(ctx context.Context, spec *specs.Spec, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount) error {
+func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount) error {
// Map mount type to filesystem name, and parse out the options that we are
// capable of dealing with.
fsName, data, useOverlay, err := getMountNameAndOptions(conf, m, fds)
@@ -285,14 +294,13 @@ func mountSubmount(ctx context.Context, spec *specs.Spec, conf *Config, mns *fs.
mf.ReadOnly = true
}
- inode, err := filesystem.Mount(ctx, m.Type, mf, strings.Join(data, ","))
+ inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(data, ","))
if err != nil {
return fmt.Errorf("failed to create mount with source %q: %v", m.Source, err)
}
// If there are submounts, we need to overlay the mount on top of a
// ramfs with stub directories for submount paths.
- mounts := specutils.SupportedMounts(spec.Mounts)
submounts := subtargets(m.Destination, mounts)
if len(submounts) > 0 {
log.Infof("Adding submount overlay over %q", m.Destination)
@@ -406,7 +414,7 @@ func mountDevice(m specs.Mount) string {
if m.Type == "bind" {
// Make a device string that includes the target, which is consistent across
// S/R and uniquely identifies the connection.
- return "p9fs-" + m.Destination
+ return "9pfs-" + m.Destination
}
// All other fs types use device "none".
return "none"
@@ -417,14 +425,24 @@ func mountDevice(m specs.Mount) string {
func addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount, fds *fdDispenser) error {
fsName, data, _, err := getMountNameAndOptions(conf, m, fds)
dataString := strings.Join(data, ",")
+
+ // Return the error or nil that corresponds to the default case in getMountNameAndOptions.
if err != nil {
return err
}
- renv.MountSources[fsName] = append(renv.MountSources[fsName], fs.MountArgs{
+ // TODO: Fix this when we support all the mount types and make this a
+ // fatal error.
+ if fsName == "" {
+ return nil
+ }
+
+ newMount := fs.MountArgs{
Dev: mountDevice(m),
Flags: mountFlags(m.Options),
Data: dataString,
- })
+ }
+ renv.MountSources[fsName] = append(renv.MountSources[fsName], newMount)
+ log.Infof("Added mount at %q: %+v", fsName, newMount)
return nil
}
@@ -438,6 +456,8 @@ func createRestoreEnvironment(spec *specs.Spec, conf *Config, fds *fdDispenser)
MountSources: make(map[string][]fs.MountArgs),
}
+ mounts := compileMounts(spec)
+
// Add root mount.
fd := fds.remove()
dataString := strings.Join([]string{"trans=fd", fmt.Sprintf("rfdno=%d", fd), fmt.Sprintf("wfdno=%d", fd), "privateunixsocket=true"}, ",")
@@ -445,15 +465,16 @@ func createRestoreEnvironment(spec *specs.Spec, conf *Config, fds *fdDispenser)
if spec.Root.Readonly {
mf.ReadOnly = true
}
- const rootFSName = "9p"
- renv.MountSources[rootFSName] = append(renv.MountSources[rootFSName], fs.MountArgs{
- Dev: "p9fs-/",
+
+ rootMount := fs.MountArgs{
+ Dev: rootDevice,
Flags: mf,
Data: dataString,
- })
+ }
+ renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount)
// Add submounts
- for _, m := range spec.Mounts {
+ for _, m := range mounts {
if err := addRestoreMount(conf, renv, m, fds); err != nil {
return nil, err
}
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 014908179..6fcfba5cb 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -29,6 +29,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/cpuid"
"gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/sentry/inet"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
@@ -80,6 +81,9 @@ type Loader struct {
// container. It should be called when a sandbox is destroyed.
stopSignalForwarding func()
+ // restore is set to true if we are restoring a container.
+ restore bool
+
// rootProcArgs refers to the root sandbox init task.
rootProcArgs kernel.CreateProcessArgs
@@ -106,7 +110,17 @@ func init() {
}
// New initializes a new kernel loader configured by spec.
+// New also handles setting up a kernel for restoring a container.
func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []int, console bool) (*Loader, error) {
+ var (
+ tk *kernel.Timekeeper
+ creds *auth.Credentials
+ vdso *loader.VDSO
+ utsns *kernel.UTSNamespace
+ ipcns *kernel.IPCNamespace
+ restoreFile *os.File
+ procArgs kernel.CreateProcessArgs
+ )
// Create kernel and platform.
p, err := createPlatform(conf)
if err != nil {
@@ -116,47 +130,60 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
Platform: p,
}
- // Create VDSO.
- //
- // Pass k as the platform since it is savable, unlike the actual platform.
- vdso, err := loader.PrepareVDSO(k)
- if err != nil {
- return nil, fmt.Errorf("error creating vdso: %v", err)
- }
+ if restoreFD == -1 {
+ // Create VDSO.
+ //
+ // Pass k as the platform since it is savable, unlike the actual platform.
+ vdso, err := loader.PrepareVDSO(k)
+ if err != nil {
+ return nil, fmt.Errorf("error creating vdso: %v", err)
+ }
- // Create timekeeper.
- tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange())
- if err != nil {
- return nil, fmt.Errorf("error creating timekeeper: %v", err)
- }
- tk.SetClocks(time.NewCalibratedClocks())
+ // Create timekeeper.
+ tk, err = kernel.NewTimekeeper(k, vdso.ParamPage.FileRange())
+ if err != nil {
+ return nil, fmt.Errorf("error creating timekeeper: %v", err)
+ }
+ tk.SetClocks(time.NewCalibratedClocks())
- // Create capabilities.
- caps, err := specutils.Capabilities(spec.Process.Capabilities)
- if err != nil {
- return nil, fmt.Errorf("error creating capabilities: %v", err)
- }
+ // Create capabilities.
+ caps, err := specutils.Capabilities(spec.Process.Capabilities)
+ if err != nil {
+ return nil, fmt.Errorf("error creating capabilities: %v", err)
+ }
- // Convert the spec's additional GIDs to KGIDs.
- extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids))
- for _, GID := range spec.Process.User.AdditionalGids {
- extraKGIDs = append(extraKGIDs, auth.KGID(GID))
- }
+ // Convert the spec's additional GIDs to KGIDs.
+ extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids))
+ for _, GID := range spec.Process.User.AdditionalGids {
+ extraKGIDs = append(extraKGIDs, auth.KGID(GID))
+ }
- // Create credentials.
- creds := auth.NewUserCredentials(
- auth.KUID(spec.Process.User.UID),
- auth.KGID(spec.Process.User.GID),
- extraKGIDs,
- caps,
- auth.NewRootUserNamespace())
+ // Create credentials.
+ creds = auth.NewUserCredentials(
+ auth.KUID(spec.Process.User.UID),
+ auth.KGID(spec.Process.User.GID),
+ extraKGIDs,
+ caps,
+ auth.NewRootUserNamespace())
- // Create user namespace.
- // TODO: Not clear what domain name should be here. It is
- // not configurable from runtime spec.
- utsns := kernel.NewUTSNamespace(spec.Hostname, "", creds.UserNamespace)
+ // Create user namespace.
+ // TODO: Not clear what domain name should be here. It is
+ // not configurable from runtime spec.
+ utsns = kernel.NewUTSNamespace(spec.Hostname, "", creds.UserNamespace)
- ipcns := kernel.NewIPCNamespace(creds.UserNamespace)
+ ipcns = kernel.NewIPCNamespace(creds.UserNamespace)
+ } else {
+ // Create and set RestoreEnvironment
+ fds := &fdDispenser{fds: ioFDs}
+ renv, err := createRestoreEnvironment(spec, conf, fds)
+ if err != nil {
+ return nil, fmt.Errorf("error creating RestoreEnvironment: %v", err)
+ }
+ fs.SetRestoreEnvironment(*renv)
+
+ restoreFile = os.NewFile(uintptr(restoreFD), "restore_file")
+ defer restoreFile.Close()
+ }
if err := enableStrace(conf); err != nil {
return nil, fmt.Errorf("failed to enable strace: %v", err)
@@ -168,19 +195,7 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
// Run().
networkStack := newEmptyNetworkStack(conf, k)
- // Check if we need to restore the kernel
- if restoreFD != -1 {
- restoreFile := os.NewFile(uintptr(restoreFD), "restore_file")
- defer restoreFile.Close()
-
- // Load the state.
- loadOpts := state.LoadOpts{
- Source: restoreFile,
- }
- if err := loadOpts.Load(k, p, networkStack); err != nil {
- return nil, err
- }
- } else {
+ if restoreFile == nil {
// Initiate the Kernel object, which is required by the Context passed
// to createVFS in order to mount (among other things) procfs.
if err = k.Init(kernel.InitKernelArgs{
@@ -196,6 +211,17 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
}); err != nil {
return nil, fmt.Errorf("error initializing kernel: %v", err)
}
+ } else {
+ // Load the state.
+ loadOpts := state.LoadOpts{
+ Source: restoreFile,
+ }
+ if err := loadOpts.Load(k, p, networkStack); err != nil {
+ return nil, err
+ }
+
+ // Set timekeeper.
+ k.Timekeeper().SetClocks(time.NewCalibratedClocks())
}
// Turn on packet logging if enabled.
@@ -232,9 +258,11 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
// Ensure that signals received are forwarded to the emulated kernel.
stopSignalForwarding := sighandling.PrepareForwarding(k, false)()
- procArgs, err := newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k)
- if err != nil {
- return nil, fmt.Errorf("failed to create root process: %v", err)
+ if restoreFile == nil {
+ procArgs, err = newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create root process: %v", err)
+ }
}
l := &Loader{
@@ -245,6 +273,7 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
watchdog: watchdog,
stopSignalForwarding: stopSignalForwarding,
rootProcArgs: procArgs,
+ restore: restoreFile != nil,
}
ctrl.manager.l = l
return l, nil
@@ -378,13 +407,16 @@ func (l *Loader) run() error {
}
}
- // Create the root container init task.
- if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
- return fmt.Errorf("failed to create init process: %v", err)
- }
+ // If we are restoring, we do not want to create a process.
+ if !l.restore {
+ // Create the root container init task.
+ if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
+ return fmt.Errorf("failed to create init process: %v", err)
+ }
- // CreateProcess takes a reference on FDMap if successful.
- l.rootProcArgs.FDMap.DecRef()
+ // CreateProcess takes a reference on FDMap if successful.
+ l.rootProcArgs.FDMap.DecRef()
+ }
l.watchdog.Start()
return l.k.Start()
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 15ced0601..28d45b54b 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -364,7 +364,7 @@ func TestRestoreEnvironment(t *testing.T) {
MountSources: map[string][]fs.MountArgs{
"9p": {
{
- Dev: "p9fs-/",
+ Dev: "9pfs-/",
Flags: fs.MountSourceFlags{ReadOnly: true},
Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true",
},
@@ -376,6 +376,24 @@ func TestRestoreEnvironment(t *testing.T) {
{
Dev: "none",
},
+ {
+ Dev: "none",
+ },
+ },
+ "devtmpfs": {
+ {
+ Dev: "none",
+ },
+ },
+ "devpts": {
+ {
+ Dev: "none",
+ },
+ },
+ "sysfs": {
+ {
+ Dev: "none",
+ },
},
},
},
@@ -406,15 +424,40 @@ func TestRestoreEnvironment(t *testing.T) {
MountSources: map[string][]fs.MountArgs{
"9p": {
{
- Dev: "p9fs-/",
+ Dev: "9pfs-/",
Flags: fs.MountSourceFlags{ReadOnly: true},
Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true",
},
{
- Dev: "p9fs-/dev/fd-foo",
+ Dev: "9pfs-/dev/fd-foo",
Data: "trans=fd,rfdno=1,wfdno=1,privateunixsocket=true",
},
},
+ "tmpfs": {
+ {
+ Dev: "none",
+ },
+ },
+ "devtmpfs": {
+ {
+ Dev: "none",
+ },
+ },
+ "devpts": {
+ {
+ Dev: "none",
+ },
+ },
+ "proc": {
+ {
+ Dev: "none",
+ },
+ },
+ "sysfs": {
+ {
+ Dev: "none",
+ },
+ },
},
},
},
@@ -445,7 +488,7 @@ func TestRestoreEnvironment(t *testing.T) {
MountSources: map[string][]fs.MountArgs{
"9p": {
{
- Dev: "p9fs-/",
+ Dev: "9pfs-/",
Flags: fs.MountSourceFlags{ReadOnly: true},
Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true",
},
@@ -456,6 +499,29 @@ func TestRestoreEnvironment(t *testing.T) {
Flags: fs.MountSourceFlags{NoAtime: true},
Data: "uid=1022",
},
+ {
+ Dev: "none",
+ },
+ },
+ "devtmpfs": {
+ {
+ Dev: "none",
+ },
+ },
+ "devpts": {
+ {
+ Dev: "none",
+ },
+ },
+ "proc": {
+ {
+ Dev: "none",
+ },
+ },
+ "sysfs": {
+ {
+ Dev: "none",
+ },
},
},
},