diff options
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/BUILD | 2 | ||||
-rw-r--r-- | runsc/boot/controller.go | 50 | ||||
-rw-r--r-- | runsc/boot/fs.go | 33 | ||||
-rw-r--r-- | runsc/boot/loader.go | 11 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 57 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 28 |
6 files changed, 117 insertions, 64 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 248f77c34..8c73dc5dc 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -38,6 +38,7 @@ go_library( "//pkg/memutil", "//pkg/rand", "//pkg/refs", + "//pkg/refsvfs2", "//pkg/sentry/arch", "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/control", @@ -74,6 +75,7 @@ go_library( "//pkg/sentry/platform", "//pkg/sentry/sighandling", "//pkg/sentry/socket/hostinet", + "//pkg/sentry/socket/netfilter", "//pkg/sentry/socket/netlink", "//pkg/sentry/socket/netlink/route", "//pkg/sentry/socket/netlink/uevent", diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 894651519..fdf13c8e1 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -30,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/sentry/state" "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/urpc" @@ -195,7 +196,7 @@ type containerManager struct { // StartRoot will start the root container process. func (cm *containerManager) StartRoot(cid *string, _ *struct{}) error { - log.Debugf("containerManager.StartRoot %q", *cid) + log.Debugf("containerManager.StartRoot, cid: %s", *cid) // Tell the root container to start and wait for the result. cm.startChan <- struct{}{} if err := <-cm.startResultChan; err != nil { @@ -206,13 +207,13 @@ func (cm *containerManager) StartRoot(cid *string, _ *struct{}) error { // Processes retrieves information about processes running in the sandbox. func (cm *containerManager) Processes(cid *string, out *[]*control.Process) error { - log.Debugf("containerManager.Processes: %q", *cid) + log.Debugf("containerManager.Processes, cid: %s", *cid) return control.Processes(cm.l.k, *cid, out) } // Create creates a container within a sandbox. func (cm *containerManager) Create(cid *string, _ *struct{}) error { - log.Debugf("containerManager.Create: %q", *cid) + log.Debugf("containerManager.Create, cid: %s", *cid) return cm.l.createContainer(*cid) } @@ -236,12 +237,11 @@ type StartArgs struct { // Start runs a created container within a sandbox. func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { - log.Debugf("containerManager.Start: %+v", args) - // Validate arguments. if args == nil { return errors.New("start missing arguments") } + log.Debugf("containerManager.Start, cid: %s, args: %+v", args.CID, args) if args.Spec == nil { return errors.New("start arguments missing spec") } @@ -268,27 +268,27 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { } }() if err := cm.l.startContainer(args.Spec, args.Conf, args.CID, fds); err != nil { - log.Debugf("containerManager.Start failed %q: %+v: %v", args.CID, args, err) + log.Debugf("containerManager.Start failed, cid: %s, args: %+v, err: %v", args.CID, args, err) return err } - log.Debugf("Container %q started", args.CID) + log.Debugf("Container started, cid: %s", args.CID) return nil } // Destroy stops a container if it is still running and cleans up its // filesystem. func (cm *containerManager) Destroy(cid *string, _ *struct{}) error { - log.Debugf("containerManager.destroy %q", *cid) + log.Debugf("containerManager.destroy, cid: %s", *cid) return cm.l.destroyContainer(*cid) } // ExecuteAsync starts running a command on a created or running sandbox. It // returns the PID of the new process. func (cm *containerManager) ExecuteAsync(args *control.ExecArgs, pid *int32) error { - log.Debugf("containerManager.ExecuteAsync: %+v", args) + log.Debugf("containerManager.ExecuteAsync, cid: %s, args: %+v", args.ContainerID, args) tgid, err := cm.l.executeAsync(args) if err != nil { - log.Debugf("containerManager.ExecuteAsync failed: %+v: %v", args, err) + log.Debugf("containerManager.ExecuteAsync failed, cid: %s, args: %+v, err: %v", args.ContainerID, args, err) return err } *pid = int32(tgid) @@ -367,12 +367,20 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { cm.l.k = k // Set up the restore environment. + ctx := k.SupervisorContext() mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints) - renv, err := mntr.createRestoreEnvironment(cm.l.root.conf) - if err != nil { - return fmt.Errorf("creating RestoreEnvironment: %v", err) + if kernel.VFS2Enabled { + ctx, err = mntr.configureRestore(ctx, cm.l.root.conf) + if err != nil { + return fmt.Errorf("configuring filesystem restore: %v", err) + } + } else { + renv, err := mntr.createRestoreEnvironment(cm.l.root.conf) + if err != nil { + return fmt.Errorf("creating RestoreEnvironment: %v", err) + } + fs.SetRestoreEnvironment(*renv) } - fs.SetRestoreEnvironment(*renv) // Prepare to load from the state file. if eps, ok := networkStack.(*netstack.Stack); ok { @@ -399,7 +407,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { // Load the state. loadOpts := state.LoadOpts{Source: specFile} - if err := loadOpts.Load(k, networkStack, time.NewCalibratedClocks()); err != nil { + if err := loadOpts.Load(ctx, k, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil { return err } @@ -444,9 +452,9 @@ func (cm *containerManager) Resume(_, _ *struct{}) error { // Wait waits for the init process in the given container. func (cm *containerManager) Wait(cid *string, waitStatus *uint32) error { - log.Debugf("containerManager.Wait") + log.Debugf("containerManager.Wait, cid: %s", *cid) err := cm.l.waitContainer(*cid, waitStatus) - log.Debugf("containerManager.Wait returned, waitStatus: %v: %v", waitStatus, err) + log.Debugf("containerManager.Wait returned, cid: %s, waitStatus: %#x, err: %v", *cid, *waitStatus, err) return err } @@ -461,8 +469,10 @@ type WaitPIDArgs struct { // WaitPID waits for the process with PID 'pid' in the sandbox. func (cm *containerManager) WaitPID(args *WaitPIDArgs, waitStatus *uint32) error { - log.Debugf("containerManager.Wait") - return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, waitStatus) + log.Debugf("containerManager.Wait, cid: %s, pid: %d", args.CID, args.PID) + err := cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, waitStatus) + log.Debugf("containerManager.Wait, cid: %s, pid: %d, waitStatus: %#x, err: %v", args.CID, args.PID, *waitStatus, err) + return err } // SignalDeliveryMode enumerates different signal delivery modes. @@ -519,6 +529,6 @@ type SignalArgs struct { // indicated process, to all processes in the container, or to the foreground // process group. func (cm *containerManager) Signal(args *SignalArgs, _ *struct{}) error { - log.Debugf("containerManager.Signal %+v", args) + log.Debugf("containerManager.Signal: cid: %s, PID: %d, signal: %d, mode: %v", args.CID, args.PID, args.Signo, args.Mode) return cm.l.signal(args.CID, args.PID, args.Signo, args.Mode) } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index ddf288456..6b6ae98d7 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -105,33 +105,28 @@ func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name // mandatory mounts that are required by the OCI specification. func compileMounts(spec *specs.Spec) []specs.Mount { // Keep track of whether proc and sys were mounted. - var procMounted, sysMounted bool + var procMounted, sysMounted, devMounted, devptsMounted bool var mounts []specs.Mount - // Always mount /dev. - mounts = append(mounts, specs.Mount{ - Type: devtmpfs.Name, - Destination: "/dev", - }) - - mounts = append(mounts, specs.Mount{ - Type: devpts.Name, - Destination: "/dev/pts", - }) - // Mount all submounts from the spec. for _, m := range spec.Mounts { if !specutils.IsSupportedDevMount(m) { log.Warningf("ignoring dev mount at %q", m.Destination) continue } - mounts = append(mounts, m) switch filepath.Clean(m.Destination) { case "/proc": procMounted = true case "/sys": sysMounted = true + case "/dev": + m.Type = devtmpfs.Name + devMounted = true + case "/dev/pts": + m.Type = devpts.Name + devptsMounted = true } + mounts = append(mounts, m) } // Mount proc and sys even if the user did not ask for it, as the spec @@ -149,6 +144,18 @@ func compileMounts(spec *specs.Spec) []specs.Mount { Destination: "/sys", }) } + if !devMounted { + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: devtmpfs.Name, + Destination: "/dev", + }) + } + if !devptsMounted { + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: devpts.Name, + Destination: "/dev/pts", + }) + } // The mandatory mounts should be ordered right after the root, in case // there are submounts of these mandatory mounts already in the spec. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 8ad000497..ebdd518d0 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -35,6 +35,7 @@ import ( "gvisor.dev/gvisor/pkg/memutil" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/fdimport" @@ -49,6 +50,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/sighandling" + "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/sentry/syscalls/linux/vfs2" "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sentry/usage" @@ -476,6 +478,10 @@ func (l *Loader) Destroy() { // save/restore. l.k.Release() + // All sentry-created resources should have been released at this point; + // check for reference leaks. + refsvfs2.DoLeakCheck() + // In the success case, stdioFDs and goferFDs will only contain // released/closed FDs that ownership has been passed over to host FDs and // gofer sessions. Close them here in case of failure. @@ -737,7 +743,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn return nil, err } - // Add the HOME enviroment variable if it is not already set. + // Add the HOME environment variable if it is not already set. var envv []string if kernel.VFS2Enabled { envv, err = user.MaybeAddExecUserHomeVFS2(ctx, info.procArgs.MountNamespaceVFS2, @@ -882,7 +888,7 @@ func (l *Loader) destroyContainer(cid string) error { } } - log.Debugf("Container destroyed %q", cid) + log.Debugf("Container destroyed, cid: %s", cid) return nil } @@ -1079,6 +1085,7 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in // privileges. RawFactory: raw.EndpointFactory{}, UniqueID: uniqueID, + IPTables: netfilter.DefaultLinuxTables(), })} // Enable SACK Recovery. diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index e376f944b..b77b4762e 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -266,7 +266,7 @@ type CreateMountTestcase struct { func createMountTestcases() []*CreateMountTestcase { testCases := []*CreateMountTestcase{ - &CreateMountTestcase{ + { // Only proc. name: "only proc mount", spec: specs.Spec{ @@ -304,11 +304,10 @@ func createMountTestcases() []*CreateMountTestcase { }, }, }, - // /some/deep/path should be mounted, along with /proc, - // /dev, and /sys. + // /some/deep/path should be mounted, along with /proc, /dev, and /sys. expectedPaths: []string{"/some/very/very/deep/path", "/proc", "/dev", "/sys"}, }, - &CreateMountTestcase{ + { // Mounts are nested inside each other. name: "nested mounts", spec: specs.Spec{ @@ -352,7 +351,7 @@ func createMountTestcases() []*CreateMountTestcase { expectedPaths: []string{"/foo", "/foo/bar", "/foo/bar/baz", "/foo/qux", "/foo/qux-quz", "/foo/some/very/very/deep/path", "/proc", "/dev", "/sys"}, }, - &CreateMountTestcase{ + { name: "mount inside /dev", spec: specs.Spec{ Root: &specs.Root{ @@ -395,35 +394,37 @@ func createMountTestcases() []*CreateMountTestcase { }, expectedPaths: []string{"/proc", "/dev", "/dev/fd-foo", "/dev/foo", "/dev/bar", "/sys"}, }, - } - - vfsCase := &CreateMountTestcase{ - name: "mounts inside mandatory mounts", - spec: specs.Spec{ - Root: &specs.Root{ - Path: os.TempDir(), - Readonly: true, - }, - Mounts: []specs.Mount{ - { - Destination: "/proc", - Type: "tmpfs", - }, - { - Destination: "/sys/bar", - Type: "tmpfs", + { + name: "mounts inside mandatory mounts", + spec: specs.Spec{ + Root: &specs.Root{ + Path: os.TempDir(), + Readonly: true, }, - - { - Destination: "/tmp/baz", - Type: "tmpfs", + Mounts: []specs.Mount{ + { + Destination: "/proc", + Type: "tmpfs", + }, + { + Destination: "/sys/bar", + Type: "tmpfs", + }, + { + Destination: "/tmp/baz", + Type: "tmpfs", + }, + { + Destination: "/dev/goo", + Type: "tmpfs", + }, }, }, + expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz", "/dev/goo"}, }, - expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz"}, } - return append(testCases, vfsCase) + return testCases } // Test that MountNamespace can be created with various specs. diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 004da5b40..b157387ef 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -210,6 +210,9 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *c ReadOnly: c.root.Readonly, GetFilesystemOptions: vfs.GetFilesystemOptions{ Data: strings.Join(data, ","), + InternalData: gofer.InternalFilesystemOptions{ + UniqueID: "/", + }, }, InternalMount: true, } @@ -427,6 +430,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo fsName := m.Type useOverlay := false var data []string + var iopts interface{} // Find filesystem name and FS specific data field. switch m.Type { @@ -451,6 +455,9 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo return "", nil, false, fmt.Errorf("9P mount requires a connection FD") } data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) + iopts = gofer.InternalFilesystemOptions{ + UniqueID: m.Destination, + } // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly @@ -462,7 +469,8 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo opts := &vfs.MountOptions{ GetFilesystemOptions: vfs.GetFilesystemOptions{ - Data: strings.Join(data, ","), + Data: strings.Join(data, ","), + InternalData: iopts, }, InternalMount: true, } @@ -667,3 +675,21 @@ func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Crede } return c.k.VFS().MakeSyntheticMountpoint(ctx, dest, root, creds) } + +// configureRestore returns an updated context.Context including filesystem +// state used by restore defined by conf. +func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Config) (context.Context, error) { + fdmap := make(map[string]int) + fdmap["/"] = c.fds.remove() + mounts, err := c.prepareMountsVFS2() + if err != nil { + return ctx, err + } + for i := range c.mounts { + submount := &mounts[i] + if submount.fd >= 0 { + fdmap[submount.Destination] = submount.fd + } + } + return context.WithValue(ctx, gofer.CtxRestoreServerFDMap, fdmap), nil +} |