diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/BUILD | 1 | ||||
-rw-r--r-- | runsc/boot/controller.go | 19 | ||||
-rw-r--r-- | runsc/boot/fs.go | 33 | ||||
-rw-r--r-- | runsc/boot/loader.go | 9 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 28 | ||||
-rw-r--r-- | runsc/cgroup/cgroup.go | 11 | ||||
-rw-r--r-- | runsc/cgroup/cgroup_test.go | 80 | ||||
-rw-r--r-- | runsc/cmd/start.go | 7 | ||||
-rw-r--r-- | runsc/container/container.go | 16 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 18 |
10 files changed, 188 insertions, 34 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 248f77c34..b97dc3c47 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -38,6 +38,7 @@ go_library( "//pkg/memutil", "//pkg/rand", "//pkg/refs", + "//pkg/refsvfs2", "//pkg/sentry/arch", "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/control", diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 894651519..4e0f0d57a 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -30,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/sentry/state" "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/urpc" @@ -367,12 +368,20 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { cm.l.k = k // Set up the restore environment. + ctx := k.SupervisorContext() mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints) - renv, err := mntr.createRestoreEnvironment(cm.l.root.conf) - if err != nil { - return fmt.Errorf("creating RestoreEnvironment: %v", err) + if kernel.VFS2Enabled { + ctx, err = mntr.configureRestore(ctx, cm.l.root.conf) + if err != nil { + return fmt.Errorf("configuring filesystem restore: %v", err) + } + } else { + renv, err := mntr.createRestoreEnvironment(cm.l.root.conf) + if err != nil { + return fmt.Errorf("creating RestoreEnvironment: %v", err) + } + fs.SetRestoreEnvironment(*renv) } - fs.SetRestoreEnvironment(*renv) // Prepare to load from the state file. if eps, ok := networkStack.(*netstack.Stack); ok { @@ -399,7 +408,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { // Load the state. loadOpts := state.LoadOpts{Source: specFile} - if err := loadOpts.Load(k, networkStack, time.NewCalibratedClocks()); err != nil { + if err := loadOpts.Load(ctx, k, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil { return err } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index ddf288456..6b6ae98d7 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -105,33 +105,28 @@ func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name // mandatory mounts that are required by the OCI specification. func compileMounts(spec *specs.Spec) []specs.Mount { // Keep track of whether proc and sys were mounted. - var procMounted, sysMounted bool + var procMounted, sysMounted, devMounted, devptsMounted bool var mounts []specs.Mount - // Always mount /dev. - mounts = append(mounts, specs.Mount{ - Type: devtmpfs.Name, - Destination: "/dev", - }) - - mounts = append(mounts, specs.Mount{ - Type: devpts.Name, - Destination: "/dev/pts", - }) - // Mount all submounts from the spec. for _, m := range spec.Mounts { if !specutils.IsSupportedDevMount(m) { log.Warningf("ignoring dev mount at %q", m.Destination) continue } - mounts = append(mounts, m) switch filepath.Clean(m.Destination) { case "/proc": procMounted = true case "/sys": sysMounted = true + case "/dev": + m.Type = devtmpfs.Name + devMounted = true + case "/dev/pts": + m.Type = devpts.Name + devptsMounted = true } + mounts = append(mounts, m) } // Mount proc and sys even if the user did not ask for it, as the spec @@ -149,6 +144,18 @@ func compileMounts(spec *specs.Spec) []specs.Mount { Destination: "/sys", }) } + if !devMounted { + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: devtmpfs.Name, + Destination: "/dev", + }) + } + if !devptsMounted { + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: devpts.Name, + Destination: "/dev/pts", + }) + } // The mandatory mounts should be ordered right after the root, in case // there are submounts of these mandatory mounts already in the spec. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 8ad000497..8c6ab213d 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -35,6 +35,7 @@ import ( "gvisor.dev/gvisor/pkg/memutil" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/fdimport" @@ -476,6 +477,12 @@ func (l *Loader) Destroy() { // save/restore. l.k.Release() + // All sentry-created resources should have been released at this point; + // check for reference leaks. + if refsvfs2.LeakCheckEnabled() { + refsvfs2.DoLeakCheck() + } + // In the success case, stdioFDs and goferFDs will only contain // released/closed FDs that ownership has been passed over to host FDs and // gofer sessions. Close them here in case of failure. @@ -737,7 +744,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn return nil, err } - // Add the HOME enviroment variable if it is not already set. + // Add the HOME environment variable if it is not already set. var envv []string if kernel.VFS2Enabled { envv, err = user.MaybeAddExecUserHomeVFS2(ctx, info.procArgs.MountNamespaceVFS2, diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 004da5b40..b157387ef 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -210,6 +210,9 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *c ReadOnly: c.root.Readonly, GetFilesystemOptions: vfs.GetFilesystemOptions{ Data: strings.Join(data, ","), + InternalData: gofer.InternalFilesystemOptions{ + UniqueID: "/", + }, }, InternalMount: true, } @@ -427,6 +430,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo fsName := m.Type useOverlay := false var data []string + var iopts interface{} // Find filesystem name and FS specific data field. switch m.Type { @@ -451,6 +455,9 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo return "", nil, false, fmt.Errorf("9P mount requires a connection FD") } data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) + iopts = gofer.InternalFilesystemOptions{ + UniqueID: m.Destination, + } // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly @@ -462,7 +469,8 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo opts := &vfs.MountOptions{ GetFilesystemOptions: vfs.GetFilesystemOptions{ - Data: strings.Join(data, ","), + Data: strings.Join(data, ","), + InternalData: iopts, }, InternalMount: true, } @@ -667,3 +675,21 @@ func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Crede } return c.k.VFS().MakeSyntheticMountpoint(ctx, dest, root, creds) } + +// configureRestore returns an updated context.Context including filesystem +// state used by restore defined by conf. +func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Config) (context.Context, error) { + fdmap := make(map[string]int) + fdmap["/"] = c.fds.remove() + mounts, err := c.prepareMountsVFS2() + if err != nil { + return ctx, err + } + for i := range c.mounts { + submount := &mounts[i] + if submount.fd >= 0 { + fdmap[submount.Destination] = submount.fd + } + } + return context.WithValue(ctx, gofer.CtxRestoreServerFDMap, fdmap), nil +} diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 56da21584..5bd0afc52 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -21,6 +21,7 @@ import ( "context" "errors" "fmt" + "io" "io/ioutil" "os" "path/filepath" @@ -198,8 +199,13 @@ func LoadPaths(pid string) (map[string]string, error) { } defer f.Close() + return loadPathsHelper(f) +} + +func loadPathsHelper(cgroup io.Reader) (map[string]string, error) { paths := make(map[string]string) - scanner := bufio.NewScanner(f) + + scanner := bufio.NewScanner(cgroup) for scanner.Scan() { // Format: ID:[name=]controller1,controller2:path // Example: 2:cpu,cpuacct:/user.slice @@ -207,6 +213,9 @@ func LoadPaths(pid string) (map[string]string, error) { if len(tokens) != 3 { return nil, fmt.Errorf("invalid cgroups file, line: %q", scanner.Text()) } + if len(tokens[1]) == 0 { + continue + } for _, ctrlr := range strings.Split(tokens[1], ",") { // Remove prefix for cgroups with no controller, eg. systemd. ctrlr = strings.TrimPrefix(ctrlr, "name=") diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go index 4db5ee5c3..9794517a7 100644 --- a/runsc/cgroup/cgroup_test.go +++ b/runsc/cgroup/cgroup_test.go @@ -647,3 +647,83 @@ func TestPids(t *testing.T) { }) } } + +func TestLoadPaths(t *testing.T) { + for _, tc := range []struct { + name string + cgroups string + want map[string]string + err string + }{ + { + name: "abs-path", + cgroups: "0:ctr:/path", + want: map[string]string{"ctr": "/path"}, + }, + { + name: "rel-path", + cgroups: "0:ctr:rel-path", + want: map[string]string{"ctr": "rel-path"}, + }, + { + name: "non-controller", + cgroups: "0:name=systemd:/path", + want: map[string]string{"systemd": "/path"}, + }, + { + name: "empty", + }, + { + name: "multiple", + cgroups: "0:ctr0:/path0\n" + + "1:ctr1:/path1\n" + + "2::/empty\n", + want: map[string]string{ + "ctr0": "/path0", + "ctr1": "/path1", + }, + }, + { + name: "missing-field", + cgroups: "0:nopath\n", + err: "invalid cgroups file", + }, + { + name: "too-many-fields", + cgroups: "0:ctr:/path:extra\n", + err: "invalid cgroups file", + }, + { + name: "multiple-malformed", + cgroups: "0:ctr0:/path0\n" + + "1:ctr1:/path1\n" + + "2:\n", + err: "invalid cgroups file", + }, + } { + t.Run(tc.name, func(t *testing.T) { + r := strings.NewReader(tc.cgroups) + got, err := loadPathsHelper(r) + if len(tc.err) == 0 { + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + } else if !strings.Contains(err.Error(), tc.err) { + t.Fatalf("Wrong error message, want: *%s*, got: %v", tc.err, err) + } + for key, vWant := range tc.want { + vGot, ok := got[key] + if !ok { + t.Errorf("Missing controller %q", key) + } + if vWant != vGot { + t.Errorf("Wrong controller %q value, want: %q, got: %q", key, vWant, vGot) + } + delete(got, key) + } + for k, v := range got { + t.Errorf("Unexpected controller %q: %q", k, v) + } + }) + } +} diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go index 88991b521..139edbd49 100644 --- a/runsc/cmd/start.go +++ b/runsc/cmd/start.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/flag" + "gvisor.dev/gvisor/runsc/specutils" ) // Start implements subcommands.Command for the "start" command. @@ -58,6 +59,12 @@ func (*Start) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s if err != nil { Fatalf("loading container: %v", err) } + // Read the spec again here to ensure flag annotations from the spec are + // applied to "conf". + if _, err := specutils.ReadSpec(c.BundleDir, conf); err != nil { + Fatalf("reading spec: %v", err) + } + if err := c.Start(conf); err != nil { Fatalf("starting container: %v", err) } diff --git a/runsc/container/container.go b/runsc/container/container.go index 63f64ce6e..52e1755ce 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -312,6 +312,14 @@ func New(conf *config.Config, args Args) (*Container, error) { if isRoot(args.Spec) { log.Debugf("Creating new sandbox for container %q", args.ID) + if args.Spec.Linux == nil { + args.Spec.Linux = &specs.Linux{} + } + // Don't force the use of cgroups in tests because they lack permission to do so. + if args.Spec.Linux.CgroupsPath == "" && !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + args.Spec.Linux.CgroupsPath = "/" + args.ID + } + // Create and join cgroup before processes are created to ensure they are // part of the cgroup from the start (and all their children processes). cg, err := cgroup.New(args.Spec) @@ -321,7 +329,13 @@ func New(conf *config.Config, args Args) (*Container, error) { if cg != nil { // If there is cgroup config, install it before creating sandbox process. if err := cg.Install(args.Spec.Linux.Resources); err != nil { - return nil, fmt.Errorf("configuring cgroup: %v", err) + switch { + case errors.Is(err, syscall.EACCES) && conf.Rootless: + log.Warningf("Skipping cgroup configuration in rootless mode: %v", err) + cg = nil + default: + return nil, fmt.Errorf("configuring cgroup: %v", err) + } } } if err := runInCgroup(cg, func() error { diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 0392e3e83..45abc1425 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -344,15 +344,9 @@ func IsSupportedDevMount(m specs.Mount) bool { var existingDevices = []string{ "/dev/fd", "/dev/stdin", "/dev/stdout", "/dev/stderr", "/dev/null", "/dev/zero", "/dev/full", "/dev/random", - "/dev/urandom", "/dev/shm", "/dev/pts", "/dev/ptmx", + "/dev/urandom", "/dev/shm", "/dev/ptmx", } dst := filepath.Clean(m.Destination) - if dst == "/dev" { - // OCI spec uses many different mounts for the things inside of '/dev'. We - // have a single mount at '/dev' that is always mounted, regardless of - // whether it was asked for, as the spec says we SHOULD. - return false - } for _, dev := range existingDevices { if dst == dev || strings.HasPrefix(dst, dev+"/") { return false @@ -425,7 +419,7 @@ func Mount(src, dst, typ string, flags uint32) error { // Special case, as there is no source directory for proc mounts. isDir = true } else if fi, err := os.Stat(src); err != nil { - return fmt.Errorf("Stat(%q) failed: %v", src, err) + return fmt.Errorf("stat(%q) failed: %v", src, err) } else { isDir = fi.IsDir() } @@ -433,25 +427,25 @@ func Mount(src, dst, typ string, flags uint32) error { if isDir { // Create the destination directory. if err := os.MkdirAll(dst, 0777); err != nil { - return fmt.Errorf("Mkdir(%q) failed: %v", dst, err) + return fmt.Errorf("mkdir(%q) failed: %v", dst, err) } } else { // Create the parent destination directory. parent := path.Dir(dst) if err := os.MkdirAll(parent, 0777); err != nil { - return fmt.Errorf("Mkdir(%q) failed: %v", parent, err) + return fmt.Errorf("mkdir(%q) failed: %v", parent, err) } // Create the destination file if it does not exist. f, err := os.OpenFile(dst, syscall.O_CREAT, 0777) if err != nil { - return fmt.Errorf("Open(%q) failed: %v", dst, err) + return fmt.Errorf("open(%q) failed: %v", dst, err) } f.Close() } // Do the mount. if err := syscall.Mount(src, dst, typ, uintptr(flags), ""); err != nil { - return fmt.Errorf("Mount(%q, %q, %d) failed: %v", src, dst, flags, err) + return fmt.Errorf("mount(%q, %q, %d) failed: %v", src, dst, flags, err) } return nil } |