summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/BUILD1
-rw-r--r--runsc/boot/controller.go19
-rw-r--r--runsc/boot/fs.go33
-rw-r--r--runsc/boot/loader.go9
-rw-r--r--runsc/boot/vfs.go28
-rw-r--r--runsc/cgroup/cgroup.go11
-rw-r--r--runsc/cgroup/cgroup_test.go80
-rw-r--r--runsc/cmd/start.go7
-rw-r--r--runsc/container/container.go16
-rw-r--r--runsc/specutils/specutils.go18
10 files changed, 188 insertions, 34 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 248f77c34..b97dc3c47 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -38,6 +38,7 @@ go_library(
"//pkg/memutil",
"//pkg/rand",
"//pkg/refs",
+ "//pkg/refsvfs2",
"//pkg/sentry/arch",
"//pkg/sentry/arch:registers_go_proto",
"//pkg/sentry/control",
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 894651519..4e0f0d57a 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -30,6 +30,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/netstack"
"gvisor.dev/gvisor/pkg/sentry/state"
"gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sentry/watchdog"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/urpc"
@@ -367,12 +368,20 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
cm.l.k = k
// Set up the restore environment.
+ ctx := k.SupervisorContext()
mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints)
- renv, err := mntr.createRestoreEnvironment(cm.l.root.conf)
- if err != nil {
- return fmt.Errorf("creating RestoreEnvironment: %v", err)
+ if kernel.VFS2Enabled {
+ ctx, err = mntr.configureRestore(ctx, cm.l.root.conf)
+ if err != nil {
+ return fmt.Errorf("configuring filesystem restore: %v", err)
+ }
+ } else {
+ renv, err := mntr.createRestoreEnvironment(cm.l.root.conf)
+ if err != nil {
+ return fmt.Errorf("creating RestoreEnvironment: %v", err)
+ }
+ fs.SetRestoreEnvironment(*renv)
}
- fs.SetRestoreEnvironment(*renv)
// Prepare to load from the state file.
if eps, ok := networkStack.(*netstack.Stack); ok {
@@ -399,7 +408,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
// Load the state.
loadOpts := state.LoadOpts{Source: specFile}
- if err := loadOpts.Load(k, networkStack, time.NewCalibratedClocks()); err != nil {
+ if err := loadOpts.Load(ctx, k, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil {
return err
}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index ddf288456..6b6ae98d7 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -105,33 +105,28 @@ func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name
// mandatory mounts that are required by the OCI specification.
func compileMounts(spec *specs.Spec) []specs.Mount {
// Keep track of whether proc and sys were mounted.
- var procMounted, sysMounted bool
+ var procMounted, sysMounted, devMounted, devptsMounted bool
var mounts []specs.Mount
- // Always mount /dev.
- mounts = append(mounts, specs.Mount{
- Type: devtmpfs.Name,
- Destination: "/dev",
- })
-
- mounts = append(mounts, specs.Mount{
- Type: devpts.Name,
- Destination: "/dev/pts",
- })
-
// Mount all submounts from the spec.
for _, m := range spec.Mounts {
if !specutils.IsSupportedDevMount(m) {
log.Warningf("ignoring dev mount at %q", m.Destination)
continue
}
- mounts = append(mounts, m)
switch filepath.Clean(m.Destination) {
case "/proc":
procMounted = true
case "/sys":
sysMounted = true
+ case "/dev":
+ m.Type = devtmpfs.Name
+ devMounted = true
+ case "/dev/pts":
+ m.Type = devpts.Name
+ devptsMounted = true
}
+ mounts = append(mounts, m)
}
// Mount proc and sys even if the user did not ask for it, as the spec
@@ -149,6 +144,18 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
Destination: "/sys",
})
}
+ if !devMounted {
+ mandatoryMounts = append(mandatoryMounts, specs.Mount{
+ Type: devtmpfs.Name,
+ Destination: "/dev",
+ })
+ }
+ if !devptsMounted {
+ mandatoryMounts = append(mandatoryMounts, specs.Mount{
+ Type: devpts.Name,
+ Destination: "/dev/pts",
+ })
+ }
// The mandatory mounts should be ordered right after the root, in case
// there are submounts of these mandatory mounts already in the spec.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 8ad000497..8c6ab213d 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -35,6 +35,7 @@ import (
"gvisor.dev/gvisor/pkg/memutil"
"gvisor.dev/gvisor/pkg/rand"
"gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/refsvfs2"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/fdimport"
@@ -476,6 +477,12 @@ func (l *Loader) Destroy() {
// save/restore.
l.k.Release()
+ // All sentry-created resources should have been released at this point;
+ // check for reference leaks.
+ if refsvfs2.LeakCheckEnabled() {
+ refsvfs2.DoLeakCheck()
+ }
+
// In the success case, stdioFDs and goferFDs will only contain
// released/closed FDs that ownership has been passed over to host FDs and
// gofer sessions. Close them here in case of failure.
@@ -737,7 +744,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
return nil, err
}
- // Add the HOME enviroment variable if it is not already set.
+ // Add the HOME environment variable if it is not already set.
var envv []string
if kernel.VFS2Enabled {
envv, err = user.MaybeAddExecUserHomeVFS2(ctx, info.procArgs.MountNamespaceVFS2,
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 004da5b40..b157387ef 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -210,6 +210,9 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *c
ReadOnly: c.root.Readonly,
GetFilesystemOptions: vfs.GetFilesystemOptions{
Data: strings.Join(data, ","),
+ InternalData: gofer.InternalFilesystemOptions{
+ UniqueID: "/",
+ },
},
InternalMount: true,
}
@@ -427,6 +430,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
fsName := m.Type
useOverlay := false
var data []string
+ var iopts interface{}
// Find filesystem name and FS specific data field.
switch m.Type {
@@ -451,6 +455,9 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
return "", nil, false, fmt.Errorf("9P mount requires a connection FD")
}
data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */)
+ iopts = gofer.InternalFilesystemOptions{
+ UniqueID: m.Destination,
+ }
// If configured, add overlay to all writable mounts.
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
@@ -462,7 +469,8 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
opts := &vfs.MountOptions{
GetFilesystemOptions: vfs.GetFilesystemOptions{
- Data: strings.Join(data, ","),
+ Data: strings.Join(data, ","),
+ InternalData: iopts,
},
InternalMount: true,
}
@@ -667,3 +675,21 @@ func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Crede
}
return c.k.VFS().MakeSyntheticMountpoint(ctx, dest, root, creds)
}
+
+// configureRestore returns an updated context.Context including filesystem
+// state used by restore defined by conf.
+func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Config) (context.Context, error) {
+ fdmap := make(map[string]int)
+ fdmap["/"] = c.fds.remove()
+ mounts, err := c.prepareMountsVFS2()
+ if err != nil {
+ return ctx, err
+ }
+ for i := range c.mounts {
+ submount := &mounts[i]
+ if submount.fd >= 0 {
+ fdmap[submount.Destination] = submount.fd
+ }
+ }
+ return context.WithValue(ctx, gofer.CtxRestoreServerFDMap, fdmap), nil
+}
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 56da21584..5bd0afc52 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -21,6 +21,7 @@ import (
"context"
"errors"
"fmt"
+ "io"
"io/ioutil"
"os"
"path/filepath"
@@ -198,8 +199,13 @@ func LoadPaths(pid string) (map[string]string, error) {
}
defer f.Close()
+ return loadPathsHelper(f)
+}
+
+func loadPathsHelper(cgroup io.Reader) (map[string]string, error) {
paths := make(map[string]string)
- scanner := bufio.NewScanner(f)
+
+ scanner := bufio.NewScanner(cgroup)
for scanner.Scan() {
// Format: ID:[name=]controller1,controller2:path
// Example: 2:cpu,cpuacct:/user.slice
@@ -207,6 +213,9 @@ func LoadPaths(pid string) (map[string]string, error) {
if len(tokens) != 3 {
return nil, fmt.Errorf("invalid cgroups file, line: %q", scanner.Text())
}
+ if len(tokens[1]) == 0 {
+ continue
+ }
for _, ctrlr := range strings.Split(tokens[1], ",") {
// Remove prefix for cgroups with no controller, eg. systemd.
ctrlr = strings.TrimPrefix(ctrlr, "name=")
diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go
index 4db5ee5c3..9794517a7 100644
--- a/runsc/cgroup/cgroup_test.go
+++ b/runsc/cgroup/cgroup_test.go
@@ -647,3 +647,83 @@ func TestPids(t *testing.T) {
})
}
}
+
+func TestLoadPaths(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ cgroups string
+ want map[string]string
+ err string
+ }{
+ {
+ name: "abs-path",
+ cgroups: "0:ctr:/path",
+ want: map[string]string{"ctr": "/path"},
+ },
+ {
+ name: "rel-path",
+ cgroups: "0:ctr:rel-path",
+ want: map[string]string{"ctr": "rel-path"},
+ },
+ {
+ name: "non-controller",
+ cgroups: "0:name=systemd:/path",
+ want: map[string]string{"systemd": "/path"},
+ },
+ {
+ name: "empty",
+ },
+ {
+ name: "multiple",
+ cgroups: "0:ctr0:/path0\n" +
+ "1:ctr1:/path1\n" +
+ "2::/empty\n",
+ want: map[string]string{
+ "ctr0": "/path0",
+ "ctr1": "/path1",
+ },
+ },
+ {
+ name: "missing-field",
+ cgroups: "0:nopath\n",
+ err: "invalid cgroups file",
+ },
+ {
+ name: "too-many-fields",
+ cgroups: "0:ctr:/path:extra\n",
+ err: "invalid cgroups file",
+ },
+ {
+ name: "multiple-malformed",
+ cgroups: "0:ctr0:/path0\n" +
+ "1:ctr1:/path1\n" +
+ "2:\n",
+ err: "invalid cgroups file",
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ r := strings.NewReader(tc.cgroups)
+ got, err := loadPathsHelper(r)
+ if len(tc.err) == 0 {
+ if err != nil {
+ t.Fatalf("Unexpected error: %v", err)
+ }
+ } else if !strings.Contains(err.Error(), tc.err) {
+ t.Fatalf("Wrong error message, want: *%s*, got: %v", tc.err, err)
+ }
+ for key, vWant := range tc.want {
+ vGot, ok := got[key]
+ if !ok {
+ t.Errorf("Missing controller %q", key)
+ }
+ if vWant != vGot {
+ t.Errorf("Wrong controller %q value, want: %q, got: %q", key, vWant, vGot)
+ }
+ delete(got, key)
+ }
+ for k, v := range got {
+ t.Errorf("Unexpected controller %q: %q", k, v)
+ }
+ })
+ }
+}
diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go
index 88991b521..139edbd49 100644
--- a/runsc/cmd/start.go
+++ b/runsc/cmd/start.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/runsc/config"
"gvisor.dev/gvisor/runsc/container"
"gvisor.dev/gvisor/runsc/flag"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Start implements subcommands.Command for the "start" command.
@@ -58,6 +59,12 @@ func (*Start) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
if err != nil {
Fatalf("loading container: %v", err)
}
+ // Read the spec again here to ensure flag annotations from the spec are
+ // applied to "conf".
+ if _, err := specutils.ReadSpec(c.BundleDir, conf); err != nil {
+ Fatalf("reading spec: %v", err)
+ }
+
if err := c.Start(conf); err != nil {
Fatalf("starting container: %v", err)
}
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 63f64ce6e..52e1755ce 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -312,6 +312,14 @@ func New(conf *config.Config, args Args) (*Container, error) {
if isRoot(args.Spec) {
log.Debugf("Creating new sandbox for container %q", args.ID)
+ if args.Spec.Linux == nil {
+ args.Spec.Linux = &specs.Linux{}
+ }
+ // Don't force the use of cgroups in tests because they lack permission to do so.
+ if args.Spec.Linux.CgroupsPath == "" && !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+ args.Spec.Linux.CgroupsPath = "/" + args.ID
+ }
+
// Create and join cgroup before processes are created to ensure they are
// part of the cgroup from the start (and all their children processes).
cg, err := cgroup.New(args.Spec)
@@ -321,7 +329,13 @@ func New(conf *config.Config, args Args) (*Container, error) {
if cg != nil {
// If there is cgroup config, install it before creating sandbox process.
if err := cg.Install(args.Spec.Linux.Resources); err != nil {
- return nil, fmt.Errorf("configuring cgroup: %v", err)
+ switch {
+ case errors.Is(err, syscall.EACCES) && conf.Rootless:
+ log.Warningf("Skipping cgroup configuration in rootless mode: %v", err)
+ cg = nil
+ default:
+ return nil, fmt.Errorf("configuring cgroup: %v", err)
+ }
}
}
if err := runInCgroup(cg, func() error {
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 0392e3e83..45abc1425 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -344,15 +344,9 @@ func IsSupportedDevMount(m specs.Mount) bool {
var existingDevices = []string{
"/dev/fd", "/dev/stdin", "/dev/stdout", "/dev/stderr",
"/dev/null", "/dev/zero", "/dev/full", "/dev/random",
- "/dev/urandom", "/dev/shm", "/dev/pts", "/dev/ptmx",
+ "/dev/urandom", "/dev/shm", "/dev/ptmx",
}
dst := filepath.Clean(m.Destination)
- if dst == "/dev" {
- // OCI spec uses many different mounts for the things inside of '/dev'. We
- // have a single mount at '/dev' that is always mounted, regardless of
- // whether it was asked for, as the spec says we SHOULD.
- return false
- }
for _, dev := range existingDevices {
if dst == dev || strings.HasPrefix(dst, dev+"/") {
return false
@@ -425,7 +419,7 @@ func Mount(src, dst, typ string, flags uint32) error {
// Special case, as there is no source directory for proc mounts.
isDir = true
} else if fi, err := os.Stat(src); err != nil {
- return fmt.Errorf("Stat(%q) failed: %v", src, err)
+ return fmt.Errorf("stat(%q) failed: %v", src, err)
} else {
isDir = fi.IsDir()
}
@@ -433,25 +427,25 @@ func Mount(src, dst, typ string, flags uint32) error {
if isDir {
// Create the destination directory.
if err := os.MkdirAll(dst, 0777); err != nil {
- return fmt.Errorf("Mkdir(%q) failed: %v", dst, err)
+ return fmt.Errorf("mkdir(%q) failed: %v", dst, err)
}
} else {
// Create the parent destination directory.
parent := path.Dir(dst)
if err := os.MkdirAll(parent, 0777); err != nil {
- return fmt.Errorf("Mkdir(%q) failed: %v", parent, err)
+ return fmt.Errorf("mkdir(%q) failed: %v", parent, err)
}
// Create the destination file if it does not exist.
f, err := os.OpenFile(dst, syscall.O_CREAT, 0777)
if err != nil {
- return fmt.Errorf("Open(%q) failed: %v", dst, err)
+ return fmt.Errorf("open(%q) failed: %v", dst, err)
}
f.Close()
}
// Do the mount.
if err := syscall.Mount(src, dst, typ, uintptr(flags), ""); err != nil {
- return fmt.Errorf("Mount(%q, %q, %d) failed: %v", src, dst, flags, err)
+ return fmt.Errorf("mount(%q, %q, %d) failed: %v", src, dst, flags, err)
}
return nil
}