diff options
Diffstat (limited to 'runsc/boot/fs.go')
-rw-r--r-- | runsc/boot/fs.go | 159 |
1 files changed, 116 insertions, 43 deletions
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index eea2ec1f5..8996b1398 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -48,6 +48,19 @@ const ( // Device name for root mount. rootDevice = "9pfs-/" + + // childContainersDir is the directory where child container root + // filesystems are mounted. + childContainersDir = "/__runsc_containers__" + + // Filesystems that runsc supports. + bind = "bind" + devpts = "devpts" + devtmpfs = "devtmpfs" + proc = "proc" + sysfs = "sysfs" + tmpfs = "tmpfs" + nonefs = "none" ) type fdDispenser struct { @@ -70,8 +83,15 @@ func (f *fdDispenser) empty() bool { // createMountNamespace creates a mount namespace containing the root filesystem // and all mounts. 'rootCtx' is used to walk directories to find mount points. func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec *specs.Spec, conf *Config, ioFDs []int) (*fs.MountNamespace, error) { + mounts := compileMounts(spec) + // Create a tmpfs mount where we create and mount a root filesystem for + // each child container. + mounts = append(mounts, specs.Mount{ + Type: tmpfs, + Destination: childContainersDir, + }) fds := &fdDispenser{fds: ioFDs} - rootInode, err := createRootMount(rootCtx, spec, conf, fds) + rootInode, err := createRootMount(rootCtx, spec, conf, fds, mounts) if err != nil { return nil, fmt.Errorf("failed to create root mount: %v", err) } @@ -79,7 +99,7 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec if err != nil { return nil, fmt.Errorf("failed to create root mount namespace: %v", err) } - mounts := compileMounts(spec) + if err := setMounts(rootCtx, conf, mns, fds, mounts); err != nil { return nil, fmt.Errorf("failed to configure mounts: %v", err) } @@ -98,12 +118,12 @@ func compileMounts(spec *specs.Spec) []specs.Mount { // Always mount /dev. mounts = append(mounts, specs.Mount{ - Type: "devtmpfs", + Type: devtmpfs, Destination: "/dev", }) mounts = append(mounts, specs.Mount{ - Type: "devpts", + Type: devpts, Destination: "/dev/pts", }) @@ -129,13 +149,13 @@ func compileMounts(spec *specs.Spec) []specs.Mount { var mandatoryMounts []specs.Mount if !procMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: "proc", + Type: proc, Destination: "/proc", }) } if !sysMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: "sysfs", + Type: sysfs, Destination: "/sys", }) } @@ -149,7 +169,7 @@ func compileMounts(spec *specs.Spec) []specs.Mount { // that. Until then, the /tmp mount will always appear empty at // container creation. mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: "tmpfs", + Type: tmpfs, Destination: "/tmp", }) } @@ -165,7 +185,7 @@ func compileMounts(spec *specs.Spec) []specs.Mount { // mount namespace. func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, mounts []specs.Mount) error { for _, m := range mounts { - if err := mountSubmount(ctx, conf, mns, fds, m, mounts); err != nil { + if err := mountSubmount(ctx, conf, mns, fds, m, mounts, m.Destination); err != nil { return err } } @@ -173,7 +193,7 @@ func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *f } // createRootMount creates the root filesystem. -func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *fdDispenser) (*fs.Inode, error) { +func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *fdDispenser, mounts []specs.Mount) (*fs.Inode, error) { // First construct the filesystem from the spec.Root. mf := fs.MountSourceFlags{ReadOnly: spec.Root.Readonly} @@ -207,7 +227,7 @@ func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *f // We need to overlay the root on top of a ramfs with stub directories // for submount paths. "/dev" "/sys" "/proc" and "/tmp" are always // mounted even if they are not in the spec. - submounts := append(subtargets("/", spec.Mounts), "/dev", "/sys", "/proc", "/tmp") + submounts := append(subtargets("/", mounts), "/dev", "/sys", "/proc", "/tmp") rootInode, err = addSubmountOverlay(ctx, rootInode, submounts) if err != nil { return nil, fmt.Errorf("error adding submount overlay: %v", err) @@ -256,17 +276,17 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri ) switch m.Type { - case "devpts", "devtmpfs", "proc", "sysfs": + case devpts, devtmpfs, proc, sysfs: fsName = m.Type - case "none": - fsName = "sysfs" - case "tmpfs": + case nonefs: + fsName = sysfs + case tmpfs: fsName = m.Type // tmpfs has some extra supported options that we must pass through. opts, err = parseAndFilterOptions(m.Options, "mode", "uid", "gid") - case "bind": + case bind: switch conf.FileAccess { case FileAccessProxy, FileAccessProxyExclusive: fd := fds.remove() @@ -291,7 +311,7 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri return fsName, opts, useOverlay, err } -func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount) error { +func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount, dest string) error { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. fsName, opts, useOverlay, err := getMountNameAndOptions(conf, m, fds) @@ -342,51 +362,52 @@ func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fd // in the right location, e.g. // mount: /var/run/secrets, may be created in '/run/secrets' if // '/var/run' => '/var'. - if err := mkdirAll(ctx, mns, m.Destination); err != nil { + if err := mkdirAll(ctx, mns, dest); err != nil { return err } root := mns.Root() defer root.DecRef() - dirent, err := mns.FindInode(ctx, root, nil, m.Destination, linux.MaxSymlinkTraversals) + dirent, err := mns.FindInode(ctx, root, nil, dest, linux.MaxSymlinkTraversals) if err != nil { - return fmt.Errorf("failed to find mount destination %q: %v", m.Destination, err) + return fmt.Errorf("failed to find mount destination %q: %v", dest, err) } defer dirent.DecRef() if err := mns.Mount(ctx, dirent, inode); err != nil { - return fmt.Errorf("failed to mount at destination %q: %v", m.Destination, err) + return fmt.Errorf("failed to mount at destination %q: %v", dest, err) } - log.Infof("Mounted %q to %q type %s", m.Source, m.Destination, m.Type) + log.Infof("Mounted %q to %q type %s", m.Source, dest, m.Type) return nil } func mkdirAll(ctx context.Context, mns *fs.MountNamespace, path string) error { + log.Infof("mkdirAll called with path %s", path) root := mns.Root() defer root.DecRef() // Starting at the root, walk the path. parent := root ps := strings.Split(filepath.Clean(path), string(filepath.Separator)) - for i := 0; i < len(ps); i++ { - if ps[i] == "" { + for _, pathElem := range ps { + if pathElem == "" { // This will be case for the first and last element, if the path // begins or ends with '/'. Note that we always treat the path as // absolute, regardless of what the first character contains. continue } - d, err := mns.FindInode(ctx, root, parent, ps[i], fs.DefaultTraversalLimit) + d, err := mns.FindInode(ctx, root, parent, pathElem, fs.DefaultTraversalLimit) if err == syserror.ENOENT { // If we encounter a path that does not exist, then // create it. - if err := parent.CreateDirectory(ctx, root, ps[i], fs.FilePermsFromMode(0755)); err != nil { - return fmt.Errorf("failed to create directory %q: %v", ps[i], err) + if err := parent.CreateDirectory(ctx, root, pathElem, fs.FilePermsFromMode(0755)); err != nil { + return fmt.Errorf("failed to create directory %q: %v", pathElem, err) } - if d, err = parent.Walk(ctx, root, ps[i]); err != nil { - return fmt.Errorf("walk to %q failed: %v", ps[i], err) + if d, err = parent.Walk(ctx, root, pathElem); err != nil { + return fmt.Errorf("walk to %q failed: %v", pathElem, err) } } else if err != nil { - return fmt.Errorf("failed to find inode %q: %v", ps[i], err) + return fmt.Errorf("failed to find inode %q: %v", pathElem, err) } parent = d } @@ -444,7 +465,7 @@ func destinations(mounts []specs.Mount, extra ...string) []string { // mountDevice returns a device string based on the fs type and target // of the mount. func mountDevice(m specs.Mount) string { - if m.Type == "bind" { + if m.Type == bind { // Make a device string that includes the target, which is consistent across // S/R and uniquely identifies the connection. return "9pfs-" + m.Destination @@ -589,7 +610,7 @@ func subtargets(root string, mnts []specs.Mount) []string { // setFileSystemForProcess is used to set up the file system and amend the procArgs accordingly. // procArgs are passed by reference and the FDMap field is modified. -func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel) error { +func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel, cid string) error { ctx := procArgs.NewContext(k) // Create the FD map, which will set stdin, stdout, and stderr. If @@ -604,27 +625,79 @@ func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spe // won't need ours either way. procArgs.FDMap = fdm + // Use root user to configure mounts. The current user might not have + // permission to do so. + rootProcArgs := kernel.CreateProcessArgs{ + WorkingDirectory: "/", + Credentials: auth.NewRootCredentials(creds.UserNamespace), + Umask: 0022, + MaxSymlinkTraversals: linux.MaxSymlinkTraversals, + } + rootCtx := rootProcArgs.NewContext(k) + // If this is the root container, we also need to setup the root mount // namespace. - if k.RootMountNamespace() == nil { - // Use root user to configure mounts. The current user might not have - // permission to do so. - rootProcArgs := kernel.CreateProcessArgs{ - WorkingDirectory: "/", - Credentials: auth.NewRootCredentials(creds.UserNamespace), - Umask: 0022, - MaxSymlinkTraversals: linux.MaxSymlinkTraversals, - } - rootCtx := rootProcArgs.NewContext(k) - + mns := k.RootMountNamespace() + if mns == nil { // Create the virtual filesystem. mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs) if err != nil { return fmt.Errorf("error creating mounts: %v", err) } - k.SetRootMountNamespace(mns) + return nil + } + + // Setup a child container. + + // Create the container's root filesystem mount. + log.Infof("Creating new process in child container.") + fds := &fdDispenser{fds: append([]int{}, ioFDs...)} + rootInode, err := createRootMount(rootCtx, spec, conf, fds, nil) + if err != nil { + return fmt.Errorf("error creating filesystem for container: %v", err) + } + + // Make directories for submounts within the container. + rootDir := mns.Root() + defer rootDir.DecRef() + containerRoot := filepath.Join(childContainersDir, cid) + mkdirAll(ctx, mns, containerRoot) + + // Mount the container's root filesystem to the newly created + // mount point. + containerRootDirent, err := mns.FindInode(ctx, rootDir, nil, containerRoot, linux.MaxSymlinkTraversals) + if err != nil { + return fmt.Errorf("failed to find mount destination: %q: %v", containerRoot, err) + } + if err := mns.Mount(ctx, containerRootDirent, rootInode); err != nil { + return fmt.Errorf("failed to mount at destination %q: %v", containerRoot, err) + } + containerRootDirent.DecRef() + + // We have to re-walk to the dirent to find the mounted + // directory. The old dirent is invalid at this point. + containerRootDirent, err = mns.FindInode(ctx, rootDir, nil, containerRoot, linux.MaxSymlinkTraversals) + if err != nil { + return fmt.Errorf("failed to find mount destination2: %q: %v", containerRoot, err) + } + log.Infof("Mounted child's root fs to %q", containerRoot) + + // Mount all submounts. + mounts := compileMounts(spec) + for _, m := range mounts { + // TODO: Enable bind mounts in child containers. + if m.Type == bind { + log.Infof("Bind mounts in child containers are not yet supported: %+v", m) + continue + } + dest := filepath.Join(containerRoot, m.Destination) + if err := mountSubmount(rootCtx, conf, k.RootMountNamespace(), fds, m, mounts, dest); err != nil { + return fmt.Errorf("error mounting filesystem for container: %v", err) + } } + // Set the procArgs root directory. + procArgs.Root = containerRootDirent return nil } |