diff options
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/controller.go | 19 | ||||
-rw-r--r-- | runsc/boot/fs.go | 159 | ||||
-rw-r--r-- | runsc/boot/loader.go | 36 |
3 files changed, 150 insertions, 64 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index fc6ea326a..69e88d8e0 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -17,6 +17,7 @@ package boot import ( "errors" "fmt" + "path" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.googlesource.com/gvisor/pkg/control/server" @@ -181,11 +182,15 @@ type StartArgs struct { // CID is the ID of the container to start. CID string + + // FilePayload contains the file descriptor over which the sandbox will + // request files from its root filesystem. + urpc.FilePayload } // Start runs a created container within a sandbox. func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { - log.Debugf("containerManager.Start") + log.Debugf("containerManager.Start: %+v", args) // Validate arguments. if args == nil { @@ -200,8 +205,18 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { if args.CID == "" { return errors.New("start argument missing container ID") } + // Prevent CIDs containing ".." from confusing the sentry when creating + // /containers/<cid> directory. + // TODO: Once we have multiple independant roots, this + // check won't be necessary. + if path.Clean(args.CID) != args.CID { + return fmt.Errorf("container ID shouldn't contain directory traversals such as \"..\": %q", args.CID) + } + if len(args.FilePayload.Files) != 1 { + return fmt.Errorf("start arguments must contain one file for the container root") + } - tgid, err := cm.l.startContainer(args, cm.l.k) + tgid, err := cm.l.startContainer(cm.l.k, args.Spec, args.Conf, args.CID, args.FilePayload.Files[0]) if err != nil { return err } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index eea2ec1f5..8996b1398 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -48,6 +48,19 @@ const ( // Device name for root mount. rootDevice = "9pfs-/" + + // childContainersDir is the directory where child container root + // filesystems are mounted. + childContainersDir = "/__runsc_containers__" + + // Filesystems that runsc supports. + bind = "bind" + devpts = "devpts" + devtmpfs = "devtmpfs" + proc = "proc" + sysfs = "sysfs" + tmpfs = "tmpfs" + nonefs = "none" ) type fdDispenser struct { @@ -70,8 +83,15 @@ func (f *fdDispenser) empty() bool { // createMountNamespace creates a mount namespace containing the root filesystem // and all mounts. 'rootCtx' is used to walk directories to find mount points. func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec *specs.Spec, conf *Config, ioFDs []int) (*fs.MountNamespace, error) { + mounts := compileMounts(spec) + // Create a tmpfs mount where we create and mount a root filesystem for + // each child container. + mounts = append(mounts, specs.Mount{ + Type: tmpfs, + Destination: childContainersDir, + }) fds := &fdDispenser{fds: ioFDs} - rootInode, err := createRootMount(rootCtx, spec, conf, fds) + rootInode, err := createRootMount(rootCtx, spec, conf, fds, mounts) if err != nil { return nil, fmt.Errorf("failed to create root mount: %v", err) } @@ -79,7 +99,7 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec if err != nil { return nil, fmt.Errorf("failed to create root mount namespace: %v", err) } - mounts := compileMounts(spec) + if err := setMounts(rootCtx, conf, mns, fds, mounts); err != nil { return nil, fmt.Errorf("failed to configure mounts: %v", err) } @@ -98,12 +118,12 @@ func compileMounts(spec *specs.Spec) []specs.Mount { // Always mount /dev. mounts = append(mounts, specs.Mount{ - Type: "devtmpfs", + Type: devtmpfs, Destination: "/dev", }) mounts = append(mounts, specs.Mount{ - Type: "devpts", + Type: devpts, Destination: "/dev/pts", }) @@ -129,13 +149,13 @@ func compileMounts(spec *specs.Spec) []specs.Mount { var mandatoryMounts []specs.Mount if !procMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: "proc", + Type: proc, Destination: "/proc", }) } if !sysMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: "sysfs", + Type: sysfs, Destination: "/sys", }) } @@ -149,7 +169,7 @@ func compileMounts(spec *specs.Spec) []specs.Mount { // that. Until then, the /tmp mount will always appear empty at // container creation. mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: "tmpfs", + Type: tmpfs, Destination: "/tmp", }) } @@ -165,7 +185,7 @@ func compileMounts(spec *specs.Spec) []specs.Mount { // mount namespace. func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, mounts []specs.Mount) error { for _, m := range mounts { - if err := mountSubmount(ctx, conf, mns, fds, m, mounts); err != nil { + if err := mountSubmount(ctx, conf, mns, fds, m, mounts, m.Destination); err != nil { return err } } @@ -173,7 +193,7 @@ func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *f } // createRootMount creates the root filesystem. -func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *fdDispenser) (*fs.Inode, error) { +func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *fdDispenser, mounts []specs.Mount) (*fs.Inode, error) { // First construct the filesystem from the spec.Root. mf := fs.MountSourceFlags{ReadOnly: spec.Root.Readonly} @@ -207,7 +227,7 @@ func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *f // We need to overlay the root on top of a ramfs with stub directories // for submount paths. "/dev" "/sys" "/proc" and "/tmp" are always // mounted even if they are not in the spec. - submounts := append(subtargets("/", spec.Mounts), "/dev", "/sys", "/proc", "/tmp") + submounts := append(subtargets("/", mounts), "/dev", "/sys", "/proc", "/tmp") rootInode, err = addSubmountOverlay(ctx, rootInode, submounts) if err != nil { return nil, fmt.Errorf("error adding submount overlay: %v", err) @@ -256,17 +276,17 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri ) switch m.Type { - case "devpts", "devtmpfs", "proc", "sysfs": + case devpts, devtmpfs, proc, sysfs: fsName = m.Type - case "none": - fsName = "sysfs" - case "tmpfs": + case nonefs: + fsName = sysfs + case tmpfs: fsName = m.Type // tmpfs has some extra supported options that we must pass through. opts, err = parseAndFilterOptions(m.Options, "mode", "uid", "gid") - case "bind": + case bind: switch conf.FileAccess { case FileAccessProxy, FileAccessProxyExclusive: fd := fds.remove() @@ -291,7 +311,7 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri return fsName, opts, useOverlay, err } -func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount) error { +func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount, dest string) error { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. fsName, opts, useOverlay, err := getMountNameAndOptions(conf, m, fds) @@ -342,51 +362,52 @@ func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fd // in the right location, e.g. // mount: /var/run/secrets, may be created in '/run/secrets' if // '/var/run' => '/var'. - if err := mkdirAll(ctx, mns, m.Destination); err != nil { + if err := mkdirAll(ctx, mns, dest); err != nil { return err } root := mns.Root() defer root.DecRef() - dirent, err := mns.FindInode(ctx, root, nil, m.Destination, linux.MaxSymlinkTraversals) + dirent, err := mns.FindInode(ctx, root, nil, dest, linux.MaxSymlinkTraversals) if err != nil { - return fmt.Errorf("failed to find mount destination %q: %v", m.Destination, err) + return fmt.Errorf("failed to find mount destination %q: %v", dest, err) } defer dirent.DecRef() if err := mns.Mount(ctx, dirent, inode); err != nil { - return fmt.Errorf("failed to mount at destination %q: %v", m.Destination, err) + return fmt.Errorf("failed to mount at destination %q: %v", dest, err) } - log.Infof("Mounted %q to %q type %s", m.Source, m.Destination, m.Type) + log.Infof("Mounted %q to %q type %s", m.Source, dest, m.Type) return nil } func mkdirAll(ctx context.Context, mns *fs.MountNamespace, path string) error { + log.Infof("mkdirAll called with path %s", path) root := mns.Root() defer root.DecRef() // Starting at the root, walk the path. parent := root ps := strings.Split(filepath.Clean(path), string(filepath.Separator)) - for i := 0; i < len(ps); i++ { - if ps[i] == "" { + for _, pathElem := range ps { + if pathElem == "" { // This will be case for the first and last element, if the path // begins or ends with '/'. Note that we always treat the path as // absolute, regardless of what the first character contains. continue } - d, err := mns.FindInode(ctx, root, parent, ps[i], fs.DefaultTraversalLimit) + d, err := mns.FindInode(ctx, root, parent, pathElem, fs.DefaultTraversalLimit) if err == syserror.ENOENT { // If we encounter a path that does not exist, then // create it. - if err := parent.CreateDirectory(ctx, root, ps[i], fs.FilePermsFromMode(0755)); err != nil { - return fmt.Errorf("failed to create directory %q: %v", ps[i], err) + if err := parent.CreateDirectory(ctx, root, pathElem, fs.FilePermsFromMode(0755)); err != nil { + return fmt.Errorf("failed to create directory %q: %v", pathElem, err) } - if d, err = parent.Walk(ctx, root, ps[i]); err != nil { - return fmt.Errorf("walk to %q failed: %v", ps[i], err) + if d, err = parent.Walk(ctx, root, pathElem); err != nil { + return fmt.Errorf("walk to %q failed: %v", pathElem, err) } } else if err != nil { - return fmt.Errorf("failed to find inode %q: %v", ps[i], err) + return fmt.Errorf("failed to find inode %q: %v", pathElem, err) } parent = d } @@ -444,7 +465,7 @@ func destinations(mounts []specs.Mount, extra ...string) []string { // mountDevice returns a device string based on the fs type and target // of the mount. func mountDevice(m specs.Mount) string { - if m.Type == "bind" { + if m.Type == bind { // Make a device string that includes the target, which is consistent across // S/R and uniquely identifies the connection. return "9pfs-" + m.Destination @@ -589,7 +610,7 @@ func subtargets(root string, mnts []specs.Mount) []string { // setFileSystemForProcess is used to set up the file system and amend the procArgs accordingly. // procArgs are passed by reference and the FDMap field is modified. -func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel) error { +func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel, cid string) error { ctx := procArgs.NewContext(k) // Create the FD map, which will set stdin, stdout, and stderr. If @@ -604,27 +625,79 @@ func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spe // won't need ours either way. procArgs.FDMap = fdm + // Use root user to configure mounts. The current user might not have + // permission to do so. + rootProcArgs := kernel.CreateProcessArgs{ + WorkingDirectory: "/", + Credentials: auth.NewRootCredentials(creds.UserNamespace), + Umask: 0022, + MaxSymlinkTraversals: linux.MaxSymlinkTraversals, + } + rootCtx := rootProcArgs.NewContext(k) + // If this is the root container, we also need to setup the root mount // namespace. - if k.RootMountNamespace() == nil { - // Use root user to configure mounts. The current user might not have - // permission to do so. - rootProcArgs := kernel.CreateProcessArgs{ - WorkingDirectory: "/", - Credentials: auth.NewRootCredentials(creds.UserNamespace), - Umask: 0022, - MaxSymlinkTraversals: linux.MaxSymlinkTraversals, - } - rootCtx := rootProcArgs.NewContext(k) - + mns := k.RootMountNamespace() + if mns == nil { // Create the virtual filesystem. mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs) if err != nil { return fmt.Errorf("error creating mounts: %v", err) } - k.SetRootMountNamespace(mns) + return nil + } + + // Setup a child container. + + // Create the container's root filesystem mount. + log.Infof("Creating new process in child container.") + fds := &fdDispenser{fds: append([]int{}, ioFDs...)} + rootInode, err := createRootMount(rootCtx, spec, conf, fds, nil) + if err != nil { + return fmt.Errorf("error creating filesystem for container: %v", err) + } + + // Make directories for submounts within the container. + rootDir := mns.Root() + defer rootDir.DecRef() + containerRoot := filepath.Join(childContainersDir, cid) + mkdirAll(ctx, mns, containerRoot) + + // Mount the container's root filesystem to the newly created + // mount point. + containerRootDirent, err := mns.FindInode(ctx, rootDir, nil, containerRoot, linux.MaxSymlinkTraversals) + if err != nil { + return fmt.Errorf("failed to find mount destination: %q: %v", containerRoot, err) + } + if err := mns.Mount(ctx, containerRootDirent, rootInode); err != nil { + return fmt.Errorf("failed to mount at destination %q: %v", containerRoot, err) + } + containerRootDirent.DecRef() + + // We have to re-walk to the dirent to find the mounted + // directory. The old dirent is invalid at this point. + containerRootDirent, err = mns.FindInode(ctx, rootDir, nil, containerRoot, linux.MaxSymlinkTraversals) + if err != nil { + return fmt.Errorf("failed to find mount destination2: %q: %v", containerRoot, err) + } + log.Infof("Mounted child's root fs to %q", containerRoot) + + // Mount all submounts. + mounts := compileMounts(spec) + for _, m := range mounts { + // TODO: Enable bind mounts in child containers. + if m.Type == bind { + log.Infof("Bind mounts in child containers are not yet supported: %+v", m) + continue + } + dest := filepath.Join(containerRoot, m.Destination) + if err := mountSubmount(rootCtx, conf, k.RootMountNamespace(), fds, m, mounts, dest); err != nil { + return fmt.Errorf("error mounting filesystem for container: %v", err) + } } + // Set the procArgs root directory. + procArgs.Root = containerRootDirent return nil } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index f6c7bf223..7debf0ac2 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "math/rand" + "os" "runtime" "sync" "sync/atomic" @@ -229,7 +230,7 @@ func New(spec *specs.Spec, conf *Config, controllerFD int, ioFDs []int, console // Ensure that signals received are forwarded to the emulated kernel. stopSignalForwarding := sighandling.PrepareForwarding(k, false)() - procArgs, err := newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k) + procArgs, err := newProcess(spec, creds, utsns, ipcns, k) if err != nil { return nil, fmt.Errorf("failed to create root process: %v", err) } @@ -250,7 +251,7 @@ func New(spec *specs.Spec, conf *Config, controllerFD int, ioFDs []int, console } // newProcess creates a process that can be run with kernel.CreateProcess. -func newProcess(spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, utsns *kernel.UTSNamespace, ipcns *kernel.IPCNamespace, k *kernel.Kernel) (kernel.CreateProcessArgs, error) { +func newProcess(spec *specs.Spec, creds *auth.Credentials, utsns *kernel.UTSNamespace, ipcns *kernel.IPCNamespace, k *kernel.Kernel) (kernel.CreateProcessArgs, error) { // Create initial limits. ls, err := createLimitSet(spec) if err != nil { @@ -277,7 +278,6 @@ func newProcess(spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds UTSNamespace: utsns, IPCNamespace: ipcns, } - return procArgs, nil } @@ -356,7 +356,8 @@ func (l *Loader) run() error { l.console, l.rootProcArgs.Credentials, l.rootProcArgs.Limits, - l.k) + l.k, + "" /* CID, which isn't needed for the root container */) if err != nil { return err } @@ -376,8 +377,7 @@ func (l *Loader) run() error { // startContainer starts a child container. It returns the thread group ID of // the newly created process. -func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.ThreadID, error) { - spec := args.Spec +func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config, cid string, file *os.File) (kernel.ThreadID, error) { // Create capabilities. caps, err := specutils.Capabilities(spec.Process.Capabilities) if err != nil { @@ -406,26 +406,24 @@ func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.Threa // when indicated by the spec. procArgs, err := newProcess( - args.Spec, - args.Conf, - nil, // ioFDs - false, // console + spec, creds, - k.RootUTSNamespace(), - k.RootIPCNamespace(), - k) + l.k.RootUTSNamespace(), + l.k.RootIPCNamespace(), + l.k) if err != nil { return 0, fmt.Errorf("failed to create new process: %v", err) } err = setFileSystemForProcess( &procArgs, - args.Spec, - args.Conf, - nil, + spec, + conf, + []int{int(file.Fd())}, // ioFDs false, creds, procArgs.Limits, - k) + k, + cid) if err != nil { return 0, fmt.Errorf("failed to create new process: %v", err) } @@ -435,7 +433,7 @@ func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.Threa return 0, fmt.Errorf("failed to create process in sentry: %v", err) } - ts := k.TaskSet() + ts := l.k.TaskSet() tgid := ts.Root.IDOfThreadGroup(tg) if tgid == 0 { return 0, errors.New("failed to get thread group ID of new process") @@ -446,7 +444,7 @@ func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.Threa l.mu.Lock() defer l.mu.Unlock() - l.containerRootTGIDs[args.CID] = tgid + l.containerRootTGIDs[cid] = tgid return tgid, nil } |