diff options
author | Fabricio Voznika <fvoznika@google.com> | 2018-10-18 12:41:07 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-10-18 12:42:24 -0700 |
commit | f3ffa4db525ea1a1d36307ea9593ed7b5e014ca7 (patch) | |
tree | e490c99350392544e21abb8953fe3c656a676221 /runsc | |
parent | 2a697791d1a473c76973f135f3af9240a32ad668 (diff) |
Resolve mount paths while setting up root fs mount
It's hard to resolve symlinks inside the sandbox because rootfs and mounts
may be read-only, forcing us to create mount points inside lower layer of an
overlay, **before** the volumes are mounted.
Since the destination must already be resolved outside the sandbox when creating
mounts, take this opportunity to rewrite the spec with paths resolved.
"runsc boot" will use the "resolved" spec to load mounts. In addition, symlink
traversals were disabled while mounting containers inside the sandbox.
It haven't been able to write a good test for it. So I'm relying on manual tests
for now.
PiperOrigin-RevId: 217749904
Change-Id: I7ac434d5befd230db1488446cda03300cc0751a9
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/config.go | 3 | ||||
-rw-r--r-- | runsc/boot/fs.go | 143 | ||||
-rw-r--r-- | runsc/cmd/create.go | 3 | ||||
-rw-r--r-- | runsc/cmd/run.go | 2 | ||||
-rw-r--r-- | runsc/container/container.go | 26 | ||||
-rw-r--r-- | runsc/container/container_test.go | 2 | ||||
-rw-r--r-- | runsc/container/fs.go | 30 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 7 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 23 | ||||
-rw-r--r-- | runsc/test/testutil/testutil.go | 1 |
10 files changed, 119 insertions, 121 deletions
diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 41af084b9..51d20d06d 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -192,9 +192,6 @@ type Config struct { // disabled. Pardon the double negation, but default to enabled is important. DisableSeccomp bool - // SpecFile is the file containing the OCI spec. - SpecFile string - // WatchdogAction sets what action the watchdog takes when triggered. WatchdogAction watchdog.Action diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 42e011beb..ea825e571 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -103,9 +103,14 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec return nil, fmt.Errorf("failed to create root mount namespace: %v", err) } - if err := setMounts(rootCtx, conf, mns, fds, mounts); err != nil { - return nil, fmt.Errorf("failed to configure mounts: %v", err) + root := mns.Root() + defer root.DecRef() + for _, m := range mounts { + if err := mountSubmount(rootCtx, conf, mns, root, fds, m, mounts); err != nil { + return nil, fmt.Errorf("mount submount: %v", err) + } } + if !fds.empty() { return nil, fmt.Errorf("not all mount points were consumed, remaining: %v", fds) } @@ -184,17 +189,6 @@ func compileMounts(spec *specs.Spec) []specs.Mount { return mounts } -// setMounts iterates over mounts and mounts them in the specified -// mount namespace. -func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, mounts []specs.Mount) error { - for _, m := range mounts { - if err := mountSubmount(ctx, conf, mns, fds, m, mounts, m.Destination); err != nil { - return err - } - } - return nil -} - // createRootMount creates the root filesystem. func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *fdDispenser, mounts []specs.Mount) (*fs.Inode, error) { // First construct the filesystem from the spec.Root. @@ -207,9 +201,9 @@ func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *f fd := fds.remove() log.Infof("Mounting root over 9P, ioFD: %d", fd) - hostFS := mustFindFilesystem("9p") + p9FS := mustFindFilesystem("9p") opts := p9MountOptions(fd, conf.FileAccess) - rootInode, err = hostFS.Mount(ctx, rootDevice, mf, strings.Join(opts, ",")) + rootInode, err = p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ",")) if err != nil { return nil, fmt.Errorf("failed to generate root mount point: %v", err) } @@ -294,7 +288,11 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri return fsName, opts, useOverlay, err } -func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount, dest string) error { +// mountSubmount mounts volumes inside the container's root. Because mounts may +// be readonly, a lower ramfs overlay is added to create the mount point dir. +// Another overlay is added with tmpfs on top if Config.Overlay is true. +// 'm.Destination' must be an absolute path with '..' and symlinks resolved. +func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, fds *fdDispenser, m specs.Mount, mounts []specs.Mount) error { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. fsName, opts, useOverlay, err := getMountNameAndOptions(conf, m, fds) @@ -340,60 +338,16 @@ func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fd } } - // Create destination in case it doesn't exist. This is required, in addition - // to 'addSubmountOverlay', in case there are symlinks to create directories - // in the right location, e.g. - // mount: /var/run/secrets, may be created in '/run/secrets' if - // '/var/run' => '/var'. - if err := mkdirAll(ctx, mns, dest); err != nil { - return err - } - - root := mns.Root() - defer root.DecRef() - dirent, err := mns.FindInode(ctx, root, nil, dest, linux.MaxSymlinkTraversals) + dirent, err := mns.FindInode(ctx, root, root, m.Destination, 0 /* maxTraversals */) if err != nil { - return fmt.Errorf("failed to find mount destination %q: %v", dest, err) + return fmt.Errorf("failed to find mount destination %q: %v", m.Destination, err) } defer dirent.DecRef() if err := mns.Mount(ctx, dirent, inode); err != nil { - return fmt.Errorf("failed to mount at destination %q: %v", dest, err) + return fmt.Errorf("failed to mount at destination %q: %v", m.Destination, err) } - log.Infof("Mounted %q to %q type %s", m.Source, dest, m.Type) - return nil -} - -func mkdirAll(ctx context.Context, mns *fs.MountNamespace, path string) error { - log.Infof("mkdirAll called with path %s", path) - root := mns.Root() - defer root.DecRef() - - // Starting at the root, walk the path. - parent := root - ps := strings.Split(filepath.Clean(path), string(filepath.Separator)) - for _, pathElem := range ps { - if pathElem == "" { - // This will be case for the first and last element, if the path - // begins or ends with '/'. Note that we always treat the path as - // absolute, regardless of what the first character contains. - continue - } - d, err := mns.FindInode(ctx, root, parent, pathElem, fs.DefaultTraversalLimit) - if err == syserror.ENOENT { - // If we encounter a path that does not exist, then - // create it. - if err := parent.CreateDirectory(ctx, root, pathElem, fs.FilePermsFromMode(0755)); err != nil { - return fmt.Errorf("failed to create directory %q: %v", pathElem, err) - } - if d, err = parent.Walk(ctx, root, pathElem); err != nil { - return fmt.Errorf("walk to %q failed: %v", pathElem, err) - } - } else if err != nil { - return fmt.Errorf("failed to find inode %q: %v", pathElem, err) - } - parent = d - } + log.Infof("Mounted %q to %q type %s", m.Source, m.Destination, m.Type) return nil } @@ -437,14 +391,6 @@ func parseAndFilterOptions(opts []string, allowedKeys ...string) ([]string, erro return out, nil } -func destinations(mounts []specs.Mount, extra ...string) []string { - var ds []string - for _, m := range mounts { - ds = append(ds, m.Destination) - } - return append(ds, extra...) -} - // mountDevice returns a device string based on the fs type and target // of the mount. func mountDevice(m specs.Mount) string { @@ -544,7 +490,8 @@ func mustFindFilesystem(name string) fs.Filesystem { func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string) (*fs.Inode, error) { // There is no real filesystem backing this ramfs tree, so we pass in // "nil" here. - mountTree, err := ramfs.MakeDirectoryTree(ctx, fs.NewNonCachingMountSource(nil, fs.MountSourceFlags{}), submounts) + msrc := fs.NewNonCachingMountSource(nil, fs.MountSourceFlags{}) + mountTree, err := ramfs.MakeDirectoryTree(ctx, msrc, submounts) if err != nil { return nil, fmt.Errorf("error creating mount tree: %v", err) } @@ -608,12 +555,16 @@ func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf // namespace. mns := k.RootMountNamespace() if mns == nil { + // Setup the root container. + // Create the virtual filesystem. mns, err := createMountNamespace(ctx, rootCtx, spec, conf, goferFDs) if err != nil { return fmt.Errorf("error creating mounts: %v", err) } k.SetRootMountNamespace(mns) + + // We're done with root container. return nil } @@ -627,42 +578,48 @@ func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf return fmt.Errorf("error creating filesystem for container: %v", err) } - // Make directories for submounts within the container. - rootDir := mns.Root() - defer rootDir.DecRef() - containerRoot := filepath.Join(ChildContainersDir, cid) - mkdirAll(ctx, mns, containerRoot) + globalRoot := mns.Root() + defer globalRoot.DecRef() - // Mount the container's root filesystem to the newly created - // mount point. - containerRootDirent, err := mns.FindInode(ctx, rootDir, nil, containerRoot, linux.MaxSymlinkTraversals) + // Create mount point for the container's rootfs. + contDir, err := mns.FindInode(ctx, globalRoot, nil, ChildContainersDir, 0 /* TraversalLimit */) if err != nil { - return fmt.Errorf("failed to find mount destination: %q: %v", containerRoot, err) + return fmt.Errorf("couldn't find child container dir %q: %v", ChildContainersDir, err) } - if err := mns.Mount(ctx, containerRootDirent, rootInode); err != nil { - return fmt.Errorf("failed to mount at destination %q: %v", containerRoot, err) + if err := contDir.CreateDirectory(ctx, globalRoot, cid, fs.FilePermsFromMode(0755)); err != nil { + return fmt.Errorf("create directory %q: %v", cid, err) + } + containerRoot, err := contDir.Walk(ctx, globalRoot, cid) + if err != nil { + return fmt.Errorf("walk to %q failed: %v", cid, err) + } + defer containerRoot.DecRef() + + // Mount the container's root filesystem to the newly created mount point. + if err := mns.Mount(ctx, containerRoot, rootInode); err != nil { + return fmt.Errorf("mount container root: %v", err) } - containerRootDirent.DecRef() // We have to re-walk to the dirent to find the mounted // directory. The old dirent is invalid at this point. - containerRootDirent, err = mns.FindInode(ctx, rootDir, nil, containerRoot, linux.MaxSymlinkTraversals) + containerRoot, err = contDir.Walk(ctx, globalRoot, cid) if err != nil { - return fmt.Errorf("failed to find mount destination2: %q: %v", containerRoot, err) + return fmt.Errorf("find container mount point %q: %v", cid, err) } - log.Infof("Mounted child's root fs to %q", containerRoot) + + log.Infof("Mounted child's root fs to %q", filepath.Join(ChildContainersDir, cid)) // Mount all submounts. mounts := compileMounts(spec) for _, m := range mounts { - dest := filepath.Join(containerRoot, m.Destination) - if err := mountSubmount(rootCtx, conf, k.RootMountNamespace(), fds, m, mounts, dest); err != nil { + if err := mountSubmount(rootCtx, conf, k.RootMountNamespace(), containerRoot, fds, m, mounts); err != nil { + containerRoot.DecRef() return fmt.Errorf("error mounting filesystem for container: %v", err) } } // Set the procArgs root directory. - procArgs.Root = containerRootDirent + procArgs.Root = containerRoot return nil } @@ -686,7 +643,7 @@ func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error mnsRoot := mns.Root() defer mnsRoot.DecRef() containerRoot := path.Join(ChildContainersDir, cid) - containerRootDirent, err := mns.FindInode(ctx, mnsRoot, nil, containerRoot, linux.MaxSymlinkTraversals) + containerRootDirent, err := mns.FindInode(ctx, mnsRoot, nil, containerRoot, 0 /* maxTraversals */) if err == syserror.ENOENT { // Container must have been destroyed already. That's fine. return nil @@ -720,7 +677,7 @@ func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error // Get a reference to the parent directory and remove the root // container directory. - containersDirDirent, err := mns.FindInode(ctx, mnsRoot, nil, ChildContainersDir, linux.MaxSymlinkTraversals) + containersDirDirent, err := mns.FindInode(ctx, mnsRoot, nil, ChildContainersDir, 0 /* maxTraversals */) if err != nil { return fmt.Errorf("error finding containers directory %q: %v", ChildContainersDir, err) } diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go index ecd76ee93..275a96f57 100644 --- a/runsc/cmd/create.go +++ b/runsc/cmd/create.go @@ -15,8 +15,6 @@ package cmd import ( - "path/filepath" - "context" "flag" "github.com/google/subcommands" @@ -93,7 +91,6 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{} Fatalf("error reading spec: %v", err) } specutils.LogSpec(spec) - conf.SpecFile = filepath.Join(bundleDir, "config.json") // Create the container. A new sandbox will be created for the // container unless the metadata specifies that it should be run in an diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go index 826e6e875..9a87cf240 100644 --- a/runsc/cmd/run.go +++ b/runsc/cmd/run.go @@ -15,7 +15,6 @@ package cmd import ( - "path/filepath" "syscall" "context" @@ -73,7 +72,6 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s Fatalf("error reading spec: %v", err) } specutils.LogSpec(spec) - conf.SpecFile = filepath.Join(bundleDir, "config.json") ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog) if err != nil { diff --git a/runsc/container/container.go b/runsc/container/container.go index 0ec4d03c1..f76bad1aa 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -271,6 +271,19 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // init container in the sandbox. if specutils.ShouldCreateSandbox(spec) { log.Debugf("Creating new sandbox for container %q", id) + + // Setup rootfs and mounts. It returns a new mount list with destination + // paths resolved. Since the spec for the root container is read from disk, + // Write the new spec to a new file that will be used by the sandbox. + cleanMounts, err := setupFS(spec, conf, bundleDir) + if err != nil { + return nil, fmt.Errorf("setup mounts: %v", err) + } + spec.Mounts = cleanMounts + if err := specutils.WriteCleanSpec(bundleDir, spec); err != nil { + return nil, fmt.Errorf("writing clean spec: %v", err) + } + ioFiles, err := c.createGoferProcess(spec, conf, bundleDir) if err != nil { return nil, err @@ -351,6 +364,15 @@ func (c *Container) Start(conf *boot.Config) error { return err } } else { + // Setup rootfs and mounts. It returns a new mount list with destination + // paths resolved. Replace the original spec with new mount list and start + // container. + cleanMounts, err := setupFS(c.Spec, conf, c.BundleDir) + if err != nil { + return fmt.Errorf("setup mounts: %v", err) + } + c.Spec.Mounts = cleanMounts + // Create the gofer process. ioFiles, err := c.createGoferProcess(c.Spec, conf, c.BundleDir) if err != nil { @@ -691,10 +713,6 @@ func (c *Container) waitForStopped() error { } func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, error) { - if err := setupFS(spec, conf, bundleDir); err != nil { - return nil, fmt.Errorf("failed to setup mounts: %v", err) - } - // Start with the general config flags. args := conf.ToFlags() args = append(args, "gofer", "--bundle", bundleDir) diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index e2bb7d8ec..662591b3b 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -458,7 +458,7 @@ func TestAppExitStatus(t *testing.T) { defer os.RemoveAll(rootDir2) defer os.RemoveAll(bundleDir2) - ws, err = Run(testutil.UniqueContainerID(), succSpec, conf, bundleDir2, "", "", "") + ws, err = Run(testutil.UniqueContainerID(), errSpec, conf, bundleDir2, "", "", "") if err != nil { t.Fatalf("error running container: %v", err) } diff --git a/runsc/container/fs.go b/runsc/container/fs.go index 59edd9488..2ed42fd93 100644 --- a/runsc/container/fs.go +++ b/runsc/container/fs.go @@ -73,9 +73,13 @@ var optionsMap = map[string]mapping{ // This allows the gofer serving the containers to be chroot under this // directory to create an extra layer to security in case the gofer gets // compromised. -func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) error { +// Returns list of mounts equivalent to 'spec.Mounts' with all destination paths +// cleaned and with symlinks resolved. +func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]specs.Mount, error) { + rv := make([]specs.Mount, 0, len(spec.Mounts)) for _, m := range spec.Mounts { if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + rv = append(rv, m) continue } @@ -83,39 +87,47 @@ func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) error { // container. dst, err := resolveSymlinks(spec.Root.Path, m.Destination) if err != nil { - return fmt.Errorf("failed to resolve symlinks: %v", err) + return nil, fmt.Errorf("failed to resolve symlinks: %v", err) } flags := optionsToFlags(m.Options) flags |= syscall.MS_BIND log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil { - return fmt.Errorf("failed to mount %v: %v", m, err) + return nil, fmt.Errorf("failed to mount %v: %v", m, err) } // Make the mount a slave, so that for recursive bind mount, umount won't // propagate to the source. flags = syscall.MS_SLAVE | syscall.MS_REC if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil { - return fmt.Errorf("failed to rslave mount dst: %q, flags: %#x, err: %v", dst, flags, err) + return nil, fmt.Errorf("failed to rslave mount dst: %q, flags: %#x, err: %v", dst, flags, err) } + + cpy := m + relDst, err := filepath.Rel(spec.Root.Path, dst) + if err != nil { + panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, spec.Root.Path, err)) + } + cpy.Destination = filepath.Join("/", relDst) + rv = append(rv, cpy) } // If root is read only, check if it needs to be remounted as readonly. if spec.Root.Readonly { isMountPoint, readonly, err := mountInfo(spec.Root.Path) if err != nil { - return err + return nil, err } if readonly { - return nil + return rv, nil } if !isMountPoint { // Readonly root is not a mount point nor read-only. Can't do much other // than just logging a warning. The gofer will prevent files to be open // in write mode. log.Warningf("Mount where root is located is not read-only and cannot be changed: %q", spec.Root.Path) - return nil + return rv, nil } // If root is a mount point but not read-only, we can change mount options @@ -124,10 +136,10 @@ func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) error { flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC) src := spec.Root.Path if err := syscall.Mount(src, src, "bind", flags, ""); err != nil { - return fmt.Errorf("failed to remount root as read-only with source: %q, target: %q, flags: %#x, err: %v", spec.Root.Path, spec.Root.Path, flags, err) + return nil, fmt.Errorf("failed to remount root as read-only with source: %q, target: %q, flags: %#x, err: %v", spec.Root.Path, spec.Root.Path, flags, err) } } - return nil + return rv, nil } // mountInfo returns whether the path is a mount point and whether the mount diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 6dc8cf7f0..923a52f7f 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -321,12 +321,9 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund nextFD++ // Open the spec file to donate to the sandbox. - if conf.SpecFile == "" { - return fmt.Errorf("conf.SpecFile must be set") - } - specFile, err := os.Open(conf.SpecFile) + specFile, err := specutils.OpenCleanSpec(bundleDir) if err != nil { - return fmt.Errorf("error opening spec file %q: %v", conf.SpecFile, err) + return fmt.Errorf("opening spec file: %v", err) } defer specFile.Close() cmd.ExtraFiles = append(cmd.ExtraFiles, specFile) diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 6b3e52021..b29802fde 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -170,6 +170,29 @@ func ReadSpecFromFile(bundleDir string, specFile *os.File) (*specs.Spec, error) return &spec, nil } +// OpenCleanSpec opens spec file that has destination mount paths resolved to +// their absolute location. +func OpenCleanSpec(bundleDir string) (*os.File, error) { + f, err := os.Open(filepath.Join(bundleDir, "config.clean.json")) + if err != nil { + return nil, err + } + if _, err := f.Seek(0, os.SEEK_SET); err != nil { + f.Close() + return nil, fmt.Errorf("error seeking to beginning of file %q: %v", f.Name(), err) + } + return f, nil +} + +// WriteCleanSpec writes a spec file that has destination mount paths resolved. +func WriteCleanSpec(bundleDir string, spec *specs.Spec) error { + bytes, err := json.Marshal(spec) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(bundleDir, "config.clean.json"), bytes, 0755) +} + // Capabilities takes in spec and returns a TaskCapabilities corresponding to // the spec. func Capabilities(specCaps *specs.LinuxCapabilities) (*auth.TaskCapabilities, error) { diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go index b4664995c..4d7ac3bc9 100644 --- a/runsc/test/testutil/testutil.go +++ b/runsc/test/testutil/testutil.go @@ -179,7 +179,6 @@ func SetupContainerInRoot(rootDir string, spec *specs.Spec, conf *boot.Config) ( } conf.RootDir = rootDir - conf.SpecFile = filepath.Join(bundleDir, "config.json") return bundleDir, nil } |