diff options
-rw-r--r-- | runsc/cmd/BUILD | 1 | ||||
-rw-r--r-- | runsc/cmd/boot.go | 16 | ||||
-rw-r--r-- | runsc/cmd/gofer.go | 279 | ||||
-rw-r--r-- | runsc/cmd/gofer_test.go (renamed from runsc/container/fs_test.go) | 16 | ||||
-rw-r--r-- | runsc/container/BUILD | 2 | ||||
-rw-r--r-- | runsc/container/container.go | 75 | ||||
-rw-r--r-- | runsc/container/container_test.go | 165 | ||||
-rw-r--r-- | runsc/container/fs.go | 287 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 23 | ||||
-rw-r--r-- | runsc/specutils/BUILD | 1 | ||||
-rw-r--r-- | runsc/specutils/fs.go | 139 | ||||
-rw-r--r-- | runsc/specutils/namespace.go | 16 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 52 | ||||
-rw-r--r-- | runsc/specutils/specutils_test.go | 31 |
14 files changed, 681 insertions, 422 deletions
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 9e2be0d37..dabf18c5f 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -60,6 +60,7 @@ go_test( "capability_test.go", "delete_test.go", "exec_test.go", + "gofer_test.go", ], data = [ "//runsc", diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 3039b389f..ff2fa2fb9 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -76,6 +76,11 @@ type Boot struct { // startSyncFD is the file descriptor to synchronize runsc and sandbox. startSyncFD int + // mountsFD is the file descriptor to read list of mounts after they have + // been resolved (direct paths, no symlinks). They are resolved outside the + // sandbox (e.g. gofer) and sent through this FD. + mountsFD int + // pidns is set if the sanadbox is in its own pid namespace. pidns bool } @@ -111,6 +116,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) { f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container") f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup") + f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).") } // Execute implements subcommands.Command.Execute. It starts a sandbox in a @@ -191,6 +197,16 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) panic("setCapsAndCallSelf must never return success") } + // Read resolved mount list and replace the original one from the spec. + mountsFile := os.NewFile(uintptr(b.mountsFD), "mounts file") + cleanMounts, err := specutils.ReadMounts(mountsFile) + if err != nil { + mountsFile.Close() + Fatalf("Error reading mounts file: %v", err) + } + mountsFile.Close() + spec.Mounts = cleanMounts + // Create the loader. bootArgs := boot.Args{ ID: f.Arg(0), diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 6f9711518..e712244ef 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -16,7 +16,11 @@ package cmd import ( "context" + "encoding/json" + "fmt" "os" + "path/filepath" + "strings" "sync" "syscall" @@ -59,6 +63,7 @@ type Gofer struct { panicOnWrite bool specFD int + mountsFD int } // Name implements subcommands.Command. @@ -84,6 +89,7 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) { f.BoolVar(&g.panicOnWrite, "panic-on-write", false, "if true, panics on attempts to write to RO mounts. RW mounts are unnaffected") f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process") f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec") + f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).") } // Execute implements subcommands.Command. @@ -100,45 +106,13 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("reading spec: %v", err) } - // Find what path is going to be served by this gofer. - root := spec.Root.Path - conf := args[0].(*boot.Config) - if g.setUpRoot && !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { - // Convert all shared mounts into slave to be sure that nothing will be - // propagated outside of our namespace. - if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { - Fatalf("error converting mounts: %v", err) - } - - // FIXME: runsc can't be re-executed without - // /proc, so we create a tmpfs mount, mount ./proc and ./root - // there, then move this mount to the root and after - // setCapsAndCallSelf, runsc will chroot into /root. - // - // We need a directory to construct a new root and we know that - // runsc can't start without /proc, so we can use it for this. - flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC) - if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil { - Fatalf("error mounting tmpfs: %v", err) - } - os.Mkdir("/proc/proc", 0755) - os.Mkdir("/proc/root", 0755) - if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil { - Fatalf("error mounting proc: %v", err) - } - if err := syscall.Mount(root, "/proc/root", "", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { - Fatalf("error mounting root: %v", err) - } - if err := pivotRoot("/proc"); err != nil { - Fatalf("faild to change the root file system: %v", err) - } - if err := os.Chdir("/"); err != nil { - Fatalf("failed to change working directory") + if g.setUpRoot { + if err := setupRootFS(spec, conf); err != nil { + Fatalf("Error setting up root FS: %v", err) } } - if g.applyCaps { // Disable caps when calling myself again. // Note: minimal argument handling for the default case to keep it simple. @@ -150,15 +124,34 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) panic("unreachable") } + // Find what path is going to be served by this gofer. + root := spec.Root.Path + if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + root = "/root" + } + + // Resolve mount points paths, then replace mounts from our spec and send the + // mount list over to the sandbox, so they are both in sync. + // + // Note that all mount points have been mounted in the proper location in + // setupRootFS(). + cleanMounts, err := resolveMounts(spec.Mounts, root) + if err != nil { + Fatalf("Failure to resolve mounts: %v", err) + } + spec.Mounts = cleanMounts + go func() { + if err := g.writeMounts(cleanMounts); err != nil { + panic(fmt.Sprintf("Failed to write mounts: %v", err)) + } + }() + specutils.LogSpec(spec) // fsgofer should run with a umask of 0, because we want to preserve file // modes exactly as sent by the sandbox, which will have applied its own umask. syscall.Umask(0) - if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { - root = "/root" - } if err := syscall.Chroot(root); err != nil { Fatalf("failed to chroot to %q: %v", root, err) } @@ -232,6 +225,25 @@ func runServers(ats []p9.Attacher, ioFDs []int) { log.Infof("All 9P servers exited.") } +func (g *Gofer) writeMounts(mounts []specs.Mount) error { + bytes, err := json.Marshal(mounts) + if err != nil { + return err + } + + f := os.NewFile(uintptr(g.mountsFD), "mounts file") + defer f.Close() + + for written := 0; written < len(bytes); { + w, err := f.Write(bytes[written:]) + if err != nil { + return err + } + written += w + } + return nil +} + func isReadonlyMount(opts []string) bool { for _, o := range opts { if o == "ro" { @@ -240,3 +252,194 @@ func isReadonlyMount(opts []string) bool { } return false } + +func setupRootFS(spec *specs.Spec, conf *boot.Config) error { + // Convert all shared mounts into slaves to be sure that nothing will be + // propagated outside of our namespace. + if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + Fatalf("error converting mounts: %v", err) + } + + root := spec.Root.Path + if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + // FIXME: runsc can't be re-executed without + // /proc, so we create a tmpfs mount, mount ./proc and ./root + // there, then move this mount to the root and after + // setCapsAndCallSelf, runsc will chroot into /root. + // + // We need a directory to construct a new root and we know that + // runsc can't start without /proc, so we can use it for this. + flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC) + if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil { + Fatalf("error mounting tmpfs: %v", err) + } + + // Prepare tree structure for pivot_root(2). + os.Mkdir("/proc/proc", 0755) + os.Mkdir("/proc/root", 0755) + if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil { + Fatalf("error mounting proc: %v", err) + } + root = "/proc/root" + } + + // Mount root path followed by submounts. + if err := syscall.Mount(spec.Root.Path, root, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { + return fmt.Errorf("mounting root on root (%q) err: %v", spec.Root.Path, err) + } + flags := uint32(syscall.MS_SLAVE | syscall.MS_REC) + if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { + flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation}) + } + if err := syscall.Mount("", spec.Root.Path, "", uintptr(flags), ""); err != nil { + return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", spec.Root.Path, flags, err) + } + + // Replace the current spec, with the clean spec with symlinks resolved. + if err := setupMounts(spec.Mounts, root); err != nil { + Fatalf("error setting up FS: %v", err) + } + + // Create working directory if needed. + if spec.Process.Cwd != "" { + dst, err := resolveSymlinks(root, spec.Process.Cwd) + if err != nil { + return fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err) + } + if err := os.MkdirAll(dst, 0755); err != nil { + return fmt.Errorf("creating working directory %q: %v", spec.Process.Cwd, err) + } + } + + // Check if root needs to be remounted as readonly. + if spec.Root.Readonly { + // If root is a mount point but not read-only, we can change mount options + // to make it read-only for extra safety. + log.Infof("Remounting root as readonly: %q", spec.Root.Path) + flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC) + if err := syscall.Mount(spec.Root.Path, spec.Root.Path, "bind", flags, ""); err != nil { + return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", spec.Root.Path, spec.Root.Path, flags, err) + } + } + + if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + if err := pivotRoot("/proc"); err != nil { + Fatalf("faild to change the root file system: %v", err) + } + if err := os.Chdir("/"); err != nil { + Fatalf("failed to change working directory") + } + } + return nil +} + +// setupMounts binds mount all mounts specified in the spec in their correct +// location inside root. It will resolve relative paths and symlinks. It also +// creates directories as needed. +func setupMounts(mounts []specs.Mount, root string) error { + for _, m := range mounts { + if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + continue + } + + dst, err := resolveSymlinks(root, m.Destination) + if err != nil { + return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) + } + + flags := specutils.OptionsToFlags(m.Options) | syscall.MS_BIND + log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) + if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil { + return fmt.Errorf("mounting %v: %v", m, err) + } + + // Set propagation options that cannot be set together with other options. + flags = specutils.PropOptionsToFlags(m.Options) + if flags != 0 { + if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil { + return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err) + } + } + } + return nil +} + +// resolveMounts resolved relative paths and symlinks to mount points. +// +// Note: mount points must already be in place for resolution to work. +// Otherwise, it may follow symlinks to locations that would be overwritten +// with another mount point and return the wrong location. In short, make sure +// setupMounts() has been called before. +func resolveMounts(mounts []specs.Mount, root string) ([]specs.Mount, error) { + cleanMounts := make([]specs.Mount, 0, len(mounts)) + for _, m := range mounts { + if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + cleanMounts = append(cleanMounts, m) + continue + } + dst, err := resolveSymlinks(root, m.Destination) + if err != nil { + return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) + } + relDst, err := filepath.Rel(root, dst) + if err != nil { + panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err)) + } + cpy := m + cpy.Destination = filepath.Join("/", relDst) + cleanMounts = append(cleanMounts, cpy) + } + return cleanMounts, nil +} + +// ResolveSymlinks walks 'rel' having 'root' as the root directory. If there are +// symlinks, they are evaluated relative to 'root' to ensure the end result is +// the same as if the process was running inside the container. +func resolveSymlinks(root, rel string) (string, error) { + return resolveSymlinksImpl(root, root, rel, 255) +} + +func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) { + if followCount == 0 { + return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel)) + } + + rel = filepath.Clean(rel) + for _, name := range strings.Split(rel, string(filepath.Separator)) { + if name == "" { + continue + } + // Note that Join() resolves things like ".." and returns a clean path. + path := filepath.Join(base, name) + if !strings.HasPrefix(path, root) { + // One cannot '..' their way out of root. + path = root + continue + } + fi, err := os.Lstat(path) + if err != nil { + if !os.IsNotExist(err) { + return "", err + } + // Not found means there is no symlink to check. Just keep walking dirs. + base = path + continue + } + if fi.Mode()&os.ModeSymlink != 0 { + link, err := os.Readlink(path) + if err != nil { + return "", err + } + if filepath.IsAbs(link) { + base = root + } + base, err = resolveSymlinksImpl(root, base, link, followCount-1) + if err != nil { + return "", err + } + continue + } + base = path + } + return base, nil +} diff --git a/runsc/container/fs_test.go b/runsc/cmd/gofer_test.go index 87cdb078e..8e692feb9 100644 --- a/runsc/container/fs_test.go +++ b/runsc/cmd/gofer_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package container +package cmd import ( "fmt" @@ -21,10 +21,16 @@ import ( "path" "path/filepath" "testing" - - "gvisor.googlesource.com/gvisor/runsc/test/testutil" ) +func tmpDir() string { + dir := os.Getenv("TEST_TMPDIR") + if dir == "" { + dir = "/tmp" + } + return dir +} + type dir struct { rel string link string @@ -50,7 +56,7 @@ func construct(root string, dirs []dir) error { } func TestResolveSymlinks(t *testing.T) { - root, err := ioutil.TempDir(testutil.TmpDir(), "root") + root, err := ioutil.TempDir(tmpDir(), "root") if err != nil { t.Fatal("ioutil.TempDir() failed:", err) } @@ -141,7 +147,7 @@ func TestResolveSymlinks(t *testing.T) { } func TestResolveSymlinksLoop(t *testing.T) { - root, err := ioutil.TempDir(testutil.TmpDir(), "root") + root, err := ioutil.TempDir(tmpDir(), "root") if err != nil { t.Fatal("ioutil.TempDir() failed:", err) } diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 3b25ff79a..2936b7cdf 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -6,7 +6,6 @@ go_library( name = "container", srcs = [ "container.go", - "fs.go", "hook.go", "status.go", ], @@ -34,7 +33,6 @@ go_test( srcs = [ "console_test.go", "container_test.go", - "fs_test.go", "multi_container_test.go", "shared_volume_test.go", ], diff --git a/runsc/container/container.go b/runsc/container/container.go index 6f092a5ce..fdcf8d7b7 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -281,18 +281,6 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo if specutils.ShouldCreateSandbox(spec) { log.Debugf("Creating new sandbox for container %q", id) - // Setup rootfs and mounts. It returns a new mount list with destination - // paths resolved. Since the spec for the root container is read from disk, - // Write the new spec to a new file that will be used by the sandbox. - cleanMounts, err := setupFS(spec, conf, bundleDir) - if err != nil { - return nil, fmt.Errorf("setup mounts: %v", err) - } - spec.Mounts = cleanMounts - if err := specutils.WriteCleanSpec(bundleDir, spec); err != nil { - return nil, fmt.Errorf("writing clean spec: %v", err) - } - // Create and join cgroup before processes are created to ensure they are // part of the cgroup from the start (and all tneir children processes). cg, err := cgroup.New(spec) @@ -306,14 +294,14 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo } } if err := runInCgroup(cg, func() error { - ioFiles, err := c.createGoferProcess(spec, conf, bundleDir) + ioFiles, specFile, err := c.createGoferProcess(spec, conf, bundleDir) if err != nil { return err } // Start a new sandbox for this container. Any errors after this point // must destroy the container. - c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, cg) + c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, cg) return err }); err != nil { return nil, err @@ -387,26 +375,22 @@ func (c *Container) Start(conf *boot.Config) error { return err } } else { - // Setup rootfs and mounts. It returns a new mount list with destination - // paths resolved. Replace the original spec with new mount list and start - // container. - cleanMounts, err := setupFS(c.Spec, conf, c.BundleDir) - if err != nil { - return fmt.Errorf("setup mounts: %v", err) - } - c.Spec.Mounts = cleanMounts - if err := specutils.WriteCleanSpec(c.BundleDir, c.Spec); err != nil { - return fmt.Errorf("writing clean spec: %v", err) - } - // Join cgroup to strt gofer process to ensure it's part of the cgroup from // the start (and all tneir children processes). if err := runInCgroup(c.Sandbox.Cgroup, func() error { // Create the gofer process. - ioFiles, err := c.createGoferProcess(c.Spec, conf, c.BundleDir) + ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir) if err != nil { return err } + defer mountsFile.Close() + + cleanMounts, err := specutils.ReadMounts(mountsFile) + if err != nil { + return fmt.Errorf("reading mounts file: %v", err) + } + c.Spec.Mounts = cleanMounts + return c.Sandbox.StartContainer(c.Spec, conf, c.ID, ioFiles) }); err != nil { return err @@ -665,12 +649,6 @@ func (c *Container) Destroy() error { errs = append(errs, err.Error()) } - if err := destroyFS(c.Spec); err != nil { - err = fmt.Errorf("destroying container fs: %v", err) - log.Warningf("%v", err) - errs = append(errs, err.Error()) - } - if err := os.RemoveAll(c.Root); err != nil && !os.IsNotExist(err) { err = fmt.Errorf("deleting container root directory %q: %v", c.Root, err) log.Warningf("%v", err) @@ -787,7 +765,7 @@ func (c *Container) waitForStopped() error { return backoff.Retry(op, b) } -func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, error) { +func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, *os.File, error) { // Start with the general config flags. args := conf.ToFlags() @@ -800,7 +778,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund if conf.LogFilename != "" { logFile, err := os.OpenFile(conf.LogFilename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { - return nil, fmt.Errorf("opening log file %q: %v", conf.LogFilename, err) + return nil, nil, fmt.Errorf("opening log file %q: %v", conf.LogFilename, err) } defer logFile.Close() goferEnds = append(goferEnds, logFile) @@ -811,7 +789,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund if conf.DebugLog != "" { debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "gofer") if err != nil { - return nil, fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err) + return nil, nil, fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err) } defer debugLogFile.Close() goferEnds = append(goferEnds, debugLogFile) @@ -825,30 +803,39 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund } // Open the spec file to donate to the sandbox. - specFile, err := specutils.OpenCleanSpec(bundleDir) + specFile, err := specutils.OpenSpec(bundleDir) if err != nil { - return nil, fmt.Errorf("opening spec file: %v", err) + return nil, nil, fmt.Errorf("opening spec file: %v", err) } defer specFile.Close() goferEnds = append(goferEnds, specFile) args = append(args, "--spec-fd="+strconv.Itoa(nextFD)) nextFD++ + // Create pipe that allows gofer to send mount list to sandbox after all paths + // have been resolved. + mountsSand, mountsGofer, err := os.Pipe() + if err != nil { + return nil, nil, err + } + defer mountsGofer.Close() + goferEnds = append(goferEnds, mountsGofer) + args = append(args, fmt.Sprintf("--mounts-fd=%d", nextFD)) + nextFD++ + // Add root mount and then add any other additional mounts. mountCount := 1 - - // Add additional mounts. for _, m := range spec.Mounts { if specutils.Is9PMount(m) { mountCount++ } } - sandEnds := make([]*os.File, 0, mountCount) + sandEnds := make([]*os.File, 0, mountCount) for i := 0; i < mountCount; i++ { fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) if err != nil { - return nil, err + return nil, nil, err } sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox IO FD")) @@ -884,12 +871,12 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund // Start the gofer in the given namespace. log.Debugf("Starting gofer: %s %v", binPath, args) if err := specutils.StartInNS(cmd, nss); err != nil { - return nil, err + return nil, nil, err } log.Infof("Gofer started, PID: %d", cmd.Process.Pid) c.GoferPid = cmd.Process.Pid c.goferIsChild = true - return sandEnds, nil + return sandEnds, mountsSand, nil } // changeStatus transitions from one status to another ensuring that the diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 06a25de6d..f17155175 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -1594,6 +1594,171 @@ func TestCreateWorkingDir(t *testing.T) { } } +// TestMountPropagation verifies that mount propagates to slave but not to +// private mounts. +func TestMountPropagation(t *testing.T) { + // Setup dir structure: + // - src: is mounted as shared and is used as source for both private and + // slave mounts + // - dir: will be bind mounted inside src and should propagate to slave + tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "mount") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + src := filepath.Join(tmpDir, "src") + srcMnt := filepath.Join(src, "mnt") + dir := filepath.Join(tmpDir, "dir") + for _, path := range []string{src, srcMnt, dir} { + if err := os.MkdirAll(path, 0777); err != nil { + t.Fatalf("MkdirAll(%q): %v", path, err) + } + } + dirFile := filepath.Join(dir, "file") + f, err := os.Create(dirFile) + if err != nil { + t.Fatalf("os.Create(%q): %v", dirFile, err) + } + f.Close() + + // Setup src as a shared mount. + if err := syscall.Mount(src, src, "bind", syscall.MS_BIND, ""); err != nil { + t.Fatalf("mount(%q, %q, MS_BIND): %v", dir, srcMnt, err) + } + if err := syscall.Mount("", src, "", syscall.MS_SHARED, ""); err != nil { + t.Fatalf("mount(%q, MS_SHARED): %v", srcMnt, err) + } + + spec := testutil.NewSpecWithArgs("sleep", "1000") + + priv := filepath.Join(tmpDir, "priv") + slave := filepath.Join(tmpDir, "slave") + spec.Mounts = []specs.Mount{ + { + Source: src, + Destination: priv, + Type: "bind", + Options: []string{"private"}, + }, + { + Source: src, + Destination: slave, + Type: "bind", + Options: []string{"slave"}, + }, + } + + conf := testutil.TestConfig() + rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(rootDir) + defer os.RemoveAll(bundleDir) + + cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + if err != nil { + t.Fatalf("creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + // After the container is started, mount dir inside source and check what + // happens to both destinations. + if err := syscall.Mount(dir, srcMnt, "bind", syscall.MS_BIND, ""); err != nil { + t.Fatalf("mount(%q, %q, MS_BIND): %v", dir, srcMnt, err) + } + + // Check that mount didn't propagate to private mount. + privFile := filepath.Join(priv, "mnt", "file") + args := &control.ExecArgs{ + Filename: "/usr/bin/test", + Argv: []string{"test", "!", "-f", privFile}, + } + if ws, err := cont.executeSync(args); err != nil || ws != 0 { + t.Fatalf("exec: test ! -f %q, ws: %v, err: %v", privFile, ws, err) + } + + // Check that mount propagated to slave mount. + slaveFile := filepath.Join(slave, "mnt", "file") + args = &control.ExecArgs{ + Filename: "/usr/bin/test", + Argv: []string{"test", "-f", slaveFile}, + } + if ws, err := cont.executeSync(args); err != nil || ws != 0 { + t.Fatalf("exec: test -f %q, ws: %v, err: %v", privFile, ws, err) + } +} + +func TestMountSymlink(t *testing.T) { + for _, conf := range configs(overlay) { + t.Logf("Running test with conf: %+v", conf) + + dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + + source := path.Join(dir, "source") + target := path.Join(dir, "target") + for _, path := range []string{source, target} { + if err := os.MkdirAll(path, 0777); err != nil { + t.Fatalf("os.MkdirAll(): %v", err) + } + } + f, err := os.Create(path.Join(source, "file")) + if err != nil { + t.Fatalf("os.Create(): %v", err) + } + f.Close() + + link := path.Join(dir, "link") + if err := os.Symlink(target, link); err != nil { + t.Fatalf("os.Symlink(%q, %q): %v", target, link, err) + } + + spec := testutil.NewSpecWithArgs("/bin/sleep", "1000") + + // Mount to a symlink to ensure the mount code will follow it and mount + // at the symlink target. + spec.Mounts = append(spec.Mounts, specs.Mount{ + Type: "bind", + Destination: link, + Source: source, + }) + + rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(rootDir) + defer os.RemoveAll(bundleDir) + + cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + if err != nil { + t.Fatalf("creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + // Check that symlink was resolved and mount was created where the symlink + // is pointing to. + file := path.Join(target, "file") + args := &control.ExecArgs{ + Filename: "/usr/bin/test", + Argv: []string{"test", "-f", file}, + } + if ws, err := cont.executeSync(args); err != nil || ws != 0 { + t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err) + } + } +} + // executeSync synchronously executes a new process. func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) { pid, err := cont.Execute(args) diff --git a/runsc/container/fs.go b/runsc/container/fs.go deleted file mode 100644 index 998160487..000000000 --- a/runsc/container/fs.go +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright 2018 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package container - -import ( - "bufio" - "fmt" - "os" - "path/filepath" - "strings" - "syscall" - - specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/specutils" -) - -type mapping struct { - set bool - val uint32 -} - -var optionsMap = map[string]mapping{ - "acl": {set: true, val: syscall.MS_POSIXACL}, - "async": {set: false, val: syscall.MS_SYNCHRONOUS}, - "atime": {set: false, val: syscall.MS_NOATIME}, - "bind": {set: true, val: syscall.MS_BIND}, - "defaults": {set: true, val: 0}, - "dev": {set: false, val: syscall.MS_NODEV}, - "diratime": {set: false, val: syscall.MS_NODIRATIME}, - "dirsync": {set: true, val: syscall.MS_DIRSYNC}, - "exec": {set: false, val: syscall.MS_NOEXEC}, - "iversion": {set: true, val: syscall.MS_I_VERSION}, - "loud": {set: false, val: syscall.MS_SILENT}, - "mand": {set: true, val: syscall.MS_MANDLOCK}, - "noacl": {set: false, val: syscall.MS_POSIXACL}, - "noatime": {set: true, val: syscall.MS_NOATIME}, - "nodev": {set: true, val: syscall.MS_NODEV}, - "nodiratime": {set: true, val: syscall.MS_NODIRATIME}, - "noexec": {set: true, val: syscall.MS_NOEXEC}, - "noiversion": {set: false, val: syscall.MS_I_VERSION}, - "nomand": {set: false, val: syscall.MS_MANDLOCK}, - "norelatime": {set: false, val: syscall.MS_RELATIME}, - "nostrictatime": {set: false, val: syscall.MS_STRICTATIME}, - "nosuid": {set: true, val: syscall.MS_NOSUID}, - "private": {set: true, val: syscall.MS_PRIVATE}, - "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC}, - "relatime": {set: true, val: syscall.MS_RELATIME}, - "remount": {set: true, val: syscall.MS_REMOUNT}, - "ro": {set: true, val: syscall.MS_RDONLY}, - "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC}, - "rw": {set: false, val: syscall.MS_RDONLY}, - "silent": {set: true, val: syscall.MS_SILENT}, - "strictatime": {set: true, val: syscall.MS_STRICTATIME}, - "suid": {set: false, val: syscall.MS_NOSUID}, - "sync": {set: true, val: syscall.MS_SYNCHRONOUS}, -} - -// setupFS creates the container directory structure under 'spec.Root.Path'. -// This allows the gofer serving the containers to be chroot under this -// directory to create an extra layer to security in case the gofer gets -// compromised. -// Returns list of mounts equivalent to 'spec.Mounts' with all destination paths -// cleaned and with symlinks resolved. -func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]specs.Mount, error) { - rv := make([]specs.Mount, 0, len(spec.Mounts)) - for _, m := range spec.Mounts { - if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { - rv = append(rv, m) - continue - } - - // It's possible that 'm.Destination' follows symlinks inside the - // container. - dst, err := resolveSymlinks(spec.Root.Path, m.Destination) - if err != nil { - return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) - } - - flags := optionsToFlags(m.Options) - flags |= syscall.MS_BIND - log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) - if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil { - return nil, fmt.Errorf("mounting %v: %v", m, err) - } - - // Make the mount a slave, so that for recursive bind mount, umount won't - // propagate to the source. - flags = syscall.MS_SLAVE | syscall.MS_REC - if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil { - return nil, fmt.Errorf("mount rslave dst: %q, flags: %#x, err: %v", dst, flags, err) - } - - cpy := m - relDst, err := filepath.Rel(spec.Root.Path, dst) - if err != nil { - panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, spec.Root.Path, err)) - } - cpy.Destination = filepath.Join("/", relDst) - rv = append(rv, cpy) - } - - if spec.Process.Cwd != "" { - dst, err := resolveSymlinks(spec.Root.Path, spec.Process.Cwd) - if err != nil { - return nil, fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err) - } - if err := os.MkdirAll(dst, 0755); err != nil { - return nil, err - } - } - - // If root is read only, check if it needs to be remounted as readonly. - if spec.Root.Readonly { - isMountPoint, readonly, err := mountInfo(spec.Root.Path) - if err != nil { - return nil, err - } - if readonly { - return rv, nil - } - if !isMountPoint { - // Readonly root is not a mount point nor read-only. Can't do much other - // than just logging a warning. The gofer will prevent files to be open - // in write mode. - log.Warningf("Mount where root is located is not read-only and cannot be changed: %q", spec.Root.Path) - return rv, nil - } - - // If root is a mount point but not read-only, we can change mount options - // to make it read-only for extra safety. - log.Infof("Remounting root as readonly: %q", spec.Root.Path) - flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC) - src := spec.Root.Path - if err := syscall.Mount(src, src, "bind", flags, ""); err != nil { - return nil, fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", spec.Root.Path, spec.Root.Path, flags, err) - } - } - return rv, nil -} - -// mountInfo returns whether the path is a mount point and whether the mount -// that path belongs to is read-only. -func mountInfo(path string) (bool, bool, error) { - // Mounts are listed by their real paths. - realPath, err := filepath.EvalSymlinks(path) - if err != nil { - return false, false, err - } - f, err := os.Open("/proc/mounts") - if err != nil { - return false, false, err - } - scanner := bufio.NewScanner(f) - - var mountPoint string - var readonly bool - for scanner.Scan() { - line := scanner.Text() - parts := strings.Split(line, " ") - if len(parts) < 4 { - return false, false, fmt.Errorf("invalid /proc/mounts line format %q", line) - } - mp := parts[1] - opts := strings.Split(parts[3], ",") - - // Find the closest submount to the path. - if strings.Contains(realPath, mp) && len(mp) > len(mountPoint) { - mountPoint = mp - readonly = specutils.ContainsStr(opts, "ro") - } - } - if err := scanner.Err(); err != nil { - return false, false, err - } - return mountPoint == realPath, readonly, nil -} - -// destroyFS unmounts mounts done by runsc under `spec.Root.Path`. This -// recovers the container rootfs into the original state. -func destroyFS(spec *specs.Spec) error { - for _, m := range spec.Mounts { - if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { - continue - } - - // It's possible that 'm.Destination' follows symlinks inside the - // container. - dst, err := resolveSymlinks(spec.Root.Path, m.Destination) - if err != nil { - return err - } - - flags := syscall.MNT_DETACH - log.Infof("Unmounting dst: %q, flags: %#x", dst, flags) - // Do not return error if dst is not a mountpoint. - // Based on http://man7.org/linux/man-pages/man2/umount.2.html - // For kernel version 2.6+ and MNT_DETACH flag, EINVAL means - // the dst is not a mount point. - if err := syscall.Unmount(dst, flags); err != nil && - !os.IsNotExist(err) && err != syscall.EINVAL { - return err - } - } - return nil -} - -// resolveSymlinks walks 'rel' having 'root' as the root directory. If there are -// symlinks, they are evaluated relative to 'root' to ensure the end result is -// the same as if the process was running inside the container. -func resolveSymlinks(root, rel string) (string, error) { - return resolveSymlinksImpl(root, root, rel, 255) -} - -func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) { - if followCount == 0 { - return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel)) - } - - rel = filepath.Clean(rel) - for _, name := range strings.Split(rel, string(filepath.Separator)) { - if name == "" { - continue - } - // Note that Join() resolves things like ".." and returns a clean path. - path := filepath.Join(base, name) - if !strings.HasPrefix(path, root) { - // One cannot '..' their way out of root. - path = root - continue - } - fi, err := os.Lstat(path) - if err != nil { - if !os.IsNotExist(err) { - return "", err - } - // Not found means there is no symlink to check. Just keep walking dirs. - base = path - continue - } - if fi.Mode()&os.ModeSymlink != 0 { - link, err := os.Readlink(path) - if err != nil { - return "", err - } - if filepath.IsAbs(link) { - base = root - } - base, err = resolveSymlinksImpl(root, base, link, followCount-1) - if err != nil { - return "", err - } - continue - } - base = path - } - return base, nil -} - -func optionsToFlags(opts []string) uint32 { - var rv uint32 - for _, opt := range opts { - if m, ok := optionsMap[opt]; ok { - if m.set { - rv |= m.val - } else { - rv ^= m.val - } - } else { - log.Warningf("Ignoring mount option %q", opt) - } - } - return rv -} diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 2698e3f86..ae6375e13 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -75,7 +75,7 @@ type Sandbox struct { // New creates the sandbox process. The caller must call Destroy() on the // sandbox. -func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, cg *cgroup.Cgroup) (*Sandbox, error) { +func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, specFile *os.File, cg *cgroup.Cgroup) (*Sandbox, error) { s := &Sandbox{ID: id, Cgroup: cg} // The Cleanup object cleans up partially created sandboxes when an error // occurs. Any errors occurring during cleanup itself are ignored. @@ -86,17 +86,14 @@ func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke defer c.Clean() // Create pipe to synchronize when sandbox process has been booted. - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { + clientSyncFile, sandboxSyncFile, err := os.Pipe() + if err != nil { return nil, fmt.Errorf("creating pipe for sandbox %q: %v", s.ID, err) } - clientSyncFile := os.NewFile(uintptr(fds[0]), "client sandbox sync") defer clientSyncFile.Close() - sandboxSyncFile := os.NewFile(uintptr(fds[1]), "sandbox sync") - // Create the sandbox process. - err := s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, sandboxSyncFile) + err = s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, sandboxSyncFile) // sandboxSyncFile has to be closed to be able to detect when the sandbox // process exits unexpectedly. sandboxSyncFile.Close() @@ -294,7 +291,7 @@ func (s *Sandbox) connError(err error) error { // createSandboxProcess starts the sandbox as a subprocess by running the "boot" // command, passing in the bundle dir. -func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, startSyncFile *os.File) error { +func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, mountsFile, startSyncFile *os.File) error { // nextFD is used to get unused FDs that we can pass to the sandbox. It // starts at 3 because 0, 1, and 2 are taken by stdin/out/err. nextFD := 3 @@ -345,10 +342,14 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund cmd.Args = append(cmd.Args, "--controller-fd="+strconv.Itoa(nextFD)) nextFD++ - // Open the spec file to donate to the sandbox. - specFile, err := specutils.OpenCleanSpec(bundleDir) + defer mountsFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, mountsFile) + cmd.Args = append(cmd.Args, "--mounts-fd="+strconv.Itoa(nextFD)) + nextFD++ + + specFile, err := specutils.OpenSpec(bundleDir) if err != nil { - return fmt.Errorf("opening spec file: %v", err) + return err } defer specFile.Close() cmd.ExtraFiles = append(cmd.ExtraFiles, specFile) diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD index 372799850..15476de6f 100644 --- a/runsc/specutils/BUILD +++ b/runsc/specutils/BUILD @@ -5,6 +5,7 @@ package(licenses = ["notice"]) go_library( name = "specutils", srcs = [ + "fs.go", "namespace.go", "specutils.go", ], diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go new file mode 100644 index 000000000..b812a5fbd --- /dev/null +++ b/runsc/specutils/fs.go @@ -0,0 +1,139 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package specutils + +import ( + "fmt" + "path" + "syscall" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/log" +) + +type mapping struct { + set bool + val uint32 +} + +// optionsMap maps mount propagation-related OCI filesystem options to mount(2) +// syscall flags. +var optionsMap = map[string]mapping{ + "acl": {set: true, val: syscall.MS_POSIXACL}, + "async": {set: false, val: syscall.MS_SYNCHRONOUS}, + "atime": {set: false, val: syscall.MS_NOATIME}, + "bind": {set: true, val: syscall.MS_BIND}, + "defaults": {set: true, val: 0}, + "dev": {set: false, val: syscall.MS_NODEV}, + "diratime": {set: false, val: syscall.MS_NODIRATIME}, + "dirsync": {set: true, val: syscall.MS_DIRSYNC}, + "exec": {set: false, val: syscall.MS_NOEXEC}, + "iversion": {set: true, val: syscall.MS_I_VERSION}, + "loud": {set: false, val: syscall.MS_SILENT}, + "mand": {set: true, val: syscall.MS_MANDLOCK}, + "noacl": {set: false, val: syscall.MS_POSIXACL}, + "noatime": {set: true, val: syscall.MS_NOATIME}, + "nodev": {set: true, val: syscall.MS_NODEV}, + "nodiratime": {set: true, val: syscall.MS_NODIRATIME}, + "noiversion": {set: false, val: syscall.MS_I_VERSION}, + "nomand": {set: false, val: syscall.MS_MANDLOCK}, + "norelatime": {set: false, val: syscall.MS_RELATIME}, + "nostrictatime": {set: false, val: syscall.MS_STRICTATIME}, + "nosuid": {set: true, val: syscall.MS_NOSUID}, + "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC}, + "relatime": {set: true, val: syscall.MS_RELATIME}, + "remount": {set: true, val: syscall.MS_REMOUNT}, + "ro": {set: true, val: syscall.MS_RDONLY}, + "rw": {set: false, val: syscall.MS_RDONLY}, + "silent": {set: true, val: syscall.MS_SILENT}, + "strictatime": {set: true, val: syscall.MS_STRICTATIME}, + "suid": {set: false, val: syscall.MS_NOSUID}, + "sync": {set: true, val: syscall.MS_SYNCHRONOUS}, +} + +// propOptionsMap is similar to optionsMap, but it lists propagation options +// that cannot be used together with other flags. +var propOptionsMap = map[string]mapping{ + "private": {set: true, val: syscall.MS_PRIVATE}, + "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC}, + "slave": {set: true, val: syscall.MS_SLAVE}, + "rslave": {set: true, val: syscall.MS_SLAVE | syscall.MS_REC}, + "unbindable": {set: true, val: syscall.MS_UNBINDABLE}, + "runbindable": {set: true, val: syscall.MS_UNBINDABLE | syscall.MS_REC}, +} + +// invalidOptions list options not allowed. +// - shared: sandbox must be isolated from the host. Propagating mount changes +// from the sandbox to the host breaks the isolation. +// - noexec: not yet supported. Don't ignore it since it could break +// in-sandbox security. +var invalidOptions = []string{"shared", "rshared", "noexec"} + +// OptionsToFlags converts mount options to syscall flags. +func OptionsToFlags(opts []string) uint32 { + return optionsToFlags(opts, optionsMap) +} + +// PropOptionsToFlags converts propagation mount options to syscall flags. +// Propagation options cannot be set other with other options and must be +// handled separatedly. +func PropOptionsToFlags(opts []string) uint32 { + return optionsToFlags(opts, propOptionsMap) +} + +func optionsToFlags(opts []string, source map[string]mapping) uint32 { + var rv uint32 + for _, opt := range opts { + if m, ok := source[opt]; ok { + if m.set { + rv |= m.val + } else { + rv ^= m.val + } + } + } + return rv +} + +// ValidateMount validates that spec mounts are correct. +func validateMount(mnt *specs.Mount) error { + if !path.IsAbs(mnt.Destination) { + return fmt.Errorf("Mount.Destination must be an absolute path: %v", mnt) + } + + if mnt.Type == "bind" { + for _, o := range mnt.Options { + if ContainsStr(invalidOptions, o) { + return fmt.Errorf("mount option %q is not supported: %v", o, mnt) + } + _, ok1 := optionsMap[o] + _, ok2 := propOptionsMap[o] + if !ok1 && !ok2 { + log.Warningf("Ignoring unknown mount option %q", o) + } + } + } + return nil +} + +// ValidateRootfsPropagation validates that rootfs propagation options are +// correct. +func validateRootfsPropagation(opt string) error { + flags := PropOptionsToFlags([]string{opt}) + if flags&(syscall.MS_SLAVE|syscall.MS_PRIVATE) == 0 { + return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt) + } + return nil +} diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index 73fab13e1..35da789f4 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -105,9 +105,9 @@ func FilterNS(filter []specs.LinuxNamespaceType, s *specs.Spec) []specs.LinuxNam return out } -// SetNS sets the namespace of the given type. It must be called with +// setNS sets the namespace of the given type. It must be called with // OSThreadLocked. -func SetNS(fd, nsType uintptr) error { +func setNS(fd, nsType uintptr) error { if _, _, err := syscall.RawSyscall(unix.SYS_SETNS, fd, nsType, 0); err != 0 { return err } @@ -119,30 +119,30 @@ func SetNS(fd, nsType uintptr) error { // // Preconditions: Must be called with os thread locked. func ApplyNS(ns specs.LinuxNamespace) (func(), error) { - log.Infof("applying namespace %v at path %q", ns.Type, ns.Path) + log.Infof("Applying namespace %v at path %q", ns.Type, ns.Path) newNS, err := os.Open(ns.Path) if err != nil { return nil, fmt.Errorf("error opening %q: %v", ns.Path, err) } defer newNS.Close() - // Store current netns to restore back after child is started. + // Store current namespace to restore back. curPath := nsPath(ns.Type) oldNS, err := os.Open(curPath) if err != nil { return nil, fmt.Errorf("error opening %q: %v", curPath, err) } - // Set netns to the one requested and setup function to restore it back. + // Set namespace to the one requested and setup function to restore it back. flag := nsCloneFlag(ns.Type) - if err := SetNS(newNS.Fd(), flag); err != nil { + if err := setNS(newNS.Fd(), flag); err != nil { oldNS.Close() return nil, fmt.Errorf("error setting namespace of type %v and path %q: %v", ns.Type, ns.Path, err) } return func() { - log.Infof("restoring namespace %v", ns.Type) + log.Infof("Restoring namespace %v", ns.Type) defer oldNS.Close() - if err := SetNS(oldNS.Fd(), flag); err != nil { + if err := setNS(oldNS.Fd(), flag); err != nil { panic(fmt.Sprintf("error restoring namespace: of type %v: %v", ns.Type, err)) } }, nil diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 4e7893ab4..cbf099c64 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -92,9 +92,14 @@ func ValidateSpec(spec *specs.Spec) error { log.Warningf("Seccomp spec is being ignored") } - for i, m := range spec.Mounts { - if !path.IsAbs(m.Destination) { - return fmt.Errorf("Spec.Mounts[%d] Mount.Destination must be an absolute path: %v", i, m) + if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { + if err := validateRootfsPropagation(spec.Linux.RootfsPropagation); err != nil { + return err + } + } + for _, m := range spec.Mounts { + if err := validateMount(&m); err != nil { + return err } } @@ -129,15 +134,19 @@ func absPath(base, rel string) string { return filepath.Join(base, rel) } +// OpenSpec opens an OCI runtime spec from the given bundle directory. +func OpenSpec(bundleDir string) (*os.File, error) { + // The spec file must be named "config.json" inside the bundle directory. + return os.Open(filepath.Join(bundleDir, "config.json")) +} + // ReadSpec reads an OCI runtime spec from the given bundle directory. // ReadSpec also normalizes all potential relative paths into absolute // path, e.g. spec.Root.Path, mount.Source. func ReadSpec(bundleDir string) (*specs.Spec, error) { - // The spec file must be in "config.json" inside the bundle directory. - specPath := filepath.Join(bundleDir, "config.json") - specFile, err := os.Open(specPath) + specFile, err := OpenSpec(bundleDir) if err != nil { - return nil, fmt.Errorf("error opening spec file %q: %v", specPath, err) + return nil, fmt.Errorf("error opening spec file %q: %v", specFile.Name(), err) } defer specFile.Close() return ReadSpecFromFile(bundleDir, specFile) @@ -171,27 +180,17 @@ func ReadSpecFromFile(bundleDir string, specFile *os.File) (*specs.Spec, error) return &spec, nil } -// OpenCleanSpec opens spec file that has destination mount paths resolved to -// their absolute location. -func OpenCleanSpec(bundleDir string) (*os.File, error) { - f, err := os.Open(filepath.Join(bundleDir, "config.clean.json")) +// ReadMounts reads mount list from a file. +func ReadMounts(f *os.File) ([]specs.Mount, error) { + bytes, err := ioutil.ReadAll(f) if err != nil { - return nil, err + return nil, fmt.Errorf("error reading mounts: %v", err) } - if _, err := f.Seek(0, os.SEEK_SET); err != nil { - f.Close() - return nil, fmt.Errorf("error seeking to beginning of file %q: %v", f.Name(), err) - } - return f, nil -} - -// WriteCleanSpec writes a spec file that has destination mount paths resolved. -func WriteCleanSpec(bundleDir string, spec *specs.Spec) error { - bytes, err := json.Marshal(spec) - if err != nil { - return err + var mounts []specs.Mount + if err := json.Unmarshal(bytes, &mounts); err != nil { + return nil, fmt.Errorf("error unmarshaling mounts: %v\n %s", err, string(bytes)) } - return ioutil.WriteFile(filepath.Join(bundleDir, "config.clean.json"), bytes, 0755) + return mounts, nil } // Capabilities takes in spec and returns a TaskCapabilities corresponding to @@ -407,8 +406,7 @@ func Mount(src, dst, typ string, flags uint32) error { // source (file or directory). var isDir bool if typ == "proc" { - // Special case, as there is no source directory for proc - // mounts. + // Special case, as there is no source directory for proc mounts. isDir = true } else if fi, err := os.Stat(src); err != nil { return fmt.Errorf("Stat(%q) failed: %v", src, err) diff --git a/runsc/specutils/specutils_test.go b/runsc/specutils/specutils_test.go index b61f1ca62..02af6e6ad 100644 --- a/runsc/specutils/specutils_test.go +++ b/runsc/specutils/specutils_test.go @@ -219,6 +219,37 @@ func TestSpecInvalid(t *testing.T) { }, error: "must be an absolute path", }, + { + name: "invalid mount option", + spec: specs.Spec{ + Root: &specs.Root{Path: "/"}, + Process: &specs.Process{ + Args: []string{"/bin/true"}, + }, + Mounts: []specs.Mount{ + { + Source: "/src", + Destination: "/dst", + Type: "bind", + Options: []string{"shared"}, + }, + }, + }, + error: "is not supported", + }, + { + name: "invalid rootfs propagation", + spec: specs.Spec{ + Root: &specs.Root{Path: "/"}, + Process: &specs.Process{ + Args: []string{"/bin/true"}, + }, + Linux: &specs.Linux{ + RootfsPropagation: "foo", + }, + }, + error: "root mount propagation option must specify private or slave", + }, } { err := ValidateSpec(&test.spec) if len(test.error) == 0 { |