summaryrefslogtreecommitdiffhomepage
path: root/runsc/cmd/gofer.go
diff options
context:
space:
mode:
authorFabricio Voznika <fvoznika@google.com>2019-03-18 12:29:43 -0700
committerShentubot <shentubot@google.com>2019-03-18 12:30:43 -0700
commite420cc3e5d2066674d32d16ad885bee6b30da210 (patch)
tree991b119af7c8816a539318560338b3e5f065a2f8 /runsc/cmd/gofer.go
parenteb69542807a87491fd4e6405bdab1c0f64db536d (diff)
Add support for mount propagation
Properly handle propagation options for root and mounts. Now usage of mount options shared, rshared, and noexec cause error to start. shared/ rshared breaks sandbox=>host isolation. slave however can be supported because changes propagate from host to sandbox. Root FS setup moved inside the gofer. Apart from simplifying the code, it keeps all mounts inside the namespace. And they are torn down when the namespace is destroyed (DestroyFS is no longer needed). PiperOrigin-RevId: 239037661 Change-Id: I8b5ee4d50da33c042ea34fa68e56514ebe20e6e0
Diffstat (limited to 'runsc/cmd/gofer.go')
-rw-r--r--runsc/cmd/gofer.go279
1 files changed, 241 insertions, 38 deletions
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 6f9711518..e712244ef 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -16,7 +16,11 @@ package cmd
import (
"context"
+ "encoding/json"
+ "fmt"
"os"
+ "path/filepath"
+ "strings"
"sync"
"syscall"
@@ -59,6 +63,7 @@ type Gofer struct {
panicOnWrite bool
specFD int
+ mountsFD int
}
// Name implements subcommands.Command.
@@ -84,6 +89,7 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) {
f.BoolVar(&g.panicOnWrite, "panic-on-write", false, "if true, panics on attempts to write to RO mounts. RW mounts are unnaffected")
f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process")
f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec")
+ f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).")
}
// Execute implements subcommands.Command.
@@ -100,45 +106,13 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
Fatalf("reading spec: %v", err)
}
- // Find what path is going to be served by this gofer.
- root := spec.Root.Path
-
conf := args[0].(*boot.Config)
- if g.setUpRoot && !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
- // Convert all shared mounts into slave to be sure that nothing will be
- // propagated outside of our namespace.
- if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
- Fatalf("error converting mounts: %v", err)
- }
-
- // FIXME: runsc can't be re-executed without
- // /proc, so we create a tmpfs mount, mount ./proc and ./root
- // there, then move this mount to the root and after
- // setCapsAndCallSelf, runsc will chroot into /root.
- //
- // We need a directory to construct a new root and we know that
- // runsc can't start without /proc, so we can use it for this.
- flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC)
- if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil {
- Fatalf("error mounting tmpfs: %v", err)
- }
- os.Mkdir("/proc/proc", 0755)
- os.Mkdir("/proc/root", 0755)
- if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil {
- Fatalf("error mounting proc: %v", err)
- }
- if err := syscall.Mount(root, "/proc/root", "", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
- Fatalf("error mounting root: %v", err)
- }
- if err := pivotRoot("/proc"); err != nil {
- Fatalf("faild to change the root file system: %v", err)
- }
- if err := os.Chdir("/"); err != nil {
- Fatalf("failed to change working directory")
+ if g.setUpRoot {
+ if err := setupRootFS(spec, conf); err != nil {
+ Fatalf("Error setting up root FS: %v", err)
}
}
-
if g.applyCaps {
// Disable caps when calling myself again.
// Note: minimal argument handling for the default case to keep it simple.
@@ -150,15 +124,34 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
panic("unreachable")
}
+ // Find what path is going to be served by this gofer.
+ root := spec.Root.Path
+ if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+ root = "/root"
+ }
+
+ // Resolve mount points paths, then replace mounts from our spec and send the
+ // mount list over to the sandbox, so they are both in sync.
+ //
+ // Note that all mount points have been mounted in the proper location in
+ // setupRootFS().
+ cleanMounts, err := resolveMounts(spec.Mounts, root)
+ if err != nil {
+ Fatalf("Failure to resolve mounts: %v", err)
+ }
+ spec.Mounts = cleanMounts
+ go func() {
+ if err := g.writeMounts(cleanMounts); err != nil {
+ panic(fmt.Sprintf("Failed to write mounts: %v", err))
+ }
+ }()
+
specutils.LogSpec(spec)
// fsgofer should run with a umask of 0, because we want to preserve file
// modes exactly as sent by the sandbox, which will have applied its own umask.
syscall.Umask(0)
- if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
- root = "/root"
- }
if err := syscall.Chroot(root); err != nil {
Fatalf("failed to chroot to %q: %v", root, err)
}
@@ -232,6 +225,25 @@ func runServers(ats []p9.Attacher, ioFDs []int) {
log.Infof("All 9P servers exited.")
}
+func (g *Gofer) writeMounts(mounts []specs.Mount) error {
+ bytes, err := json.Marshal(mounts)
+ if err != nil {
+ return err
+ }
+
+ f := os.NewFile(uintptr(g.mountsFD), "mounts file")
+ defer f.Close()
+
+ for written := 0; written < len(bytes); {
+ w, err := f.Write(bytes[written:])
+ if err != nil {
+ return err
+ }
+ written += w
+ }
+ return nil
+}
+
func isReadonlyMount(opts []string) bool {
for _, o := range opts {
if o == "ro" {
@@ -240,3 +252,194 @@ func isReadonlyMount(opts []string) bool {
}
return false
}
+
+func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
+ // Convert all shared mounts into slaves to be sure that nothing will be
+ // propagated outside of our namespace.
+ if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
+ Fatalf("error converting mounts: %v", err)
+ }
+
+ root := spec.Root.Path
+ if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+ // FIXME: runsc can't be re-executed without
+ // /proc, so we create a tmpfs mount, mount ./proc and ./root
+ // there, then move this mount to the root and after
+ // setCapsAndCallSelf, runsc will chroot into /root.
+ //
+ // We need a directory to construct a new root and we know that
+ // runsc can't start without /proc, so we can use it for this.
+ flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC)
+ if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil {
+ Fatalf("error mounting tmpfs: %v", err)
+ }
+
+ // Prepare tree structure for pivot_root(2).
+ os.Mkdir("/proc/proc", 0755)
+ os.Mkdir("/proc/root", 0755)
+ if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil {
+ Fatalf("error mounting proc: %v", err)
+ }
+ root = "/proc/root"
+ }
+
+ // Mount root path followed by submounts.
+ if err := syscall.Mount(spec.Root.Path, root, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
+ return fmt.Errorf("mounting root on root (%q) err: %v", spec.Root.Path, err)
+ }
+ flags := uint32(syscall.MS_SLAVE | syscall.MS_REC)
+ if spec.Linux != nil && spec.Linux.RootfsPropagation != "" {
+ flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation})
+ }
+ if err := syscall.Mount("", spec.Root.Path, "", uintptr(flags), ""); err != nil {
+ return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", spec.Root.Path, flags, err)
+ }
+
+ // Replace the current spec, with the clean spec with symlinks resolved.
+ if err := setupMounts(spec.Mounts, root); err != nil {
+ Fatalf("error setting up FS: %v", err)
+ }
+
+ // Create working directory if needed.
+ if spec.Process.Cwd != "" {
+ dst, err := resolveSymlinks(root, spec.Process.Cwd)
+ if err != nil {
+ return fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err)
+ }
+ if err := os.MkdirAll(dst, 0755); err != nil {
+ return fmt.Errorf("creating working directory %q: %v", spec.Process.Cwd, err)
+ }
+ }
+
+ // Check if root needs to be remounted as readonly.
+ if spec.Root.Readonly {
+ // If root is a mount point but not read-only, we can change mount options
+ // to make it read-only for extra safety.
+ log.Infof("Remounting root as readonly: %q", spec.Root.Path)
+ flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC)
+ if err := syscall.Mount(spec.Root.Path, spec.Root.Path, "bind", flags, ""); err != nil {
+ return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", spec.Root.Path, spec.Root.Path, flags, err)
+ }
+ }
+
+ if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+ if err := pivotRoot("/proc"); err != nil {
+ Fatalf("faild to change the root file system: %v", err)
+ }
+ if err := os.Chdir("/"); err != nil {
+ Fatalf("failed to change working directory")
+ }
+ }
+ return nil
+}
+
+// setupMounts binds mount all mounts specified in the spec in their correct
+// location inside root. It will resolve relative paths and symlinks. It also
+// creates directories as needed.
+func setupMounts(mounts []specs.Mount, root string) error {
+ for _, m := range mounts {
+ if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
+ continue
+ }
+
+ dst, err := resolveSymlinks(root, m.Destination)
+ if err != nil {
+ return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
+ }
+
+ flags := specutils.OptionsToFlags(m.Options) | syscall.MS_BIND
+ log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags)
+ if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil {
+ return fmt.Errorf("mounting %v: %v", m, err)
+ }
+
+ // Set propagation options that cannot be set together with other options.
+ flags = specutils.PropOptionsToFlags(m.Options)
+ if flags != 0 {
+ if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil {
+ return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err)
+ }
+ }
+ }
+ return nil
+}
+
+// resolveMounts resolved relative paths and symlinks to mount points.
+//
+// Note: mount points must already be in place for resolution to work.
+// Otherwise, it may follow symlinks to locations that would be overwritten
+// with another mount point and return the wrong location. In short, make sure
+// setupMounts() has been called before.
+func resolveMounts(mounts []specs.Mount, root string) ([]specs.Mount, error) {
+ cleanMounts := make([]specs.Mount, 0, len(mounts))
+ for _, m := range mounts {
+ if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
+ cleanMounts = append(cleanMounts, m)
+ continue
+ }
+ dst, err := resolveSymlinks(root, m.Destination)
+ if err != nil {
+ return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
+ }
+ relDst, err := filepath.Rel(root, dst)
+ if err != nil {
+ panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err))
+ }
+ cpy := m
+ cpy.Destination = filepath.Join("/", relDst)
+ cleanMounts = append(cleanMounts, cpy)
+ }
+ return cleanMounts, nil
+}
+
+// ResolveSymlinks walks 'rel' having 'root' as the root directory. If there are
+// symlinks, they are evaluated relative to 'root' to ensure the end result is
+// the same as if the process was running inside the container.
+func resolveSymlinks(root, rel string) (string, error) {
+ return resolveSymlinksImpl(root, root, rel, 255)
+}
+
+func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) {
+ if followCount == 0 {
+ return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel))
+ }
+
+ rel = filepath.Clean(rel)
+ for _, name := range strings.Split(rel, string(filepath.Separator)) {
+ if name == "" {
+ continue
+ }
+ // Note that Join() resolves things like ".." and returns a clean path.
+ path := filepath.Join(base, name)
+ if !strings.HasPrefix(path, root) {
+ // One cannot '..' their way out of root.
+ path = root
+ continue
+ }
+ fi, err := os.Lstat(path)
+ if err != nil {
+ if !os.IsNotExist(err) {
+ return "", err
+ }
+ // Not found means there is no symlink to check. Just keep walking dirs.
+ base = path
+ continue
+ }
+ if fi.Mode()&os.ModeSymlink != 0 {
+ link, err := os.Readlink(path)
+ if err != nil {
+ return "", err
+ }
+ if filepath.IsAbs(link) {
+ base = root
+ }
+ base, err = resolveSymlinksImpl(root, base, link, followCount-1)
+ if err != nil {
+ return "", err
+ }
+ continue
+ }
+ base = path
+ }
+ return base, nil
+}