diff options
author | Fabricio Voznika <fvoznika@google.com> | 2019-03-18 12:29:43 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-03-18 12:30:43 -0700 |
commit | e420cc3e5d2066674d32d16ad885bee6b30da210 (patch) | |
tree | 991b119af7c8816a539318560338b3e5f065a2f8 /runsc/cmd | |
parent | eb69542807a87491fd4e6405bdab1c0f64db536d (diff) |
Add support for mount propagation
Properly handle propagation options for root and mounts. Now usage of
mount options shared, rshared, and noexec cause error to start. shared/
rshared breaks sandbox=>host isolation. slave however can be supported
because changes propagate from host to sandbox.
Root FS setup moved inside the gofer. Apart from simplifying the code,
it keeps all mounts inside the namespace. And they are torn down when
the namespace is destroyed (DestroyFS is no longer needed).
PiperOrigin-RevId: 239037661
Change-Id: I8b5ee4d50da33c042ea34fa68e56514ebe20e6e0
Diffstat (limited to 'runsc/cmd')
-rw-r--r-- | runsc/cmd/BUILD | 1 | ||||
-rw-r--r-- | runsc/cmd/boot.go | 16 | ||||
-rw-r--r-- | runsc/cmd/gofer.go | 279 | ||||
-rw-r--r-- | runsc/cmd/gofer_test.go | 164 |
4 files changed, 422 insertions, 38 deletions
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 9e2be0d37..dabf18c5f 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -60,6 +60,7 @@ go_test( "capability_test.go", "delete_test.go", "exec_test.go", + "gofer_test.go", ], data = [ "//runsc", diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 3039b389f..ff2fa2fb9 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -76,6 +76,11 @@ type Boot struct { // startSyncFD is the file descriptor to synchronize runsc and sandbox. startSyncFD int + // mountsFD is the file descriptor to read list of mounts after they have + // been resolved (direct paths, no symlinks). They are resolved outside the + // sandbox (e.g. gofer) and sent through this FD. + mountsFD int + // pidns is set if the sanadbox is in its own pid namespace. pidns bool } @@ -111,6 +116,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) { f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container") f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup") + f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).") } // Execute implements subcommands.Command.Execute. It starts a sandbox in a @@ -191,6 +197,16 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) panic("setCapsAndCallSelf must never return success") } + // Read resolved mount list and replace the original one from the spec. + mountsFile := os.NewFile(uintptr(b.mountsFD), "mounts file") + cleanMounts, err := specutils.ReadMounts(mountsFile) + if err != nil { + mountsFile.Close() + Fatalf("Error reading mounts file: %v", err) + } + mountsFile.Close() + spec.Mounts = cleanMounts + // Create the loader. bootArgs := boot.Args{ ID: f.Arg(0), diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 6f9711518..e712244ef 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -16,7 +16,11 @@ package cmd import ( "context" + "encoding/json" + "fmt" "os" + "path/filepath" + "strings" "sync" "syscall" @@ -59,6 +63,7 @@ type Gofer struct { panicOnWrite bool specFD int + mountsFD int } // Name implements subcommands.Command. @@ -84,6 +89,7 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) { f.BoolVar(&g.panicOnWrite, "panic-on-write", false, "if true, panics on attempts to write to RO mounts. RW mounts are unnaffected") f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process") f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec") + f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).") } // Execute implements subcommands.Command. @@ -100,45 +106,13 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("reading spec: %v", err) } - // Find what path is going to be served by this gofer. - root := spec.Root.Path - conf := args[0].(*boot.Config) - if g.setUpRoot && !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { - // Convert all shared mounts into slave to be sure that nothing will be - // propagated outside of our namespace. - if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { - Fatalf("error converting mounts: %v", err) - } - - // FIXME: runsc can't be re-executed without - // /proc, so we create a tmpfs mount, mount ./proc and ./root - // there, then move this mount to the root and after - // setCapsAndCallSelf, runsc will chroot into /root. - // - // We need a directory to construct a new root and we know that - // runsc can't start without /proc, so we can use it for this. - flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC) - if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil { - Fatalf("error mounting tmpfs: %v", err) - } - os.Mkdir("/proc/proc", 0755) - os.Mkdir("/proc/root", 0755) - if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil { - Fatalf("error mounting proc: %v", err) - } - if err := syscall.Mount(root, "/proc/root", "", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { - Fatalf("error mounting root: %v", err) - } - if err := pivotRoot("/proc"); err != nil { - Fatalf("faild to change the root file system: %v", err) - } - if err := os.Chdir("/"); err != nil { - Fatalf("failed to change working directory") + if g.setUpRoot { + if err := setupRootFS(spec, conf); err != nil { + Fatalf("Error setting up root FS: %v", err) } } - if g.applyCaps { // Disable caps when calling myself again. // Note: minimal argument handling for the default case to keep it simple. @@ -150,15 +124,34 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) panic("unreachable") } + // Find what path is going to be served by this gofer. + root := spec.Root.Path + if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + root = "/root" + } + + // Resolve mount points paths, then replace mounts from our spec and send the + // mount list over to the sandbox, so they are both in sync. + // + // Note that all mount points have been mounted in the proper location in + // setupRootFS(). + cleanMounts, err := resolveMounts(spec.Mounts, root) + if err != nil { + Fatalf("Failure to resolve mounts: %v", err) + } + spec.Mounts = cleanMounts + go func() { + if err := g.writeMounts(cleanMounts); err != nil { + panic(fmt.Sprintf("Failed to write mounts: %v", err)) + } + }() + specutils.LogSpec(spec) // fsgofer should run with a umask of 0, because we want to preserve file // modes exactly as sent by the sandbox, which will have applied its own umask. syscall.Umask(0) - if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { - root = "/root" - } if err := syscall.Chroot(root); err != nil { Fatalf("failed to chroot to %q: %v", root, err) } @@ -232,6 +225,25 @@ func runServers(ats []p9.Attacher, ioFDs []int) { log.Infof("All 9P servers exited.") } +func (g *Gofer) writeMounts(mounts []specs.Mount) error { + bytes, err := json.Marshal(mounts) + if err != nil { + return err + } + + f := os.NewFile(uintptr(g.mountsFD), "mounts file") + defer f.Close() + + for written := 0; written < len(bytes); { + w, err := f.Write(bytes[written:]) + if err != nil { + return err + } + written += w + } + return nil +} + func isReadonlyMount(opts []string) bool { for _, o := range opts { if o == "ro" { @@ -240,3 +252,194 @@ func isReadonlyMount(opts []string) bool { } return false } + +func setupRootFS(spec *specs.Spec, conf *boot.Config) error { + // Convert all shared mounts into slaves to be sure that nothing will be + // propagated outside of our namespace. + if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + Fatalf("error converting mounts: %v", err) + } + + root := spec.Root.Path + if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + // FIXME: runsc can't be re-executed without + // /proc, so we create a tmpfs mount, mount ./proc and ./root + // there, then move this mount to the root and after + // setCapsAndCallSelf, runsc will chroot into /root. + // + // We need a directory to construct a new root and we know that + // runsc can't start without /proc, so we can use it for this. + flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC) + if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil { + Fatalf("error mounting tmpfs: %v", err) + } + + // Prepare tree structure for pivot_root(2). + os.Mkdir("/proc/proc", 0755) + os.Mkdir("/proc/root", 0755) + if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil { + Fatalf("error mounting proc: %v", err) + } + root = "/proc/root" + } + + // Mount root path followed by submounts. + if err := syscall.Mount(spec.Root.Path, root, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { + return fmt.Errorf("mounting root on root (%q) err: %v", spec.Root.Path, err) + } + flags := uint32(syscall.MS_SLAVE | syscall.MS_REC) + if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { + flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation}) + } + if err := syscall.Mount("", spec.Root.Path, "", uintptr(flags), ""); err != nil { + return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", spec.Root.Path, flags, err) + } + + // Replace the current spec, with the clean spec with symlinks resolved. + if err := setupMounts(spec.Mounts, root); err != nil { + Fatalf("error setting up FS: %v", err) + } + + // Create working directory if needed. + if spec.Process.Cwd != "" { + dst, err := resolveSymlinks(root, spec.Process.Cwd) + if err != nil { + return fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err) + } + if err := os.MkdirAll(dst, 0755); err != nil { + return fmt.Errorf("creating working directory %q: %v", spec.Process.Cwd, err) + } + } + + // Check if root needs to be remounted as readonly. + if spec.Root.Readonly { + // If root is a mount point but not read-only, we can change mount options + // to make it read-only for extra safety. + log.Infof("Remounting root as readonly: %q", spec.Root.Path) + flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC) + if err := syscall.Mount(spec.Root.Path, spec.Root.Path, "bind", flags, ""); err != nil { + return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", spec.Root.Path, spec.Root.Path, flags, err) + } + } + + if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + if err := pivotRoot("/proc"); err != nil { + Fatalf("faild to change the root file system: %v", err) + } + if err := os.Chdir("/"); err != nil { + Fatalf("failed to change working directory") + } + } + return nil +} + +// setupMounts binds mount all mounts specified in the spec in their correct +// location inside root. It will resolve relative paths and symlinks. It also +// creates directories as needed. +func setupMounts(mounts []specs.Mount, root string) error { + for _, m := range mounts { + if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + continue + } + + dst, err := resolveSymlinks(root, m.Destination) + if err != nil { + return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) + } + + flags := specutils.OptionsToFlags(m.Options) | syscall.MS_BIND + log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) + if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil { + return fmt.Errorf("mounting %v: %v", m, err) + } + + // Set propagation options that cannot be set together with other options. + flags = specutils.PropOptionsToFlags(m.Options) + if flags != 0 { + if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil { + return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err) + } + } + } + return nil +} + +// resolveMounts resolved relative paths and symlinks to mount points. +// +// Note: mount points must already be in place for resolution to work. +// Otherwise, it may follow symlinks to locations that would be overwritten +// with another mount point and return the wrong location. In short, make sure +// setupMounts() has been called before. +func resolveMounts(mounts []specs.Mount, root string) ([]specs.Mount, error) { + cleanMounts := make([]specs.Mount, 0, len(mounts)) + for _, m := range mounts { + if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + cleanMounts = append(cleanMounts, m) + continue + } + dst, err := resolveSymlinks(root, m.Destination) + if err != nil { + return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) + } + relDst, err := filepath.Rel(root, dst) + if err != nil { + panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err)) + } + cpy := m + cpy.Destination = filepath.Join("/", relDst) + cleanMounts = append(cleanMounts, cpy) + } + return cleanMounts, nil +} + +// ResolveSymlinks walks 'rel' having 'root' as the root directory. If there are +// symlinks, they are evaluated relative to 'root' to ensure the end result is +// the same as if the process was running inside the container. +func resolveSymlinks(root, rel string) (string, error) { + return resolveSymlinksImpl(root, root, rel, 255) +} + +func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) { + if followCount == 0 { + return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel)) + } + + rel = filepath.Clean(rel) + for _, name := range strings.Split(rel, string(filepath.Separator)) { + if name == "" { + continue + } + // Note that Join() resolves things like ".." and returns a clean path. + path := filepath.Join(base, name) + if !strings.HasPrefix(path, root) { + // One cannot '..' their way out of root. + path = root + continue + } + fi, err := os.Lstat(path) + if err != nil { + if !os.IsNotExist(err) { + return "", err + } + // Not found means there is no symlink to check. Just keep walking dirs. + base = path + continue + } + if fi.Mode()&os.ModeSymlink != 0 { + link, err := os.Readlink(path) + if err != nil { + return "", err + } + if filepath.IsAbs(link) { + base = root + } + base, err = resolveSymlinksImpl(root, base, link, followCount-1) + if err != nil { + return "", err + } + continue + } + base = path + } + return base, nil +} diff --git a/runsc/cmd/gofer_test.go b/runsc/cmd/gofer_test.go new file mode 100644 index 000000000..8e692feb9 --- /dev/null +++ b/runsc/cmd/gofer_test.go @@ -0,0 +1,164 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "fmt" + "io/ioutil" + "os" + "path" + "path/filepath" + "testing" +) + +func tmpDir() string { + dir := os.Getenv("TEST_TMPDIR") + if dir == "" { + dir = "/tmp" + } + return dir +} + +type dir struct { + rel string + link string +} + +func construct(root string, dirs []dir) error { + for _, d := range dirs { + p := path.Join(root, d.rel) + if d.link == "" { + if err := os.MkdirAll(p, 0755); err != nil { + return fmt.Errorf("error creating dir: %v", err) + } + } else { + if err := os.MkdirAll(path.Dir(p), 0755); err != nil { + return fmt.Errorf("error creating dir: %v", err) + } + if err := os.Symlink(d.link, p); err != nil { + return fmt.Errorf("error creating symlink: %v", err) + } + } + } + return nil +} + +func TestResolveSymlinks(t *testing.T) { + root, err := ioutil.TempDir(tmpDir(), "root") + if err != nil { + t.Fatal("ioutil.TempDir() failed:", err) + } + dirs := []dir{ + {"dir1/dir11/dir111/dir1111", ""}, // Just a boring dir + {"dir1/lnk12", "dir11"}, // Link to sibling + {"dir1/lnk13", "./dir11"}, // Link to sibling through self + {"dir1/lnk14", "../dir1/dir11"}, // Link to sibling through parent + {"dir1/dir15/lnk151", ".."}, // Link to parent + {"dir1/lnk16", "dir11/dir111"}, // Link to child + {"dir1/lnk17", "."}, // Link to self + {"dir1/lnk18", "lnk13"}, // Link to link + {"lnk2", "dir1/lnk13"}, // Link to link to link + {"dir3/dir21/lnk211", "../.."}, // Link to root relative + {"dir3/lnk22", "/"}, // Link to root absolute + {"dir3/lnk23", "/dir1"}, // Link to dir absolute + {"dir3/lnk24", "/dir1/lnk12"}, // Link to link absolute + {"lnk5", "../../.."}, // Link outside root + } + if err := construct(root, dirs); err != nil { + t.Fatal("construct failed:", err) + } + + tests := []struct { + name string + rel string + want string + compareHost bool + }{ + {name: "root", rel: "/", want: "/", compareHost: true}, + {name: "basic dir", rel: "/dir1/dir11/dir111", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "dot 1", rel: "/dir1/dir11/./dir111", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "dot 2", rel: "/dir1/././dir11/./././././dir111/.", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "dotdot 1", rel: "/dir1/dir11/../dir15", want: "/dir1/dir15", compareHost: true}, + {name: "dotdot 2", rel: "/dir1/dir11/dir1111/../..", want: "/dir1", compareHost: true}, + + {name: "link sibling", rel: "/dir1/lnk12", want: "/dir1/dir11", compareHost: true}, + {name: "link sibling + dir", rel: "/dir1/lnk12/dir111", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "link sibling through self", rel: "/dir1/lnk13", want: "/dir1/dir11", compareHost: true}, + {name: "link sibling through parent", rel: "/dir1/lnk14", want: "/dir1/dir11", compareHost: true}, + + {name: "link parent", rel: "/dir1/dir15/lnk151", want: "/dir1", compareHost: true}, + {name: "link parent + dir", rel: "/dir1/dir15/lnk151/dir11", want: "/dir1/dir11", compareHost: true}, + {name: "link child", rel: "/dir1/lnk16", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "link child + dir", rel: "/dir1/lnk16/dir1111", want: "/dir1/dir11/dir111/dir1111", compareHost: true}, + {name: "link self", rel: "/dir1/lnk17", want: "/dir1", compareHost: true}, + {name: "link self + dir", rel: "/dir1/lnk17/dir11", want: "/dir1/dir11", compareHost: true}, + + {name: "link^2", rel: "/dir1/lnk18", want: "/dir1/dir11", compareHost: true}, + {name: "link^2 + dir", rel: "/dir1/lnk18/dir111", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "link^3", rel: "/lnk2", want: "/dir1/dir11", compareHost: true}, + {name: "link^3 + dir", rel: "/lnk2/dir111", want: "/dir1/dir11/dir111", compareHost: true}, + + {name: "link abs", rel: "/dir3/lnk23", want: "/dir1"}, + {name: "link abs + dir", rel: "/dir3/lnk23/dir11", want: "/dir1/dir11"}, + {name: "link^2 abs", rel: "/dir3/lnk24", want: "/dir1/dir11"}, + {name: "link^2 abs + dir", rel: "/dir3/lnk24/dir111", want: "/dir1/dir11/dir111"}, + + {name: "root link rel", rel: "/dir3/dir21/lnk211", want: "/", compareHost: true}, + {name: "root link abs", rel: "/dir3/lnk22", want: "/"}, + {name: "root contain link", rel: "/lnk5/dir1", want: "/dir1"}, + {name: "root contain dotdot", rel: "/dir1/dir11/../../../../../../../..", want: "/"}, + + {name: "crazy", rel: "/dir3/dir21/lnk211/dir3/lnk22/dir1/dir11/../../lnk5/dir3/../dir3/lnk24/dir111/dir1111/..", want: "/dir1/dir11/dir111"}, + } + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + got, err := resolveSymlinks(root, tst.rel) + if err != nil { + t.Errorf("resolveSymlinks(root, %q) failed: %v", tst.rel, err) + } + want := path.Join(root, tst.want) + if got != want { + t.Errorf("resolveSymlinks(root, %q) got: %q, want: %q", tst.rel, got, want) + } + if tst.compareHost { + // Check that host got to the same end result. + host, err := filepath.EvalSymlinks(path.Join(root, tst.rel)) + if err != nil { + t.Errorf("path.EvalSymlinks(root, %q) failed: %v", tst.rel, err) + } + if host != got { + t.Errorf("resolveSymlinks(root, %q) got: %q, want: %q", tst.rel, host, got) + } + } + }) + } +} + +func TestResolveSymlinksLoop(t *testing.T) { + root, err := ioutil.TempDir(tmpDir(), "root") + if err != nil { + t.Fatal("ioutil.TempDir() failed:", err) + } + dirs := []dir{ + {"loop1", "loop2"}, + {"loop2", "loop1"}, + } + if err := construct(root, dirs); err != nil { + t.Fatal("construct failed:", err) + } + if _, err := resolveSymlinks(root, "loop1"); err == nil { + t.Errorf("resolveSymlinks() should have failed") + } +} |