summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/cmd/BUILD1
-rw-r--r--runsc/cmd/boot.go16
-rw-r--r--runsc/cmd/gofer.go279
-rw-r--r--runsc/cmd/gofer_test.go (renamed from runsc/container/fs_test.go)16
-rw-r--r--runsc/container/BUILD2
-rw-r--r--runsc/container/container.go75
-rw-r--r--runsc/container/container_test.go165
-rw-r--r--runsc/container/fs.go287
-rw-r--r--runsc/sandbox/sandbox.go23
-rw-r--r--runsc/specutils/BUILD1
-rw-r--r--runsc/specutils/fs.go139
-rw-r--r--runsc/specutils/namespace.go16
-rw-r--r--runsc/specutils/specutils.go52
-rw-r--r--runsc/specutils/specutils_test.go31
14 files changed, 681 insertions, 422 deletions
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index 9e2be0d37..dabf18c5f 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -60,6 +60,7 @@ go_test(
"capability_test.go",
"delete_test.go",
"exec_test.go",
+ "gofer_test.go",
],
data = [
"//runsc",
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index 3039b389f..ff2fa2fb9 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -76,6 +76,11 @@ type Boot struct {
// startSyncFD is the file descriptor to synchronize runsc and sandbox.
startSyncFD int
+ // mountsFD is the file descriptor to read list of mounts after they have
+ // been resolved (direct paths, no symlinks). They are resolved outside the
+ // sandbox (e.g. gofer) and sent through this FD.
+ mountsFD int
+
// pidns is set if the sanadbox is in its own pid namespace.
pidns bool
}
@@ -111,6 +116,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) {
f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container")
f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.")
f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup")
+ f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).")
}
// Execute implements subcommands.Command.Execute. It starts a sandbox in a
@@ -191,6 +197,16 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
panic("setCapsAndCallSelf must never return success")
}
+ // Read resolved mount list and replace the original one from the spec.
+ mountsFile := os.NewFile(uintptr(b.mountsFD), "mounts file")
+ cleanMounts, err := specutils.ReadMounts(mountsFile)
+ if err != nil {
+ mountsFile.Close()
+ Fatalf("Error reading mounts file: %v", err)
+ }
+ mountsFile.Close()
+ spec.Mounts = cleanMounts
+
// Create the loader.
bootArgs := boot.Args{
ID: f.Arg(0),
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 6f9711518..e712244ef 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -16,7 +16,11 @@ package cmd
import (
"context"
+ "encoding/json"
+ "fmt"
"os"
+ "path/filepath"
+ "strings"
"sync"
"syscall"
@@ -59,6 +63,7 @@ type Gofer struct {
panicOnWrite bool
specFD int
+ mountsFD int
}
// Name implements subcommands.Command.
@@ -84,6 +89,7 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) {
f.BoolVar(&g.panicOnWrite, "panic-on-write", false, "if true, panics on attempts to write to RO mounts. RW mounts are unnaffected")
f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process")
f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec")
+ f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).")
}
// Execute implements subcommands.Command.
@@ -100,45 +106,13 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
Fatalf("reading spec: %v", err)
}
- // Find what path is going to be served by this gofer.
- root := spec.Root.Path
-
conf := args[0].(*boot.Config)
- if g.setUpRoot && !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
- // Convert all shared mounts into slave to be sure that nothing will be
- // propagated outside of our namespace.
- if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
- Fatalf("error converting mounts: %v", err)
- }
-
- // FIXME: runsc can't be re-executed without
- // /proc, so we create a tmpfs mount, mount ./proc and ./root
- // there, then move this mount to the root and after
- // setCapsAndCallSelf, runsc will chroot into /root.
- //
- // We need a directory to construct a new root and we know that
- // runsc can't start without /proc, so we can use it for this.
- flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC)
- if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil {
- Fatalf("error mounting tmpfs: %v", err)
- }
- os.Mkdir("/proc/proc", 0755)
- os.Mkdir("/proc/root", 0755)
- if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil {
- Fatalf("error mounting proc: %v", err)
- }
- if err := syscall.Mount(root, "/proc/root", "", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
- Fatalf("error mounting root: %v", err)
- }
- if err := pivotRoot("/proc"); err != nil {
- Fatalf("faild to change the root file system: %v", err)
- }
- if err := os.Chdir("/"); err != nil {
- Fatalf("failed to change working directory")
+ if g.setUpRoot {
+ if err := setupRootFS(spec, conf); err != nil {
+ Fatalf("Error setting up root FS: %v", err)
}
}
-
if g.applyCaps {
// Disable caps when calling myself again.
// Note: minimal argument handling for the default case to keep it simple.
@@ -150,15 +124,34 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
panic("unreachable")
}
+ // Find what path is going to be served by this gofer.
+ root := spec.Root.Path
+ if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+ root = "/root"
+ }
+
+ // Resolve mount points paths, then replace mounts from our spec and send the
+ // mount list over to the sandbox, so they are both in sync.
+ //
+ // Note that all mount points have been mounted in the proper location in
+ // setupRootFS().
+ cleanMounts, err := resolveMounts(spec.Mounts, root)
+ if err != nil {
+ Fatalf("Failure to resolve mounts: %v", err)
+ }
+ spec.Mounts = cleanMounts
+ go func() {
+ if err := g.writeMounts(cleanMounts); err != nil {
+ panic(fmt.Sprintf("Failed to write mounts: %v", err))
+ }
+ }()
+
specutils.LogSpec(spec)
// fsgofer should run with a umask of 0, because we want to preserve file
// modes exactly as sent by the sandbox, which will have applied its own umask.
syscall.Umask(0)
- if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
- root = "/root"
- }
if err := syscall.Chroot(root); err != nil {
Fatalf("failed to chroot to %q: %v", root, err)
}
@@ -232,6 +225,25 @@ func runServers(ats []p9.Attacher, ioFDs []int) {
log.Infof("All 9P servers exited.")
}
+func (g *Gofer) writeMounts(mounts []specs.Mount) error {
+ bytes, err := json.Marshal(mounts)
+ if err != nil {
+ return err
+ }
+
+ f := os.NewFile(uintptr(g.mountsFD), "mounts file")
+ defer f.Close()
+
+ for written := 0; written < len(bytes); {
+ w, err := f.Write(bytes[written:])
+ if err != nil {
+ return err
+ }
+ written += w
+ }
+ return nil
+}
+
func isReadonlyMount(opts []string) bool {
for _, o := range opts {
if o == "ro" {
@@ -240,3 +252,194 @@ func isReadonlyMount(opts []string) bool {
}
return false
}
+
+func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
+ // Convert all shared mounts into slaves to be sure that nothing will be
+ // propagated outside of our namespace.
+ if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
+ Fatalf("error converting mounts: %v", err)
+ }
+
+ root := spec.Root.Path
+ if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+ // FIXME: runsc can't be re-executed without
+ // /proc, so we create a tmpfs mount, mount ./proc and ./root
+ // there, then move this mount to the root and after
+ // setCapsAndCallSelf, runsc will chroot into /root.
+ //
+ // We need a directory to construct a new root and we know that
+ // runsc can't start without /proc, so we can use it for this.
+ flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC)
+ if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil {
+ Fatalf("error mounting tmpfs: %v", err)
+ }
+
+ // Prepare tree structure for pivot_root(2).
+ os.Mkdir("/proc/proc", 0755)
+ os.Mkdir("/proc/root", 0755)
+ if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil {
+ Fatalf("error mounting proc: %v", err)
+ }
+ root = "/proc/root"
+ }
+
+ // Mount root path followed by submounts.
+ if err := syscall.Mount(spec.Root.Path, root, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
+ return fmt.Errorf("mounting root on root (%q) err: %v", spec.Root.Path, err)
+ }
+ flags := uint32(syscall.MS_SLAVE | syscall.MS_REC)
+ if spec.Linux != nil && spec.Linux.RootfsPropagation != "" {
+ flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation})
+ }
+ if err := syscall.Mount("", spec.Root.Path, "", uintptr(flags), ""); err != nil {
+ return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", spec.Root.Path, flags, err)
+ }
+
+ // Replace the current spec, with the clean spec with symlinks resolved.
+ if err := setupMounts(spec.Mounts, root); err != nil {
+ Fatalf("error setting up FS: %v", err)
+ }
+
+ // Create working directory if needed.
+ if spec.Process.Cwd != "" {
+ dst, err := resolveSymlinks(root, spec.Process.Cwd)
+ if err != nil {
+ return fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err)
+ }
+ if err := os.MkdirAll(dst, 0755); err != nil {
+ return fmt.Errorf("creating working directory %q: %v", spec.Process.Cwd, err)
+ }
+ }
+
+ // Check if root needs to be remounted as readonly.
+ if spec.Root.Readonly {
+ // If root is a mount point but not read-only, we can change mount options
+ // to make it read-only for extra safety.
+ log.Infof("Remounting root as readonly: %q", spec.Root.Path)
+ flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC)
+ if err := syscall.Mount(spec.Root.Path, spec.Root.Path, "bind", flags, ""); err != nil {
+ return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", spec.Root.Path, spec.Root.Path, flags, err)
+ }
+ }
+
+ if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+ if err := pivotRoot("/proc"); err != nil {
+ Fatalf("faild to change the root file system: %v", err)
+ }
+ if err := os.Chdir("/"); err != nil {
+ Fatalf("failed to change working directory")
+ }
+ }
+ return nil
+}
+
+// setupMounts binds mount all mounts specified in the spec in their correct
+// location inside root. It will resolve relative paths and symlinks. It also
+// creates directories as needed.
+func setupMounts(mounts []specs.Mount, root string) error {
+ for _, m := range mounts {
+ if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
+ continue
+ }
+
+ dst, err := resolveSymlinks(root, m.Destination)
+ if err != nil {
+ return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
+ }
+
+ flags := specutils.OptionsToFlags(m.Options) | syscall.MS_BIND
+ log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags)
+ if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil {
+ return fmt.Errorf("mounting %v: %v", m, err)
+ }
+
+ // Set propagation options that cannot be set together with other options.
+ flags = specutils.PropOptionsToFlags(m.Options)
+ if flags != 0 {
+ if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil {
+ return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err)
+ }
+ }
+ }
+ return nil
+}
+
+// resolveMounts resolved relative paths and symlinks to mount points.
+//
+// Note: mount points must already be in place for resolution to work.
+// Otherwise, it may follow symlinks to locations that would be overwritten
+// with another mount point and return the wrong location. In short, make sure
+// setupMounts() has been called before.
+func resolveMounts(mounts []specs.Mount, root string) ([]specs.Mount, error) {
+ cleanMounts := make([]specs.Mount, 0, len(mounts))
+ for _, m := range mounts {
+ if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
+ cleanMounts = append(cleanMounts, m)
+ continue
+ }
+ dst, err := resolveSymlinks(root, m.Destination)
+ if err != nil {
+ return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
+ }
+ relDst, err := filepath.Rel(root, dst)
+ if err != nil {
+ panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err))
+ }
+ cpy := m
+ cpy.Destination = filepath.Join("/", relDst)
+ cleanMounts = append(cleanMounts, cpy)
+ }
+ return cleanMounts, nil
+}
+
+// ResolveSymlinks walks 'rel' having 'root' as the root directory. If there are
+// symlinks, they are evaluated relative to 'root' to ensure the end result is
+// the same as if the process was running inside the container.
+func resolveSymlinks(root, rel string) (string, error) {
+ return resolveSymlinksImpl(root, root, rel, 255)
+}
+
+func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) {
+ if followCount == 0 {
+ return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel))
+ }
+
+ rel = filepath.Clean(rel)
+ for _, name := range strings.Split(rel, string(filepath.Separator)) {
+ if name == "" {
+ continue
+ }
+ // Note that Join() resolves things like ".." and returns a clean path.
+ path := filepath.Join(base, name)
+ if !strings.HasPrefix(path, root) {
+ // One cannot '..' their way out of root.
+ path = root
+ continue
+ }
+ fi, err := os.Lstat(path)
+ if err != nil {
+ if !os.IsNotExist(err) {
+ return "", err
+ }
+ // Not found means there is no symlink to check. Just keep walking dirs.
+ base = path
+ continue
+ }
+ if fi.Mode()&os.ModeSymlink != 0 {
+ link, err := os.Readlink(path)
+ if err != nil {
+ return "", err
+ }
+ if filepath.IsAbs(link) {
+ base = root
+ }
+ base, err = resolveSymlinksImpl(root, base, link, followCount-1)
+ if err != nil {
+ return "", err
+ }
+ continue
+ }
+ base = path
+ }
+ return base, nil
+}
diff --git a/runsc/container/fs_test.go b/runsc/cmd/gofer_test.go
index 87cdb078e..8e692feb9 100644
--- a/runsc/container/fs_test.go
+++ b/runsc/cmd/gofer_test.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package container
+package cmd
import (
"fmt"
@@ -21,10 +21,16 @@ import (
"path"
"path/filepath"
"testing"
-
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
)
+func tmpDir() string {
+ dir := os.Getenv("TEST_TMPDIR")
+ if dir == "" {
+ dir = "/tmp"
+ }
+ return dir
+}
+
type dir struct {
rel string
link string
@@ -50,7 +56,7 @@ func construct(root string, dirs []dir) error {
}
func TestResolveSymlinks(t *testing.T) {
- root, err := ioutil.TempDir(testutil.TmpDir(), "root")
+ root, err := ioutil.TempDir(tmpDir(), "root")
if err != nil {
t.Fatal("ioutil.TempDir() failed:", err)
}
@@ -141,7 +147,7 @@ func TestResolveSymlinks(t *testing.T) {
}
func TestResolveSymlinksLoop(t *testing.T) {
- root, err := ioutil.TempDir(testutil.TmpDir(), "root")
+ root, err := ioutil.TempDir(tmpDir(), "root")
if err != nil {
t.Fatal("ioutil.TempDir() failed:", err)
}
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 3b25ff79a..2936b7cdf 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -6,7 +6,6 @@ go_library(
name = "container",
srcs = [
"container.go",
- "fs.go",
"hook.go",
"status.go",
],
@@ -34,7 +33,6 @@ go_test(
srcs = [
"console_test.go",
"container_test.go",
- "fs_test.go",
"multi_container_test.go",
"shared_volume_test.go",
],
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 6f092a5ce..fdcf8d7b7 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -281,18 +281,6 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
if specutils.ShouldCreateSandbox(spec) {
log.Debugf("Creating new sandbox for container %q", id)
- // Setup rootfs and mounts. It returns a new mount list with destination
- // paths resolved. Since the spec for the root container is read from disk,
- // Write the new spec to a new file that will be used by the sandbox.
- cleanMounts, err := setupFS(spec, conf, bundleDir)
- if err != nil {
- return nil, fmt.Errorf("setup mounts: %v", err)
- }
- spec.Mounts = cleanMounts
- if err := specutils.WriteCleanSpec(bundleDir, spec); err != nil {
- return nil, fmt.Errorf("writing clean spec: %v", err)
- }
-
// Create and join cgroup before processes are created to ensure they are
// part of the cgroup from the start (and all tneir children processes).
cg, err := cgroup.New(spec)
@@ -306,14 +294,14 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
}
}
if err := runInCgroup(cg, func() error {
- ioFiles, err := c.createGoferProcess(spec, conf, bundleDir)
+ ioFiles, specFile, err := c.createGoferProcess(spec, conf, bundleDir)
if err != nil {
return err
}
// Start a new sandbox for this container. Any errors after this point
// must destroy the container.
- c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, cg)
+ c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, cg)
return err
}); err != nil {
return nil, err
@@ -387,26 +375,22 @@ func (c *Container) Start(conf *boot.Config) error {
return err
}
} else {
- // Setup rootfs and mounts. It returns a new mount list with destination
- // paths resolved. Replace the original spec with new mount list and start
- // container.
- cleanMounts, err := setupFS(c.Spec, conf, c.BundleDir)
- if err != nil {
- return fmt.Errorf("setup mounts: %v", err)
- }
- c.Spec.Mounts = cleanMounts
- if err := specutils.WriteCleanSpec(c.BundleDir, c.Spec); err != nil {
- return fmt.Errorf("writing clean spec: %v", err)
- }
-
// Join cgroup to strt gofer process to ensure it's part of the cgroup from
// the start (and all tneir children processes).
if err := runInCgroup(c.Sandbox.Cgroup, func() error {
// Create the gofer process.
- ioFiles, err := c.createGoferProcess(c.Spec, conf, c.BundleDir)
+ ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir)
if err != nil {
return err
}
+ defer mountsFile.Close()
+
+ cleanMounts, err := specutils.ReadMounts(mountsFile)
+ if err != nil {
+ return fmt.Errorf("reading mounts file: %v", err)
+ }
+ c.Spec.Mounts = cleanMounts
+
return c.Sandbox.StartContainer(c.Spec, conf, c.ID, ioFiles)
}); err != nil {
return err
@@ -665,12 +649,6 @@ func (c *Container) Destroy() error {
errs = append(errs, err.Error())
}
- if err := destroyFS(c.Spec); err != nil {
- err = fmt.Errorf("destroying container fs: %v", err)
- log.Warningf("%v", err)
- errs = append(errs, err.Error())
- }
-
if err := os.RemoveAll(c.Root); err != nil && !os.IsNotExist(err) {
err = fmt.Errorf("deleting container root directory %q: %v", c.Root, err)
log.Warningf("%v", err)
@@ -787,7 +765,7 @@ func (c *Container) waitForStopped() error {
return backoff.Retry(op, b)
}
-func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, error) {
+func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, *os.File, error) {
// Start with the general config flags.
args := conf.ToFlags()
@@ -800,7 +778,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
if conf.LogFilename != "" {
logFile, err := os.OpenFile(conf.LogFilename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
- return nil, fmt.Errorf("opening log file %q: %v", conf.LogFilename, err)
+ return nil, nil, fmt.Errorf("opening log file %q: %v", conf.LogFilename, err)
}
defer logFile.Close()
goferEnds = append(goferEnds, logFile)
@@ -811,7 +789,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
if conf.DebugLog != "" {
debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "gofer")
if err != nil {
- return nil, fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err)
+ return nil, nil, fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err)
}
defer debugLogFile.Close()
goferEnds = append(goferEnds, debugLogFile)
@@ -825,30 +803,39 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
}
// Open the spec file to donate to the sandbox.
- specFile, err := specutils.OpenCleanSpec(bundleDir)
+ specFile, err := specutils.OpenSpec(bundleDir)
if err != nil {
- return nil, fmt.Errorf("opening spec file: %v", err)
+ return nil, nil, fmt.Errorf("opening spec file: %v", err)
}
defer specFile.Close()
goferEnds = append(goferEnds, specFile)
args = append(args, "--spec-fd="+strconv.Itoa(nextFD))
nextFD++
+ // Create pipe that allows gofer to send mount list to sandbox after all paths
+ // have been resolved.
+ mountsSand, mountsGofer, err := os.Pipe()
+ if err != nil {
+ return nil, nil, err
+ }
+ defer mountsGofer.Close()
+ goferEnds = append(goferEnds, mountsGofer)
+ args = append(args, fmt.Sprintf("--mounts-fd=%d", nextFD))
+ nextFD++
+
// Add root mount and then add any other additional mounts.
mountCount := 1
-
- // Add additional mounts.
for _, m := range spec.Mounts {
if specutils.Is9PMount(m) {
mountCount++
}
}
- sandEnds := make([]*os.File, 0, mountCount)
+ sandEnds := make([]*os.File, 0, mountCount)
for i := 0; i < mountCount; i++ {
fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
- return nil, err
+ return nil, nil, err
}
sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox IO FD"))
@@ -884,12 +871,12 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
// Start the gofer in the given namespace.
log.Debugf("Starting gofer: %s %v", binPath, args)
if err := specutils.StartInNS(cmd, nss); err != nil {
- return nil, err
+ return nil, nil, err
}
log.Infof("Gofer started, PID: %d", cmd.Process.Pid)
c.GoferPid = cmd.Process.Pid
c.goferIsChild = true
- return sandEnds, nil
+ return sandEnds, mountsSand, nil
}
// changeStatus transitions from one status to another ensuring that the
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 06a25de6d..f17155175 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -1594,6 +1594,171 @@ func TestCreateWorkingDir(t *testing.T) {
}
}
+// TestMountPropagation verifies that mount propagates to slave but not to
+// private mounts.
+func TestMountPropagation(t *testing.T) {
+ // Setup dir structure:
+ // - src: is mounted as shared and is used as source for both private and
+ // slave mounts
+ // - dir: will be bind mounted inside src and should propagate to slave
+ tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "mount")
+ if err != nil {
+ t.Fatalf("ioutil.TempDir() failed: %v", err)
+ }
+ src := filepath.Join(tmpDir, "src")
+ srcMnt := filepath.Join(src, "mnt")
+ dir := filepath.Join(tmpDir, "dir")
+ for _, path := range []string{src, srcMnt, dir} {
+ if err := os.MkdirAll(path, 0777); err != nil {
+ t.Fatalf("MkdirAll(%q): %v", path, err)
+ }
+ }
+ dirFile := filepath.Join(dir, "file")
+ f, err := os.Create(dirFile)
+ if err != nil {
+ t.Fatalf("os.Create(%q): %v", dirFile, err)
+ }
+ f.Close()
+
+ // Setup src as a shared mount.
+ if err := syscall.Mount(src, src, "bind", syscall.MS_BIND, ""); err != nil {
+ t.Fatalf("mount(%q, %q, MS_BIND): %v", dir, srcMnt, err)
+ }
+ if err := syscall.Mount("", src, "", syscall.MS_SHARED, ""); err != nil {
+ t.Fatalf("mount(%q, MS_SHARED): %v", srcMnt, err)
+ }
+
+ spec := testutil.NewSpecWithArgs("sleep", "1000")
+
+ priv := filepath.Join(tmpDir, "priv")
+ slave := filepath.Join(tmpDir, "slave")
+ spec.Mounts = []specs.Mount{
+ {
+ Source: src,
+ Destination: priv,
+ Type: "bind",
+ Options: []string{"private"},
+ },
+ {
+ Source: src,
+ Destination: slave,
+ Type: "bind",
+ Options: []string{"slave"},
+ },
+ }
+
+ conf := testutil.TestConfig()
+ rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ if err != nil {
+ t.Fatalf("creating container: %v", err)
+ }
+ defer cont.Destroy()
+
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("starting container: %v", err)
+ }
+
+ // After the container is started, mount dir inside source and check what
+ // happens to both destinations.
+ if err := syscall.Mount(dir, srcMnt, "bind", syscall.MS_BIND, ""); err != nil {
+ t.Fatalf("mount(%q, %q, MS_BIND): %v", dir, srcMnt, err)
+ }
+
+ // Check that mount didn't propagate to private mount.
+ privFile := filepath.Join(priv, "mnt", "file")
+ args := &control.ExecArgs{
+ Filename: "/usr/bin/test",
+ Argv: []string{"test", "!", "-f", privFile},
+ }
+ if ws, err := cont.executeSync(args); err != nil || ws != 0 {
+ t.Fatalf("exec: test ! -f %q, ws: %v, err: %v", privFile, ws, err)
+ }
+
+ // Check that mount propagated to slave mount.
+ slaveFile := filepath.Join(slave, "mnt", "file")
+ args = &control.ExecArgs{
+ Filename: "/usr/bin/test",
+ Argv: []string{"test", "-f", slaveFile},
+ }
+ if ws, err := cont.executeSync(args); err != nil || ws != 0 {
+ t.Fatalf("exec: test -f %q, ws: %v, err: %v", privFile, ws, err)
+ }
+}
+
+func TestMountSymlink(t *testing.T) {
+ for _, conf := range configs(overlay) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink")
+ if err != nil {
+ t.Fatalf("ioutil.TempDir() failed: %v", err)
+ }
+
+ source := path.Join(dir, "source")
+ target := path.Join(dir, "target")
+ for _, path := range []string{source, target} {
+ if err := os.MkdirAll(path, 0777); err != nil {
+ t.Fatalf("os.MkdirAll(): %v", err)
+ }
+ }
+ f, err := os.Create(path.Join(source, "file"))
+ if err != nil {
+ t.Fatalf("os.Create(): %v", err)
+ }
+ f.Close()
+
+ link := path.Join(dir, "link")
+ if err := os.Symlink(target, link); err != nil {
+ t.Fatalf("os.Symlink(%q, %q): %v", target, link, err)
+ }
+
+ spec := testutil.NewSpecWithArgs("/bin/sleep", "1000")
+
+ // Mount to a symlink to ensure the mount code will follow it and mount
+ // at the symlink target.
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Type: "bind",
+ Destination: link,
+ Source: source,
+ })
+
+ rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ if err != nil {
+ t.Fatalf("creating container: %v", err)
+ }
+ defer cont.Destroy()
+
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("starting container: %v", err)
+ }
+
+ // Check that symlink was resolved and mount was created where the symlink
+ // is pointing to.
+ file := path.Join(target, "file")
+ args := &control.ExecArgs{
+ Filename: "/usr/bin/test",
+ Argv: []string{"test", "-f", file},
+ }
+ if ws, err := cont.executeSync(args); err != nil || ws != 0 {
+ t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err)
+ }
+ }
+}
+
// executeSync synchronously executes a new process.
func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) {
pid, err := cont.Execute(args)
diff --git a/runsc/container/fs.go b/runsc/container/fs.go
deleted file mode 100644
index 998160487..000000000
--- a/runsc/container/fs.go
+++ /dev/null
@@ -1,287 +0,0 @@
-// Copyright 2018 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package container
-
-import (
- "bufio"
- "fmt"
- "os"
- "path/filepath"
- "strings"
- "syscall"
-
- specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
-)
-
-type mapping struct {
- set bool
- val uint32
-}
-
-var optionsMap = map[string]mapping{
- "acl": {set: true, val: syscall.MS_POSIXACL},
- "async": {set: false, val: syscall.MS_SYNCHRONOUS},
- "atime": {set: false, val: syscall.MS_NOATIME},
- "bind": {set: true, val: syscall.MS_BIND},
- "defaults": {set: true, val: 0},
- "dev": {set: false, val: syscall.MS_NODEV},
- "diratime": {set: false, val: syscall.MS_NODIRATIME},
- "dirsync": {set: true, val: syscall.MS_DIRSYNC},
- "exec": {set: false, val: syscall.MS_NOEXEC},
- "iversion": {set: true, val: syscall.MS_I_VERSION},
- "loud": {set: false, val: syscall.MS_SILENT},
- "mand": {set: true, val: syscall.MS_MANDLOCK},
- "noacl": {set: false, val: syscall.MS_POSIXACL},
- "noatime": {set: true, val: syscall.MS_NOATIME},
- "nodev": {set: true, val: syscall.MS_NODEV},
- "nodiratime": {set: true, val: syscall.MS_NODIRATIME},
- "noexec": {set: true, val: syscall.MS_NOEXEC},
- "noiversion": {set: false, val: syscall.MS_I_VERSION},
- "nomand": {set: false, val: syscall.MS_MANDLOCK},
- "norelatime": {set: false, val: syscall.MS_RELATIME},
- "nostrictatime": {set: false, val: syscall.MS_STRICTATIME},
- "nosuid": {set: true, val: syscall.MS_NOSUID},
- "private": {set: true, val: syscall.MS_PRIVATE},
- "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC},
- "relatime": {set: true, val: syscall.MS_RELATIME},
- "remount": {set: true, val: syscall.MS_REMOUNT},
- "ro": {set: true, val: syscall.MS_RDONLY},
- "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC},
- "rw": {set: false, val: syscall.MS_RDONLY},
- "silent": {set: true, val: syscall.MS_SILENT},
- "strictatime": {set: true, val: syscall.MS_STRICTATIME},
- "suid": {set: false, val: syscall.MS_NOSUID},
- "sync": {set: true, val: syscall.MS_SYNCHRONOUS},
-}
-
-// setupFS creates the container directory structure under 'spec.Root.Path'.
-// This allows the gofer serving the containers to be chroot under this
-// directory to create an extra layer to security in case the gofer gets
-// compromised.
-// Returns list of mounts equivalent to 'spec.Mounts' with all destination paths
-// cleaned and with symlinks resolved.
-func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]specs.Mount, error) {
- rv := make([]specs.Mount, 0, len(spec.Mounts))
- for _, m := range spec.Mounts {
- if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
- rv = append(rv, m)
- continue
- }
-
- // It's possible that 'm.Destination' follows symlinks inside the
- // container.
- dst, err := resolveSymlinks(spec.Root.Path, m.Destination)
- if err != nil {
- return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
- }
-
- flags := optionsToFlags(m.Options)
- flags |= syscall.MS_BIND
- log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags)
- if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil {
- return nil, fmt.Errorf("mounting %v: %v", m, err)
- }
-
- // Make the mount a slave, so that for recursive bind mount, umount won't
- // propagate to the source.
- flags = syscall.MS_SLAVE | syscall.MS_REC
- if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil {
- return nil, fmt.Errorf("mount rslave dst: %q, flags: %#x, err: %v", dst, flags, err)
- }
-
- cpy := m
- relDst, err := filepath.Rel(spec.Root.Path, dst)
- if err != nil {
- panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, spec.Root.Path, err))
- }
- cpy.Destination = filepath.Join("/", relDst)
- rv = append(rv, cpy)
- }
-
- if spec.Process.Cwd != "" {
- dst, err := resolveSymlinks(spec.Root.Path, spec.Process.Cwd)
- if err != nil {
- return nil, fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err)
- }
- if err := os.MkdirAll(dst, 0755); err != nil {
- return nil, err
- }
- }
-
- // If root is read only, check if it needs to be remounted as readonly.
- if spec.Root.Readonly {
- isMountPoint, readonly, err := mountInfo(spec.Root.Path)
- if err != nil {
- return nil, err
- }
- if readonly {
- return rv, nil
- }
- if !isMountPoint {
- // Readonly root is not a mount point nor read-only. Can't do much other
- // than just logging a warning. The gofer will prevent files to be open
- // in write mode.
- log.Warningf("Mount where root is located is not read-only and cannot be changed: %q", spec.Root.Path)
- return rv, nil
- }
-
- // If root is a mount point but not read-only, we can change mount options
- // to make it read-only for extra safety.
- log.Infof("Remounting root as readonly: %q", spec.Root.Path)
- flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC)
- src := spec.Root.Path
- if err := syscall.Mount(src, src, "bind", flags, ""); err != nil {
- return nil, fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", spec.Root.Path, spec.Root.Path, flags, err)
- }
- }
- return rv, nil
-}
-
-// mountInfo returns whether the path is a mount point and whether the mount
-// that path belongs to is read-only.
-func mountInfo(path string) (bool, bool, error) {
- // Mounts are listed by their real paths.
- realPath, err := filepath.EvalSymlinks(path)
- if err != nil {
- return false, false, err
- }
- f, err := os.Open("/proc/mounts")
- if err != nil {
- return false, false, err
- }
- scanner := bufio.NewScanner(f)
-
- var mountPoint string
- var readonly bool
- for scanner.Scan() {
- line := scanner.Text()
- parts := strings.Split(line, " ")
- if len(parts) < 4 {
- return false, false, fmt.Errorf("invalid /proc/mounts line format %q", line)
- }
- mp := parts[1]
- opts := strings.Split(parts[3], ",")
-
- // Find the closest submount to the path.
- if strings.Contains(realPath, mp) && len(mp) > len(mountPoint) {
- mountPoint = mp
- readonly = specutils.ContainsStr(opts, "ro")
- }
- }
- if err := scanner.Err(); err != nil {
- return false, false, err
- }
- return mountPoint == realPath, readonly, nil
-}
-
-// destroyFS unmounts mounts done by runsc under `spec.Root.Path`. This
-// recovers the container rootfs into the original state.
-func destroyFS(spec *specs.Spec) error {
- for _, m := range spec.Mounts {
- if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
- continue
- }
-
- // It's possible that 'm.Destination' follows symlinks inside the
- // container.
- dst, err := resolveSymlinks(spec.Root.Path, m.Destination)
- if err != nil {
- return err
- }
-
- flags := syscall.MNT_DETACH
- log.Infof("Unmounting dst: %q, flags: %#x", dst, flags)
- // Do not return error if dst is not a mountpoint.
- // Based on http://man7.org/linux/man-pages/man2/umount.2.html
- // For kernel version 2.6+ and MNT_DETACH flag, EINVAL means
- // the dst is not a mount point.
- if err := syscall.Unmount(dst, flags); err != nil &&
- !os.IsNotExist(err) && err != syscall.EINVAL {
- return err
- }
- }
- return nil
-}
-
-// resolveSymlinks walks 'rel' having 'root' as the root directory. If there are
-// symlinks, they are evaluated relative to 'root' to ensure the end result is
-// the same as if the process was running inside the container.
-func resolveSymlinks(root, rel string) (string, error) {
- return resolveSymlinksImpl(root, root, rel, 255)
-}
-
-func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) {
- if followCount == 0 {
- return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel))
- }
-
- rel = filepath.Clean(rel)
- for _, name := range strings.Split(rel, string(filepath.Separator)) {
- if name == "" {
- continue
- }
- // Note that Join() resolves things like ".." and returns a clean path.
- path := filepath.Join(base, name)
- if !strings.HasPrefix(path, root) {
- // One cannot '..' their way out of root.
- path = root
- continue
- }
- fi, err := os.Lstat(path)
- if err != nil {
- if !os.IsNotExist(err) {
- return "", err
- }
- // Not found means there is no symlink to check. Just keep walking dirs.
- base = path
- continue
- }
- if fi.Mode()&os.ModeSymlink != 0 {
- link, err := os.Readlink(path)
- if err != nil {
- return "", err
- }
- if filepath.IsAbs(link) {
- base = root
- }
- base, err = resolveSymlinksImpl(root, base, link, followCount-1)
- if err != nil {
- return "", err
- }
- continue
- }
- base = path
- }
- return base, nil
-}
-
-func optionsToFlags(opts []string) uint32 {
- var rv uint32
- for _, opt := range opts {
- if m, ok := optionsMap[opt]; ok {
- if m.set {
- rv |= m.val
- } else {
- rv ^= m.val
- }
- } else {
- log.Warningf("Ignoring mount option %q", opt)
- }
- }
- return rv
-}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 2698e3f86..ae6375e13 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -75,7 +75,7 @@ type Sandbox struct {
// New creates the sandbox process. The caller must call Destroy() on the
// sandbox.
-func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, cg *cgroup.Cgroup) (*Sandbox, error) {
+func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, specFile *os.File, cg *cgroup.Cgroup) (*Sandbox, error) {
s := &Sandbox{ID: id, Cgroup: cg}
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
@@ -86,17 +86,14 @@ func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
defer c.Clean()
// Create pipe to synchronize when sandbox process has been booted.
- fds := make([]int, 2)
- if err := syscall.Pipe(fds); err != nil {
+ clientSyncFile, sandboxSyncFile, err := os.Pipe()
+ if err != nil {
return nil, fmt.Errorf("creating pipe for sandbox %q: %v", s.ID, err)
}
- clientSyncFile := os.NewFile(uintptr(fds[0]), "client sandbox sync")
defer clientSyncFile.Close()
- sandboxSyncFile := os.NewFile(uintptr(fds[1]), "sandbox sync")
-
// Create the sandbox process.
- err := s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, sandboxSyncFile)
+ err = s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, sandboxSyncFile)
// sandboxSyncFile has to be closed to be able to detect when the sandbox
// process exits unexpectedly.
sandboxSyncFile.Close()
@@ -294,7 +291,7 @@ func (s *Sandbox) connError(err error) error {
// createSandboxProcess starts the sandbox as a subprocess by running the "boot"
// command, passing in the bundle dir.
-func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, startSyncFile *os.File) error {
+func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, mountsFile, startSyncFile *os.File) error {
// nextFD is used to get unused FDs that we can pass to the sandbox. It
// starts at 3 because 0, 1, and 2 are taken by stdin/out/err.
nextFD := 3
@@ -345,10 +342,14 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
cmd.Args = append(cmd.Args, "--controller-fd="+strconv.Itoa(nextFD))
nextFD++
- // Open the spec file to donate to the sandbox.
- specFile, err := specutils.OpenCleanSpec(bundleDir)
+ defer mountsFile.Close()
+ cmd.ExtraFiles = append(cmd.ExtraFiles, mountsFile)
+ cmd.Args = append(cmd.Args, "--mounts-fd="+strconv.Itoa(nextFD))
+ nextFD++
+
+ specFile, err := specutils.OpenSpec(bundleDir)
if err != nil {
- return fmt.Errorf("opening spec file: %v", err)
+ return err
}
defer specFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, specFile)
diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD
index 372799850..15476de6f 100644
--- a/runsc/specutils/BUILD
+++ b/runsc/specutils/BUILD
@@ -5,6 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "specutils",
srcs = [
+ "fs.go",
"namespace.go",
"specutils.go",
],
diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go
new file mode 100644
index 000000000..b812a5fbd
--- /dev/null
+++ b/runsc/specutils/fs.go
@@ -0,0 +1,139 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package specutils
+
+import (
+ "fmt"
+ "path"
+ "syscall"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+type mapping struct {
+ set bool
+ val uint32
+}
+
+// optionsMap maps mount propagation-related OCI filesystem options to mount(2)
+// syscall flags.
+var optionsMap = map[string]mapping{
+ "acl": {set: true, val: syscall.MS_POSIXACL},
+ "async": {set: false, val: syscall.MS_SYNCHRONOUS},
+ "atime": {set: false, val: syscall.MS_NOATIME},
+ "bind": {set: true, val: syscall.MS_BIND},
+ "defaults": {set: true, val: 0},
+ "dev": {set: false, val: syscall.MS_NODEV},
+ "diratime": {set: false, val: syscall.MS_NODIRATIME},
+ "dirsync": {set: true, val: syscall.MS_DIRSYNC},
+ "exec": {set: false, val: syscall.MS_NOEXEC},
+ "iversion": {set: true, val: syscall.MS_I_VERSION},
+ "loud": {set: false, val: syscall.MS_SILENT},
+ "mand": {set: true, val: syscall.MS_MANDLOCK},
+ "noacl": {set: false, val: syscall.MS_POSIXACL},
+ "noatime": {set: true, val: syscall.MS_NOATIME},
+ "nodev": {set: true, val: syscall.MS_NODEV},
+ "nodiratime": {set: true, val: syscall.MS_NODIRATIME},
+ "noiversion": {set: false, val: syscall.MS_I_VERSION},
+ "nomand": {set: false, val: syscall.MS_MANDLOCK},
+ "norelatime": {set: false, val: syscall.MS_RELATIME},
+ "nostrictatime": {set: false, val: syscall.MS_STRICTATIME},
+ "nosuid": {set: true, val: syscall.MS_NOSUID},
+ "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC},
+ "relatime": {set: true, val: syscall.MS_RELATIME},
+ "remount": {set: true, val: syscall.MS_REMOUNT},
+ "ro": {set: true, val: syscall.MS_RDONLY},
+ "rw": {set: false, val: syscall.MS_RDONLY},
+ "silent": {set: true, val: syscall.MS_SILENT},
+ "strictatime": {set: true, val: syscall.MS_STRICTATIME},
+ "suid": {set: false, val: syscall.MS_NOSUID},
+ "sync": {set: true, val: syscall.MS_SYNCHRONOUS},
+}
+
+// propOptionsMap is similar to optionsMap, but it lists propagation options
+// that cannot be used together with other flags.
+var propOptionsMap = map[string]mapping{
+ "private": {set: true, val: syscall.MS_PRIVATE},
+ "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC},
+ "slave": {set: true, val: syscall.MS_SLAVE},
+ "rslave": {set: true, val: syscall.MS_SLAVE | syscall.MS_REC},
+ "unbindable": {set: true, val: syscall.MS_UNBINDABLE},
+ "runbindable": {set: true, val: syscall.MS_UNBINDABLE | syscall.MS_REC},
+}
+
+// invalidOptions list options not allowed.
+// - shared: sandbox must be isolated from the host. Propagating mount changes
+// from the sandbox to the host breaks the isolation.
+// - noexec: not yet supported. Don't ignore it since it could break
+// in-sandbox security.
+var invalidOptions = []string{"shared", "rshared", "noexec"}
+
+// OptionsToFlags converts mount options to syscall flags.
+func OptionsToFlags(opts []string) uint32 {
+ return optionsToFlags(opts, optionsMap)
+}
+
+// PropOptionsToFlags converts propagation mount options to syscall flags.
+// Propagation options cannot be set other with other options and must be
+// handled separatedly.
+func PropOptionsToFlags(opts []string) uint32 {
+ return optionsToFlags(opts, propOptionsMap)
+}
+
+func optionsToFlags(opts []string, source map[string]mapping) uint32 {
+ var rv uint32
+ for _, opt := range opts {
+ if m, ok := source[opt]; ok {
+ if m.set {
+ rv |= m.val
+ } else {
+ rv ^= m.val
+ }
+ }
+ }
+ return rv
+}
+
+// ValidateMount validates that spec mounts are correct.
+func validateMount(mnt *specs.Mount) error {
+ if !path.IsAbs(mnt.Destination) {
+ return fmt.Errorf("Mount.Destination must be an absolute path: %v", mnt)
+ }
+
+ if mnt.Type == "bind" {
+ for _, o := range mnt.Options {
+ if ContainsStr(invalidOptions, o) {
+ return fmt.Errorf("mount option %q is not supported: %v", o, mnt)
+ }
+ _, ok1 := optionsMap[o]
+ _, ok2 := propOptionsMap[o]
+ if !ok1 && !ok2 {
+ log.Warningf("Ignoring unknown mount option %q", o)
+ }
+ }
+ }
+ return nil
+}
+
+// ValidateRootfsPropagation validates that rootfs propagation options are
+// correct.
+func validateRootfsPropagation(opt string) error {
+ flags := PropOptionsToFlags([]string{opt})
+ if flags&(syscall.MS_SLAVE|syscall.MS_PRIVATE) == 0 {
+ return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt)
+ }
+ return nil
+}
diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go
index 73fab13e1..35da789f4 100644
--- a/runsc/specutils/namespace.go
+++ b/runsc/specutils/namespace.go
@@ -105,9 +105,9 @@ func FilterNS(filter []specs.LinuxNamespaceType, s *specs.Spec) []specs.LinuxNam
return out
}
-// SetNS sets the namespace of the given type. It must be called with
+// setNS sets the namespace of the given type. It must be called with
// OSThreadLocked.
-func SetNS(fd, nsType uintptr) error {
+func setNS(fd, nsType uintptr) error {
if _, _, err := syscall.RawSyscall(unix.SYS_SETNS, fd, nsType, 0); err != 0 {
return err
}
@@ -119,30 +119,30 @@ func SetNS(fd, nsType uintptr) error {
//
// Preconditions: Must be called with os thread locked.
func ApplyNS(ns specs.LinuxNamespace) (func(), error) {
- log.Infof("applying namespace %v at path %q", ns.Type, ns.Path)
+ log.Infof("Applying namespace %v at path %q", ns.Type, ns.Path)
newNS, err := os.Open(ns.Path)
if err != nil {
return nil, fmt.Errorf("error opening %q: %v", ns.Path, err)
}
defer newNS.Close()
- // Store current netns to restore back after child is started.
+ // Store current namespace to restore back.
curPath := nsPath(ns.Type)
oldNS, err := os.Open(curPath)
if err != nil {
return nil, fmt.Errorf("error opening %q: %v", curPath, err)
}
- // Set netns to the one requested and setup function to restore it back.
+ // Set namespace to the one requested and setup function to restore it back.
flag := nsCloneFlag(ns.Type)
- if err := SetNS(newNS.Fd(), flag); err != nil {
+ if err := setNS(newNS.Fd(), flag); err != nil {
oldNS.Close()
return nil, fmt.Errorf("error setting namespace of type %v and path %q: %v", ns.Type, ns.Path, err)
}
return func() {
- log.Infof("restoring namespace %v", ns.Type)
+ log.Infof("Restoring namespace %v", ns.Type)
defer oldNS.Close()
- if err := SetNS(oldNS.Fd(), flag); err != nil {
+ if err := setNS(oldNS.Fd(), flag); err != nil {
panic(fmt.Sprintf("error restoring namespace: of type %v: %v", ns.Type, err))
}
}, nil
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 4e7893ab4..cbf099c64 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -92,9 +92,14 @@ func ValidateSpec(spec *specs.Spec) error {
log.Warningf("Seccomp spec is being ignored")
}
- for i, m := range spec.Mounts {
- if !path.IsAbs(m.Destination) {
- return fmt.Errorf("Spec.Mounts[%d] Mount.Destination must be an absolute path: %v", i, m)
+ if spec.Linux != nil && spec.Linux.RootfsPropagation != "" {
+ if err := validateRootfsPropagation(spec.Linux.RootfsPropagation); err != nil {
+ return err
+ }
+ }
+ for _, m := range spec.Mounts {
+ if err := validateMount(&m); err != nil {
+ return err
}
}
@@ -129,15 +134,19 @@ func absPath(base, rel string) string {
return filepath.Join(base, rel)
}
+// OpenSpec opens an OCI runtime spec from the given bundle directory.
+func OpenSpec(bundleDir string) (*os.File, error) {
+ // The spec file must be named "config.json" inside the bundle directory.
+ return os.Open(filepath.Join(bundleDir, "config.json"))
+}
+
// ReadSpec reads an OCI runtime spec from the given bundle directory.
// ReadSpec also normalizes all potential relative paths into absolute
// path, e.g. spec.Root.Path, mount.Source.
func ReadSpec(bundleDir string) (*specs.Spec, error) {
- // The spec file must be in "config.json" inside the bundle directory.
- specPath := filepath.Join(bundleDir, "config.json")
- specFile, err := os.Open(specPath)
+ specFile, err := OpenSpec(bundleDir)
if err != nil {
- return nil, fmt.Errorf("error opening spec file %q: %v", specPath, err)
+ return nil, fmt.Errorf("error opening spec file %q: %v", specFile.Name(), err)
}
defer specFile.Close()
return ReadSpecFromFile(bundleDir, specFile)
@@ -171,27 +180,17 @@ func ReadSpecFromFile(bundleDir string, specFile *os.File) (*specs.Spec, error)
return &spec, nil
}
-// OpenCleanSpec opens spec file that has destination mount paths resolved to
-// their absolute location.
-func OpenCleanSpec(bundleDir string) (*os.File, error) {
- f, err := os.Open(filepath.Join(bundleDir, "config.clean.json"))
+// ReadMounts reads mount list from a file.
+func ReadMounts(f *os.File) ([]specs.Mount, error) {
+ bytes, err := ioutil.ReadAll(f)
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("error reading mounts: %v", err)
}
- if _, err := f.Seek(0, os.SEEK_SET); err != nil {
- f.Close()
- return nil, fmt.Errorf("error seeking to beginning of file %q: %v", f.Name(), err)
- }
- return f, nil
-}
-
-// WriteCleanSpec writes a spec file that has destination mount paths resolved.
-func WriteCleanSpec(bundleDir string, spec *specs.Spec) error {
- bytes, err := json.Marshal(spec)
- if err != nil {
- return err
+ var mounts []specs.Mount
+ if err := json.Unmarshal(bytes, &mounts); err != nil {
+ return nil, fmt.Errorf("error unmarshaling mounts: %v\n %s", err, string(bytes))
}
- return ioutil.WriteFile(filepath.Join(bundleDir, "config.clean.json"), bytes, 0755)
+ return mounts, nil
}
// Capabilities takes in spec and returns a TaskCapabilities corresponding to
@@ -407,8 +406,7 @@ func Mount(src, dst, typ string, flags uint32) error {
// source (file or directory).
var isDir bool
if typ == "proc" {
- // Special case, as there is no source directory for proc
- // mounts.
+ // Special case, as there is no source directory for proc mounts.
isDir = true
} else if fi, err := os.Stat(src); err != nil {
return fmt.Errorf("Stat(%q) failed: %v", src, err)
diff --git a/runsc/specutils/specutils_test.go b/runsc/specutils/specutils_test.go
index b61f1ca62..02af6e6ad 100644
--- a/runsc/specutils/specutils_test.go
+++ b/runsc/specutils/specutils_test.go
@@ -219,6 +219,37 @@ func TestSpecInvalid(t *testing.T) {
},
error: "must be an absolute path",
},
+ {
+ name: "invalid mount option",
+ spec: specs.Spec{
+ Root: &specs.Root{Path: "/"},
+ Process: &specs.Process{
+ Args: []string{"/bin/true"},
+ },
+ Mounts: []specs.Mount{
+ {
+ Source: "/src",
+ Destination: "/dst",
+ Type: "bind",
+ Options: []string{"shared"},
+ },
+ },
+ },
+ error: "is not supported",
+ },
+ {
+ name: "invalid rootfs propagation",
+ spec: specs.Spec{
+ Root: &specs.Root{Path: "/"},
+ Process: &specs.Process{
+ Args: []string{"/bin/true"},
+ },
+ Linux: &specs.Linux{
+ RootfsPropagation: "foo",
+ },
+ },
+ error: "root mount propagation option must specify private or slave",
+ },
} {
err := ValidateSpec(&test.spec)
if len(test.error) == 0 {