summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/controller.go19
-rw-r--r--runsc/boot/fs.go159
-rw-r--r--runsc/boot/loader.go36
-rw-r--r--runsc/cmd/BUILD1
-rw-r--r--runsc/cmd/gofer.go36
-rw-r--r--runsc/container/container.go7
-rw-r--r--runsc/fsgofer/BUILD4
-rw-r--r--runsc/fsgofer/control.go203
-rw-r--r--runsc/fsgofer/fsgofer.go5
-rw-r--r--runsc/sandbox/BUILD1
-rw-r--r--runsc/sandbox/sandbox.go139
11 files changed, 492 insertions, 118 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index fc6ea326a..69e88d8e0 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -17,6 +17,7 @@ package boot
import (
"errors"
"fmt"
+ "path"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.googlesource.com/gvisor/pkg/control/server"
@@ -181,11 +182,15 @@ type StartArgs struct {
// CID is the ID of the container to start.
CID string
+
+ // FilePayload contains the file descriptor over which the sandbox will
+ // request files from its root filesystem.
+ urpc.FilePayload
}
// Start runs a created container within a sandbox.
func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
- log.Debugf("containerManager.Start")
+ log.Debugf("containerManager.Start: %+v", args)
// Validate arguments.
if args == nil {
@@ -200,8 +205,18 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
if args.CID == "" {
return errors.New("start argument missing container ID")
}
+ // Prevent CIDs containing ".." from confusing the sentry when creating
+ // /containers/<cid> directory.
+ // TODO: Once we have multiple independant roots, this
+ // check won't be necessary.
+ if path.Clean(args.CID) != args.CID {
+ return fmt.Errorf("container ID shouldn't contain directory traversals such as \"..\": %q", args.CID)
+ }
+ if len(args.FilePayload.Files) != 1 {
+ return fmt.Errorf("start arguments must contain one file for the container root")
+ }
- tgid, err := cm.l.startContainer(args, cm.l.k)
+ tgid, err := cm.l.startContainer(cm.l.k, args.Spec, args.Conf, args.CID, args.FilePayload.Files[0])
if err != nil {
return err
}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index eea2ec1f5..8996b1398 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -48,6 +48,19 @@ const (
// Device name for root mount.
rootDevice = "9pfs-/"
+
+ // childContainersDir is the directory where child container root
+ // filesystems are mounted.
+ childContainersDir = "/__runsc_containers__"
+
+ // Filesystems that runsc supports.
+ bind = "bind"
+ devpts = "devpts"
+ devtmpfs = "devtmpfs"
+ proc = "proc"
+ sysfs = "sysfs"
+ tmpfs = "tmpfs"
+ nonefs = "none"
)
type fdDispenser struct {
@@ -70,8 +83,15 @@ func (f *fdDispenser) empty() bool {
// createMountNamespace creates a mount namespace containing the root filesystem
// and all mounts. 'rootCtx' is used to walk directories to find mount points.
func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec *specs.Spec, conf *Config, ioFDs []int) (*fs.MountNamespace, error) {
+ mounts := compileMounts(spec)
+ // Create a tmpfs mount where we create and mount a root filesystem for
+ // each child container.
+ mounts = append(mounts, specs.Mount{
+ Type: tmpfs,
+ Destination: childContainersDir,
+ })
fds := &fdDispenser{fds: ioFDs}
- rootInode, err := createRootMount(rootCtx, spec, conf, fds)
+ rootInode, err := createRootMount(rootCtx, spec, conf, fds, mounts)
if err != nil {
return nil, fmt.Errorf("failed to create root mount: %v", err)
}
@@ -79,7 +99,7 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec
if err != nil {
return nil, fmt.Errorf("failed to create root mount namespace: %v", err)
}
- mounts := compileMounts(spec)
+
if err := setMounts(rootCtx, conf, mns, fds, mounts); err != nil {
return nil, fmt.Errorf("failed to configure mounts: %v", err)
}
@@ -98,12 +118,12 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
// Always mount /dev.
mounts = append(mounts, specs.Mount{
- Type: "devtmpfs",
+ Type: devtmpfs,
Destination: "/dev",
})
mounts = append(mounts, specs.Mount{
- Type: "devpts",
+ Type: devpts,
Destination: "/dev/pts",
})
@@ -129,13 +149,13 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
var mandatoryMounts []specs.Mount
if !procMounted {
mandatoryMounts = append(mandatoryMounts, specs.Mount{
- Type: "proc",
+ Type: proc,
Destination: "/proc",
})
}
if !sysMounted {
mandatoryMounts = append(mandatoryMounts, specs.Mount{
- Type: "sysfs",
+ Type: sysfs,
Destination: "/sys",
})
}
@@ -149,7 +169,7 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
// that. Until then, the /tmp mount will always appear empty at
// container creation.
mandatoryMounts = append(mandatoryMounts, specs.Mount{
- Type: "tmpfs",
+ Type: tmpfs,
Destination: "/tmp",
})
}
@@ -165,7 +185,7 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
// mount namespace.
func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, mounts []specs.Mount) error {
for _, m := range mounts {
- if err := mountSubmount(ctx, conf, mns, fds, m, mounts); err != nil {
+ if err := mountSubmount(ctx, conf, mns, fds, m, mounts, m.Destination); err != nil {
return err
}
}
@@ -173,7 +193,7 @@ func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *f
}
// createRootMount creates the root filesystem.
-func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *fdDispenser) (*fs.Inode, error) {
+func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *fdDispenser, mounts []specs.Mount) (*fs.Inode, error) {
// First construct the filesystem from the spec.Root.
mf := fs.MountSourceFlags{ReadOnly: spec.Root.Readonly}
@@ -207,7 +227,7 @@ func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *f
// We need to overlay the root on top of a ramfs with stub directories
// for submount paths. "/dev" "/sys" "/proc" and "/tmp" are always
// mounted even if they are not in the spec.
- submounts := append(subtargets("/", spec.Mounts), "/dev", "/sys", "/proc", "/tmp")
+ submounts := append(subtargets("/", mounts), "/dev", "/sys", "/proc", "/tmp")
rootInode, err = addSubmountOverlay(ctx, rootInode, submounts)
if err != nil {
return nil, fmt.Errorf("error adding submount overlay: %v", err)
@@ -256,17 +276,17 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri
)
switch m.Type {
- case "devpts", "devtmpfs", "proc", "sysfs":
+ case devpts, devtmpfs, proc, sysfs:
fsName = m.Type
- case "none":
- fsName = "sysfs"
- case "tmpfs":
+ case nonefs:
+ fsName = sysfs
+ case tmpfs:
fsName = m.Type
// tmpfs has some extra supported options that we must pass through.
opts, err = parseAndFilterOptions(m.Options, "mode", "uid", "gid")
- case "bind":
+ case bind:
switch conf.FileAccess {
case FileAccessProxy, FileAccessProxyExclusive:
fd := fds.remove()
@@ -291,7 +311,7 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri
return fsName, opts, useOverlay, err
}
-func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount) error {
+func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount, dest string) error {
// Map mount type to filesystem name, and parse out the options that we are
// capable of dealing with.
fsName, opts, useOverlay, err := getMountNameAndOptions(conf, m, fds)
@@ -342,51 +362,52 @@ func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fd
// in the right location, e.g.
// mount: /var/run/secrets, may be created in '/run/secrets' if
// '/var/run' => '/var'.
- if err := mkdirAll(ctx, mns, m.Destination); err != nil {
+ if err := mkdirAll(ctx, mns, dest); err != nil {
return err
}
root := mns.Root()
defer root.DecRef()
- dirent, err := mns.FindInode(ctx, root, nil, m.Destination, linux.MaxSymlinkTraversals)
+ dirent, err := mns.FindInode(ctx, root, nil, dest, linux.MaxSymlinkTraversals)
if err != nil {
- return fmt.Errorf("failed to find mount destination %q: %v", m.Destination, err)
+ return fmt.Errorf("failed to find mount destination %q: %v", dest, err)
}
defer dirent.DecRef()
if err := mns.Mount(ctx, dirent, inode); err != nil {
- return fmt.Errorf("failed to mount at destination %q: %v", m.Destination, err)
+ return fmt.Errorf("failed to mount at destination %q: %v", dest, err)
}
- log.Infof("Mounted %q to %q type %s", m.Source, m.Destination, m.Type)
+ log.Infof("Mounted %q to %q type %s", m.Source, dest, m.Type)
return nil
}
func mkdirAll(ctx context.Context, mns *fs.MountNamespace, path string) error {
+ log.Infof("mkdirAll called with path %s", path)
root := mns.Root()
defer root.DecRef()
// Starting at the root, walk the path.
parent := root
ps := strings.Split(filepath.Clean(path), string(filepath.Separator))
- for i := 0; i < len(ps); i++ {
- if ps[i] == "" {
+ for _, pathElem := range ps {
+ if pathElem == "" {
// This will be case for the first and last element, if the path
// begins or ends with '/'. Note that we always treat the path as
// absolute, regardless of what the first character contains.
continue
}
- d, err := mns.FindInode(ctx, root, parent, ps[i], fs.DefaultTraversalLimit)
+ d, err := mns.FindInode(ctx, root, parent, pathElem, fs.DefaultTraversalLimit)
if err == syserror.ENOENT {
// If we encounter a path that does not exist, then
// create it.
- if err := parent.CreateDirectory(ctx, root, ps[i], fs.FilePermsFromMode(0755)); err != nil {
- return fmt.Errorf("failed to create directory %q: %v", ps[i], err)
+ if err := parent.CreateDirectory(ctx, root, pathElem, fs.FilePermsFromMode(0755)); err != nil {
+ return fmt.Errorf("failed to create directory %q: %v", pathElem, err)
}
- if d, err = parent.Walk(ctx, root, ps[i]); err != nil {
- return fmt.Errorf("walk to %q failed: %v", ps[i], err)
+ if d, err = parent.Walk(ctx, root, pathElem); err != nil {
+ return fmt.Errorf("walk to %q failed: %v", pathElem, err)
}
} else if err != nil {
- return fmt.Errorf("failed to find inode %q: %v", ps[i], err)
+ return fmt.Errorf("failed to find inode %q: %v", pathElem, err)
}
parent = d
}
@@ -444,7 +465,7 @@ func destinations(mounts []specs.Mount, extra ...string) []string {
// mountDevice returns a device string based on the fs type and target
// of the mount.
func mountDevice(m specs.Mount) string {
- if m.Type == "bind" {
+ if m.Type == bind {
// Make a device string that includes the target, which is consistent across
// S/R and uniquely identifies the connection.
return "9pfs-" + m.Destination
@@ -589,7 +610,7 @@ func subtargets(root string, mnts []specs.Mount) []string {
// setFileSystemForProcess is used to set up the file system and amend the procArgs accordingly.
// procArgs are passed by reference and the FDMap field is modified.
-func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel) error {
+func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel, cid string) error {
ctx := procArgs.NewContext(k)
// Create the FD map, which will set stdin, stdout, and stderr. If
@@ -604,27 +625,79 @@ func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spe
// won't need ours either way.
procArgs.FDMap = fdm
+ // Use root user to configure mounts. The current user might not have
+ // permission to do so.
+ rootProcArgs := kernel.CreateProcessArgs{
+ WorkingDirectory: "/",
+ Credentials: auth.NewRootCredentials(creds.UserNamespace),
+ Umask: 0022,
+ MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
+ }
+ rootCtx := rootProcArgs.NewContext(k)
+
// If this is the root container, we also need to setup the root mount
// namespace.
- if k.RootMountNamespace() == nil {
- // Use root user to configure mounts. The current user might not have
- // permission to do so.
- rootProcArgs := kernel.CreateProcessArgs{
- WorkingDirectory: "/",
- Credentials: auth.NewRootCredentials(creds.UserNamespace),
- Umask: 0022,
- MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
- }
- rootCtx := rootProcArgs.NewContext(k)
-
+ mns := k.RootMountNamespace()
+ if mns == nil {
// Create the virtual filesystem.
mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs)
if err != nil {
return fmt.Errorf("error creating mounts: %v", err)
}
-
k.SetRootMountNamespace(mns)
+ return nil
+ }
+
+ // Setup a child container.
+
+ // Create the container's root filesystem mount.
+ log.Infof("Creating new process in child container.")
+ fds := &fdDispenser{fds: append([]int{}, ioFDs...)}
+ rootInode, err := createRootMount(rootCtx, spec, conf, fds, nil)
+ if err != nil {
+ return fmt.Errorf("error creating filesystem for container: %v", err)
+ }
+
+ // Make directories for submounts within the container.
+ rootDir := mns.Root()
+ defer rootDir.DecRef()
+ containerRoot := filepath.Join(childContainersDir, cid)
+ mkdirAll(ctx, mns, containerRoot)
+
+ // Mount the container's root filesystem to the newly created
+ // mount point.
+ containerRootDirent, err := mns.FindInode(ctx, rootDir, nil, containerRoot, linux.MaxSymlinkTraversals)
+ if err != nil {
+ return fmt.Errorf("failed to find mount destination: %q: %v", containerRoot, err)
+ }
+ if err := mns.Mount(ctx, containerRootDirent, rootInode); err != nil {
+ return fmt.Errorf("failed to mount at destination %q: %v", containerRoot, err)
+ }
+ containerRootDirent.DecRef()
+
+ // We have to re-walk to the dirent to find the mounted
+ // directory. The old dirent is invalid at this point.
+ containerRootDirent, err = mns.FindInode(ctx, rootDir, nil, containerRoot, linux.MaxSymlinkTraversals)
+ if err != nil {
+ return fmt.Errorf("failed to find mount destination2: %q: %v", containerRoot, err)
+ }
+ log.Infof("Mounted child's root fs to %q", containerRoot)
+
+ // Mount all submounts.
+ mounts := compileMounts(spec)
+ for _, m := range mounts {
+ // TODO: Enable bind mounts in child containers.
+ if m.Type == bind {
+ log.Infof("Bind mounts in child containers are not yet supported: %+v", m)
+ continue
+ }
+ dest := filepath.Join(containerRoot, m.Destination)
+ if err := mountSubmount(rootCtx, conf, k.RootMountNamespace(), fds, m, mounts, dest); err != nil {
+ return fmt.Errorf("error mounting filesystem for container: %v", err)
+ }
}
+ // Set the procArgs root directory.
+ procArgs.Root = containerRootDirent
return nil
}
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index f6c7bf223..7debf0ac2 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -19,6 +19,7 @@ import (
"errors"
"fmt"
"math/rand"
+ "os"
"runtime"
"sync"
"sync/atomic"
@@ -229,7 +230,7 @@ func New(spec *specs.Spec, conf *Config, controllerFD int, ioFDs []int, console
// Ensure that signals received are forwarded to the emulated kernel.
stopSignalForwarding := sighandling.PrepareForwarding(k, false)()
- procArgs, err := newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k)
+ procArgs, err := newProcess(spec, creds, utsns, ipcns, k)
if err != nil {
return nil, fmt.Errorf("failed to create root process: %v", err)
}
@@ -250,7 +251,7 @@ func New(spec *specs.Spec, conf *Config, controllerFD int, ioFDs []int, console
}
// newProcess creates a process that can be run with kernel.CreateProcess.
-func newProcess(spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, utsns *kernel.UTSNamespace, ipcns *kernel.IPCNamespace, k *kernel.Kernel) (kernel.CreateProcessArgs, error) {
+func newProcess(spec *specs.Spec, creds *auth.Credentials, utsns *kernel.UTSNamespace, ipcns *kernel.IPCNamespace, k *kernel.Kernel) (kernel.CreateProcessArgs, error) {
// Create initial limits.
ls, err := createLimitSet(spec)
if err != nil {
@@ -277,7 +278,6 @@ func newProcess(spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds
UTSNamespace: utsns,
IPCNamespace: ipcns,
}
-
return procArgs, nil
}
@@ -356,7 +356,8 @@ func (l *Loader) run() error {
l.console,
l.rootProcArgs.Credentials,
l.rootProcArgs.Limits,
- l.k)
+ l.k,
+ "" /* CID, which isn't needed for the root container */)
if err != nil {
return err
}
@@ -376,8 +377,7 @@ func (l *Loader) run() error {
// startContainer starts a child container. It returns the thread group ID of
// the newly created process.
-func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.ThreadID, error) {
- spec := args.Spec
+func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config, cid string, file *os.File) (kernel.ThreadID, error) {
// Create capabilities.
caps, err := specutils.Capabilities(spec.Process.Capabilities)
if err != nil {
@@ -406,26 +406,24 @@ func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.Threa
// when indicated by the spec.
procArgs, err := newProcess(
- args.Spec,
- args.Conf,
- nil, // ioFDs
- false, // console
+ spec,
creds,
- k.RootUTSNamespace(),
- k.RootIPCNamespace(),
- k)
+ l.k.RootUTSNamespace(),
+ l.k.RootIPCNamespace(),
+ l.k)
if err != nil {
return 0, fmt.Errorf("failed to create new process: %v", err)
}
err = setFileSystemForProcess(
&procArgs,
- args.Spec,
- args.Conf,
- nil,
+ spec,
+ conf,
+ []int{int(file.Fd())}, // ioFDs
false,
creds,
procArgs.Limits,
- k)
+ k,
+ cid)
if err != nil {
return 0, fmt.Errorf("failed to create new process: %v", err)
}
@@ -435,7 +433,7 @@ func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.Threa
return 0, fmt.Errorf("failed to create process in sentry: %v", err)
}
- ts := k.TaskSet()
+ ts := l.k.TaskSet()
tgid := ts.Root.IDOfThreadGroup(tg)
if tgid == 0 {
return 0, errors.New("failed to get thread group ID of new process")
@@ -446,7 +444,7 @@ func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.Threa
l.mu.Lock()
defer l.mu.Unlock()
- l.containerRootTGIDs[args.CID] = tgid
+ l.containerRootTGIDs[cid] = tgid
return tgid, nil
}
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index 18e95284b..c45784749 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -36,7 +36,6 @@ go_library(
"//pkg/p9",
"//pkg/sentry/control",
"//pkg/sentry/kernel/auth",
- "//pkg/unet",
"//pkg/urpc",
"//runsc/boot",
"//runsc/container",
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 55315c0e8..ed4b1d29c 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -16,7 +16,6 @@ package cmd
import (
"os"
- "sync"
"syscall"
"context"
@@ -25,7 +24,6 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/unet"
"gvisor.googlesource.com/gvisor/runsc/fsgofer"
"gvisor.googlesource.com/gvisor/runsc/specutils"
)
@@ -36,6 +34,10 @@ type Gofer struct {
bundleDir string
ioFDs intFlags
applyCaps bool
+
+ // controllerFD is the file descriptor of a stream socket for the
+ // control server that is donated to this process.
+ controllerFD int
}
// Name implements subcommands.Command.
@@ -58,11 +60,12 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) {
f.StringVar(&g.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory")
f.Var(&g.ioFDs, "io-fds", "list of FDs to connect 9P servers. They must follow this order: root first, then mounts as defined in the spec")
f.BoolVar(&g.applyCaps, "apply-caps", true, "if true, apply capabilities to restrict what the Gofer process can do")
+ f.IntVar(&g.controllerFD, "controller-fd", -1, "required FD of a stream socket for the control server that must be donated to this process")
}
// Execute implements subcommands.Command.
func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
- if g.bundleDir == "" || len(g.ioFDs) < 1 {
+ if g.bundleDir == "" || len(g.ioFDs) < 1 || g.controllerFD == -1 {
f.Usage()
return subcommands.ExitUsageError
}
@@ -134,29 +137,14 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
Fatalf("Too many FDs passed for mounts. mounts: %d, FDs: %d", mountIdx, len(g.ioFDs))
}
- runServers(ats, g.ioFDs)
- return subcommands.ExitSuccess
-}
+ ctrl, err := fsgofer.NewController(g.controllerFD, g.bundleDir)
-func runServers(ats []p9.Attacher, ioFDs []int) {
- // Run the loops and wait for all to exit.
- var wg sync.WaitGroup
- for i, ioFD := range ioFDs {
- wg.Add(1)
- go func(ioFD int, at p9.Attacher) {
- socket, err := unet.NewSocket(ioFD)
- if err != nil {
- Fatalf("err creating server on FD %d: %v", ioFD, err)
- }
- s := p9.NewServer(at)
- if err := s.Handle(socket); err != nil {
- Fatalf("P9 server returned error. Gofer is shutting down. FD: %d, err: %v", ioFD, err)
- }
- wg.Done()
- }(ioFD, ats[i])
+ if err := ctrl.Serve(ats, g.ioFDs); err != nil {
+ Fatalf("Failed to serve via P9: %v", err)
}
- wg.Wait()
- log.Infof("All 9P servers exited.")
+ ctrl.Wait()
+
+ return subcommands.ExitSuccess
}
func isReadonlyMount(opts []string) bool {
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 574075b00..da2ce0d25 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -249,6 +249,13 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
return nil, err
}
c.Sandbox = sb.Sandbox
+
+ // Prepare the gofer to serve the container's filesystem.
+ err = sb.Sandbox.CreateChild(c.ID, bundleDir)
+ if err != nil {
+ c.Destroy()
+ return nil, err
+ }
}
c.Status = Created
diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD
index 24e172f48..0bc682b5f 100644
--- a/runsc/fsgofer/BUILD
+++ b/runsc/fsgofer/BUILD
@@ -5,6 +5,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "fsgofer",
srcs = [
+ "control.go",
"fsgofer.go",
"fsgofer_unsafe.go",
],
@@ -14,9 +15,12 @@ go_library(
],
deps = [
"//pkg/abi/linux",
+ "//pkg/control/server",
"//pkg/fd",
"//pkg/log",
"//pkg/p9",
+ "//pkg/unet",
+ "//pkg/urpc",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/runsc/fsgofer/control.go b/runsc/fsgofer/control.go
new file mode 100644
index 000000000..8ce8ee8a0
--- /dev/null
+++ b/runsc/fsgofer/control.go
@@ -0,0 +1,203 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsgofer
+
+import (
+ "fmt"
+ "path/filepath"
+ "sync"
+
+ "gvisor.googlesource.com/gvisor/pkg/control/server"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/p9"
+ "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.googlesource.com/gvisor/pkg/urpc"
+)
+
+// Controller manages the fsgofer's control server.
+type Controller struct {
+ // api holds the control server's URPC endpoints.
+ api api
+
+ // srv is the control server.
+ srv *server.Server
+}
+
+// NewController creates a new Controller and starts it listenting
+func NewController(fd int, rootBundleDir string) (*Controller, error) {
+ if !filepath.IsAbs(rootBundleDir) {
+ return nil, fmt.Errorf("NewController should receive an absolute bundle dir path, but got %q", rootBundleDir)
+ }
+
+ srv, err := server.CreateFromFD(fd)
+ if err != nil {
+ return nil, err
+ }
+
+ cr := &Controller{srv: srv}
+ cr.api.rootBundleDir = rootBundleDir
+ cr.api.bundleDirs = make(map[string]string)
+ srv.Register(&cr.api)
+
+ if err := srv.StartServing(); err != nil {
+ return nil, err
+ }
+
+ return cr, nil
+}
+
+// Wait waits for all the p9 servers to finish, then shuts down the control
+// server.
+func (cr *Controller) Wait() {
+ cr.api.p9wg.Wait()
+ cr.srv.Stop()
+ log.Infof("All 9P servers exited.")
+}
+
+// Serve starts serving each Attacher in ats via its corresponding file
+// descriptor in ioFDs.
+func (cr *Controller) Serve(ats []p9.Attacher, ioFDs []int) error {
+ if len(ats) != len(ioFDs) {
+ return fmt.Errorf("number of attach points does not match the number of IO FDs (%d and %d)", len(ats), len(ioFDs))
+ }
+ for i, _ := range ats {
+ cr.api.serve(ats[i], ioFDs[i])
+ }
+ return nil
+}
+
+// api URPC methods.
+const (
+ // AddBundleDirs readies the gofer to serve from a new bundle
+ // directory. It should be called during runsc create.
+ AddBundleDirs = "api.AddBundleDirs"
+
+ // ServeDirectory serves a new directory via the fsgofer. It should be
+ // called during runsc start.
+ ServeDirectory = "api.ServeDirectory"
+)
+
+// API defines and implements the URPC endpoints for the gofer.
+type api struct {
+ // p9wg waits for all the goroutines serving the sentry via p9. When its
+ // counter is 0, the gofer is out of work and exits.
+ p9wg sync.WaitGroup
+
+ // bundleDirs maps from container ID to bundle directory for each
+ // container.
+ bundleDirs map[string]string
+
+ // rootBundleDir is the bundle directory of the root container.
+ rootBundleDir string
+}
+
+// AddBundleDirsRequest is the URPC argument to AddBundleDirs.
+type AddBundleDirsRequest struct {
+ // BundleDirs is a map of container IDs to bundle directories to add to
+ // the gofer.
+ BundleDirs map[string]string
+}
+
+// AddBundleDirsRequest adds bundle directories that for the gofer to serve.
+func (api *api) AddBundleDirs(req *AddBundleDirsRequest, _ *struct{}) error {
+ log.Debugf("fsgofer.AddBundleDirs")
+ for cid, bd := range req.BundleDirs {
+ if _, ok := api.bundleDirs[cid]; ok {
+ return fmt.Errorf("fsgofer already has a bundleDir for container %q", cid)
+ }
+ api.bundleDirs[cid] = bd
+ }
+ return nil
+}
+
+// ServeDirectoryRequest is the URPC argument to ServeDirectory.
+type ServeDirectoryRequest struct {
+ // Dir is the absolute path to a directory to be served to the sentry.
+ Dir string
+
+ // IsReadOnly specifies whether the directory should be served in
+ // read-only mode.
+ IsReadOnly bool
+
+ // CID is the container ID of the container that needs to serve a
+ // directory.
+ CID string
+
+ // FilePayload contains the socket over which the sentry will request
+ // files from Dir.
+ urpc.FilePayload
+}
+
+// ServeDirectory begins serving a directory via a file descriptor for the
+// sentry. Directories must be added via AddBundleDirsRequest before
+// ServeDirectory is called.
+func (api *api) ServeDirectory(req *ServeDirectoryRequest, _ *struct{}) error {
+ log.Debugf("fsgofer.ServeDirectory: %+v", req)
+
+ if req.Dir == "" {
+ return fmt.Errorf("ServeDirectory should receive a directory argument, but was empty")
+ }
+ if req.CID == "" {
+ return fmt.Errorf("ServeDirectory should receive a CID argument, but was empty")
+ }
+ // Prevent CIDs containing ".." from confusing the sentry when creating
+ // /containers/<cid> directory.
+ // TODO: Once we have multiple independant roots, this
+ // check won't be necessary.
+ if filepath.Clean(req.CID) != req.CID {
+ return fmt.Errorf("container ID shouldn't contain directory traversals such as \"..\": %q", req.CID)
+ }
+ if nFiles := len(req.FilePayload.Files); nFiles != 1 {
+ return fmt.Errorf("ServeDirectory should receive 1 file descriptor, but got %d", nFiles)
+ }
+
+ bd, ok := api.bundleDirs[req.CID]
+ if !ok {
+ // If there's no entry in bundleDirs for the container ID, this
+ // is the root container.
+ bd = api.rootBundleDir
+ }
+
+ // Relative paths are served relative to the bundle directory.
+ absDir := req.Dir
+ if !filepath.IsAbs(absDir) {
+ absDir = filepath.Join(bd, req.Dir)
+ }
+
+ // Create the attach point and start serving.
+ at := NewAttachPoint(absDir, Config{
+ ROMount: req.IsReadOnly,
+ LazyOpenForWrite: true,
+ })
+ api.serve(at, int(req.FilePayload.Files[0].Fd()))
+
+ return nil
+}
+
+// serve begins serving a directory via a file descriptor.
+func (api *api) serve(at p9.Attacher, ioFD int) {
+ api.p9wg.Add(1)
+ go func(ioFD int, at p9.Attacher) {
+ socket, err := unet.NewSocket(ioFD)
+ if err != nil {
+ panic(fmt.Sprintf("err creating server on FD %d: %v", ioFD, err))
+ }
+ s := p9.NewServer(at)
+ if err := s.Handle(socket); err != nil {
+ panic(fmt.Sprintf("P9 server returned error. Gofer is shutting down. FD: %d, err: %v", ioFD, err))
+ }
+ api.p9wg.Done()
+ }(ioFD, at)
+}
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 52cdc91a2..38263896a 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -66,6 +66,11 @@ func (f fileType) String() string {
return "unknown"
}
+// ControlSocketAddr generates an abstract unix socket name for the given id.
+func ControlSocketAddr(id string) string {
+ return fmt.Sprintf("\x00runsc-gofer.%s", id)
+}
+
// Config sets configuration options for each attach point.
type Config struct {
// ROMount is set to true if this is a readonly mount.
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index a961c3cc7..cdacc5e22 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -21,6 +21,7 @@ go_library(
"//pkg/sentry/control",
"//pkg/urpc",
"//runsc/boot",
+ "//runsc/fsgofer",
"//runsc/specutils",
"@com_github_kr_pty//:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 2b043d412..83cc94dc4 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -31,6 +31,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/control"
"gvisor.googlesource.com/gvisor/pkg/urpc"
"gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/fsgofer"
"gvisor.googlesource.com/gvisor/runsc/specutils"
)
@@ -84,7 +85,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// StartRoot starts running the root container process inside the sandbox.
func (s *Sandbox) StartRoot(spec *specs.Spec, conf *boot.Config) error {
log.Debugf("Start root sandbox %q, pid: %d", s.ID, s.Pid)
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return err
}
@@ -104,21 +105,67 @@ func (s *Sandbox) StartRoot(spec *specs.Spec, conf *boot.Config) error {
return nil
}
+// CreateChild creates a non-root container inside the sandbox.
+func (s *Sandbox) CreateChild(cid, bundleDir string) error {
+ log.Debugf("Create non-root container sandbox %q, pid: %d for container %q with bundle directory %q", s.ID, s.Pid, cid, bundleDir)
+
+ // Connect to the gofer and prepare it to serve from bundleDir for this
+ // container.
+ goferConn, err := s.goferConnect()
+ if err != nil {
+ return fmt.Errorf("couldn't connect to gofer: %v", err)
+ }
+ defer goferConn.Close()
+ goferReq := fsgofer.AddBundleDirsRequest{BundleDirs: map[string]string{cid: bundleDir}}
+ if err := goferConn.Call(fsgofer.AddBundleDirs, &goferReq, nil); err != nil {
+ return fmt.Errorf("error serving new filesystem for non-root container %v: %v", goferReq, err)
+ }
+
+ return nil
+}
+
// Start starts running a non-root container inside the sandbox.
func (s *Sandbox) Start(spec *specs.Spec, conf *boot.Config, cid string) error {
log.Debugf("Start non-root container sandbox %q, pid: %d", s.ID, s.Pid)
- conn, err := s.connect()
+
+ sandboxConn, err := s.sandboxConnect()
+ if err != nil {
+ return fmt.Errorf("couldn't connect to sandbox: %v", err)
+ }
+ defer sandboxConn.Close()
+ goferConn, err := s.goferConnect()
+ if err != nil {
+ return fmt.Errorf("couldn't connect to gofer: %v", err)
+ }
+ defer goferConn.Close()
+
+ // Create socket that connects the sandbox and gofer.
+ sandEnd, goferEnd, err := createSocketPair()
if err != nil {
return err
}
- defer conn.Close()
+ defer sandEnd.Close()
+ defer goferEnd.Close()
+
+ // Tell the Gofer about the new filesystem it needs to serve.
+ goferReq := fsgofer.ServeDirectoryRequest{
+ Dir: spec.Root.Path,
+ IsReadOnly: spec.Root.Readonly,
+ CID: cid,
+ FilePayload: urpc.FilePayload{Files: []*os.File{goferEnd}},
+ }
+ if err := goferConn.Call(fsgofer.ServeDirectory, &goferReq, nil); err != nil {
+ return fmt.Errorf("error serving new filesystem for non-root container %v: %v", goferReq, err)
+ }
+ // Start running the container.
args := boot.StartArgs{
- Spec: spec,
- Conf: conf,
- CID: cid,
+ Spec: spec,
+ Conf: conf,
+ CID: cid,
+ FilePayload: urpc.FilePayload{Files: []*os.File{sandEnd}},
}
- if err := conn.Call(boot.ContainerStart, args, nil); err != nil {
+ if err := sandboxConn.Call(boot.ContainerStart, &args, nil); err != nil {
return fmt.Errorf("error starting non-root container %v: %v", spec.Process.Args, err)
}
@@ -142,7 +189,7 @@ func (s *Sandbox) Restore(cid string, spec *specs.Spec, conf *boot.Config, f str
SandboxID: s.ID,
}
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return err
}
@@ -165,7 +212,7 @@ func (s *Sandbox) Restore(cid string, spec *specs.Spec, conf *boot.Config, f str
// given container in this sandbox.
func (s *Sandbox) Processes(cid string) ([]*control.Process, error) {
log.Debugf("Getting processes for container %q in sandbox %q", cid, s.ID)
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return nil, err
}
@@ -183,7 +230,7 @@ func (s *Sandbox) Processes(cid string) ([]*control.Process, error) {
// Execute runs the specified command in the container.
func (s *Sandbox) Execute(cid string, e *control.ExecArgs) (syscall.WaitStatus, error) {
log.Debugf("Executing new process in container %q in sandbox %q", cid, s.ID)
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return 0, s.connError(err)
}
@@ -203,7 +250,7 @@ func (s *Sandbox) Execute(cid string, e *control.ExecArgs) (syscall.WaitStatus,
// Event retrieves stats about the sandbox such as memory and CPU utilization.
func (s *Sandbox) Event(cid string) (*boot.Event, error) {
log.Debugf("Getting events for container %q in sandbox %q", cid, s.ID)
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return nil, err
}
@@ -219,7 +266,7 @@ func (s *Sandbox) Event(cid string) (*boot.Event, error) {
return &e, nil
}
-func (s *Sandbox) connect() (*urpc.Client, error) {
+func (s *Sandbox) sandboxConnect() (*urpc.Client, error) {
log.Debugf("Connecting to sandbox %q", s.ID)
conn, err := client.ConnectTo(boot.ControlSocketAddr(s.ID))
if err != nil {
@@ -228,6 +275,15 @@ func (s *Sandbox) connect() (*urpc.Client, error) {
return conn, nil
}
+func (s *Sandbox) goferConnect() (*urpc.Client, error) {
+ log.Debugf("Connecting to gofer for sandbox %q", s.ID)
+ conn, err := client.ConnectTo(fsgofer.ControlSocketAddr(s.ID))
+ if err != nil {
+ return nil, s.connError(err)
+ }
+ return conn, nil
+}
+
func (s *Sandbox) connError(err error) error {
return fmt.Errorf("error connecting to control server at pid %d: %v", s.Pid, err)
}
@@ -244,31 +300,45 @@ func (s *Sandbox) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundle
// Add root mount and then add any other additional mounts.
mountCount := 1
+
+ // Add additional mounts.
for _, m := range spec.Mounts {
if specutils.Is9PMount(m) {
mountCount++
}
}
-
sandEnds := make([]*os.File, 0, mountCount)
goferEnds := make([]*os.File, 0, mountCount)
- for i := 0; i < mountCount; i++ {
- // Create socket that connects the sandbox and gofer.
- fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
+ // nextFD is the next available file descriptor for the gofer process.
+ // It starts at 3 because 0-2 are used by stdin/stdout/stderr.
+ var nextFD int
+ for nextFD = 3; nextFD-3 < mountCount; nextFD++ {
+ sandEnd, goferEnd, err := createSocketPair()
if err != nil {
return nil, err
}
- sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox io fd"))
-
- goferEnd := os.NewFile(uintptr(fds[1]), "gofer io fd")
defer goferEnd.Close()
+ sandEnds = append(sandEnds, sandEnd)
goferEnds = append(goferEnds, goferEnd)
+ args = append(args, fmt.Sprintf("--io-fds=%d", nextFD))
+ }
- args = append(args, fmt.Sprintf("--io-fds=%d", 3+i))
+ // Create and donate a file descriptor for the control server.
+ addr := fsgofer.ControlSocketAddr(s.ID)
+ serverFD, err := server.CreateSocket(addr)
+ if err != nil {
+ return nil, fmt.Errorf("error creating control server socket for sandbox %q: %v", s.ID, err)
}
+ // Add the control server fd.
+ args = append(args, "--controller-fd="+strconv.Itoa(nextFD))
+ nextFD++
+ controllerFile := os.NewFile(uintptr(serverFD), "gofer_control_socket_server")
+ defer controllerFile.Close()
+
cmd := exec.Command(binPath, args...)
cmd.ExtraFiles = goferEnds
+ cmd.ExtraFiles = append(cmd.ExtraFiles, controllerFile)
// Setup any uid/gid mappings, and create or join the configured user
// namespace so the gofer's view of the filesystem aligns with the
@@ -286,6 +356,15 @@ func (s *Sandbox) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundle
return sandEnds, nil
}
+// createSocketPair creates a pair of files wrapping a socket pair.
+func createSocketPair() (*os.File, *os.File, error) {
+ fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
+ if err != nil {
+ return nil, nil, err
+ }
+ return os.NewFile(uintptr(fds[0]), "sandbox io fd"), os.NewFile(uintptr(fds[1]), "gofer io fd"), nil
+}
+
// createSandboxProcess starts the sandbox as a subprocess by running the "boot"
// command, passing in the bundle dir.
func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, binPath string, ioFiles []*os.File) error {
@@ -296,7 +375,9 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// Create control server socket here and donate FD to child process because
// it may be in a different network namespace and won't be reachable from
// outside.
- fd, err := server.CreateSocket(boot.ControlSocketAddr(s.ID))
+ addr := boot.ControlSocketAddr(s.ID)
+ fd, err := server.CreateSocket(addr)
+ log.Infof("creating sandbox process with addr: %s", addr)
if err != nil {
return fmt.Errorf("error creating control server socket for sandbox %q: %v", s.ID, err)
}
@@ -438,7 +519,7 @@ func (s *Sandbox) waitForCreated(timeout time.Duration) error {
if err := specutils.WaitForReady(s.Pid, timeout, ready); err != nil {
return fmt.Errorf("unexpected error waiting for sandbox %q, err: %v", s.ID, err)
}
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return err
}
@@ -454,7 +535,7 @@ func (s *Sandbox) waitForCreated(timeout time.Duration) error {
func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) {
log.Debugf("Waiting for container %q in sandbox %q", cid, s.ID)
var ws syscall.WaitStatus
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return ws, err
}
@@ -471,7 +552,7 @@ func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) {
func (s *Sandbox) WaitPID(pid int32, cid string) (syscall.WaitStatus, error) {
log.Debugf("Waiting for PID %d in sandbox %q", pid, s.ID)
var ws syscall.WaitStatus
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return ws, err
}
@@ -536,7 +617,7 @@ func (s *Sandbox) Destroy() error {
// Signal sends the signal to a container in the sandbox.
func (s *Sandbox) Signal(cid string, sig syscall.Signal) error {
log.Debugf("Signal sandbox %q", s.ID)
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return err
}
@@ -556,7 +637,7 @@ func (s *Sandbox) Signal(cid string, sig syscall.Signal) error {
// The statefile will be written to f.
func (s *Sandbox) Checkpoint(cid string, f *os.File) error {
log.Debugf("Checkpoint sandbox %q", s.ID)
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return err
}
@@ -577,7 +658,7 @@ func (s *Sandbox) Checkpoint(cid string, f *os.File) error {
// Pause sends the pause call for a container in the sandbox.
func (s *Sandbox) Pause(cid string) error {
log.Debugf("Pause sandbox %q", s.ID)
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return err
}
@@ -592,7 +673,7 @@ func (s *Sandbox) Pause(cid string) error {
// Resume sends the resume call for a container in the sandbox.
func (s *Sandbox) Resume(cid string) error {
log.Debugf("Resume sandbox %q", s.ID)
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return err
}
@@ -630,7 +711,7 @@ func (s *Sandbox) IsRunning() bool {
// Stacks collects and returns all stacks for the sandbox.
func (s *Sandbox) Stacks() (string, error) {
log.Debugf("Stacks sandbox %q", s.ID)
- conn, err := s.connect()
+ conn, err := s.sandboxConnect()
if err != nil {
return "", err
}