diff options
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/controller.go | 51 | ||||
-rw-r--r-- | runsc/boot/fs.go | 12 | ||||
-rw-r--r-- | runsc/boot/loader.go | 53 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 8 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 39 |
5 files changed, 98 insertions, 65 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 3e5e4c22f..626a3816e 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -101,14 +101,13 @@ const ( // Profiling related commands (see pprof.go for more details). const ( - StartCPUProfile = "Profile.StartCPUProfile" - StopCPUProfile = "Profile.StopCPUProfile" - HeapProfile = "Profile.HeapProfile" - GoroutineProfile = "Profile.GoroutineProfile" - BlockProfile = "Profile.BlockProfile" - MutexProfile = "Profile.MutexProfile" - StartTrace = "Profile.StartTrace" - StopTrace = "Profile.StopTrace" + StartCPUProfile = "Profile.StartCPUProfile" + StopCPUProfile = "Profile.StopCPUProfile" + HeapProfile = "Profile.HeapProfile" + BlockProfile = "Profile.BlockProfile" + MutexProfile = "Profile.MutexProfile" + StartTrace = "Profile.StartTrace" + StopTrace = "Profile.StopTrace" ) // Logging related commands (see logging.go for more details). @@ -129,42 +128,52 @@ type controller struct { // manager holds the containerManager methods. manager *containerManager + + // pprop holds the profile instance if enabled. It may be nil. + pprof *control.Profile } // newController creates a new controller. The caller must call // controller.srv.StartServing() to start the controller. func newController(fd int, l *Loader) (*controller, error) { - srv, err := server.CreateFromFD(fd) + ctrl := &controller{} + var err error + ctrl.srv, err = server.CreateFromFD(fd) if err != nil { return nil, err } - manager := &containerManager{ + ctrl.manager = &containerManager{ startChan: make(chan struct{}), startResultChan: make(chan error), l: l, } - srv.Register(manager) + ctrl.srv.Register(ctrl.manager) if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok { net := &Network{ Stack: eps.Stack, } - srv.Register(net) + ctrl.srv.Register(net) } - srv.Register(&debug{}) - srv.Register(&control.Logging{}) + ctrl.srv.Register(&debug{}) + ctrl.srv.Register(&control.Logging{}) + if l.root.conf.ProfileEnable { - srv.Register(&control.Profile{ - Kernel: l.k, - }) + ctrl.pprof = &control.Profile{Kernel: l.k} + ctrl.srv.Register(ctrl.pprof) } - return &controller{ - srv: srv, - manager: manager, - }, nil + return ctrl, nil +} + +func (c *controller) stop() { + if c.pprof != nil { + // These are noop if there is nothing being profiled. + _ = c.pprof.StopCPUProfile(nil, nil) + _ = c.pprof.StopTrace(nil, nil) + } } // containerManager manages sandbox containers. diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 59639ba19..9dd5b0184 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -640,7 +640,7 @@ func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Confi func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace) error { root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) for _, m := range c.mounts { log.Debugf("Mounting %q to %q, type: %s, options: %s", m.Source, m.Destination, m.Type, m.Options) @@ -868,7 +868,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns if err != nil { return fmt.Errorf("can't find mount destination %q: %v", m.Destination, err) } - defer dirent.DecRef() + defer dirent.DecRef(ctx) if err := mns.Mount(ctx, dirent, inode); err != nil { return fmt.Errorf("mount %q error: %v", m.Destination, err) } @@ -889,12 +889,12 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun if err != nil { return fmt.Errorf("can't find mount destination %q: %v", mount.Destination, err) } - defer target.DecRef() + defer target.DecRef(ctx) // Take a ref on the inode that is about to be (re)-mounted. source.root.IncRef() if err := mns.Mount(ctx, target, source.root); err != nil { - source.root.DecRef() + source.root.DecRef(ctx) return fmt.Errorf("bind mount %q error: %v", mount.Destination, err) } @@ -997,12 +997,12 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M switch err { case nil: // Found '/tmp' in filesystem, check if it's empty. - defer tmp.DecRef() + defer tmp.DecRef(ctx) f, err := tmp.Inode.GetFile(ctx, tmp, fs.FileFlags{Read: true, Directory: true}) if err != nil { return err } - defer f.DecRef() + defer f.DecRef(ctx) serializer := &fs.CollectEntriesSerializer{} if err := f.Readdir(ctx, serializer); err != nil { return err diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 9cd9c5909..533b9c5e7 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -16,12 +16,12 @@ package boot import ( + "errors" "fmt" mrand "math/rand" "os" "runtime" "sync/atomic" - "syscall" gtime "time" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -187,7 +187,7 @@ type Args struct { } // make sure stdioFDs are always the same on initial start and on restore -const startingStdioFD = 64 +const startingStdioFD = 256 // New initializes a new kernel loader configured by spec. // New also handles setting up a kernel for restoring a container. @@ -346,7 +346,7 @@ func New(args Args) (*Loader, error) { if err != nil { return nil, fmt.Errorf("failed to create hostfs filesystem: %v", err) } - defer hostFilesystem.DecRef() + defer hostFilesystem.DecRef(k.SupervisorContext()) hostMount, err := k.VFS().NewDisconnectedMount(hostFilesystem, nil, &vfs.MountOptions{}) if err != nil { return nil, fmt.Errorf("failed to create hostfs mount: %v", err) @@ -360,15 +360,20 @@ func New(args Args) (*Loader, error) { var stdioFDs []int newfd := startingStdioFD for _, fd := range args.StdioFDs { - err := syscall.Dup3(fd, newfd, syscall.O_CLOEXEC) + // Check that newfd is unused to avoid clobbering over it. + if _, err := unix.FcntlInt(uintptr(newfd), unix.F_GETFD, 0); !errors.Is(err, unix.EBADF) { + if err != nil { + return nil, fmt.Errorf("error checking for FD (%d) conflict: %w", newfd, err) + } + return nil, fmt.Errorf("unable to remap stdios, FD %d is already in use", newfd) + } + + err := unix.Dup3(fd, newfd, unix.O_CLOEXEC) if err != nil { return nil, fmt.Errorf("dup3 of stdioFDs failed: %v", err) } stdioFDs = append(stdioFDs, newfd) - err = syscall.Close(fd) - if err != nil { - return nil, fmt.Errorf("close original stdioFDs failed: %v", err) - } + _ = unix.Close(fd) newfd++ } @@ -458,6 +463,11 @@ func (l *Loader) Destroy() { l.stopSignalForwarding() } l.watchdog.Stop() + + for i, fd := range l.root.stdioFDs { + _ = unix.Close(fd) + l.root.stdioFDs[i] = -1 + } } func createPlatform(conf *Config, deviceFile *os.File) (platform.Platform, error) { @@ -591,11 +601,9 @@ func (l *Loader) run() error { // during restore, we can release l.stdioFDs now. VFS2 takes ownership of the // passed FDs, so only close for VFS1. if !kernel.VFS2Enabled { - for _, fd := range l.root.stdioFDs { - err := syscall.Close(fd) - if err != nil { - return fmt.Errorf("close dup()ed stdioFDs: %v", err) - } + for i, fd := range l.root.stdioFDs { + _ = unix.Close(fd) + l.root.stdioFDs[i] = -1 } } @@ -686,7 +694,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file // Can't take ownership away from os.File. dup them to get a new FDs. for _, f := range files[3:] { - fd, err := syscall.Dup(int(f.Fd())) + fd, err := unix.Dup(int(f.Fd())) if err != nil { return fmt.Errorf("failed to dup file: %v", err) } @@ -755,7 +763,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn return nil, fmt.Errorf("creating process: %v", err) } // CreateProcess takes a reference on FDTable if successful. - info.procArgs.FDTable.DecRef() + info.procArgs.FDTable.DecRef(ctx) // Set the foreground process group on the TTY to the global init process // group, since that is what we are about to start running. @@ -890,22 +898,20 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { // Add the HOME environment variable if it is not already set. if kernel.VFS2Enabled { - defer args.MountNamespaceVFS2.DecRef() - root := args.MountNamespaceVFS2.Root() - defer root.DecRef() ctx := vfs.WithRoot(l.k.SupervisorContext(), root) + defer args.MountNamespaceVFS2.DecRef(ctx) + defer root.DecRef(ctx) envv, err := user.MaybeAddExecUserHomeVFS2(ctx, args.MountNamespaceVFS2, args.KUID, args.Envv) if err != nil { return 0, err } args.Envv = envv } else { - defer args.MountNamespace.DecRef() - root := args.MountNamespace.Root() - defer root.DecRef() ctx := fs.WithRoot(l.k.SupervisorContext(), root) + defer args.MountNamespace.DecRef(ctx) + defer root.DecRef(ctx) envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv) if err != nil { return 0, err @@ -1002,6 +1008,9 @@ func (l *Loader) WaitExit() kernel.ExitStatus { // Wait for container. l.k.WaitExited() + // Cleanup + l.ctrl.stop() + return l.k.GlobalInit().ExitStatus() } @@ -1263,7 +1272,7 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F fdTable := k.NewFDTable() ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, stdioFDs) if err != nil { - fdTable.DecRef() + fdTable.DecRef(ctx) return nil, nil, nil, err } return fdTable, ttyFile, ttyFileVFS2, nil diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 8e6fe57e1..aa3fdf96c 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -450,13 +450,13 @@ func TestCreateMountNamespace(t *testing.T) { } root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) for _, p := range tc.expectedPaths { maxTraversals := uint(0) if d, err := mns.FindInode(ctx, root, root, p, &maxTraversals); err != nil { t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err) } else { - d.DecRef() + d.DecRef(ctx) } } }) @@ -491,7 +491,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) { } root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) for _, p := range tc.expectedPaths { target := &vfs.PathOperation{ Root: root, @@ -502,7 +502,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) { if d, err := l.k.VFS().GetDentryAt(ctx, l.root.procArgs.Credentials, target, &vfs.GetDentryOptions{}); err != nil { t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err) } else { - d.DecRef() + d.DecRef(ctx) } } }) diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 9a1ed8e9e..e7d6035bb 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -37,6 +37,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -89,6 +90,12 @@ func registerFilesystems(k *kernel.Kernel) error { if err := ttydev.Register(vfsObj); err != nil { return fmt.Errorf("registering ttydev: %w", err) } + tunSupported := tundev.IsNetTunSupported(inet.StackFromContext(ctx)) + if tunSupported { + if err := tundev.Register(vfsObj); err != nil { + return fmt.Errorf("registering tundev: %v", err) + } + } if kernel.FUSEEnabled { if err := fuse.Register(vfsObj); err != nil { @@ -96,14 +103,11 @@ func registerFilesystems(k *kernel.Kernel) error { } } - if err := tundev.Register(vfsObj); err != nil { - return fmt.Errorf("registering tundev: %v", err) - } a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name) if err != nil { return fmt.Errorf("creating devtmpfs accessor: %w", err) } - defer a.Release() + defer a.Release(ctx) if err := a.UserspaceInit(ctx); err != nil { return fmt.Errorf("initializing userspace: %w", err) @@ -114,8 +118,10 @@ func registerFilesystems(k *kernel.Kernel) error { if err := ttydev.CreateDevtmpfsFiles(ctx, a); err != nil { return fmt.Errorf("creating ttydev devtmpfs files: %w", err) } - if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil { - return fmt.Errorf("creating tundev devtmpfs files: %v", err) + if tunSupported { + if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil { + return fmt.Errorf("creating tundev devtmpfs files: %v", err) + } } if kernel.FUSEEnabled { @@ -171,10 +177,19 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { fd := c.fds.remove() - opts := strings.Join(p9MountData(fd, conf.FileAccess, true /* vfs2 */), ",") + opts := p9MountData(fd, conf.FileAccess, true /* vfs2 */) + + if conf.OverlayfsStaleRead { + // We can't check for overlayfs here because sandbox is chroot'ed and gofer + // can only send mount options for specs.Mounts (specs.Root is missing + // Options field). So assume root is always on top of overlayfs. + opts = append(opts, "overlayfs_stale_read") + } log.Infof("Mounting root over 9P, ioFD: %d", fd) - mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{Data: opts}) + mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{ + Data: strings.Join(opts, ","), + }) if err != nil { return nil, fmt.Errorf("setting up mount namespace: %w", err) } @@ -243,7 +258,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error { root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) target := &vfs.PathOperation{ Root: root, Start: root, @@ -378,7 +393,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *Config, creds } root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) pop := vfs.PathOperation{ Root: root, Start: root, @@ -472,10 +487,10 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *Co if err != nil { return err } - defer newMnt.DecRef() + defer newMnt.DecRef(ctx) root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) if err := c.makeSyntheticMount(ctx, mount.Destination, root, creds); err != nil { return err } |