diff options
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/filter/config.go | 6 | ||||
-rw-r--r-- | runsc/boot/loader.go | 44 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 25 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 5 |
4 files changed, 47 insertions, 33 deletions
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 6ac19668f..a7c4ebb0c 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -162,6 +162,12 @@ var allowedSyscalls = seccomp.SyscallRules{ }, syscall.SYS_LSEEK: {}, syscall.SYS_MADVISE: {}, + unix.SYS_MEMBARRIER: []seccomp.Rule{ + { + seccomp.EqualTo(linux.MEMBARRIER_CMD_GLOBAL), + seccomp.EqualTo(0), + }, + }, syscall.SYS_MINCORE: {}, // Used by the Go runtime as a temporarily workaround for a Linux // 5.2-5.4 bug. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 2e652ddad..8ad000497 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -282,6 +282,7 @@ func New(args Args) (*Loader, error) { args.NumCPU = runtime.NumCPU() } log.Infof("CPUs: %d", args.NumCPU) + runtime.GOMAXPROCS(args.NumCPU) if args.TotalMem > 0 { // Adjust the total memory returned by the Sentry so that applications that @@ -471,9 +472,13 @@ func (l *Loader) Destroy() { } l.watchdog.Stop() + // Release all kernel resources. This is only safe after we can no longer + // save/restore. + l.k.Release() + // In the success case, stdioFDs and goferFDs will only contain // released/closed FDs that ownership has been passed over to host FDs and - // gofer sessions. Close them here in case on failure. + // gofer sessions. Close them here in case of failure. for _, fd := range l.root.stdioFDs { _ = fd.Close() } @@ -797,7 +802,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn } // startGoferMonitor runs a goroutine to monitor gofer's health. It polls on -// the gofer FDs looking for disconnects, and destroys the container if a +// the gofer FDs looking for disconnects, and kills the container processes if a // disconnect occurs in any of the gofer FDs. func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) { go func() { @@ -818,18 +823,15 @@ func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) { panic(fmt.Sprintf("Error monitoring gofer FDs: %v", err)) } - // Check if the gofer has stopped as part of normal container destruction. - // This is done just to avoid sending an annoying error message to the log. - // Note that there is a small race window in between mu.Unlock() and the - // lock being reacquired in destroyContainer(), but it's harmless to call - // destroyContainer() multiple times. l.mu.Lock() - _, ok := l.processes[execID{cid: cid}] - l.mu.Unlock() - if ok { - log.Infof("Gofer socket disconnected, destroying container %q", cid) - if err := l.destroyContainer(cid); err != nil { - log.Warningf("Error destroying container %q after gofer stopped: %v", cid, err) + defer l.mu.Unlock() + + // The gofer could have been stopped due to a normal container shutdown. + // Check if the container has not stopped yet. + if tg, _ := l.tryThreadGroupFromIDLocked(execID{cid: cid}); tg != nil { + log.Infof("Gofer socket disconnected, killing container %q", cid) + if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil { + log.Warningf("Error killing container %q after gofer stopped: %v", cid, err) } } }() @@ -898,17 +900,24 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { return 0, fmt.Errorf("container %q not started", args.ContainerID) } - // Get the container MountNamespace from the Task. + // Get the container MountNamespace from the Task. Try to acquire ref may fail + // in case it raced with task exit. if kernel.VFS2Enabled { - // task.MountNamespace() does not take a ref, so we must do so ourselves. + // task.MountNamespaceVFS2() does not take a ref, so we must do so ourselves. args.MountNamespaceVFS2 = tg.Leader().MountNamespaceVFS2() - args.MountNamespaceVFS2.IncRef() + if !args.MountNamespaceVFS2.TryIncRef() { + return 0, fmt.Errorf("container %q has stopped", args.ContainerID) + } } else { + var reffed bool tg.Leader().WithMuLocked(func(t *kernel.Task) { // task.MountNamespace() does not take a ref, so we must do so ourselves. args.MountNamespace = t.MountNamespace() - args.MountNamespace.IncRef() + reffed = args.MountNamespace.TryIncRef() }) + if !reffed { + return 0, fmt.Errorf("container %q has stopped", args.ContainerID) + } } // Add the HOME environment variable if it is not already set. @@ -916,7 +925,6 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { root := args.MountNamespaceVFS2.Root() ctx := vfs.WithRoot(l.k.SupervisorContext(), root) defer args.MountNamespaceVFS2.DecRef(ctx) - defer root.DecRef(ctx) envv, err := user.MaybeAddExecUserHomeVFS2(ctx, args.MountNamespaceVFS2, args.KUID, args.Envv) if err != nil { return 0, err diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index bf9ec5d38..e376f944b 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -264,7 +264,7 @@ type CreateMountTestcase struct { expectedPaths []string } -func createMountTestcases(vfs2 bool) []*CreateMountTestcase { +func createMountTestcases() []*CreateMountTestcase { testCases := []*CreateMountTestcase{ &CreateMountTestcase{ // Only proc. @@ -409,32 +409,26 @@ func createMountTestcases(vfs2 bool) []*CreateMountTestcase { Destination: "/proc", Type: "tmpfs", }, - // TODO (gvisor.dev/issue/1487): Re-add this case when sysfs supports - // MkDirAt in VFS2 (and remove the reduntant append). - // { - // Destination: "/sys/bar", - // Type: "tmpfs", - // }, - // + { + Destination: "/sys/bar", + Type: "tmpfs", + }, + { Destination: "/tmp/baz", Type: "tmpfs", }, }, }, - expectedPaths: []string{"/proc", "/sys" /* "/sys/bar" ,*/, "/tmp", "/tmp/baz"}, + expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz"}, } - if !vfs2 { - vfsCase.spec.Mounts = append(vfsCase.spec.Mounts, specs.Mount{Destination: "/sys/bar", Type: "tmpfs"}) - vfsCase.expectedPaths = append(vfsCase.expectedPaths, "/sys/bar") - } return append(testCases, vfsCase) } // Test that MountNamespace can be created with various specs. func TestCreateMountNamespace(t *testing.T) { - for _, tc := range createMountTestcases(false /* vfs2 */) { + for _, tc := range createMountTestcases() { t.Run(tc.name, func(t *testing.T) { conf := testConfig() ctx := contexttest.Context(t) @@ -471,7 +465,7 @@ func TestCreateMountNamespace(t *testing.T) { // Test that MountNamespace can be created with various specs. func TestCreateMountNamespaceVFS2(t *testing.T) { - for _, tc := range createMountTestcases(true /* vfs2 */) { + for _, tc := range createMountTestcases() { t.Run(tc.name, func(t *testing.T) { spec := testSpec() spec.Mounts = tc.spec.Mounts @@ -497,6 +491,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) { } root := mns.Root() + root.IncRef() defer root.DecRef(ctx) for _, p := range tc.expectedPaths { target := &vfs.PathOperation{ diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index e36664938..82e459f46 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -170,6 +170,7 @@ func (c *containerMounter) mountAll(conf *config.Config, procArgs *kernel.Create rootProcArgs.MountNamespaceVFS2 = mns root := mns.Root() + root.IncRef() defer root.DecRef(rootCtx) if root.Mount().ReadOnly() { // Switch to ReadWrite while we setup submounts. @@ -377,6 +378,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C } root := mns.Root() + root.IncRef() defer root.DecRef(ctx) target := &vfs.PathOperation{ Root: root, @@ -474,6 +476,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config } root := mns.Root() + root.IncRef() defer root.DecRef(ctx) pop := vfs.PathOperation{ Root: root, @@ -597,6 +600,7 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *co defer newMnt.DecRef(ctx) root := mns.Root() + root.IncRef() defer root.DecRef(ctx) target := &vfs.PathOperation{ Root: root, @@ -617,6 +621,7 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *co func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, dest string) error { root := mns.Root() + root.IncRef() defer root.DecRef(ctx) target := &vfs.PathOperation{ Root: root, |