summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot')
-rw-r--r--runsc/boot/filter/config.go6
-rw-r--r--runsc/boot/loader.go44
-rw-r--r--runsc/boot/loader_test.go25
-rw-r--r--runsc/boot/vfs.go5
4 files changed, 47 insertions, 33 deletions
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 6ac19668f..a7c4ebb0c 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -162,6 +162,12 @@ var allowedSyscalls = seccomp.SyscallRules{
},
syscall.SYS_LSEEK: {},
syscall.SYS_MADVISE: {},
+ unix.SYS_MEMBARRIER: []seccomp.Rule{
+ {
+ seccomp.EqualTo(linux.MEMBARRIER_CMD_GLOBAL),
+ seccomp.EqualTo(0),
+ },
+ },
syscall.SYS_MINCORE: {},
// Used by the Go runtime as a temporarily workaround for a Linux
// 5.2-5.4 bug.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 2e652ddad..8ad000497 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -282,6 +282,7 @@ func New(args Args) (*Loader, error) {
args.NumCPU = runtime.NumCPU()
}
log.Infof("CPUs: %d", args.NumCPU)
+ runtime.GOMAXPROCS(args.NumCPU)
if args.TotalMem > 0 {
// Adjust the total memory returned by the Sentry so that applications that
@@ -471,9 +472,13 @@ func (l *Loader) Destroy() {
}
l.watchdog.Stop()
+ // Release all kernel resources. This is only safe after we can no longer
+ // save/restore.
+ l.k.Release()
+
// In the success case, stdioFDs and goferFDs will only contain
// released/closed FDs that ownership has been passed over to host FDs and
- // gofer sessions. Close them here in case on failure.
+ // gofer sessions. Close them here in case of failure.
for _, fd := range l.root.stdioFDs {
_ = fd.Close()
}
@@ -797,7 +802,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
}
// startGoferMonitor runs a goroutine to monitor gofer's health. It polls on
-// the gofer FDs looking for disconnects, and destroys the container if a
+// the gofer FDs looking for disconnects, and kills the container processes if a
// disconnect occurs in any of the gofer FDs.
func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) {
go func() {
@@ -818,18 +823,15 @@ func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) {
panic(fmt.Sprintf("Error monitoring gofer FDs: %v", err))
}
- // Check if the gofer has stopped as part of normal container destruction.
- // This is done just to avoid sending an annoying error message to the log.
- // Note that there is a small race window in between mu.Unlock() and the
- // lock being reacquired in destroyContainer(), but it's harmless to call
- // destroyContainer() multiple times.
l.mu.Lock()
- _, ok := l.processes[execID{cid: cid}]
- l.mu.Unlock()
- if ok {
- log.Infof("Gofer socket disconnected, destroying container %q", cid)
- if err := l.destroyContainer(cid); err != nil {
- log.Warningf("Error destroying container %q after gofer stopped: %v", cid, err)
+ defer l.mu.Unlock()
+
+ // The gofer could have been stopped due to a normal container shutdown.
+ // Check if the container has not stopped yet.
+ if tg, _ := l.tryThreadGroupFromIDLocked(execID{cid: cid}); tg != nil {
+ log.Infof("Gofer socket disconnected, killing container %q", cid)
+ if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
+ log.Warningf("Error killing container %q after gofer stopped: %v", cid, err)
}
}
}()
@@ -898,17 +900,24 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
return 0, fmt.Errorf("container %q not started", args.ContainerID)
}
- // Get the container MountNamespace from the Task.
+ // Get the container MountNamespace from the Task. Try to acquire ref may fail
+ // in case it raced with task exit.
if kernel.VFS2Enabled {
- // task.MountNamespace() does not take a ref, so we must do so ourselves.
+ // task.MountNamespaceVFS2() does not take a ref, so we must do so ourselves.
args.MountNamespaceVFS2 = tg.Leader().MountNamespaceVFS2()
- args.MountNamespaceVFS2.IncRef()
+ if !args.MountNamespaceVFS2.TryIncRef() {
+ return 0, fmt.Errorf("container %q has stopped", args.ContainerID)
+ }
} else {
+ var reffed bool
tg.Leader().WithMuLocked(func(t *kernel.Task) {
// task.MountNamespace() does not take a ref, so we must do so ourselves.
args.MountNamespace = t.MountNamespace()
- args.MountNamespace.IncRef()
+ reffed = args.MountNamespace.TryIncRef()
})
+ if !reffed {
+ return 0, fmt.Errorf("container %q has stopped", args.ContainerID)
+ }
}
// Add the HOME environment variable if it is not already set.
@@ -916,7 +925,6 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
root := args.MountNamespaceVFS2.Root()
ctx := vfs.WithRoot(l.k.SupervisorContext(), root)
defer args.MountNamespaceVFS2.DecRef(ctx)
- defer root.DecRef(ctx)
envv, err := user.MaybeAddExecUserHomeVFS2(ctx, args.MountNamespaceVFS2, args.KUID, args.Envv)
if err != nil {
return 0, err
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index bf9ec5d38..e376f944b 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -264,7 +264,7 @@ type CreateMountTestcase struct {
expectedPaths []string
}
-func createMountTestcases(vfs2 bool) []*CreateMountTestcase {
+func createMountTestcases() []*CreateMountTestcase {
testCases := []*CreateMountTestcase{
&CreateMountTestcase{
// Only proc.
@@ -409,32 +409,26 @@ func createMountTestcases(vfs2 bool) []*CreateMountTestcase {
Destination: "/proc",
Type: "tmpfs",
},
- // TODO (gvisor.dev/issue/1487): Re-add this case when sysfs supports
- // MkDirAt in VFS2 (and remove the reduntant append).
- // {
- // Destination: "/sys/bar",
- // Type: "tmpfs",
- // },
- //
+ {
+ Destination: "/sys/bar",
+ Type: "tmpfs",
+ },
+
{
Destination: "/tmp/baz",
Type: "tmpfs",
},
},
},
- expectedPaths: []string{"/proc", "/sys" /* "/sys/bar" ,*/, "/tmp", "/tmp/baz"},
+ expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz"},
}
- if !vfs2 {
- vfsCase.spec.Mounts = append(vfsCase.spec.Mounts, specs.Mount{Destination: "/sys/bar", Type: "tmpfs"})
- vfsCase.expectedPaths = append(vfsCase.expectedPaths, "/sys/bar")
- }
return append(testCases, vfsCase)
}
// Test that MountNamespace can be created with various specs.
func TestCreateMountNamespace(t *testing.T) {
- for _, tc := range createMountTestcases(false /* vfs2 */) {
+ for _, tc := range createMountTestcases() {
t.Run(tc.name, func(t *testing.T) {
conf := testConfig()
ctx := contexttest.Context(t)
@@ -471,7 +465,7 @@ func TestCreateMountNamespace(t *testing.T) {
// Test that MountNamespace can be created with various specs.
func TestCreateMountNamespaceVFS2(t *testing.T) {
- for _, tc := range createMountTestcases(true /* vfs2 */) {
+ for _, tc := range createMountTestcases() {
t.Run(tc.name, func(t *testing.T) {
spec := testSpec()
spec.Mounts = tc.spec.Mounts
@@ -497,6 +491,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) {
}
root := mns.Root()
+ root.IncRef()
defer root.DecRef(ctx)
for _, p := range tc.expectedPaths {
target := &vfs.PathOperation{
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index e36664938..82e459f46 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -170,6 +170,7 @@ func (c *containerMounter) mountAll(conf *config.Config, procArgs *kernel.Create
rootProcArgs.MountNamespaceVFS2 = mns
root := mns.Root()
+ root.IncRef()
defer root.DecRef(rootCtx)
if root.Mount().ReadOnly() {
// Switch to ReadWrite while we setup submounts.
@@ -377,6 +378,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C
}
root := mns.Root()
+ root.IncRef()
defer root.DecRef(ctx)
target := &vfs.PathOperation{
Root: root,
@@ -474,6 +476,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config
}
root := mns.Root()
+ root.IncRef()
defer root.DecRef(ctx)
pop := vfs.PathOperation{
Root: root,
@@ -597,6 +600,7 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *co
defer newMnt.DecRef(ctx)
root := mns.Root()
+ root.IncRef()
defer root.DecRef(ctx)
target := &vfs.PathOperation{
Root: root,
@@ -617,6 +621,7 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *co
func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, dest string) error {
root := mns.Root()
+ root.IncRef()
defer root.DecRef(ctx)
target := &vfs.PathOperation{
Root: root,