diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/BUILD | 3 | ||||
-rw-r--r-- | runsc/boot/controller.go | 17 | ||||
-rw-r--r-- | runsc/boot/filter/config.go | 11 | ||||
-rw-r--r-- | runsc/boot/fs.go | 18 | ||||
-rw-r--r-- | runsc/boot/loader.go | 30 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 2 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 28 | ||||
-rw-r--r-- | runsc/cmd/boot.go | 12 | ||||
-rw-r--r-- | runsc/cmd/gofer.go | 18 | ||||
-rw-r--r-- | runsc/config/config.go | 24 | ||||
-rw-r--r-- | runsc/config/flags.go | 1 | ||||
-rw-r--r-- | runsc/container/container_test.go | 141 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 6 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 2 |
14 files changed, 192 insertions, 121 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index d51347fe1..c9d2b3eff 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -32,13 +32,13 @@ go_library( "//pkg/control/server", "//pkg/coverage", "//pkg/cpuid", + "//pkg/errors/linuxerr", "//pkg/eventchannel", "//pkg/fd", "//pkg/flipcall", "//pkg/fspath", "//pkg/log", "//pkg/memutil", - "//pkg/metric", "//pkg/rand", "//pkg/refs", "//pkg/refsvfs2", @@ -95,7 +95,6 @@ go_library( "//pkg/sentry/vfs", "//pkg/sentry/watchdog", "//pkg/sync", - "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/link/fdbased", "//pkg/tcpip/link/loopback", diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 9b270cbf2..132973e6b 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "os" + gtime "time" specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" @@ -165,8 +166,11 @@ func newController(fd int, l *Loader) (*controller, error) { return ctrl, nil } +// stopRPCTimeout is the time for clients to complete ongoing RPCs. +const stopRPCTimeout = 15 * gtime.Second + func (c *controller) stop() { - c.srv.Stop() + c.srv.Stop(stopRPCTimeout) } // containerManager manages sandbox containers. @@ -330,6 +334,11 @@ func (cm *containerManager) ExecuteAsync(args *control.ExecArgs, pid *int32) err // Checkpoint pauses a sandbox and saves its state. func (cm *containerManager) Checkpoint(o *control.SaveOpts, _ *struct{}) error { log.Debugf("containerManager.Checkpoint") + // TODO(gvisor.dev/issues/6243): save/restore not supported w/ hostinet + if cm.l.root.conf.Network == config.NetworkHost { + return errors.New("checkpoint not supported when using hostinet") + } + state := control.State{ Kernel: cm.l.k, Watchdog: cm.l.watchdog, @@ -340,6 +349,10 @@ func (cm *containerManager) Checkpoint(o *control.SaveOpts, _ *struct{}) error { // Pause suspends a container. func (cm *containerManager) Pause(_, _ *struct{}) error { log.Debugf("containerManager.Pause") + // TODO(gvisor.dev/issues/6243): save/restore not supported w/ hostinet + if cm.l.root.conf.Network == config.NetworkHost { + return errors.New("pause not supported when using hostinet") + } cm.l.k.Pause() return nil } @@ -439,7 +452,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { // Load the state. loadOpts := state.LoadOpts{Source: specFile} - if err := loadOpts.Load(ctx, k, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil { + if err := loadOpts.Load(ctx, k, nil, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil { return err } diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 49b503f99..33e738efc 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -200,6 +200,12 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, + seccomp.EqualTo(unix.MAP_SHARED | unix.MAP_FIXED), + }, + { + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, seccomp.EqualTo(unix.MAP_PRIVATE), }, { @@ -265,7 +271,6 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.EqualTo(unix.MSG_DONTWAIT), - seccomp.EqualTo(0), }, }, unix.SYS_RESTART_SYSCALL: {}, @@ -454,6 +459,10 @@ func hostInetFilters() seccomp.SyscallRules { seccomp.MatchAny{}, seccomp.EqualTo(unix.TIOCINQ), }, + { + seccomp.MatchAny{}, + seccomp.EqualTo(unix.SIOCGIFFLAGS), + }, }, unix.SYS_LISTEN: {}, unix.SYS_READV: {}, diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index bf4a41f77..7fce2b708 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -25,6 +25,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -41,7 +42,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/specutils" @@ -763,12 +763,10 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Con p9FS := mustFindFilesystem("9p") opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) - if conf.OverlayfsStaleRead { - // We can't check for overlayfs here because sandbox is chroot'ed and gofer - // can only send mount options for specs.Mounts (specs.Root is missing - // Options field). So assume root is always on top of overlayfs. - opts = append(opts, "overlayfs_stale_read") - } + // We can't check for overlayfs here because sandbox is chroot'ed and gofer + // can only send mount options for specs.Mounts (specs.Root is missing + // Options field). So assume root is always on top of overlayfs. + opts = append(opts, "overlayfs_stale_read") rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil) if err != nil { @@ -1041,8 +1039,8 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *config.Config, mn maxTraversals := uint(0) tmp, err := mns.FindInode(ctx, root, root, "tmp", &maxTraversals) - switch err { - case nil: + switch { + case err == nil: // Found '/tmp' in filesystem, check if it's empty. defer tmp.DecRef(ctx) f, err := tmp.Inode.GetFile(ctx, tmp, fs.FileFlags{Read: true, Directory: true}) @@ -1063,7 +1061,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *config.Config, mn log.Infof("Mounting internal tmpfs on top of empty %q", "/tmp") fallthrough - case syserror.ENOENT: + case linuxerr.Equals(linuxerr.ENOENT, err): // No '/tmp' found (or fallthrough from above). Safe to mount internal // tmpfs. tmpMount := specs.Mount{ diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index b73ac101f..8d71d7447 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -34,11 +34,9 @@ import ( "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/memutil" - "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/refsvfs2" - "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/fdimport" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -218,8 +216,6 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("setting up memory usage: %w", err) } - metric.CreateSentryMetrics() - // Is this a VFSv2 kernel? if args.Conf.VFS2 { kernel.VFS2Enabled = true @@ -282,19 +278,15 @@ func New(args Args) (*Loader, error) { } // Create timekeeper. - tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) - if err != nil { - return nil, fmt.Errorf("creating timekeeper: %w", err) - } + tk := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) tk.SetClocks(time.NewCalibratedClocks()) - k.SetTimekeeper(tk) if err := enableStrace(args.Conf); err != nil { return nil, fmt.Errorf("enabling strace: %w", err) } // Create root network namespace/stack. - netns, err := newRootNetworkNamespace(args.Conf, k, k) + netns, err := newRootNetworkNamespace(args.Conf, tk, k) if err != nil { return nil, fmt.Errorf("creating network: %w", err) } @@ -336,6 +328,7 @@ func New(args Args) (*Loader, error) { // to createVFS in order to mount (among other things) procfs. if err = k.Init(kernel.InitKernelArgs{ FeatureSet: cpuid.HostFeatureSet(), + Timekeeper: tk, RootUserNamespace: creds.UserNamespace, RootNetworkNamespace: netns, ApplicationCores: uint(args.NumCPU), @@ -967,10 +960,15 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { } args.Envv = envv } + args.PIDNamespace = tg.PIDNamespace() + + args.Limits, err = createLimitSet(l.root.spec) + if err != nil { + return 0, fmt.Errorf("creating limits: %w", err) + } // Start the process. proc := control.Proc{Kernel: l.k} - args.PIDNamespace = tg.PIDNamespace() newTG, tgid, ttyFile, ttyFileVFS2, err := control.ExecAsync(&proc, args) if err != nil { return 0, err @@ -1224,7 +1222,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er execTG, err := l.threadGroupFromID(execID{cid: cid, pid: tgid}) if err == nil { // Send signal directly to the identified process. - return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(execTG, &linux.SignalInfo{Signo: signo}) } // The caller may be signaling a process not started directly via exec. @@ -1237,7 +1235,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er if tg.Leader().ContainerID() != cid { return fmt.Errorf("process %d belongs to a different container: %q", tgid, tg.Leader().ContainerID()) } - return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo}) } // signalForegrondProcessGroup looks up foreground process group from the TTY @@ -1273,7 +1271,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s // No foreground process group has been set. Signal the // original thread group. log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, tgid, tgid) - return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo}) } // Send the signal to all processes in the process group. var lastErr error @@ -1281,7 +1279,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s if tg.ProcessGroup() != pg { continue } - if err := l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}); err != nil { + if err := l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo}); err != nil { lastErr = err } } @@ -1296,7 +1294,7 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error { // sent to the entire container. l.k.Pause() defer l.k.Unpause() - return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo}) + return l.k.SendContainerSignal(cid, &linux.SignalInfo{Signo: signo}) } // threadGroupFromID is similar to tryThreadGroupFromIDLocked except that it diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 93c476971..b5e8d08a5 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -214,7 +214,7 @@ func doStartSignal(t *testing.T, vfsEnabled bool) { // We aren't going to wait on this application, so the control server // needs to be shut down manually. - defer l.ctrl.srv.Stop() + defer l.ctrl.srv.Stop(time.Hour) // Start a goroutine that calls WaitForStartSignal and writes to a // channel when it returns. diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 7be5176b0..346796d9c 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/devices/memdev" @@ -44,7 +45,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/specutils" ) @@ -210,12 +210,10 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *c fd := c.fds.remove() data := p9MountData(fd, conf.FileAccess, true /* vfs2 */) - if conf.OverlayfsStaleRead { - // We can't check for overlayfs here because sandbox is chroot'ed and gofer - // can only send mount options for specs.Mounts (specs.Root is missing - // Options field). So assume root is always on top of overlayfs. - data = append(data, "overlayfs_stale_read") - } + // We can't check for overlayfs here because sandbox is chroot'ed and gofer + // can only send mount options for specs.Mounts (specs.Root is missing + // Options field). So assume root is always on top of overlayfs. + data = append(data, "overlayfs_stale_read") log.Infof("Mounting root over 9P, ioFD: %d", fd) opts := &vfs.MountOptions{ @@ -658,20 +656,20 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config Path: fspath.Parse("/tmp"), } fd, err := c.k.VFS().OpenAt(ctx, creds, &pop, &vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_DIRECTORY}) - switch err { - case nil: + switch { + case err == nil: defer fd.DecRef(ctx) err := fd.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error { if dirent.Name != "." && dirent.Name != ".." { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } return nil })) - switch err { - case nil: + switch { + case err == nil: log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`) - case syserror.ENOTEMPTY: + case linuxerr.Equals(linuxerr.ENOTEMPTY, err): // If more than "." and ".." is found, skip internal tmpfs to prevent // hiding existing files. log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`) @@ -681,7 +679,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config } fallthrough - case syserror.ENOENT: + case linuxerr.Equals(linuxerr.ENOENT, err): // No '/tmp' found (or fallthrough from above). It's safe to mount internal // tmpfs. tmpMount := specs.Mount{ @@ -694,7 +692,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config _, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{mount: &tmpMount}) return err - case syserror.ENOTDIR: + case linuxerr.Equals(linuxerr.ENOTDIR, err): // Not a dir?! Let it be. return nil diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index a14249641..42c66fbcf 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -157,10 +157,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // we will read it again after the exec call. This works // because the ReadSpecFromFile function seeks to the beginning // of the file before reading. - if err := callSelfAsNobody(args); err != nil { - Fatalf("%v", err) - } - panic("callSelfAsNobody must never return success") + Fatalf("callSelfAsNobody(%v): %v", args, callSelfAsNobody(args)) + panic("unreachable") } } @@ -199,10 +197,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // we will read it again after the exec call. This works // because the ReadSpecFromFile function seeks to the beginning // of the file before reading. - if err := setCapsAndCallSelf(args, caps); err != nil { - Fatalf("%v", err) - } - panic("setCapsAndCallSelf must never return success") + Fatalf("setCapsAndCallSelf(%v, %v): %v", args, caps, setCapsAndCallSelf(args, caps)) + panic("unreachable") } // Read resolved mount list and replace the original one from the spec. diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 6a755ecb6..80da9c9a2 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -116,9 +116,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Note: minimal argument handling for the default case to keep it simple. args := os.Args args = append(args, "--apply-caps=false", "--setup-root=false") - if err := setCapsAndCallSelf(args, goferCaps); err != nil { - Fatalf("Unable to apply caps: %v", err) - } + Fatalf("setCapsAndCallSelf(%v, %v): %v", args, goferCaps, setCapsAndCallSelf(args, goferCaps)) panic("unreachable") } @@ -473,14 +471,12 @@ func adjustMountOptions(conf *config.Config, path string, opts []string) ([]stri rv := make([]string, len(opts)) copy(rv, opts) - if conf.OverlayfsStaleRead { - statfs := unix.Statfs_t{} - if err := unix.Statfs(path, &statfs); err != nil { - return nil, err - } - if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC { - rv = append(rv, "overlayfs_stale_read") - } + statfs := unix.Statfs_t{} + if err := unix.Statfs(path, &statfs); err != nil { + return nil, err + } + if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC { + rv = append(rv, "overlayfs_stale_read") } return rv, nil } diff --git a/runsc/config/config.go b/runsc/config/config.go index fa550ebf7..3d8c7a0ab 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -151,12 +151,6 @@ type Config struct { // ReferenceLeakMode sets reference leak check mode ReferenceLeak refs.LeakMode `flag:"ref-leak-mode"` - // OverlayfsStaleRead instructs the sandbox to assume that the root mount - // is on a Linux overlayfs mount, which does not necessarily preserve - // coherence between read-only and subsequent writable file descriptors - // representing the "same" file. - OverlayfsStaleRead bool `flag:"overlayfs-stale-read"` - // CPUNumFromQuota sets CPU number count to available CPU quota, using // least integer value greater than or equal to quota. // @@ -245,14 +239,14 @@ func (f *FileAccessType) Get() interface{} { } // String implements flag.Value. -func (f *FileAccessType) String() string { - switch *f { +func (f FileAccessType) String() string { + switch f { case FileAccessShared: return "shared" case FileAccessExclusive: return "exclusive" } - panic(fmt.Sprintf("Invalid file access type %v", *f)) + panic(fmt.Sprintf("Invalid file access type %d", f)) } // NetworkType tells which network stack to use. @@ -294,8 +288,8 @@ func (n *NetworkType) Get() interface{} { } // String implements flag.Value. -func (n *NetworkType) String() string { - switch *n { +func (n NetworkType) String() string { + switch n { case NetworkSandbox: return "sandbox" case NetworkHost: @@ -303,7 +297,7 @@ func (n *NetworkType) String() string { case NetworkNone: return "none" } - panic(fmt.Sprintf("Invalid network type %v", *n)) + panic(fmt.Sprintf("Invalid network type %d", n)) } // QueueingDiscipline is used to specify the kind of Queueing Discipline to @@ -341,14 +335,14 @@ func (q *QueueingDiscipline) Get() interface{} { } // String implements flag.Value. -func (q *QueueingDiscipline) String() string { - switch *q { +func (q QueueingDiscipline) String() string { + switch q { case QDiscNone: return "none" case QDiscFIFO: return "fifo" } - panic(fmt.Sprintf("Invalid qdisc %v", *q)) + panic(fmt.Sprintf("Invalid qdisc %d", q)) } func leakModePtr(v refs.LeakMode) *refs.LeakMode { diff --git a/runsc/config/flags.go b/runsc/config/flags.go index c3dca2352..6f1b5927a 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -72,7 +72,6 @@ func RegisterFlags() { flag.Var(fileAccessTypePtr(FileAccessShared), "file-access-mounts", "specifies which filesystem validation to use for volumes other than the root mount: shared (default), exclusive.") flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") flag.Bool("verity", false, "specifies whether a verity file system will be mounted.") - flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem") flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.") flag.Bool("vfs2", false, "enables VFSv2. This uses the new VFS layer that is faster than the previous one.") flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.") diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 0e79877b7..249324c5a 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -47,6 +47,62 @@ import ( "gvisor.dev/gvisor/runsc/specutils" ) +func TestMain(m *testing.M) { + log.SetLevel(log.Debug) + flag.Parse() + if err := testutil.ConfigureExePath(); err != nil { + panic(err.Error()) + } + specutils.MaybeRunAsRoot() + os.Exit(m.Run()) +} + +func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) { + args := &control.ExecArgs{ + Filename: name, + Argv: append([]string{name}, arg...), + } + return cont.executeSync(args) +} + +func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) { + r, w, err := os.Pipe() + if err != nil { + return nil, err + } + defer r.Close() + + args := &control.ExecArgs{ + Filename: name, + Argv: append([]string{name}, arg...), + FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}}, + } + ws, err := cont.executeSync(args) + w.Close() + if err != nil { + return nil, err + } + if ws != 0 { + return nil, fmt.Errorf("exec failed, status: %v", ws) + } + + out, err := ioutil.ReadAll(r) + return out, err +} + +// executeSync synchronously executes a new process. +func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) { + pid, err := c.Execute(args) + if err != nil { + return 0, fmt.Errorf("error executing: %v", err) + } + ws, err := c.WaitPID(pid) + if err != nil { + return 0, fmt.Errorf("error waiting: %v", err) + } + return ws, nil +} + // waitForProcessList waits for the given process list to show up in the container. func waitForProcessList(cont *Container, want []*control.Process) error { cb := func() error { @@ -2470,58 +2526,67 @@ func TestBindMountByOption(t *testing.T) { } } -func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) { - args := &control.ExecArgs{ - Filename: name, - Argv: append([]string{name}, arg...), +// TestRlimits sets limit to number of open files and checks that the limit +// is propagated to the container. +func TestRlimits(t *testing.T) { + file, err := ioutil.TempFile(testutil.TmpDir(), "ulimit") + if err != nil { + t.Fatal(err) } - return cont.executeSync(args) -} + cmd := fmt.Sprintf("ulimit -n > %q", file.Name()) -func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) { - r, w, err := os.Pipe() - if err != nil { - return nil, err + spec := testutil.NewSpecWithArgs("sh", "-c", cmd) + spec.Process.Rlimits = []specs.POSIXRlimit{ + {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100}, } - defer r.Close() - args := &control.ExecArgs{ - Filename: name, - Argv: append([]string{name}, arg...), - FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}}, + conf := testutil.TestConfig(t) + if err := run(spec, conf); err != nil { + t.Fatalf("Error running container: %v", err) } - ws, err := cont.executeSync(args) - w.Close() + got, err := ioutil.ReadFile(file.Name()) if err != nil { - return nil, err + t.Fatal(err) } - if ws != 0 { - return nil, fmt.Errorf("exec failed, status: %v", ws) + if want := "100\n"; string(got) != want { + t.Errorf("ulimit result, got: %q, want: %q", got, want) } - - out, err := ioutil.ReadAll(r) - return out, err } -// executeSync synchronously executes a new process. -func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) { - pid, err := c.Execute(args) +// TestRlimitsExec sets limit to number of open files and checks that the limit +// is propagated to exec'd processes. +func TestRlimitsExec(t *testing.T) { + spec := testutil.NewSpecWithArgs("sleep", "100") + spec.Process.Rlimits = []specs.POSIXRlimit{ + {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100}, + } + + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { - return 0, fmt.Errorf("error executing: %v", err) + t.Fatalf("error setting up container: %v", err) } - ws, err := c.WaitPID(pid) + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { - return 0, fmt.Errorf("error waiting: %v", err) + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) } - return ws, nil -} -func TestMain(m *testing.M) { - log.SetLevel(log.Debug) - flag.Parse() - if err := testutil.ConfigureExePath(); err != nil { - panic(err.Error()) + got, err := executeCombinedOutput(cont, "/bin/sh", "-c", "ulimit -n") + if err != nil { + t.Fatal(err) + } + if want := "100\n"; string(got) != want { + t.Errorf("ulimit result, got: %q, want: %q", got, want) } - specutils.MaybeRunAsRoot() - os.Exit(m.Run()) } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 29e202b7d..f14cc7229 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -797,7 +797,13 @@ func (s *Sandbox) Wait(cid string) (unix.WaitStatus, error) { // Try the Wait RPC to the sandbox. var ws unix.WaitStatus err = conn.Call(boot.ContainerWait, &cid, &ws) + conn.Close() if err == nil { + if s.IsRootContainer(cid) { + if err := s.waitForStopped(); err != nil { + return unix.WaitStatus(0), err + } + } // It worked! return ws, nil } diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 11b476690..c228d6299 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -246,7 +246,7 @@ func Capabilities(enableRaw bool, specCaps *specs.LinuxCapabilities) (*auth.Task if caps.PermittedCaps, err = capsFromNames(specCaps.Permitted, skipSet); err != nil { return nil, err } - // TODO(nlacasse): Support ambient capabilities. + // TODO(gvisor.dev/issue/3166): Support ambient capabilities. } return &caps, nil } |