diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/BUILD | 1 | ||||
-rw-r--r-- | runsc/boot/filter/config.go | 7 | ||||
-rw-r--r-- | runsc/boot/fs.go | 10 | ||||
-rw-r--r-- | runsc/boot/loader.go | 143 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 11 | ||||
-rw-r--r-- | runsc/cmd/gofer.go | 14 | ||||
-rw-r--r-- | runsc/config/config.go | 24 | ||||
-rw-r--r-- | runsc/config/flags.go | 1 | ||||
-rw-r--r-- | runsc/container/container_test.go | 141 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 5 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 2 |
11 files changed, 208 insertions, 151 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index d51347fe1..a79afbdc4 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -38,7 +38,6 @@ go_library( "//pkg/fspath", "//pkg/log", "//pkg/memutil", - "//pkg/metric", "//pkg/rand", "//pkg/refs", "//pkg/refsvfs2", diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 49b503f99..752fea0e1 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -200,6 +200,12 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, + seccomp.EqualTo(unix.MAP_SHARED | unix.MAP_FIXED), + }, + { + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, seccomp.EqualTo(unix.MAP_PRIVATE), }, { @@ -265,7 +271,6 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.EqualTo(unix.MSG_DONTWAIT), - seccomp.EqualTo(0), }, }, unix.SYS_RESTART_SYSCALL: {}, diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index bf4a41f77..c4590aab1 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -763,12 +763,10 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Con p9FS := mustFindFilesystem("9p") opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) - if conf.OverlayfsStaleRead { - // We can't check for overlayfs here because sandbox is chroot'ed and gofer - // can only send mount options for specs.Mounts (specs.Root is missing - // Options field). So assume root is always on top of overlayfs. - opts = append(opts, "overlayfs_stale_read") - } + // We can't check for overlayfs here because sandbox is chroot'ed and gofer + // can only send mount options for specs.Mounts (specs.Root is missing + // Options field). So assume root is always on top of overlayfs. + opts = append(opts, "overlayfs_stale_read") rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil) if err != nil { diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 10f2d3d35..ad4d50008 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -34,11 +34,9 @@ import ( "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/memutil" - "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/refsvfs2" - "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/fdimport" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -211,15 +209,13 @@ func New(args Args) (*Loader, error) { // We initialize the rand package now to make sure /dev/urandom is pre-opened // on kernels that do not support getrandom(2). if err := rand.Init(); err != nil { - return nil, fmt.Errorf("setting up rand: %v", err) + return nil, fmt.Errorf("setting up rand: %w", err) } if err := usage.Init(); err != nil { - return nil, fmt.Errorf("setting up memory usage: %v", err) + return nil, fmt.Errorf("setting up memory usage: %w", err) } - metric.CreateSentryMetrics() - // Is this a VFSv2 kernel? if args.Conf.VFS2 { kernel.VFS2Enabled = true @@ -260,7 +256,7 @@ func New(args Args) (*Loader, error) { // Create kernel and platform. p, err := createPlatform(args.Conf, args.Device) if err != nil { - return nil, fmt.Errorf("creating platform: %v", err) + return nil, fmt.Errorf("creating platform: %w", err) } k := &kernel.Kernel{ Platform: p, @@ -269,7 +265,7 @@ func New(args Args) (*Loader, error) { // Create memory file. mf, err := createMemoryFile() if err != nil { - return nil, fmt.Errorf("creating memory file: %v", err) + return nil, fmt.Errorf("creating memory file: %w", err) } k.SetMemoryFile(mf) @@ -278,30 +274,31 @@ func New(args Args) (*Loader, error) { // Pass k as the platform since it is savable, unlike the actual platform. vdso, err := loader.PrepareVDSO(k) if err != nil { - return nil, fmt.Errorf("creating vdso: %v", err) + return nil, fmt.Errorf("creating vdso: %w", err) } // Create timekeeper. tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) if err != nil { - return nil, fmt.Errorf("creating timekeeper: %v", err) + return nil, fmt.Errorf("creating timekeeper: %w", err) } tk.SetClocks(time.NewCalibratedClocks()) + k.SetTimekeeper(tk) if err := enableStrace(args.Conf); err != nil { - return nil, fmt.Errorf("enabling strace: %v", err) + return nil, fmt.Errorf("enabling strace: %w", err) } // Create root network namespace/stack. netns, err := newRootNetworkNamespace(args.Conf, k, k) if err != nil { - return nil, fmt.Errorf("creating network: %v", err) + return nil, fmt.Errorf("creating network: %w", err) } // Create capabilities. caps, err := specutils.Capabilities(args.Conf.EnableRaw, args.Spec.Process.Capabilities) if err != nil { - return nil, fmt.Errorf("converting capabilities: %v", err) + return nil, fmt.Errorf("converting capabilities: %w", err) } // Convert the spec's additional GIDs to KGIDs. @@ -335,7 +332,6 @@ func New(args Args) (*Loader, error) { // to createVFS in order to mount (among other things) procfs. if err = k.Init(kernel.InitKernelArgs{ FeatureSet: cpuid.HostFeatureSet(), - Timekeeper: tk, RootUserNamespace: creds.UserNamespace, RootNetworkNamespace: netns, ApplicationCores: uint(args.NumCPU), @@ -345,7 +341,7 @@ func New(args Args) (*Loader, error) { RootAbstractSocketNamespace: kernel.NewAbstractSocketNamespace(), PIDNamespace: kernel.NewRootPIDNamespace(creds.UserNamespace), }); err != nil { - return nil, fmt.Errorf("initializing kernel: %v", err) + return nil, fmt.Errorf("initializing kernel: %w", err) } if kernel.VFS2Enabled { @@ -374,17 +370,17 @@ func New(args Args) (*Loader, error) { procArgs, err := createProcessArgs(args.ID, args.Spec, creds, k, k.RootPIDNamespace()) if err != nil { - return nil, fmt.Errorf("creating init process for root container: %v", err) + return nil, fmt.Errorf("creating init process for root container: %w", err) } info.procArgs = procArgs if err := initCompatLogs(args.UserLogFD); err != nil { - return nil, fmt.Errorf("initializing compat logs: %v", err) + return nil, fmt.Errorf("initializing compat logs: %w", err) } mountHints, err := newPodMountHints(args.Spec) if err != nil { - return nil, fmt.Errorf("creating pod mount hints: %v", err) + return nil, fmt.Errorf("creating pod mount hints: %w", err) } info.conf = args.Conf @@ -394,12 +390,12 @@ func New(args Args) (*Loader, error) { // Set up host mount that will be used for imported fds. hostFilesystem, err := hostvfs2.NewFilesystem(k.VFS()) if err != nil { - return nil, fmt.Errorf("failed to create hostfs filesystem: %v", err) + return nil, fmt.Errorf("failed to create hostfs filesystem: %w", err) } defer hostFilesystem.DecRef(k.SupervisorContext()) hostMount, err := k.VFS().NewDisconnectedMount(hostFilesystem, nil, &vfs.MountOptions{}) if err != nil { - return nil, fmt.Errorf("failed to create hostfs mount: %v", err) + return nil, fmt.Errorf("failed to create hostfs mount: %w", err) } k.SetHostMount(hostMount) } @@ -417,7 +413,7 @@ func New(args Args) (*Loader, error) { // We don't care about child signals; some platforms can generate a // tremendous number of useless ones (I'm looking at you, ptrace). if err := sighandling.IgnoreChildStop(); err != nil { - return nil, fmt.Errorf("ignore child stop signals failed: %v", err) + return nil, fmt.Errorf("ignore child stop signals failed: %w", err) } // Create the control server using the provided FD. @@ -426,14 +422,14 @@ func New(args Args) (*Loader, error) { // controller is used to configure the kernel's network stack. ctrl, err := newController(args.ControllerFD, l) if err != nil { - return nil, fmt.Errorf("creating control server: %v", err) + return nil, fmt.Errorf("creating control server: %w", err) } l.ctrl = ctrl // Only start serving after Loader is set to controller and controller is set // to Loader, because they are both used in the urpc methods. if err := ctrl.srv.StartServing(); err != nil { - return nil, fmt.Errorf("starting control server: %v", err) + return nil, fmt.Errorf("starting control server: %w", err) } return l, nil @@ -444,7 +440,7 @@ func createProcessArgs(id string, spec *specs.Spec, creds *auth.Credentials, k * // Create initial limits. ls, err := createLimitSet(spec) if err != nil { - return kernel.CreateProcessArgs{}, fmt.Errorf("creating limits: %v", err) + return kernel.CreateProcessArgs{}, fmt.Errorf("creating limits: %w", err) } env, err := specutils.ResolveEnvs(spec.Process.Env) if err != nil { @@ -498,18 +494,18 @@ func (l *Loader) Destroy() { // In the success case, stdioFDs and goferFDs will only contain // released/closed FDs that ownership has been passed over to host FDs and // gofer sessions. Close them here in case of failure. - for _, fd := range l.root.stdioFDs { - _ = fd.Close() + for _, f := range l.root.stdioFDs { + _ = f.Close() } - for _, fd := range l.root.goferFDs { - _ = fd.Close() + for _, f := range l.root.goferFDs { + _ = f.Close() } } func createPlatform(conf *config.Config, deviceFile *os.File) (platform.Platform, error) { p, err := platform.Lookup(conf.Platform) if err != nil { - panic(fmt.Sprintf("invalid platform %v: %v", conf.Platform, err)) + panic(fmt.Sprintf("invalid platform %s: %s", conf.Platform, err)) } log.Infof("Platform: %s", conf.Platform) return p.New(deviceFile) @@ -519,7 +515,7 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) { const memfileName = "runsc-memory" memfd, err := memutil.CreateMemFD(memfileName, 0) if err != nil { - return nil, fmt.Errorf("error creating memfd: %v", err) + return nil, fmt.Errorf("error creating memfd: %w", err) } memfile := os.NewFile(uintptr(memfd), memfileName) // We can't enable pgalloc.MemoryFileOpts.UseHostMemcgPressure even if @@ -527,8 +523,8 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) { // in a mount namespace in which the relevant cgroupfs is not visible. mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{}) if err != nil { - memfile.Close() - return nil, fmt.Errorf("error creating pgalloc.MemoryFile: %v", err) + _ = memfile.Close() + return nil, fmt.Errorf("error creating pgalloc.MemoryFile: %w", err) } return mf, nil } @@ -545,7 +541,7 @@ func (l *Loader) installSeccompFilters() error { ControllerFD: l.ctrl.srv.FD(), } if err := filter.Install(opts); err != nil { - return fmt.Errorf("installing seccomp filters: %v", err) + return fmt.Errorf("installing seccomp filters: %w", err) } } return nil @@ -571,8 +567,8 @@ func (l *Loader) run() error { // Delay host network configuration to this point because network namespace // is configured after the loader is created and before Run() is called. log.Debugf("Configuring host network") - stack := l.k.RootNetworkNamespace().Stack().(*hostinet.Stack) - if err := stack.Configure(); err != nil { + s := l.k.RootNetworkNamespace().Stack().(*hostinet.Stack) + if err := s.Configure(); err != nil { return err } } @@ -629,9 +625,9 @@ func (l *Loader) run() error { // be handled properly. deliveryMode = DeliverToForegroundProcessGroup } - log.Infof("Received external signal %d, mode: %v", sig, deliveryMode) + log.Infof("Received external signal %d, mode: %s", sig, deliveryMode) if err := l.signal(l.sandboxID, 0, int32(sig), deliveryMode); err != nil { - log.Warningf("error sending signal %v to container %q: %v", sig, l.sandboxID, err) + log.Warningf("error sending signal %s to container %q: %s", sig, l.sandboxID, err) } }) @@ -660,7 +656,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin // Create capabilities. caps, err := specutils.Capabilities(conf.EnableRaw, spec.Process.Capabilities) if err != nil { - return fmt.Errorf("creating capabilities: %v", err) + return fmt.Errorf("creating capabilities: %w", err) } l.mu.Lock() @@ -713,16 +709,16 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin } info.procArgs, err = createProcessArgs(cid, spec, creds, l.k, pidns) if err != nil { - return fmt.Errorf("creating new process: %v", err) + return fmt.Errorf("creating new process: %w", err) } // Use stdios or TTY depending on the spec configuration. if spec.Process.Terminal { - if len(stdioFDs) > 0 { - return fmt.Errorf("using TTY, stdios not expected: %v", stdioFDs) + if l := len(stdioFDs); l != 0 { + return fmt.Errorf("using TTY, stdios not expected: %d", l) } if ep.hostTTY == nil { - return fmt.Errorf("terminal enabled but no TTY provided. Did you set --console-socket on create?") + return fmt.Errorf("terminal enabled but no TTY provided (--console-socket possibly passed)") } info.stdioFDs = []*fd.FD{ep.hostTTY, ep.hostTTY, ep.hostTTY} ep.hostTTY = nil @@ -743,7 +739,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn ctx := info.procArgs.NewContext(l.k) fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, info.spec.Process.Terminal, info.stdioFDs) if err != nil { - return nil, nil, nil, fmt.Errorf("importing fds: %v", err) + return nil, nil, nil, fmt.Errorf("importing fds: %w", err) } // CreateProcess takes a reference on fdTable if successful. We won't need // ours either way. @@ -780,7 +776,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn // Create and start the new process. tg, _, err := l.k.CreateProcess(info.procArgs) if err != nil { - return nil, nil, nil, fmt.Errorf("creating process: %v", err) + return nil, nil, nil, fmt.Errorf("creating process: %w", err) } // CreateProcess takes a reference on FDTable if successful. info.procArgs.FDTable.DecRef(ctx) @@ -799,7 +795,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn if info.spec.Linux != nil && info.spec.Linux.Seccomp != nil { program, err := seccomp.BuildProgram(info.spec.Linux.Seccomp) if err != nil { - return nil, nil, nil, fmt.Errorf("building seccomp program: %v", err) + return nil, nil, nil, fmt.Errorf("building seccomp program: %w", err) } if log.IsLogging(log.Debug) { @@ -810,7 +806,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn task := tg.Leader() // NOTE: It seems Flags are ignored by runc so we ignore them too. if err := task.AppendSyscallFilter(program, true); err != nil { - return nil, nil, nil, fmt.Errorf("appending seccomp filters: %v", err) + return nil, nil, nil, fmt.Errorf("appending seccomp filters: %w", err) } } } else { @@ -841,7 +837,7 @@ func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) { return uintptr(n), 0, err }) if err != nil { - panic(fmt.Sprintf("Error monitoring gofer FDs: %v", err)) + panic(fmt.Sprintf("Error monitoring gofer FDs: %s", err)) } l.mu.Lock() @@ -852,7 +848,7 @@ func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) { if tg, _ := l.tryThreadGroupFromIDLocked(execID{cid: cid}); tg != nil { log.Infof("Gofer socket disconnected, killing container %q", cid) if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil { - log.Warningf("Error killing container %q after gofer stopped: %v", cid, err) + log.Warningf("Error killing container %q after gofer stopped: %s", cid, err) } } }() @@ -873,7 +869,7 @@ func (l *Loader) destroyContainer(cid string) error { // The container exists, but has it been started? if tg != nil { if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil { - return fmt.Errorf("sending SIGKILL to all container processes: %v", err) + return fmt.Errorf("sending SIGKILL to all container processes: %w", err) } // Wait for all processes that belong to the container to exit (including // exec'd processes). @@ -967,10 +963,15 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { } args.Envv = envv } + args.PIDNamespace = tg.PIDNamespace() + + args.Limits, err = createLimitSet(l.root.spec) + if err != nil { + return 0, fmt.Errorf("creating limits: %w", err) + } // Start the process. proc := control.Proc{Kernel: l.k} - args.PIDNamespace = tg.PIDNamespace() newTG, tgid, ttyFile, ttyFileVFS2, err := control.ExecAsync(&proc, args) if err != nil { return 0, err @@ -982,7 +983,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { tty: ttyFile, ttyVFS2: ttyFileVFS2, } - log.Debugf("updated processes: %v", l.processes) + log.Debugf("updated processes: %s", l.processes) return tgid, nil } @@ -993,7 +994,7 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { // multiple clients to wait on the same container. tg, err := l.threadGroupFromID(execID{cid: cid}) if err != nil { - return fmt.Errorf("can't wait for container %q: %v", cid, err) + return fmt.Errorf("can't wait for container %q: %w", cid, err) } // If the thread either has already exited or exits during waiting, @@ -1007,7 +1008,7 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { if l.root.procArgs.ContainerID == cid { // All sentry-created resources should have been released at this point. refsvfs2.DoLeakCheck() - coverage.Report() + _ = coverage.Report() } return nil } @@ -1026,7 +1027,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e l.mu.Lock() delete(l.processes, eid) - log.Debugf("updated processes (removal): %v", l.processes) + log.Debugf("updated processes (removal): %s", l.processes) l.mu.Unlock() return nil } @@ -1035,7 +1036,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e // In this case, find the process in the container's PID namespace. initTG, err := l.threadGroupFromID(execID{cid: cid}) if err != nil { - return fmt.Errorf("waiting for PID %d: %v", tgid, err) + return fmt.Errorf("waiting for PID %d: %w", tgid, err) } tg := initTG.PIDNamespace().ThreadGroupWithID(tgid) if tg == nil { @@ -1094,7 +1095,7 @@ func newRootNetworkNamespace(conf *config.Config, clock tcpip.Clock, uniqueID st return inet.NewRootNamespace(s, creator), nil default: - panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) + panic(fmt.Sprintf("invalid network configuration: %d", conf.Network)) } } @@ -1107,7 +1108,7 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in icmp.NewProtocol4, icmp.NewProtocol6, } - s := netstack.Stack{stack.New(stack.Options{ + s := netstack.Stack{Stack: stack.New(stack.Options{ NetworkProtocols: netProtos, TransportProtocols: transProtos, Clock: clock, @@ -1115,9 +1116,9 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in HandleLocal: true, // Enable raw sockets for users with sufficient // privileges. - RawFactory: raw.EndpointFactory{}, - UniqueID: uniqueID, - IPTables: netfilter.DefaultLinuxTables(), + RawFactory: raw.EndpointFactory{}, + UniqueID: uniqueID, + DefaultIPTables: netfilter.DefaultLinuxTables, })} // Enable SACK Recovery. @@ -1190,13 +1191,13 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e switch mode { case DeliverToProcess: if err := l.signalProcess(cid, kernel.ThreadID(pid), signo); err != nil { - return fmt.Errorf("signaling process in container %q PID %d: %v", cid, pid, err) + return fmt.Errorf("signaling process in container %q PID %d: %w", cid, pid, err) } return nil case DeliverToForegroundProcessGroup: if err := l.signalForegrondProcessGroup(cid, kernel.ThreadID(pid), signo); err != nil { - return fmt.Errorf("signaling foreground process group in container %q PID %d: %v", cid, pid, err) + return fmt.Errorf("signaling foreground process group in container %q PID %d: %w", cid, pid, err) } return nil @@ -1209,12 +1210,12 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e return err } if err := l.signalAllProcesses(cid, signo); err != nil { - return fmt.Errorf("signaling all processes in container %q: %v", cid, err) + return fmt.Errorf("signaling all processes in container %q: %w", cid, err) } return nil default: - panic(fmt.Sprintf("unknown signal delivery mode %v", mode)) + panic(fmt.Sprintf("unknown signal delivery mode %s", mode)) } } @@ -1224,7 +1225,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er execTG, err := l.threadGroupFromID(execID{cid: cid, pid: tgid}) if err == nil { // Send signal directly to the identified process. - return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(execTG, &linux.SignalInfo{Signo: signo}) } // The caller may be signaling a process not started directly via exec. @@ -1237,7 +1238,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er if tg.Leader().ContainerID() != cid { return fmt.Errorf("process %d belongs to a different container: %q", tgid, tg.Leader().ContainerID()) } - return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo}) } // signalForegrondProcessGroup looks up foreground process group from the TTY @@ -1247,7 +1248,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid, pid: tgid}) if err != nil { l.mu.Unlock() - return fmt.Errorf("no thread group found: %v", err) + return fmt.Errorf("no thread group found: %w", err) } if tg == nil { l.mu.Unlock() @@ -1257,7 +1258,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s tty, ttyVFS2, err := l.ttyFromIDLocked(execID{cid: cid, pid: tgid}) l.mu.Unlock() if err != nil { - return fmt.Errorf("no thread group found: %v", err) + return fmt.Errorf("no thread group found: %w", err) } var pg *kernel.ProcessGroup @@ -1273,7 +1274,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s // No foreground process group has been set. Signal the // original thread group. log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, tgid, tgid) - return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo}) } // Send the signal to all processes in the process group. var lastErr error @@ -1281,7 +1282,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s if tg.ProcessGroup() != pg { continue } - if err := l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}); err != nil { + if err := l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo}); err != nil { lastErr = err } } @@ -1296,7 +1297,7 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error { // sent to the entire container. l.k.Pause() defer l.k.Unpause() - return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo}) + return l.k.SendContainerSignal(cid, &linux.SignalInfo{Signo: signo}) } // threadGroupFromID is similar to tryThreadGroupFromIDLocked except that it diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index c1828bd3d..52aa33529 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -210,12 +210,10 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *c fd := c.fds.remove() data := p9MountData(fd, conf.FileAccess, true /* vfs2 */) - if conf.OverlayfsStaleRead { - // We can't check for overlayfs here because sandbox is chroot'ed and gofer - // can only send mount options for specs.Mounts (specs.Root is missing - // Options field). So assume root is always on top of overlayfs. - data = append(data, "overlayfs_stale_read") - } + // We can't check for overlayfs here because sandbox is chroot'ed and gofer + // can only send mount options for specs.Mounts (specs.Root is missing + // Options field). So assume root is always on top of overlayfs. + data = append(data, "overlayfs_stale_read") log.Infof("Mounting root over 9P, ioFD: %d", fd) opts := &vfs.MountOptions{ @@ -657,7 +655,6 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config Start: root, Path: fspath.Parse("/tmp"), } - // TODO(gvisor.dev/issue/2782): Use O_PATH when available. fd, err := c.k.VFS().OpenAt(ctx, creds, &pop, &vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_DIRECTORY}) switch err { case nil: diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 6a755ecb6..5ded7b946 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -473,14 +473,12 @@ func adjustMountOptions(conf *config.Config, path string, opts []string) ([]stri rv := make([]string, len(opts)) copy(rv, opts) - if conf.OverlayfsStaleRead { - statfs := unix.Statfs_t{} - if err := unix.Statfs(path, &statfs); err != nil { - return nil, err - } - if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC { - rv = append(rv, "overlayfs_stale_read") - } + statfs := unix.Statfs_t{} + if err := unix.Statfs(path, &statfs); err != nil { + return nil, err + } + if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC { + rv = append(rv, "overlayfs_stale_read") } return rv, nil } diff --git a/runsc/config/config.go b/runsc/config/config.go index fa550ebf7..3d8c7a0ab 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -151,12 +151,6 @@ type Config struct { // ReferenceLeakMode sets reference leak check mode ReferenceLeak refs.LeakMode `flag:"ref-leak-mode"` - // OverlayfsStaleRead instructs the sandbox to assume that the root mount - // is on a Linux overlayfs mount, which does not necessarily preserve - // coherence between read-only and subsequent writable file descriptors - // representing the "same" file. - OverlayfsStaleRead bool `flag:"overlayfs-stale-read"` - // CPUNumFromQuota sets CPU number count to available CPU quota, using // least integer value greater than or equal to quota. // @@ -245,14 +239,14 @@ func (f *FileAccessType) Get() interface{} { } // String implements flag.Value. -func (f *FileAccessType) String() string { - switch *f { +func (f FileAccessType) String() string { + switch f { case FileAccessShared: return "shared" case FileAccessExclusive: return "exclusive" } - panic(fmt.Sprintf("Invalid file access type %v", *f)) + panic(fmt.Sprintf("Invalid file access type %d", f)) } // NetworkType tells which network stack to use. @@ -294,8 +288,8 @@ func (n *NetworkType) Get() interface{} { } // String implements flag.Value. -func (n *NetworkType) String() string { - switch *n { +func (n NetworkType) String() string { + switch n { case NetworkSandbox: return "sandbox" case NetworkHost: @@ -303,7 +297,7 @@ func (n *NetworkType) String() string { case NetworkNone: return "none" } - panic(fmt.Sprintf("Invalid network type %v", *n)) + panic(fmt.Sprintf("Invalid network type %d", n)) } // QueueingDiscipline is used to specify the kind of Queueing Discipline to @@ -341,14 +335,14 @@ func (q *QueueingDiscipline) Get() interface{} { } // String implements flag.Value. -func (q *QueueingDiscipline) String() string { - switch *q { +func (q QueueingDiscipline) String() string { + switch q { case QDiscNone: return "none" case QDiscFIFO: return "fifo" } - panic(fmt.Sprintf("Invalid qdisc %v", *q)) + panic(fmt.Sprintf("Invalid qdisc %d", q)) } func leakModePtr(v refs.LeakMode) *refs.LeakMode { diff --git a/runsc/config/flags.go b/runsc/config/flags.go index c3dca2352..6f1b5927a 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -72,7 +72,6 @@ func RegisterFlags() { flag.Var(fileAccessTypePtr(FileAccessShared), "file-access-mounts", "specifies which filesystem validation to use for volumes other than the root mount: shared (default), exclusive.") flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") flag.Bool("verity", false, "specifies whether a verity file system will be mounted.") - flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem") flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.") flag.Bool("vfs2", false, "enables VFSv2. This uses the new VFS layer that is faster than the previous one.") flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.") diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 0e79877b7..249324c5a 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -47,6 +47,62 @@ import ( "gvisor.dev/gvisor/runsc/specutils" ) +func TestMain(m *testing.M) { + log.SetLevel(log.Debug) + flag.Parse() + if err := testutil.ConfigureExePath(); err != nil { + panic(err.Error()) + } + specutils.MaybeRunAsRoot() + os.Exit(m.Run()) +} + +func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) { + args := &control.ExecArgs{ + Filename: name, + Argv: append([]string{name}, arg...), + } + return cont.executeSync(args) +} + +func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) { + r, w, err := os.Pipe() + if err != nil { + return nil, err + } + defer r.Close() + + args := &control.ExecArgs{ + Filename: name, + Argv: append([]string{name}, arg...), + FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}}, + } + ws, err := cont.executeSync(args) + w.Close() + if err != nil { + return nil, err + } + if ws != 0 { + return nil, fmt.Errorf("exec failed, status: %v", ws) + } + + out, err := ioutil.ReadAll(r) + return out, err +} + +// executeSync synchronously executes a new process. +func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) { + pid, err := c.Execute(args) + if err != nil { + return 0, fmt.Errorf("error executing: %v", err) + } + ws, err := c.WaitPID(pid) + if err != nil { + return 0, fmt.Errorf("error waiting: %v", err) + } + return ws, nil +} + // waitForProcessList waits for the given process list to show up in the container. func waitForProcessList(cont *Container, want []*control.Process) error { cb := func() error { @@ -2470,58 +2526,67 @@ func TestBindMountByOption(t *testing.T) { } } -func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) { - args := &control.ExecArgs{ - Filename: name, - Argv: append([]string{name}, arg...), +// TestRlimits sets limit to number of open files and checks that the limit +// is propagated to the container. +func TestRlimits(t *testing.T) { + file, err := ioutil.TempFile(testutil.TmpDir(), "ulimit") + if err != nil { + t.Fatal(err) } - return cont.executeSync(args) -} + cmd := fmt.Sprintf("ulimit -n > %q", file.Name()) -func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) { - r, w, err := os.Pipe() - if err != nil { - return nil, err + spec := testutil.NewSpecWithArgs("sh", "-c", cmd) + spec.Process.Rlimits = []specs.POSIXRlimit{ + {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100}, } - defer r.Close() - args := &control.ExecArgs{ - Filename: name, - Argv: append([]string{name}, arg...), - FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}}, + conf := testutil.TestConfig(t) + if err := run(spec, conf); err != nil { + t.Fatalf("Error running container: %v", err) } - ws, err := cont.executeSync(args) - w.Close() + got, err := ioutil.ReadFile(file.Name()) if err != nil { - return nil, err + t.Fatal(err) } - if ws != 0 { - return nil, fmt.Errorf("exec failed, status: %v", ws) + if want := "100\n"; string(got) != want { + t.Errorf("ulimit result, got: %q, want: %q", got, want) } - - out, err := ioutil.ReadAll(r) - return out, err } -// executeSync synchronously executes a new process. -func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) { - pid, err := c.Execute(args) +// TestRlimitsExec sets limit to number of open files and checks that the limit +// is propagated to exec'd processes. +func TestRlimitsExec(t *testing.T) { + spec := testutil.NewSpecWithArgs("sleep", "100") + spec.Process.Rlimits = []specs.POSIXRlimit{ + {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100}, + } + + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { - return 0, fmt.Errorf("error executing: %v", err) + t.Fatalf("error setting up container: %v", err) } - ws, err := c.WaitPID(pid) + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { - return 0, fmt.Errorf("error waiting: %v", err) + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) } - return ws, nil -} -func TestMain(m *testing.M) { - log.SetLevel(log.Debug) - flag.Parse() - if err := testutil.ConfigureExePath(); err != nil { - panic(err.Error()) + got, err := executeCombinedOutput(cont, "/bin/sh", "-c", "ulimit -n") + if err != nil { + t.Fatal(err) + } + if want := "100\n"; string(got) != want { + t.Errorf("ulimit result, got: %q, want: %q", got, want) } - specutils.MaybeRunAsRoot() - os.Exit(m.Run()) } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 8d31e33b2..29e202b7d 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -131,8 +131,9 @@ func New(conf *config.Config, args *Args) (*Sandbox, error) { // The Cleanup object cleans up partially created sandboxes when an error // occurs. Any errors occurring during cleanup itself are ignored. c := cleanup.Make(func() { - err := s.destroy() - log.Warningf("error destroying sandbox: %v", err) + if err := s.destroy(); err != nil { + log.Warningf("error destroying sandbox: %v", err) + } }) defer c.Clean() diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 11b476690..c228d6299 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -246,7 +246,7 @@ func Capabilities(enableRaw bool, specCaps *specs.LinuxCapabilities) (*auth.Task if caps.PermittedCaps, err = capsFromNames(specCaps.Permitted, skipSet); err != nil { return nil, err } - // TODO(nlacasse): Support ambient capabilities. + // TODO(gvisor.dev/issue/3166): Support ambient capabilities. } return &caps, nil } |