summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/BUILD1
-rw-r--r--runsc/boot/filter/config.go7
-rw-r--r--runsc/boot/fs.go10
-rw-r--r--runsc/boot/loader.go143
-rw-r--r--runsc/boot/vfs.go11
-rw-r--r--runsc/cmd/gofer.go14
-rw-r--r--runsc/config/config.go24
-rw-r--r--runsc/config/flags.go1
-rw-r--r--runsc/container/container_test.go141
-rw-r--r--runsc/sandbox/sandbox.go5
-rw-r--r--runsc/specutils/specutils.go2
11 files changed, 208 insertions, 151 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index d51347fe1..a79afbdc4 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -38,7 +38,6 @@ go_library(
"//pkg/fspath",
"//pkg/log",
"//pkg/memutil",
- "//pkg/metric",
"//pkg/rand",
"//pkg/refs",
"//pkg/refsvfs2",
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 49b503f99..752fea0e1 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -200,6 +200,12 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
+ seccomp.EqualTo(unix.MAP_SHARED | unix.MAP_FIXED),
+ },
+ {
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
seccomp.EqualTo(unix.MAP_PRIVATE),
},
{
@@ -265,7 +271,6 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.EqualTo(unix.MSG_DONTWAIT),
- seccomp.EqualTo(0),
},
},
unix.SYS_RESTART_SYSCALL: {},
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index bf4a41f77..c4590aab1 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -763,12 +763,10 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Con
p9FS := mustFindFilesystem("9p")
opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */)
- if conf.OverlayfsStaleRead {
- // We can't check for overlayfs here because sandbox is chroot'ed and gofer
- // can only send mount options for specs.Mounts (specs.Root is missing
- // Options field). So assume root is always on top of overlayfs.
- opts = append(opts, "overlayfs_stale_read")
- }
+ // We can't check for overlayfs here because sandbox is chroot'ed and gofer
+ // can only send mount options for specs.Mounts (specs.Root is missing
+ // Options field). So assume root is always on top of overlayfs.
+ opts = append(opts, "overlayfs_stale_read")
rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
if err != nil {
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 10f2d3d35..ad4d50008 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -34,11 +34,9 @@ import (
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/memutil"
- "gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/rand"
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/refsvfs2"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/fdimport"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -211,15 +209,13 @@ func New(args Args) (*Loader, error) {
// We initialize the rand package now to make sure /dev/urandom is pre-opened
// on kernels that do not support getrandom(2).
if err := rand.Init(); err != nil {
- return nil, fmt.Errorf("setting up rand: %v", err)
+ return nil, fmt.Errorf("setting up rand: %w", err)
}
if err := usage.Init(); err != nil {
- return nil, fmt.Errorf("setting up memory usage: %v", err)
+ return nil, fmt.Errorf("setting up memory usage: %w", err)
}
- metric.CreateSentryMetrics()
-
// Is this a VFSv2 kernel?
if args.Conf.VFS2 {
kernel.VFS2Enabled = true
@@ -260,7 +256,7 @@ func New(args Args) (*Loader, error) {
// Create kernel and platform.
p, err := createPlatform(args.Conf, args.Device)
if err != nil {
- return nil, fmt.Errorf("creating platform: %v", err)
+ return nil, fmt.Errorf("creating platform: %w", err)
}
k := &kernel.Kernel{
Platform: p,
@@ -269,7 +265,7 @@ func New(args Args) (*Loader, error) {
// Create memory file.
mf, err := createMemoryFile()
if err != nil {
- return nil, fmt.Errorf("creating memory file: %v", err)
+ return nil, fmt.Errorf("creating memory file: %w", err)
}
k.SetMemoryFile(mf)
@@ -278,30 +274,31 @@ func New(args Args) (*Loader, error) {
// Pass k as the platform since it is savable, unlike the actual platform.
vdso, err := loader.PrepareVDSO(k)
if err != nil {
- return nil, fmt.Errorf("creating vdso: %v", err)
+ return nil, fmt.Errorf("creating vdso: %w", err)
}
// Create timekeeper.
tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange())
if err != nil {
- return nil, fmt.Errorf("creating timekeeper: %v", err)
+ return nil, fmt.Errorf("creating timekeeper: %w", err)
}
tk.SetClocks(time.NewCalibratedClocks())
+ k.SetTimekeeper(tk)
if err := enableStrace(args.Conf); err != nil {
- return nil, fmt.Errorf("enabling strace: %v", err)
+ return nil, fmt.Errorf("enabling strace: %w", err)
}
// Create root network namespace/stack.
netns, err := newRootNetworkNamespace(args.Conf, k, k)
if err != nil {
- return nil, fmt.Errorf("creating network: %v", err)
+ return nil, fmt.Errorf("creating network: %w", err)
}
// Create capabilities.
caps, err := specutils.Capabilities(args.Conf.EnableRaw, args.Spec.Process.Capabilities)
if err != nil {
- return nil, fmt.Errorf("converting capabilities: %v", err)
+ return nil, fmt.Errorf("converting capabilities: %w", err)
}
// Convert the spec's additional GIDs to KGIDs.
@@ -335,7 +332,6 @@ func New(args Args) (*Loader, error) {
// to createVFS in order to mount (among other things) procfs.
if err = k.Init(kernel.InitKernelArgs{
FeatureSet: cpuid.HostFeatureSet(),
- Timekeeper: tk,
RootUserNamespace: creds.UserNamespace,
RootNetworkNamespace: netns,
ApplicationCores: uint(args.NumCPU),
@@ -345,7 +341,7 @@ func New(args Args) (*Loader, error) {
RootAbstractSocketNamespace: kernel.NewAbstractSocketNamespace(),
PIDNamespace: kernel.NewRootPIDNamespace(creds.UserNamespace),
}); err != nil {
- return nil, fmt.Errorf("initializing kernel: %v", err)
+ return nil, fmt.Errorf("initializing kernel: %w", err)
}
if kernel.VFS2Enabled {
@@ -374,17 +370,17 @@ func New(args Args) (*Loader, error) {
procArgs, err := createProcessArgs(args.ID, args.Spec, creds, k, k.RootPIDNamespace())
if err != nil {
- return nil, fmt.Errorf("creating init process for root container: %v", err)
+ return nil, fmt.Errorf("creating init process for root container: %w", err)
}
info.procArgs = procArgs
if err := initCompatLogs(args.UserLogFD); err != nil {
- return nil, fmt.Errorf("initializing compat logs: %v", err)
+ return nil, fmt.Errorf("initializing compat logs: %w", err)
}
mountHints, err := newPodMountHints(args.Spec)
if err != nil {
- return nil, fmt.Errorf("creating pod mount hints: %v", err)
+ return nil, fmt.Errorf("creating pod mount hints: %w", err)
}
info.conf = args.Conf
@@ -394,12 +390,12 @@ func New(args Args) (*Loader, error) {
// Set up host mount that will be used for imported fds.
hostFilesystem, err := hostvfs2.NewFilesystem(k.VFS())
if err != nil {
- return nil, fmt.Errorf("failed to create hostfs filesystem: %v", err)
+ return nil, fmt.Errorf("failed to create hostfs filesystem: %w", err)
}
defer hostFilesystem.DecRef(k.SupervisorContext())
hostMount, err := k.VFS().NewDisconnectedMount(hostFilesystem, nil, &vfs.MountOptions{})
if err != nil {
- return nil, fmt.Errorf("failed to create hostfs mount: %v", err)
+ return nil, fmt.Errorf("failed to create hostfs mount: %w", err)
}
k.SetHostMount(hostMount)
}
@@ -417,7 +413,7 @@ func New(args Args) (*Loader, error) {
// We don't care about child signals; some platforms can generate a
// tremendous number of useless ones (I'm looking at you, ptrace).
if err := sighandling.IgnoreChildStop(); err != nil {
- return nil, fmt.Errorf("ignore child stop signals failed: %v", err)
+ return nil, fmt.Errorf("ignore child stop signals failed: %w", err)
}
// Create the control server using the provided FD.
@@ -426,14 +422,14 @@ func New(args Args) (*Loader, error) {
// controller is used to configure the kernel's network stack.
ctrl, err := newController(args.ControllerFD, l)
if err != nil {
- return nil, fmt.Errorf("creating control server: %v", err)
+ return nil, fmt.Errorf("creating control server: %w", err)
}
l.ctrl = ctrl
// Only start serving after Loader is set to controller and controller is set
// to Loader, because they are both used in the urpc methods.
if err := ctrl.srv.StartServing(); err != nil {
- return nil, fmt.Errorf("starting control server: %v", err)
+ return nil, fmt.Errorf("starting control server: %w", err)
}
return l, nil
@@ -444,7 +440,7 @@ func createProcessArgs(id string, spec *specs.Spec, creds *auth.Credentials, k *
// Create initial limits.
ls, err := createLimitSet(spec)
if err != nil {
- return kernel.CreateProcessArgs{}, fmt.Errorf("creating limits: %v", err)
+ return kernel.CreateProcessArgs{}, fmt.Errorf("creating limits: %w", err)
}
env, err := specutils.ResolveEnvs(spec.Process.Env)
if err != nil {
@@ -498,18 +494,18 @@ func (l *Loader) Destroy() {
// In the success case, stdioFDs and goferFDs will only contain
// released/closed FDs that ownership has been passed over to host FDs and
// gofer sessions. Close them here in case of failure.
- for _, fd := range l.root.stdioFDs {
- _ = fd.Close()
+ for _, f := range l.root.stdioFDs {
+ _ = f.Close()
}
- for _, fd := range l.root.goferFDs {
- _ = fd.Close()
+ for _, f := range l.root.goferFDs {
+ _ = f.Close()
}
}
func createPlatform(conf *config.Config, deviceFile *os.File) (platform.Platform, error) {
p, err := platform.Lookup(conf.Platform)
if err != nil {
- panic(fmt.Sprintf("invalid platform %v: %v", conf.Platform, err))
+ panic(fmt.Sprintf("invalid platform %s: %s", conf.Platform, err))
}
log.Infof("Platform: %s", conf.Platform)
return p.New(deviceFile)
@@ -519,7 +515,7 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
const memfileName = "runsc-memory"
memfd, err := memutil.CreateMemFD(memfileName, 0)
if err != nil {
- return nil, fmt.Errorf("error creating memfd: %v", err)
+ return nil, fmt.Errorf("error creating memfd: %w", err)
}
memfile := os.NewFile(uintptr(memfd), memfileName)
// We can't enable pgalloc.MemoryFileOpts.UseHostMemcgPressure even if
@@ -527,8 +523,8 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
// in a mount namespace in which the relevant cgroupfs is not visible.
mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{})
if err != nil {
- memfile.Close()
- return nil, fmt.Errorf("error creating pgalloc.MemoryFile: %v", err)
+ _ = memfile.Close()
+ return nil, fmt.Errorf("error creating pgalloc.MemoryFile: %w", err)
}
return mf, nil
}
@@ -545,7 +541,7 @@ func (l *Loader) installSeccompFilters() error {
ControllerFD: l.ctrl.srv.FD(),
}
if err := filter.Install(opts); err != nil {
- return fmt.Errorf("installing seccomp filters: %v", err)
+ return fmt.Errorf("installing seccomp filters: %w", err)
}
}
return nil
@@ -571,8 +567,8 @@ func (l *Loader) run() error {
// Delay host network configuration to this point because network namespace
// is configured after the loader is created and before Run() is called.
log.Debugf("Configuring host network")
- stack := l.k.RootNetworkNamespace().Stack().(*hostinet.Stack)
- if err := stack.Configure(); err != nil {
+ s := l.k.RootNetworkNamespace().Stack().(*hostinet.Stack)
+ if err := s.Configure(); err != nil {
return err
}
}
@@ -629,9 +625,9 @@ func (l *Loader) run() error {
// be handled properly.
deliveryMode = DeliverToForegroundProcessGroup
}
- log.Infof("Received external signal %d, mode: %v", sig, deliveryMode)
+ log.Infof("Received external signal %d, mode: %s", sig, deliveryMode)
if err := l.signal(l.sandboxID, 0, int32(sig), deliveryMode); err != nil {
- log.Warningf("error sending signal %v to container %q: %v", sig, l.sandboxID, err)
+ log.Warningf("error sending signal %s to container %q: %s", sig, l.sandboxID, err)
}
})
@@ -660,7 +656,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin
// Create capabilities.
caps, err := specutils.Capabilities(conf.EnableRaw, spec.Process.Capabilities)
if err != nil {
- return fmt.Errorf("creating capabilities: %v", err)
+ return fmt.Errorf("creating capabilities: %w", err)
}
l.mu.Lock()
@@ -713,16 +709,16 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin
}
info.procArgs, err = createProcessArgs(cid, spec, creds, l.k, pidns)
if err != nil {
- return fmt.Errorf("creating new process: %v", err)
+ return fmt.Errorf("creating new process: %w", err)
}
// Use stdios or TTY depending on the spec configuration.
if spec.Process.Terminal {
- if len(stdioFDs) > 0 {
- return fmt.Errorf("using TTY, stdios not expected: %v", stdioFDs)
+ if l := len(stdioFDs); l != 0 {
+ return fmt.Errorf("using TTY, stdios not expected: %d", l)
}
if ep.hostTTY == nil {
- return fmt.Errorf("terminal enabled but no TTY provided. Did you set --console-socket on create?")
+ return fmt.Errorf("terminal enabled but no TTY provided (--console-socket possibly passed)")
}
info.stdioFDs = []*fd.FD{ep.hostTTY, ep.hostTTY, ep.hostTTY}
ep.hostTTY = nil
@@ -743,7 +739,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
ctx := info.procArgs.NewContext(l.k)
fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, info.spec.Process.Terminal, info.stdioFDs)
if err != nil {
- return nil, nil, nil, fmt.Errorf("importing fds: %v", err)
+ return nil, nil, nil, fmt.Errorf("importing fds: %w", err)
}
// CreateProcess takes a reference on fdTable if successful. We won't need
// ours either way.
@@ -780,7 +776,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
// Create and start the new process.
tg, _, err := l.k.CreateProcess(info.procArgs)
if err != nil {
- return nil, nil, nil, fmt.Errorf("creating process: %v", err)
+ return nil, nil, nil, fmt.Errorf("creating process: %w", err)
}
// CreateProcess takes a reference on FDTable if successful.
info.procArgs.FDTable.DecRef(ctx)
@@ -799,7 +795,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
if info.spec.Linux != nil && info.spec.Linux.Seccomp != nil {
program, err := seccomp.BuildProgram(info.spec.Linux.Seccomp)
if err != nil {
- return nil, nil, nil, fmt.Errorf("building seccomp program: %v", err)
+ return nil, nil, nil, fmt.Errorf("building seccomp program: %w", err)
}
if log.IsLogging(log.Debug) {
@@ -810,7 +806,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
task := tg.Leader()
// NOTE: It seems Flags are ignored by runc so we ignore them too.
if err := task.AppendSyscallFilter(program, true); err != nil {
- return nil, nil, nil, fmt.Errorf("appending seccomp filters: %v", err)
+ return nil, nil, nil, fmt.Errorf("appending seccomp filters: %w", err)
}
}
} else {
@@ -841,7 +837,7 @@ func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) {
return uintptr(n), 0, err
})
if err != nil {
- panic(fmt.Sprintf("Error monitoring gofer FDs: %v", err))
+ panic(fmt.Sprintf("Error monitoring gofer FDs: %s", err))
}
l.mu.Lock()
@@ -852,7 +848,7 @@ func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) {
if tg, _ := l.tryThreadGroupFromIDLocked(execID{cid: cid}); tg != nil {
log.Infof("Gofer socket disconnected, killing container %q", cid)
if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
- log.Warningf("Error killing container %q after gofer stopped: %v", cid, err)
+ log.Warningf("Error killing container %q after gofer stopped: %s", cid, err)
}
}
}()
@@ -873,7 +869,7 @@ func (l *Loader) destroyContainer(cid string) error {
// The container exists, but has it been started?
if tg != nil {
if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
- return fmt.Errorf("sending SIGKILL to all container processes: %v", err)
+ return fmt.Errorf("sending SIGKILL to all container processes: %w", err)
}
// Wait for all processes that belong to the container to exit (including
// exec'd processes).
@@ -967,10 +963,15 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
}
args.Envv = envv
}
+ args.PIDNamespace = tg.PIDNamespace()
+
+ args.Limits, err = createLimitSet(l.root.spec)
+ if err != nil {
+ return 0, fmt.Errorf("creating limits: %w", err)
+ }
// Start the process.
proc := control.Proc{Kernel: l.k}
- args.PIDNamespace = tg.PIDNamespace()
newTG, tgid, ttyFile, ttyFileVFS2, err := control.ExecAsync(&proc, args)
if err != nil {
return 0, err
@@ -982,7 +983,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
tty: ttyFile,
ttyVFS2: ttyFileVFS2,
}
- log.Debugf("updated processes: %v", l.processes)
+ log.Debugf("updated processes: %s", l.processes)
return tgid, nil
}
@@ -993,7 +994,7 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
// multiple clients to wait on the same container.
tg, err := l.threadGroupFromID(execID{cid: cid})
if err != nil {
- return fmt.Errorf("can't wait for container %q: %v", cid, err)
+ return fmt.Errorf("can't wait for container %q: %w", cid, err)
}
// If the thread either has already exited or exits during waiting,
@@ -1007,7 +1008,7 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
if l.root.procArgs.ContainerID == cid {
// All sentry-created resources should have been released at this point.
refsvfs2.DoLeakCheck()
- coverage.Report()
+ _ = coverage.Report()
}
return nil
}
@@ -1026,7 +1027,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e
l.mu.Lock()
delete(l.processes, eid)
- log.Debugf("updated processes (removal): %v", l.processes)
+ log.Debugf("updated processes (removal): %s", l.processes)
l.mu.Unlock()
return nil
}
@@ -1035,7 +1036,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e
// In this case, find the process in the container's PID namespace.
initTG, err := l.threadGroupFromID(execID{cid: cid})
if err != nil {
- return fmt.Errorf("waiting for PID %d: %v", tgid, err)
+ return fmt.Errorf("waiting for PID %d: %w", tgid, err)
}
tg := initTG.PIDNamespace().ThreadGroupWithID(tgid)
if tg == nil {
@@ -1094,7 +1095,7 @@ func newRootNetworkNamespace(conf *config.Config, clock tcpip.Clock, uniqueID st
return inet.NewRootNamespace(s, creator), nil
default:
- panic(fmt.Sprintf("invalid network configuration: %v", conf.Network))
+ panic(fmt.Sprintf("invalid network configuration: %d", conf.Network))
}
}
@@ -1107,7 +1108,7 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in
icmp.NewProtocol4,
icmp.NewProtocol6,
}
- s := netstack.Stack{stack.New(stack.Options{
+ s := netstack.Stack{Stack: stack.New(stack.Options{
NetworkProtocols: netProtos,
TransportProtocols: transProtos,
Clock: clock,
@@ -1115,9 +1116,9 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in
HandleLocal: true,
// Enable raw sockets for users with sufficient
// privileges.
- RawFactory: raw.EndpointFactory{},
- UniqueID: uniqueID,
- IPTables: netfilter.DefaultLinuxTables(),
+ RawFactory: raw.EndpointFactory{},
+ UniqueID: uniqueID,
+ DefaultIPTables: netfilter.DefaultLinuxTables,
})}
// Enable SACK Recovery.
@@ -1190,13 +1191,13 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e
switch mode {
case DeliverToProcess:
if err := l.signalProcess(cid, kernel.ThreadID(pid), signo); err != nil {
- return fmt.Errorf("signaling process in container %q PID %d: %v", cid, pid, err)
+ return fmt.Errorf("signaling process in container %q PID %d: %w", cid, pid, err)
}
return nil
case DeliverToForegroundProcessGroup:
if err := l.signalForegrondProcessGroup(cid, kernel.ThreadID(pid), signo); err != nil {
- return fmt.Errorf("signaling foreground process group in container %q PID %d: %v", cid, pid, err)
+ return fmt.Errorf("signaling foreground process group in container %q PID %d: %w", cid, pid, err)
}
return nil
@@ -1209,12 +1210,12 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e
return err
}
if err := l.signalAllProcesses(cid, signo); err != nil {
- return fmt.Errorf("signaling all processes in container %q: %v", cid, err)
+ return fmt.Errorf("signaling all processes in container %q: %w", cid, err)
}
return nil
default:
- panic(fmt.Sprintf("unknown signal delivery mode %v", mode))
+ panic(fmt.Sprintf("unknown signal delivery mode %s", mode))
}
}
@@ -1224,7 +1225,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er
execTG, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
if err == nil {
// Send signal directly to the identified process.
- return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(execTG, &linux.SignalInfo{Signo: signo})
}
// The caller may be signaling a process not started directly via exec.
@@ -1237,7 +1238,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er
if tg.Leader().ContainerID() != cid {
return fmt.Errorf("process %d belongs to a different container: %q", tgid, tg.Leader().ContainerID())
}
- return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo})
}
// signalForegrondProcessGroup looks up foreground process group from the TTY
@@ -1247,7 +1248,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s
tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid, pid: tgid})
if err != nil {
l.mu.Unlock()
- return fmt.Errorf("no thread group found: %v", err)
+ return fmt.Errorf("no thread group found: %w", err)
}
if tg == nil {
l.mu.Unlock()
@@ -1257,7 +1258,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s
tty, ttyVFS2, err := l.ttyFromIDLocked(execID{cid: cid, pid: tgid})
l.mu.Unlock()
if err != nil {
- return fmt.Errorf("no thread group found: %v", err)
+ return fmt.Errorf("no thread group found: %w", err)
}
var pg *kernel.ProcessGroup
@@ -1273,7 +1274,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s
// No foreground process group has been set. Signal the
// original thread group.
log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, tgid, tgid)
- return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo})
}
// Send the signal to all processes in the process group.
var lastErr error
@@ -1281,7 +1282,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s
if tg.ProcessGroup() != pg {
continue
}
- if err := l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}); err != nil {
+ if err := l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo}); err != nil {
lastErr = err
}
}
@@ -1296,7 +1297,7 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
// sent to the entire container.
l.k.Pause()
defer l.k.Unpause()
- return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo})
+ return l.k.SendContainerSignal(cid, &linux.SignalInfo{Signo: signo})
}
// threadGroupFromID is similar to tryThreadGroupFromIDLocked except that it
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index c1828bd3d..52aa33529 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -210,12 +210,10 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *c
fd := c.fds.remove()
data := p9MountData(fd, conf.FileAccess, true /* vfs2 */)
- if conf.OverlayfsStaleRead {
- // We can't check for overlayfs here because sandbox is chroot'ed and gofer
- // can only send mount options for specs.Mounts (specs.Root is missing
- // Options field). So assume root is always on top of overlayfs.
- data = append(data, "overlayfs_stale_read")
- }
+ // We can't check for overlayfs here because sandbox is chroot'ed and gofer
+ // can only send mount options for specs.Mounts (specs.Root is missing
+ // Options field). So assume root is always on top of overlayfs.
+ data = append(data, "overlayfs_stale_read")
log.Infof("Mounting root over 9P, ioFD: %d", fd)
opts := &vfs.MountOptions{
@@ -657,7 +655,6 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config
Start: root,
Path: fspath.Parse("/tmp"),
}
- // TODO(gvisor.dev/issue/2782): Use O_PATH when available.
fd, err := c.k.VFS().OpenAt(ctx, creds, &pop, &vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_DIRECTORY})
switch err {
case nil:
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 6a755ecb6..5ded7b946 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -473,14 +473,12 @@ func adjustMountOptions(conf *config.Config, path string, opts []string) ([]stri
rv := make([]string, len(opts))
copy(rv, opts)
- if conf.OverlayfsStaleRead {
- statfs := unix.Statfs_t{}
- if err := unix.Statfs(path, &statfs); err != nil {
- return nil, err
- }
- if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC {
- rv = append(rv, "overlayfs_stale_read")
- }
+ statfs := unix.Statfs_t{}
+ if err := unix.Statfs(path, &statfs); err != nil {
+ return nil, err
+ }
+ if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC {
+ rv = append(rv, "overlayfs_stale_read")
}
return rv, nil
}
diff --git a/runsc/config/config.go b/runsc/config/config.go
index fa550ebf7..3d8c7a0ab 100644
--- a/runsc/config/config.go
+++ b/runsc/config/config.go
@@ -151,12 +151,6 @@ type Config struct {
// ReferenceLeakMode sets reference leak check mode
ReferenceLeak refs.LeakMode `flag:"ref-leak-mode"`
- // OverlayfsStaleRead instructs the sandbox to assume that the root mount
- // is on a Linux overlayfs mount, which does not necessarily preserve
- // coherence between read-only and subsequent writable file descriptors
- // representing the "same" file.
- OverlayfsStaleRead bool `flag:"overlayfs-stale-read"`
-
// CPUNumFromQuota sets CPU number count to available CPU quota, using
// least integer value greater than or equal to quota.
//
@@ -245,14 +239,14 @@ func (f *FileAccessType) Get() interface{} {
}
// String implements flag.Value.
-func (f *FileAccessType) String() string {
- switch *f {
+func (f FileAccessType) String() string {
+ switch f {
case FileAccessShared:
return "shared"
case FileAccessExclusive:
return "exclusive"
}
- panic(fmt.Sprintf("Invalid file access type %v", *f))
+ panic(fmt.Sprintf("Invalid file access type %d", f))
}
// NetworkType tells which network stack to use.
@@ -294,8 +288,8 @@ func (n *NetworkType) Get() interface{} {
}
// String implements flag.Value.
-func (n *NetworkType) String() string {
- switch *n {
+func (n NetworkType) String() string {
+ switch n {
case NetworkSandbox:
return "sandbox"
case NetworkHost:
@@ -303,7 +297,7 @@ func (n *NetworkType) String() string {
case NetworkNone:
return "none"
}
- panic(fmt.Sprintf("Invalid network type %v", *n))
+ panic(fmt.Sprintf("Invalid network type %d", n))
}
// QueueingDiscipline is used to specify the kind of Queueing Discipline to
@@ -341,14 +335,14 @@ func (q *QueueingDiscipline) Get() interface{} {
}
// String implements flag.Value.
-func (q *QueueingDiscipline) String() string {
- switch *q {
+func (q QueueingDiscipline) String() string {
+ switch q {
case QDiscNone:
return "none"
case QDiscFIFO:
return "fifo"
}
- panic(fmt.Sprintf("Invalid qdisc %v", *q))
+ panic(fmt.Sprintf("Invalid qdisc %d", q))
}
func leakModePtr(v refs.LeakMode) *refs.LeakMode {
diff --git a/runsc/config/flags.go b/runsc/config/flags.go
index c3dca2352..6f1b5927a 100644
--- a/runsc/config/flags.go
+++ b/runsc/config/flags.go
@@ -72,7 +72,6 @@ func RegisterFlags() {
flag.Var(fileAccessTypePtr(FileAccessShared), "file-access-mounts", "specifies which filesystem validation to use for volumes other than the root mount: shared (default), exclusive.")
flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
flag.Bool("verity", false, "specifies whether a verity file system will be mounted.")
- flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem")
flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
flag.Bool("vfs2", false, "enables VFSv2. This uses the new VFS layer that is faster than the previous one.")
flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.")
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 0e79877b7..249324c5a 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -47,6 +47,62 @@ import (
"gvisor.dev/gvisor/runsc/specutils"
)
+func TestMain(m *testing.M) {
+ log.SetLevel(log.Debug)
+ flag.Parse()
+ if err := testutil.ConfigureExePath(); err != nil {
+ panic(err.Error())
+ }
+ specutils.MaybeRunAsRoot()
+ os.Exit(m.Run())
+}
+
+func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) {
+ args := &control.ExecArgs{
+ Filename: name,
+ Argv: append([]string{name}, arg...),
+ }
+ return cont.executeSync(args)
+}
+
+func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) {
+ r, w, err := os.Pipe()
+ if err != nil {
+ return nil, err
+ }
+ defer r.Close()
+
+ args := &control.ExecArgs{
+ Filename: name,
+ Argv: append([]string{name}, arg...),
+ FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}},
+ }
+ ws, err := cont.executeSync(args)
+ w.Close()
+ if err != nil {
+ return nil, err
+ }
+ if ws != 0 {
+ return nil, fmt.Errorf("exec failed, status: %v", ws)
+ }
+
+ out, err := ioutil.ReadAll(r)
+ return out, err
+}
+
+// executeSync synchronously executes a new process.
+func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) {
+ pid, err := c.Execute(args)
+ if err != nil {
+ return 0, fmt.Errorf("error executing: %v", err)
+ }
+ ws, err := c.WaitPID(pid)
+ if err != nil {
+ return 0, fmt.Errorf("error waiting: %v", err)
+ }
+ return ws, nil
+}
+
// waitForProcessList waits for the given process list to show up in the container.
func waitForProcessList(cont *Container, want []*control.Process) error {
cb := func() error {
@@ -2470,58 +2526,67 @@ func TestBindMountByOption(t *testing.T) {
}
}
-func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) {
- args := &control.ExecArgs{
- Filename: name,
- Argv: append([]string{name}, arg...),
+// TestRlimits sets limit to number of open files and checks that the limit
+// is propagated to the container.
+func TestRlimits(t *testing.T) {
+ file, err := ioutil.TempFile(testutil.TmpDir(), "ulimit")
+ if err != nil {
+ t.Fatal(err)
}
- return cont.executeSync(args)
-}
+ cmd := fmt.Sprintf("ulimit -n > %q", file.Name())
-func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) {
- r, w, err := os.Pipe()
- if err != nil {
- return nil, err
+ spec := testutil.NewSpecWithArgs("sh", "-c", cmd)
+ spec.Process.Rlimits = []specs.POSIXRlimit{
+ {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100},
}
- defer r.Close()
- args := &control.ExecArgs{
- Filename: name,
- Argv: append([]string{name}, arg...),
- FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}},
+ conf := testutil.TestConfig(t)
+ if err := run(spec, conf); err != nil {
+ t.Fatalf("Error running container: %v", err)
}
- ws, err := cont.executeSync(args)
- w.Close()
+ got, err := ioutil.ReadFile(file.Name())
if err != nil {
- return nil, err
+ t.Fatal(err)
}
- if ws != 0 {
- return nil, fmt.Errorf("exec failed, status: %v", ws)
+ if want := "100\n"; string(got) != want {
+ t.Errorf("ulimit result, got: %q, want: %q", got, want)
}
-
- out, err := ioutil.ReadAll(r)
- return out, err
}
-// executeSync synchronously executes a new process.
-func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) {
- pid, err := c.Execute(args)
+// TestRlimitsExec sets limit to number of open files and checks that the limit
+// is propagated to exec'd processes.
+func TestRlimitsExec(t *testing.T) {
+ spec := testutil.NewSpecWithArgs("sleep", "100")
+ spec.Process.Rlimits = []specs.POSIXRlimit{
+ {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100},
+ }
+
+ conf := testutil.TestConfig(t)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
- return 0, fmt.Errorf("error executing: %v", err)
+ t.Fatalf("error setting up container: %v", err)
}
- ws, err := c.WaitPID(pid)
+ defer cleanup()
+
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
- return 0, fmt.Errorf("error waiting: %v", err)
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont.Destroy()
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
}
- return ws, nil
-}
-func TestMain(m *testing.M) {
- log.SetLevel(log.Debug)
- flag.Parse()
- if err := testutil.ConfigureExePath(); err != nil {
- panic(err.Error())
+ got, err := executeCombinedOutput(cont, "/bin/sh", "-c", "ulimit -n")
+ if err != nil {
+ t.Fatal(err)
+ }
+ if want := "100\n"; string(got) != want {
+ t.Errorf("ulimit result, got: %q, want: %q", got, want)
}
- specutils.MaybeRunAsRoot()
- os.Exit(m.Run())
}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 8d31e33b2..29e202b7d 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -131,8 +131,9 @@ func New(conf *config.Config, args *Args) (*Sandbox, error) {
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
c := cleanup.Make(func() {
- err := s.destroy()
- log.Warningf("error destroying sandbox: %v", err)
+ if err := s.destroy(); err != nil {
+ log.Warningf("error destroying sandbox: %v", err)
+ }
})
defer c.Clean()
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 11b476690..c228d6299 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -246,7 +246,7 @@ func Capabilities(enableRaw bool, specCaps *specs.LinuxCapabilities) (*auth.Task
if caps.PermittedCaps, err = capsFromNames(specCaps.Permitted, skipSet); err != nil {
return nil, err
}
- // TODO(nlacasse): Support ambient capabilities.
+ // TODO(gvisor.dev/issue/3166): Support ambient capabilities.
}
return &caps, nil
}