summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/BUILD1
-rw-r--r--runsc/boot/controller.go2
-rw-r--r--runsc/boot/filter/config.go7
-rw-r--r--runsc/boot/fs.go10
-rw-r--r--runsc/boot/loader.go30
-rw-r--r--runsc/boot/vfs.go10
-rw-r--r--runsc/cmd/gofer.go14
-rw-r--r--runsc/config/config.go24
-rw-r--r--runsc/config/flags.go1
-rw-r--r--runsc/container/container_test.go141
10 files changed, 147 insertions, 93 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index d51347fe1..a79afbdc4 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -38,7 +38,6 @@ go_library(
"//pkg/fspath",
"//pkg/log",
"//pkg/memutil",
- "//pkg/metric",
"//pkg/rand",
"//pkg/refs",
"//pkg/refsvfs2",
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 9b270cbf2..d52cf5a00 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -439,7 +439,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
// Load the state.
loadOpts := state.LoadOpts{Source: specFile}
- if err := loadOpts.Load(ctx, k, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil {
+ if err := loadOpts.Load(ctx, k, nil, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil {
return err
}
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 49b503f99..752fea0e1 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -200,6 +200,12 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.MatchAny{},
+ seccomp.EqualTo(unix.MAP_SHARED | unix.MAP_FIXED),
+ },
+ {
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
+ seccomp.MatchAny{},
seccomp.EqualTo(unix.MAP_PRIVATE),
},
{
@@ -265,7 +271,6 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.MatchAny{},
seccomp.MatchAny{},
seccomp.EqualTo(unix.MSG_DONTWAIT),
- seccomp.EqualTo(0),
},
},
unix.SYS_RESTART_SYSCALL: {},
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index bf4a41f77..c4590aab1 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -763,12 +763,10 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Con
p9FS := mustFindFilesystem("9p")
opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */)
- if conf.OverlayfsStaleRead {
- // We can't check for overlayfs here because sandbox is chroot'ed and gofer
- // can only send mount options for specs.Mounts (specs.Root is missing
- // Options field). So assume root is always on top of overlayfs.
- opts = append(opts, "overlayfs_stale_read")
- }
+ // We can't check for overlayfs here because sandbox is chroot'ed and gofer
+ // can only send mount options for specs.Mounts (specs.Root is missing
+ // Options field). So assume root is always on top of overlayfs.
+ opts = append(opts, "overlayfs_stale_read")
rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
if err != nil {
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index b73ac101f..8d71d7447 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -34,11 +34,9 @@ import (
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/memutil"
- "gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/rand"
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/refsvfs2"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/fdimport"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -218,8 +216,6 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("setting up memory usage: %w", err)
}
- metric.CreateSentryMetrics()
-
// Is this a VFSv2 kernel?
if args.Conf.VFS2 {
kernel.VFS2Enabled = true
@@ -282,19 +278,15 @@ func New(args Args) (*Loader, error) {
}
// Create timekeeper.
- tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange())
- if err != nil {
- return nil, fmt.Errorf("creating timekeeper: %w", err)
- }
+ tk := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange())
tk.SetClocks(time.NewCalibratedClocks())
- k.SetTimekeeper(tk)
if err := enableStrace(args.Conf); err != nil {
return nil, fmt.Errorf("enabling strace: %w", err)
}
// Create root network namespace/stack.
- netns, err := newRootNetworkNamespace(args.Conf, k, k)
+ netns, err := newRootNetworkNamespace(args.Conf, tk, k)
if err != nil {
return nil, fmt.Errorf("creating network: %w", err)
}
@@ -336,6 +328,7 @@ func New(args Args) (*Loader, error) {
// to createVFS in order to mount (among other things) procfs.
if err = k.Init(kernel.InitKernelArgs{
FeatureSet: cpuid.HostFeatureSet(),
+ Timekeeper: tk,
RootUserNamespace: creds.UserNamespace,
RootNetworkNamespace: netns,
ApplicationCores: uint(args.NumCPU),
@@ -967,10 +960,15 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
}
args.Envv = envv
}
+ args.PIDNamespace = tg.PIDNamespace()
+
+ args.Limits, err = createLimitSet(l.root.spec)
+ if err != nil {
+ return 0, fmt.Errorf("creating limits: %w", err)
+ }
// Start the process.
proc := control.Proc{Kernel: l.k}
- args.PIDNamespace = tg.PIDNamespace()
newTG, tgid, ttyFile, ttyFileVFS2, err := control.ExecAsync(&proc, args)
if err != nil {
return 0, err
@@ -1224,7 +1222,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er
execTG, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
if err == nil {
// Send signal directly to the identified process.
- return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(execTG, &linux.SignalInfo{Signo: signo})
}
// The caller may be signaling a process not started directly via exec.
@@ -1237,7 +1235,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er
if tg.Leader().ContainerID() != cid {
return fmt.Errorf("process %d belongs to a different container: %q", tgid, tg.Leader().ContainerID())
}
- return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo})
}
// signalForegrondProcessGroup looks up foreground process group from the TTY
@@ -1273,7 +1271,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s
// No foreground process group has been set. Signal the
// original thread group.
log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, tgid, tgid)
- return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo})
}
// Send the signal to all processes in the process group.
var lastErr error
@@ -1281,7 +1279,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s
if tg.ProcessGroup() != pg {
continue
}
- if err := l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}); err != nil {
+ if err := l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo}); err != nil {
lastErr = err
}
}
@@ -1296,7 +1294,7 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
// sent to the entire container.
l.k.Pause()
defer l.k.Unpause()
- return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo})
+ return l.k.SendContainerSignal(cid, &linux.SignalInfo{Signo: signo})
}
// threadGroupFromID is similar to tryThreadGroupFromIDLocked except that it
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 7be5176b0..52aa33529 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -210,12 +210,10 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *c
fd := c.fds.remove()
data := p9MountData(fd, conf.FileAccess, true /* vfs2 */)
- if conf.OverlayfsStaleRead {
- // We can't check for overlayfs here because sandbox is chroot'ed and gofer
- // can only send mount options for specs.Mounts (specs.Root is missing
- // Options field). So assume root is always on top of overlayfs.
- data = append(data, "overlayfs_stale_read")
- }
+ // We can't check for overlayfs here because sandbox is chroot'ed and gofer
+ // can only send mount options for specs.Mounts (specs.Root is missing
+ // Options field). So assume root is always on top of overlayfs.
+ data = append(data, "overlayfs_stale_read")
log.Infof("Mounting root over 9P, ioFD: %d", fd)
opts := &vfs.MountOptions{
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 6a755ecb6..5ded7b946 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -473,14 +473,12 @@ func adjustMountOptions(conf *config.Config, path string, opts []string) ([]stri
rv := make([]string, len(opts))
copy(rv, opts)
- if conf.OverlayfsStaleRead {
- statfs := unix.Statfs_t{}
- if err := unix.Statfs(path, &statfs); err != nil {
- return nil, err
- }
- if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC {
- rv = append(rv, "overlayfs_stale_read")
- }
+ statfs := unix.Statfs_t{}
+ if err := unix.Statfs(path, &statfs); err != nil {
+ return nil, err
+ }
+ if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC {
+ rv = append(rv, "overlayfs_stale_read")
}
return rv, nil
}
diff --git a/runsc/config/config.go b/runsc/config/config.go
index fa550ebf7..3d8c7a0ab 100644
--- a/runsc/config/config.go
+++ b/runsc/config/config.go
@@ -151,12 +151,6 @@ type Config struct {
// ReferenceLeakMode sets reference leak check mode
ReferenceLeak refs.LeakMode `flag:"ref-leak-mode"`
- // OverlayfsStaleRead instructs the sandbox to assume that the root mount
- // is on a Linux overlayfs mount, which does not necessarily preserve
- // coherence between read-only and subsequent writable file descriptors
- // representing the "same" file.
- OverlayfsStaleRead bool `flag:"overlayfs-stale-read"`
-
// CPUNumFromQuota sets CPU number count to available CPU quota, using
// least integer value greater than or equal to quota.
//
@@ -245,14 +239,14 @@ func (f *FileAccessType) Get() interface{} {
}
// String implements flag.Value.
-func (f *FileAccessType) String() string {
- switch *f {
+func (f FileAccessType) String() string {
+ switch f {
case FileAccessShared:
return "shared"
case FileAccessExclusive:
return "exclusive"
}
- panic(fmt.Sprintf("Invalid file access type %v", *f))
+ panic(fmt.Sprintf("Invalid file access type %d", f))
}
// NetworkType tells which network stack to use.
@@ -294,8 +288,8 @@ func (n *NetworkType) Get() interface{} {
}
// String implements flag.Value.
-func (n *NetworkType) String() string {
- switch *n {
+func (n NetworkType) String() string {
+ switch n {
case NetworkSandbox:
return "sandbox"
case NetworkHost:
@@ -303,7 +297,7 @@ func (n *NetworkType) String() string {
case NetworkNone:
return "none"
}
- panic(fmt.Sprintf("Invalid network type %v", *n))
+ panic(fmt.Sprintf("Invalid network type %d", n))
}
// QueueingDiscipline is used to specify the kind of Queueing Discipline to
@@ -341,14 +335,14 @@ func (q *QueueingDiscipline) Get() interface{} {
}
// String implements flag.Value.
-func (q *QueueingDiscipline) String() string {
- switch *q {
+func (q QueueingDiscipline) String() string {
+ switch q {
case QDiscNone:
return "none"
case QDiscFIFO:
return "fifo"
}
- panic(fmt.Sprintf("Invalid qdisc %v", *q))
+ panic(fmt.Sprintf("Invalid qdisc %d", q))
}
func leakModePtr(v refs.LeakMode) *refs.LeakMode {
diff --git a/runsc/config/flags.go b/runsc/config/flags.go
index c3dca2352..6f1b5927a 100644
--- a/runsc/config/flags.go
+++ b/runsc/config/flags.go
@@ -72,7 +72,6 @@ func RegisterFlags() {
flag.Var(fileAccessTypePtr(FileAccessShared), "file-access-mounts", "specifies which filesystem validation to use for volumes other than the root mount: shared (default), exclusive.")
flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
flag.Bool("verity", false, "specifies whether a verity file system will be mounted.")
- flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem")
flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
flag.Bool("vfs2", false, "enables VFSv2. This uses the new VFS layer that is faster than the previous one.")
flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.")
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 0e79877b7..249324c5a 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -47,6 +47,62 @@ import (
"gvisor.dev/gvisor/runsc/specutils"
)
+func TestMain(m *testing.M) {
+ log.SetLevel(log.Debug)
+ flag.Parse()
+ if err := testutil.ConfigureExePath(); err != nil {
+ panic(err.Error())
+ }
+ specutils.MaybeRunAsRoot()
+ os.Exit(m.Run())
+}
+
+func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) {
+ args := &control.ExecArgs{
+ Filename: name,
+ Argv: append([]string{name}, arg...),
+ }
+ return cont.executeSync(args)
+}
+
+func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) {
+ r, w, err := os.Pipe()
+ if err != nil {
+ return nil, err
+ }
+ defer r.Close()
+
+ args := &control.ExecArgs{
+ Filename: name,
+ Argv: append([]string{name}, arg...),
+ FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}},
+ }
+ ws, err := cont.executeSync(args)
+ w.Close()
+ if err != nil {
+ return nil, err
+ }
+ if ws != 0 {
+ return nil, fmt.Errorf("exec failed, status: %v", ws)
+ }
+
+ out, err := ioutil.ReadAll(r)
+ return out, err
+}
+
+// executeSync synchronously executes a new process.
+func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) {
+ pid, err := c.Execute(args)
+ if err != nil {
+ return 0, fmt.Errorf("error executing: %v", err)
+ }
+ ws, err := c.WaitPID(pid)
+ if err != nil {
+ return 0, fmt.Errorf("error waiting: %v", err)
+ }
+ return ws, nil
+}
+
// waitForProcessList waits for the given process list to show up in the container.
func waitForProcessList(cont *Container, want []*control.Process) error {
cb := func() error {
@@ -2470,58 +2526,67 @@ func TestBindMountByOption(t *testing.T) {
}
}
-func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) {
- args := &control.ExecArgs{
- Filename: name,
- Argv: append([]string{name}, arg...),
+// TestRlimits sets limit to number of open files and checks that the limit
+// is propagated to the container.
+func TestRlimits(t *testing.T) {
+ file, err := ioutil.TempFile(testutil.TmpDir(), "ulimit")
+ if err != nil {
+ t.Fatal(err)
}
- return cont.executeSync(args)
-}
+ cmd := fmt.Sprintf("ulimit -n > %q", file.Name())
-func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) {
- r, w, err := os.Pipe()
- if err != nil {
- return nil, err
+ spec := testutil.NewSpecWithArgs("sh", "-c", cmd)
+ spec.Process.Rlimits = []specs.POSIXRlimit{
+ {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100},
}
- defer r.Close()
- args := &control.ExecArgs{
- Filename: name,
- Argv: append([]string{name}, arg...),
- FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}},
+ conf := testutil.TestConfig(t)
+ if err := run(spec, conf); err != nil {
+ t.Fatalf("Error running container: %v", err)
}
- ws, err := cont.executeSync(args)
- w.Close()
+ got, err := ioutil.ReadFile(file.Name())
if err != nil {
- return nil, err
+ t.Fatal(err)
}
- if ws != 0 {
- return nil, fmt.Errorf("exec failed, status: %v", ws)
+ if want := "100\n"; string(got) != want {
+ t.Errorf("ulimit result, got: %q, want: %q", got, want)
}
-
- out, err := ioutil.ReadAll(r)
- return out, err
}
-// executeSync synchronously executes a new process.
-func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) {
- pid, err := c.Execute(args)
+// TestRlimitsExec sets limit to number of open files and checks that the limit
+// is propagated to exec'd processes.
+func TestRlimitsExec(t *testing.T) {
+ spec := testutil.NewSpecWithArgs("sleep", "100")
+ spec.Process.Rlimits = []specs.POSIXRlimit{
+ {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100},
+ }
+
+ conf := testutil.TestConfig(t)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
- return 0, fmt.Errorf("error executing: %v", err)
+ t.Fatalf("error setting up container: %v", err)
}
- ws, err := c.WaitPID(pid)
+ defer cleanup()
+
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
- return 0, fmt.Errorf("error waiting: %v", err)
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont.Destroy()
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
}
- return ws, nil
-}
-func TestMain(m *testing.M) {
- log.SetLevel(log.Debug)
- flag.Parse()
- if err := testutil.ConfigureExePath(); err != nil {
- panic(err.Error())
+ got, err := executeCombinedOutput(cont, "/bin/sh", "-c", "ulimit -n")
+ if err != nil {
+ t.Fatal(err)
+ }
+ if want := "100\n"; string(got) != want {
+ t.Errorf("ulimit result, got: %q, want: %q", got, want)
}
- specutils.MaybeRunAsRoot()
- os.Exit(m.Run())
}