diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/BUILD | 1 | ||||
-rw-r--r-- | runsc/boot/controller.go | 2 | ||||
-rw-r--r-- | runsc/boot/filter/config.go | 7 | ||||
-rw-r--r-- | runsc/boot/fs.go | 10 | ||||
-rw-r--r-- | runsc/boot/loader.go | 30 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 10 | ||||
-rw-r--r-- | runsc/cmd/gofer.go | 14 | ||||
-rw-r--r-- | runsc/config/config.go | 24 | ||||
-rw-r--r-- | runsc/config/flags.go | 1 | ||||
-rw-r--r-- | runsc/container/container_test.go | 141 |
10 files changed, 147 insertions, 93 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index d51347fe1..a79afbdc4 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -38,7 +38,6 @@ go_library( "//pkg/fspath", "//pkg/log", "//pkg/memutil", - "//pkg/metric", "//pkg/rand", "//pkg/refs", "//pkg/refsvfs2", diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 9b270cbf2..d52cf5a00 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -439,7 +439,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { // Load the state. loadOpts := state.LoadOpts{Source: specFile} - if err := loadOpts.Load(ctx, k, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil { + if err := loadOpts.Load(ctx, k, nil, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil { return err } diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 49b503f99..752fea0e1 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -200,6 +200,12 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.MatchAny{}, + seccomp.EqualTo(unix.MAP_SHARED | unix.MAP_FIXED), + }, + { + seccomp.MatchAny{}, + seccomp.MatchAny{}, + seccomp.MatchAny{}, seccomp.EqualTo(unix.MAP_PRIVATE), }, { @@ -265,7 +271,6 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.MatchAny{}, seccomp.MatchAny{}, seccomp.EqualTo(unix.MSG_DONTWAIT), - seccomp.EqualTo(0), }, }, unix.SYS_RESTART_SYSCALL: {}, diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index bf4a41f77..c4590aab1 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -763,12 +763,10 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Con p9FS := mustFindFilesystem("9p") opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) - if conf.OverlayfsStaleRead { - // We can't check for overlayfs here because sandbox is chroot'ed and gofer - // can only send mount options for specs.Mounts (specs.Root is missing - // Options field). So assume root is always on top of overlayfs. - opts = append(opts, "overlayfs_stale_read") - } + // We can't check for overlayfs here because sandbox is chroot'ed and gofer + // can only send mount options for specs.Mounts (specs.Root is missing + // Options field). So assume root is always on top of overlayfs. + opts = append(opts, "overlayfs_stale_read") rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil) if err != nil { diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index b73ac101f..8d71d7447 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -34,11 +34,9 @@ import ( "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/memutil" - "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/refsvfs2" - "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/fdimport" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -218,8 +216,6 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("setting up memory usage: %w", err) } - metric.CreateSentryMetrics() - // Is this a VFSv2 kernel? if args.Conf.VFS2 { kernel.VFS2Enabled = true @@ -282,19 +278,15 @@ func New(args Args) (*Loader, error) { } // Create timekeeper. - tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) - if err != nil { - return nil, fmt.Errorf("creating timekeeper: %w", err) - } + tk := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) tk.SetClocks(time.NewCalibratedClocks()) - k.SetTimekeeper(tk) if err := enableStrace(args.Conf); err != nil { return nil, fmt.Errorf("enabling strace: %w", err) } // Create root network namespace/stack. - netns, err := newRootNetworkNamespace(args.Conf, k, k) + netns, err := newRootNetworkNamespace(args.Conf, tk, k) if err != nil { return nil, fmt.Errorf("creating network: %w", err) } @@ -336,6 +328,7 @@ func New(args Args) (*Loader, error) { // to createVFS in order to mount (among other things) procfs. if err = k.Init(kernel.InitKernelArgs{ FeatureSet: cpuid.HostFeatureSet(), + Timekeeper: tk, RootUserNamespace: creds.UserNamespace, RootNetworkNamespace: netns, ApplicationCores: uint(args.NumCPU), @@ -967,10 +960,15 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { } args.Envv = envv } + args.PIDNamespace = tg.PIDNamespace() + + args.Limits, err = createLimitSet(l.root.spec) + if err != nil { + return 0, fmt.Errorf("creating limits: %w", err) + } // Start the process. proc := control.Proc{Kernel: l.k} - args.PIDNamespace = tg.PIDNamespace() newTG, tgid, ttyFile, ttyFileVFS2, err := control.ExecAsync(&proc, args) if err != nil { return 0, err @@ -1224,7 +1222,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er execTG, err := l.threadGroupFromID(execID{cid: cid, pid: tgid}) if err == nil { // Send signal directly to the identified process. - return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(execTG, &linux.SignalInfo{Signo: signo}) } // The caller may be signaling a process not started directly via exec. @@ -1237,7 +1235,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er if tg.Leader().ContainerID() != cid { return fmt.Errorf("process %d belongs to a different container: %q", tgid, tg.Leader().ContainerID()) } - return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo}) } // signalForegrondProcessGroup looks up foreground process group from the TTY @@ -1273,7 +1271,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s // No foreground process group has been set. Signal the // original thread group. log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, tgid, tgid) - return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo}) } // Send the signal to all processes in the process group. var lastErr error @@ -1281,7 +1279,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s if tg.ProcessGroup() != pg { continue } - if err := l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}); err != nil { + if err := l.k.SendExternalSignalThreadGroup(tg, &linux.SignalInfo{Signo: signo}); err != nil { lastErr = err } } @@ -1296,7 +1294,7 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error { // sent to the entire container. l.k.Pause() defer l.k.Unpause() - return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo}) + return l.k.SendContainerSignal(cid, &linux.SignalInfo{Signo: signo}) } // threadGroupFromID is similar to tryThreadGroupFromIDLocked except that it diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 7be5176b0..52aa33529 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -210,12 +210,10 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *c fd := c.fds.remove() data := p9MountData(fd, conf.FileAccess, true /* vfs2 */) - if conf.OverlayfsStaleRead { - // We can't check for overlayfs here because sandbox is chroot'ed and gofer - // can only send mount options for specs.Mounts (specs.Root is missing - // Options field). So assume root is always on top of overlayfs. - data = append(data, "overlayfs_stale_read") - } + // We can't check for overlayfs here because sandbox is chroot'ed and gofer + // can only send mount options for specs.Mounts (specs.Root is missing + // Options field). So assume root is always on top of overlayfs. + data = append(data, "overlayfs_stale_read") log.Infof("Mounting root over 9P, ioFD: %d", fd) opts := &vfs.MountOptions{ diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 6a755ecb6..5ded7b946 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -473,14 +473,12 @@ func adjustMountOptions(conf *config.Config, path string, opts []string) ([]stri rv := make([]string, len(opts)) copy(rv, opts) - if conf.OverlayfsStaleRead { - statfs := unix.Statfs_t{} - if err := unix.Statfs(path, &statfs); err != nil { - return nil, err - } - if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC { - rv = append(rv, "overlayfs_stale_read") - } + statfs := unix.Statfs_t{} + if err := unix.Statfs(path, &statfs); err != nil { + return nil, err + } + if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC { + rv = append(rv, "overlayfs_stale_read") } return rv, nil } diff --git a/runsc/config/config.go b/runsc/config/config.go index fa550ebf7..3d8c7a0ab 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -151,12 +151,6 @@ type Config struct { // ReferenceLeakMode sets reference leak check mode ReferenceLeak refs.LeakMode `flag:"ref-leak-mode"` - // OverlayfsStaleRead instructs the sandbox to assume that the root mount - // is on a Linux overlayfs mount, which does not necessarily preserve - // coherence between read-only and subsequent writable file descriptors - // representing the "same" file. - OverlayfsStaleRead bool `flag:"overlayfs-stale-read"` - // CPUNumFromQuota sets CPU number count to available CPU quota, using // least integer value greater than or equal to quota. // @@ -245,14 +239,14 @@ func (f *FileAccessType) Get() interface{} { } // String implements flag.Value. -func (f *FileAccessType) String() string { - switch *f { +func (f FileAccessType) String() string { + switch f { case FileAccessShared: return "shared" case FileAccessExclusive: return "exclusive" } - panic(fmt.Sprintf("Invalid file access type %v", *f)) + panic(fmt.Sprintf("Invalid file access type %d", f)) } // NetworkType tells which network stack to use. @@ -294,8 +288,8 @@ func (n *NetworkType) Get() interface{} { } // String implements flag.Value. -func (n *NetworkType) String() string { - switch *n { +func (n NetworkType) String() string { + switch n { case NetworkSandbox: return "sandbox" case NetworkHost: @@ -303,7 +297,7 @@ func (n *NetworkType) String() string { case NetworkNone: return "none" } - panic(fmt.Sprintf("Invalid network type %v", *n)) + panic(fmt.Sprintf("Invalid network type %d", n)) } // QueueingDiscipline is used to specify the kind of Queueing Discipline to @@ -341,14 +335,14 @@ func (q *QueueingDiscipline) Get() interface{} { } // String implements flag.Value. -func (q *QueueingDiscipline) String() string { - switch *q { +func (q QueueingDiscipline) String() string { + switch q { case QDiscNone: return "none" case QDiscFIFO: return "fifo" } - panic(fmt.Sprintf("Invalid qdisc %v", *q)) + panic(fmt.Sprintf("Invalid qdisc %d", q)) } func leakModePtr(v refs.LeakMode) *refs.LeakMode { diff --git a/runsc/config/flags.go b/runsc/config/flags.go index c3dca2352..6f1b5927a 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -72,7 +72,6 @@ func RegisterFlags() { flag.Var(fileAccessTypePtr(FileAccessShared), "file-access-mounts", "specifies which filesystem validation to use for volumes other than the root mount: shared (default), exclusive.") flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") flag.Bool("verity", false, "specifies whether a verity file system will be mounted.") - flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem") flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.") flag.Bool("vfs2", false, "enables VFSv2. This uses the new VFS layer that is faster than the previous one.") flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.") diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 0e79877b7..249324c5a 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -47,6 +47,62 @@ import ( "gvisor.dev/gvisor/runsc/specutils" ) +func TestMain(m *testing.M) { + log.SetLevel(log.Debug) + flag.Parse() + if err := testutil.ConfigureExePath(); err != nil { + panic(err.Error()) + } + specutils.MaybeRunAsRoot() + os.Exit(m.Run()) +} + +func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) { + args := &control.ExecArgs{ + Filename: name, + Argv: append([]string{name}, arg...), + } + return cont.executeSync(args) +} + +func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) { + r, w, err := os.Pipe() + if err != nil { + return nil, err + } + defer r.Close() + + args := &control.ExecArgs{ + Filename: name, + Argv: append([]string{name}, arg...), + FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}}, + } + ws, err := cont.executeSync(args) + w.Close() + if err != nil { + return nil, err + } + if ws != 0 { + return nil, fmt.Errorf("exec failed, status: %v", ws) + } + + out, err := ioutil.ReadAll(r) + return out, err +} + +// executeSync synchronously executes a new process. +func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) { + pid, err := c.Execute(args) + if err != nil { + return 0, fmt.Errorf("error executing: %v", err) + } + ws, err := c.WaitPID(pid) + if err != nil { + return 0, fmt.Errorf("error waiting: %v", err) + } + return ws, nil +} + // waitForProcessList waits for the given process list to show up in the container. func waitForProcessList(cont *Container, want []*control.Process) error { cb := func() error { @@ -2470,58 +2526,67 @@ func TestBindMountByOption(t *testing.T) { } } -func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) { - args := &control.ExecArgs{ - Filename: name, - Argv: append([]string{name}, arg...), +// TestRlimits sets limit to number of open files and checks that the limit +// is propagated to the container. +func TestRlimits(t *testing.T) { + file, err := ioutil.TempFile(testutil.TmpDir(), "ulimit") + if err != nil { + t.Fatal(err) } - return cont.executeSync(args) -} + cmd := fmt.Sprintf("ulimit -n > %q", file.Name()) -func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) { - r, w, err := os.Pipe() - if err != nil { - return nil, err + spec := testutil.NewSpecWithArgs("sh", "-c", cmd) + spec.Process.Rlimits = []specs.POSIXRlimit{ + {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100}, } - defer r.Close() - args := &control.ExecArgs{ - Filename: name, - Argv: append([]string{name}, arg...), - FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}}, + conf := testutil.TestConfig(t) + if err := run(spec, conf); err != nil { + t.Fatalf("Error running container: %v", err) } - ws, err := cont.executeSync(args) - w.Close() + got, err := ioutil.ReadFile(file.Name()) if err != nil { - return nil, err + t.Fatal(err) } - if ws != 0 { - return nil, fmt.Errorf("exec failed, status: %v", ws) + if want := "100\n"; string(got) != want { + t.Errorf("ulimit result, got: %q, want: %q", got, want) } - - out, err := ioutil.ReadAll(r) - return out, err } -// executeSync synchronously executes a new process. -func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) { - pid, err := c.Execute(args) +// TestRlimitsExec sets limit to number of open files and checks that the limit +// is propagated to exec'd processes. +func TestRlimitsExec(t *testing.T) { + spec := testutil.NewSpecWithArgs("sleep", "100") + spec.Process.Rlimits = []specs.POSIXRlimit{ + {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100}, + } + + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { - return 0, fmt.Errorf("error executing: %v", err) + t.Fatalf("error setting up container: %v", err) } - ws, err := c.WaitPID(pid) + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { - return 0, fmt.Errorf("error waiting: %v", err) + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) } - return ws, nil -} -func TestMain(m *testing.M) { - log.SetLevel(log.Debug) - flag.Parse() - if err := testutil.ConfigureExePath(); err != nil { - panic(err.Error()) + got, err := executeCombinedOutput(cont, "/bin/sh", "-c", "ulimit -n") + if err != nil { + t.Fatal(err) + } + if want := "100\n"; string(got) != want { + t.Errorf("ulimit result, got: %q, want: %q", got, want) } - specutils.MaybeRunAsRoot() - os.Exit(m.Run()) } |