summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/BUILD8
-rw-r--r--runsc/boot/config.go5
-rw-r--r--runsc/boot/fds.go108
-rw-r--r--runsc/boot/fs.go13
-rw-r--r--runsc/boot/loader.go216
-rw-r--r--runsc/boot/loader_amd64.go26
-rw-r--r--runsc/boot/loader_arm64.go26
-rw-r--r--runsc/boot/loader_test.go19
-rw-r--r--runsc/boot/network.go50
-rw-r--r--runsc/boot/vfs.go57
-rw-r--r--runsc/cmd/syscalls.go23
-rw-r--r--runsc/container/BUILD2
-rw-r--r--runsc/container/container.go23
-rw-r--r--runsc/container/container_test.go147
-rw-r--r--runsc/container/multi_container_test.go2
-rw-r--r--runsc/main.go8
-rw-r--r--runsc/sandbox/network.go5
-rw-r--r--runsc/specutils/specutils.go14
18 files changed, 370 insertions, 382 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index ed3c8f546..a907c103b 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -12,12 +12,9 @@ go_library(
"controller.go",
"debug.go",
"events.go",
- "fds.go",
"fs.go",
"limits.go",
"loader.go",
- "loader_amd64.go",
- "loader_arm64.go",
"network.go",
"strace.go",
"vfs.go",
@@ -43,6 +40,7 @@ go_library(
"//pkg/sentry/arch:registers_go_proto",
"//pkg/sentry/control",
"//pkg/sentry/devices/memdev",
+ "//pkg/sentry/fdimport",
"//pkg/sentry/fs",
"//pkg/sentry/fs/dev",
"//pkg/sentry/fs/gofer",
@@ -53,6 +51,7 @@ go_library(
"//pkg/sentry/fs/tmpfs",
"//pkg/sentry/fs/tty",
"//pkg/sentry/fs/user",
+ "//pkg/sentry/fsimpl/devpts",
"//pkg/sentry/fsimpl/devtmpfs",
"//pkg/sentry/fsimpl/gofer",
"//pkg/sentry/fsimpl/host",
@@ -76,7 +75,6 @@ go_library(
"//pkg/sentry/socket/unix",
"//pkg/sentry/state",
"//pkg/sentry/strace",
- "//pkg/sentry/syscalls/linux",
"//pkg/sentry/syscalls/linux/vfs2",
"//pkg/sentry/time",
"//pkg/sentry/unimpl:unimplemented_syscall_go_proto",
@@ -88,6 +86,7 @@ go_library(
"//pkg/tcpip",
"//pkg/tcpip/link/fdbased",
"//pkg/tcpip/link/loopback",
+ "//pkg/tcpip/link/qdisc/fifo",
"//pkg/tcpip/link/sniffer",
"//pkg/tcpip/network/arp",
"//pkg/tcpip/network/ipv4",
@@ -124,7 +123,6 @@ go_test(
"//pkg/p9",
"//pkg/sentry/contexttest",
"//pkg/sentry/fs",
- "//pkg/sentry/kernel",
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/unet",
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 715a19112..6d6a705f8 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -187,6 +187,10 @@ type Config struct {
// SoftwareGSO indicates that software segmentation offload is enabled.
SoftwareGSO bool
+ // QDisc indicates the type of queuening discipline to use by default
+ // for non-loopback interfaces.
+ QDisc QueueingDiscipline
+
// LogPackets indicates that all network packets should be logged.
LogPackets bool
@@ -294,6 +298,7 @@ func (c *Config) ToFlags() []string {
"--gso=" + strconv.FormatBool(c.HardwareGSO),
"--software-gso=" + strconv.FormatBool(c.SoftwareGSO),
"--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead),
+ "--qdisc=" + c.QDisc.String(),
}
if c.CPUNumFromQuota {
f = append(f, "--cpu-num-from-quota")
diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go
deleted file mode 100644
index 7e7a31fbd..000000000
--- a/runsc/boot/fds.go
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package boot
-
-import (
- "fmt"
-
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/fs/host"
- vfshost "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
-)
-
-// createFDTable creates an FD table that contains stdin, stdout, and stderr.
-// If console is true, then ioctl calls will be passed through to the host FD.
-// Upon success, createFDMap dups then closes stdioFDs.
-func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) {
- if len(stdioFDs) != 3 {
- return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
- }
-
- if kernel.VFS2Enabled {
- return createFDTableVFS2(ctx, console, stdioFDs)
- }
-
- k := kernel.KernelFromContext(ctx)
- fdTable := k.NewFDTable()
- defer fdTable.DecRef()
-
- var ttyFile *fs.File
- for appFD, hostFD := range stdioFDs {
- var appFile *fs.File
-
- if console && appFD < 3 {
- // Import the file as a host TTY file.
- if ttyFile == nil {
- var err error
- appFile, err = host.ImportFile(ctx, hostFD, true /* isTTY */)
- if err != nil {
- return nil, err
- }
- defer appFile.DecRef()
-
- // Remember this in the TTY file, as we will
- // use it for the other stdio FDs.
- ttyFile = appFile
- } else {
- // Re-use the existing TTY file, as all three
- // stdio FDs must point to the same fs.File in
- // order to share TTY state, specifically the
- // foreground process group id.
- appFile = ttyFile
- }
- } else {
- // Import the file as a regular host file.
- var err error
- appFile, err = host.ImportFile(ctx, hostFD, false /* isTTY */)
- if err != nil {
- return nil, err
- }
- defer appFile.DecRef()
- }
-
- // Add the file to the FD map.
- if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil {
- return nil, err
- }
- }
-
- fdTable.IncRef()
- return fdTable, nil
-}
-
-func createFDTableVFS2(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) {
- k := kernel.KernelFromContext(ctx)
- fdTable := k.NewFDTable()
- defer fdTable.DecRef()
-
- for appFD, hostFD := range stdioFDs {
- // TODO(gvisor.dev/issue/1482): Add TTY support.
- appFile, err := vfshost.ImportFD(ctx, k.HostMount(), hostFD, false)
- if err != nil {
- return nil, err
- }
-
- if err := fdTable.NewFDAtVFS2(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil {
- appFile.DecRef()
- return nil, err
- }
- appFile.DecRef()
- }
-
- fdTable.IncRef()
- return fdTable, nil
-}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 98cce60af..4875452e2 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -219,6 +219,9 @@ func mountFlags(opts []string) fs.MountSourceFlags {
mf.NoAtime = true
case "noexec":
mf.NoExec = true
+ case "bind", "rbind":
+ // When options include either "bind" or "rbind",
+ // it's converted to a 9P mount.
default:
log.Warningf("ignoring unknown mount option %q", o)
}
@@ -765,6 +768,16 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
useOverlay bool
)
+ for _, opt := range m.Options {
+ // When options include either "bind" or "rbind", this behaves as
+ // bind mount even if the mount type is equal to a filesystem supported
+ // on runsc.
+ if opt == "bind" || opt == "rbind" {
+ m.Type = bind
+ break
+ }
+ }
+
switch m.Type {
case devpts, devtmpfs, proc, sysfs:
fsName = m.Type
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index f6ea4c102..8c8bad11c 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -27,16 +27,18 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/memutil"
"gvisor.dev/gvisor/pkg/rand"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/fdimport"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/host"
"gvisor.dev/gvisor/pkg/sentry/fs/user"
- vfs2host "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
+ hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -75,8 +77,6 @@ import (
_ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
)
-var syscallTable *kernel.SyscallTable
-
// Loader keeps state needed to start the kernel and run the container..
type Loader struct {
// k is the kernel.
@@ -143,6 +143,9 @@ type execProcess struct {
// tty will be nil if the process is not attached to a terminal.
tty *host.TTYFileOperations
+ // tty will be nil if the process is not attached to a terminal.
+ ttyVFS2 *hostvfs2.TTYFileDescription
+
// pidnsPath is the pid namespace path in spec
pidnsPath string
}
@@ -199,14 +202,12 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("setting up memory usage: %v", err)
}
- // Patch the syscall table.
- kernel.VFS2Enabled = args.Conf.VFS2
- if kernel.VFS2Enabled {
- vfs2.Override(syscallTable.Table)
+ // Is this a VFSv2 kernel?
+ if args.Conf.VFS2 {
+ kernel.VFS2Enabled = true
+ vfs2.Override()
}
- kernel.RegisterSyscallTable(syscallTable)
-
// Create kernel and platform.
p, err := createPlatform(args.Conf, args.Device)
if err != nil {
@@ -333,7 +334,7 @@ func New(args Args) (*Loader, error) {
if kernel.VFS2Enabled {
// Set up host mount that will be used for imported fds.
- hostFilesystem := vfs2host.NewFilesystem(k.VFS())
+ hostFilesystem := hostvfs2.NewFilesystem(k.VFS())
defer hostFilesystem.DecRef()
hostMount, err := k.VFS().NewDisconnectedMount(hostFilesystem, nil, &vfs.MountOptions{})
if err != nil {
@@ -528,6 +529,8 @@ func (l *Loader) run() error {
// If we are restoring, we do not want to create a process.
// l.restore is set by the container manager when a restore call is made.
+ var ttyFile *host.TTYFileOperations
+ var ttyFileVFS2 *hostvfs2.TTYFileDescription
if !l.restore {
if l.conf.ProfileEnable {
pprof.Initialize()
@@ -542,13 +545,14 @@ func (l *Loader) run() error {
// Create the FD map, which will set stdin, stdout, and stderr. If console
// is true, then ioctl calls will be passed through to the host fd.
ctx := l.rootProcArgs.NewContext(l.k)
- fdTable, err := createFDTable(ctx, l.console, l.stdioFDs)
+ var err error
+
+ // CreateProcess takes a reference on FDMap if successful. We won't need
+ // ours either way.
+ l.rootProcArgs.FDTable, ttyFile, ttyFileVFS2, err = createFDTable(ctx, l.console, l.stdioFDs)
if err != nil {
return fmt.Errorf("importing fds: %v", err)
}
- // CreateProcess takes a reference on FDMap if successful. We won't need
- // ours either way.
- l.rootProcArgs.FDTable = fdTable
// Setup the root container file system.
l.startGoferMonitor(l.sandboxID, l.goferFDs)
@@ -591,14 +595,16 @@ func (l *Loader) run() error {
ep.pidnsPath = ns.Path
}
if l.console {
- ttyFile, _ := l.rootProcArgs.FDTable.Get(0)
- defer ttyFile.DecRef()
- ep.tty = ttyFile.FileOperations.(*host.TTYFileOperations)
-
- // Set the foreground process group on the TTY to the global
- // init process group, since that is what we are about to
- // start running.
- ep.tty.InitForegroundProcessGroup(ep.tg.ProcessGroup())
+ // Set the foreground process group on the TTY to the global init process
+ // group, since that is what we are about to start running.
+ switch {
+ case ttyFileVFS2 != nil:
+ ep.ttyVFS2 = ttyFileVFS2
+ ttyFileVFS2.InitForegroundProcessGroup(ep.tg.ProcessGroup())
+ case ttyFile != nil:
+ ep.tty = ttyFile
+ ttyFile.InitForegroundProcessGroup(ep.tg.ProcessGroup())
+ }
}
// Handle signals by forwarding them to the root container process
@@ -719,7 +725,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
// Create the FD map, which will set stdin, stdout, and stderr.
ctx := procArgs.NewContext(l.k)
- fdTable, err := createFDTable(ctx, false, stdioFDs)
+ fdTable, _, _, err := createFDTable(ctx, false, stdioFDs)
if err != nil {
return fmt.Errorf("importing fds: %v", err)
}
@@ -804,14 +810,14 @@ func (l *Loader) destroyContainer(cid string) error {
l.mu.Lock()
defer l.mu.Unlock()
- _, _, started, err := l.threadGroupFromIDLocked(execID{cid: cid})
+ tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid})
if err != nil {
// Container doesn't exist.
return err
}
- // The container exists, has it been started?
- if started {
+ // The container exists, but has it been started?
+ if tg != nil {
if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
return fmt.Errorf("sending SIGKILL to all container processes: %v", err)
}
@@ -853,48 +859,65 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
l.mu.Lock()
defer l.mu.Unlock()
- tg, _, started, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID})
+ tg, err := l.tryThreadGroupFromIDLocked(execID{cid: args.ContainerID})
if err != nil {
return 0, err
}
- if !started {
+ if tg == nil {
return 0, fmt.Errorf("container %q not started", args.ContainerID)
}
- // TODO(gvisor.dev/issue/1623): Add VFS2 support
-
// Get the container MountNamespace from the Task.
- tg.Leader().WithMuLocked(func(t *kernel.Task) {
+ if kernel.VFS2Enabled {
// task.MountNamespace() does not take a ref, so we must do so ourselves.
- args.MountNamespace = t.MountNamespace()
- args.MountNamespace.IncRef()
- })
- if args.MountNamespace != nil {
- defer args.MountNamespace.DecRef()
+ args.MountNamespaceVFS2 = tg.Leader().MountNamespaceVFS2()
+ args.MountNamespaceVFS2.IncRef()
+ } else {
+ tg.Leader().WithMuLocked(func(t *kernel.Task) {
+ // task.MountNamespace() does not take a ref, so we must do so ourselves.
+ args.MountNamespace = t.MountNamespace()
+ args.MountNamespace.IncRef()
+ })
}
// Add the HOME environment variable if it is not already set.
- root := args.MountNamespace.Root()
- defer root.DecRef()
- ctx := fs.WithRoot(l.k.SupervisorContext(), root)
- envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv)
- if err != nil {
- return 0, err
+ if kernel.VFS2Enabled {
+ defer args.MountNamespaceVFS2.DecRef()
+
+ root := args.MountNamespaceVFS2.Root()
+ defer root.DecRef()
+ ctx := vfs.WithRoot(l.k.SupervisorContext(), root)
+ envv, err := user.MaybeAddExecUserHomeVFS2(ctx, args.MountNamespaceVFS2, args.KUID, args.Envv)
+ if err != nil {
+ return 0, err
+ }
+ args.Envv = envv
+ } else {
+ defer args.MountNamespace.DecRef()
+
+ root := args.MountNamespace.Root()
+ defer root.DecRef()
+ ctx := fs.WithRoot(l.k.SupervisorContext(), root)
+ envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv)
+ if err != nil {
+ return 0, err
+ }
+ args.Envv = envv
}
- args.Envv = envv
// Start the process.
proc := control.Proc{Kernel: l.k}
args.PIDNamespace = tg.PIDNamespace()
- newTG, tgid, ttyFile, err := control.ExecAsync(&proc, args)
+ newTG, tgid, ttyFile, ttyFileVFS2, err := control.ExecAsync(&proc, args)
if err != nil {
return 0, err
}
eid := execID{cid: args.ContainerID, pid: tgid}
l.processes[eid] = &execProcess{
- tg: newTG,
- tty: ttyFile,
+ tg: newTG,
+ tty: ttyFile,
+ ttyVFS2: ttyFileVFS2,
}
log.Debugf("updated processes: %v", l.processes)
@@ -905,7 +928,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
// Don't defer unlock, as doing so would make it impossible for
// multiple clients to wait on the same container.
- tg, _, err := l.threadGroupFromID(execID{cid: cid})
+ tg, err := l.threadGroupFromID(execID{cid: cid})
if err != nil {
return fmt.Errorf("can't wait for container %q: %v", cid, err)
}
@@ -924,7 +947,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e
// Try to find a process that was exec'd
eid := execID{cid: cid, pid: tgid}
- execTG, _, err := l.threadGroupFromID(eid)
+ execTG, err := l.threadGroupFromID(eid)
if err == nil {
ws := l.wait(execTG)
*waitStatus = ws
@@ -938,7 +961,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e
// The caller may be waiting on a process not started directly via exec.
// In this case, find the process in the container's PID namespace.
- initTG, _, err := l.threadGroupFromID(execID{cid: cid})
+ initTG, err := l.threadGroupFromID(execID{cid: cid})
if err != nil {
return fmt.Errorf("waiting for PID %d: %v", tgid, err)
}
@@ -1089,8 +1112,7 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e
return fmt.Errorf("PID (%d) cannot be set when signaling all processes", pid)
}
// Check that the container has actually started before signaling it.
- _, _, err := l.threadGroupFromID(execID{cid: cid})
- if err != nil {
+ if _, err := l.threadGroupFromID(execID{cid: cid}); err != nil {
return err
}
if err := l.signalAllProcesses(cid, signo); err != nil {
@@ -1104,7 +1126,7 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e
}
func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) error {
- execTG, _, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
+ execTG, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
if err == nil {
// Send signal directly to the identified process.
return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo})
@@ -1113,7 +1135,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er
// The caller may be signaling a process not started directly via exec.
// In this case, find the process in the container's PID namespace and
// signal it.
- initTG, _, err := l.threadGroupFromID(execID{cid: cid})
+ initTG, err := l.threadGroupFromID(execID{cid: cid})
if err != nil {
return fmt.Errorf("no thread group found: %v", err)
}
@@ -1127,17 +1149,35 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er
return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo})
}
+// signalForegrondProcessGroup looks up foreground process group from the TTY
+// for the given "tgid" inside container "cid", and send the signal to it.
func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, signo int32) error {
- // Lookup foreground process group from the TTY for the given process,
- // and send the signal to it.
- tg, tty, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
+ l.mu.Lock()
+ tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid, pid: tgid})
+ if err != nil {
+ l.mu.Unlock()
+ return fmt.Errorf("no thread group found: %v", err)
+ }
+ if tg == nil {
+ l.mu.Unlock()
+ return fmt.Errorf("container %q not started", cid)
+ }
+
+ tty, ttyVFS2, err := l.ttyFromIDLocked(execID{cid: cid, pid: tgid})
+ l.mu.Unlock()
if err != nil {
return fmt.Errorf("no thread group found: %v", err)
}
- if tty == nil {
+
+ var pg *kernel.ProcessGroup
+ switch {
+ case ttyVFS2 != nil:
+ pg = ttyVFS2.ForegroundProcessGroup()
+ case tty != nil:
+ pg = tty.ForegroundProcessGroup()
+ default:
return fmt.Errorf("no TTY attached")
}
- pg := tty.ForegroundProcessGroup()
if pg == nil {
// No foreground process group has been set. Signal the
// original thread group.
@@ -1168,33 +1208,57 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo})
}
-// threadGroupFromID same as threadGroupFromIDLocked except that it acquires
-// mutex before calling it.
-func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) {
+// threadGroupFromID is similar to tryThreadGroupFromIDLocked except that it
+// acquires mutex before calling it and fails in case container hasn't started
+// yet.
+func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, error) {
l.mu.Lock()
defer l.mu.Unlock()
- tg, tty, ok, err := l.threadGroupFromIDLocked(key)
+ tg, err := l.tryThreadGroupFromIDLocked(key)
if err != nil {
- return nil, nil, err
+ return nil, err
}
- if !ok {
- return nil, nil, fmt.Errorf("container %q not started", key.cid)
+ if tg == nil {
+ return nil, fmt.Errorf("container %q not started", key.cid)
}
- return tg, tty, nil
+ return tg, nil
}
-// threadGroupFromIDLocked returns the thread group and TTY for the given
-// execution ID. TTY may be nil if the process is not attached to a terminal.
-// Also returns a boolean indicating whether the container has already started.
-// Returns error if execution ID is invalid or if the container cannot be
-// found (maybe it has been deleted). Caller must hold 'mu'.
-func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, bool, error) {
+// tryThreadGroupFromIDLocked returns the thread group for the given execution
+// ID. It may return nil in case the container has not started yet. Returns
+// error if execution ID is invalid or if the container cannot be found (maybe
+// it has been deleted). Caller must hold 'mu'.
+func (l *Loader) tryThreadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, error) {
ep := l.processes[key]
if ep == nil {
- return nil, nil, false, fmt.Errorf("container %q not found", key.cid)
+ return nil, fmt.Errorf("container %q not found", key.cid)
}
- if ep.tg == nil {
- return nil, nil, false, nil
+ return ep.tg, nil
+}
+
+// ttyFromIDLocked returns the TTY files for the given execution ID. It may
+// return nil in case the container has not started yet. Returns error if
+// execution ID is invalid or if the container cannot be found (maybe it has
+// been deleted). Caller must hold 'mu'.
+func (l *Loader) ttyFromIDLocked(key execID) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+ ep := l.processes[key]
+ if ep == nil {
+ return nil, nil, fmt.Errorf("container %q not found", key.cid)
+ }
+ return ep.tty, ep.ttyVFS2, nil
+}
+
+func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+ if len(stdioFDs) != 3 {
+ return nil, nil, nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
+ }
+
+ k := kernel.KernelFromContext(ctx)
+ fdTable := k.NewFDTable()
+ ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, stdioFDs)
+ if err != nil {
+ fdTable.DecRef()
+ return nil, nil, nil, err
}
- return ep.tg, ep.tty, true, nil
+ return fdTable, ttyFile, ttyFileVFS2, nil
}
diff --git a/runsc/boot/loader_amd64.go b/runsc/boot/loader_amd64.go
deleted file mode 100644
index 78df86611..000000000
--- a/runsc/boot/loader_amd64.go
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package boot
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
-)
-
-func init() {
- // Set the global syscall table.
- syscallTable = linux.AMD64
-}
diff --git a/runsc/boot/loader_arm64.go b/runsc/boot/loader_arm64.go
deleted file mode 100644
index 250785010..000000000
--- a/runsc/boot/loader_arm64.go
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package boot
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
-)
-
-func init() {
- // Set the global syscall table.
- syscallTable = linux.ARM64
-}
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 55d27a632..e448fd773 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -31,7 +31,6 @@ import (
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/unet"
@@ -69,11 +68,6 @@ func testSpec() *specs.Spec {
}
}
-func resetSyscallTable() {
- kernel.VFS2Enabled = false
- kernel.FlushSyscallTablesTestOnly()
-}
-
// startGofer starts a new gofer routine serving 'root' path. It returns the
// sandbox side of the connection, and a function that when called will stop the
// gofer.
@@ -150,13 +144,11 @@ func createLoader(vfsEnabled bool, spec *specs.Spec) (*Loader, func(), error) {
// TestRun runs a simple application in a sandbox and checks that it succeeds.
func TestRun(t *testing.T) {
- defer resetSyscallTable()
doRun(t, false)
}
// TestRunVFS2 runs TestRun in VFSv2.
func TestRunVFS2(t *testing.T) {
- defer resetSyscallTable()
doRun(t, true)
}
@@ -199,13 +191,11 @@ func doRun(t *testing.T, vfsEnabled bool) {
// TestStartSignal tests that the controller Start message will cause
// WaitForStartSignal to return.
func TestStartSignal(t *testing.T) {
- defer resetSyscallTable()
doStartSignal(t, false)
}
// TestStartSignalVFS2 does TestStartSignal with VFS2.
func TestStartSignalVFS2(t *testing.T) {
- defer resetSyscallTable()
doStartSignal(t, true)
}
@@ -438,7 +428,6 @@ func createMountTestcases(vfs2 bool) []*CreateMountTestcase {
// Test that MountNamespace can be created with various specs.
func TestCreateMountNamespace(t *testing.T) {
-
for _, tc := range createMountTestcases(false /* vfs2 */) {
t.Run(tc.name, func(t *testing.T) {
conf := testConfig()
@@ -476,15 +465,13 @@ func TestCreateMountNamespace(t *testing.T) {
// Test that MountNamespace can be created with various specs.
func TestCreateMountNamespaceVFS2(t *testing.T) {
-
for _, tc := range createMountTestcases(true /* vfs2 */) {
t.Run(tc.name, func(t *testing.T) {
- defer resetSyscallTable()
-
spec := testSpec()
spec.Mounts = tc.spec.Mounts
spec.Root = tc.spec.Root
+ t.Logf("Using root: %q", spec.Root.Path)
l, loaderCleanup, err := createLoader(true /* VFS2 Enabled */, spec)
if err != nil {
t.Fatalf("failed to create loader: %v", err)
@@ -497,7 +484,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) {
t.Fatalf("failed process hints: %v", err)
}
- ctx := l.rootProcArgs.NewContext(l.k)
+ ctx := l.k.SupervisorContext()
mns, err := mntr.setupVFS2(ctx, l.conf, &l.rootProcArgs)
if err != nil {
t.Fatalf("failed to setupVFS2: %v", err)
@@ -506,7 +493,6 @@ func TestCreateMountNamespaceVFS2(t *testing.T) {
root := mns.Root()
defer root.DecRef()
for _, p := range tc.expectedPaths {
-
target := &vfs.PathOperation{
Root: root,
Start: root,
@@ -518,7 +504,6 @@ func TestCreateMountNamespaceVFS2(t *testing.T) {
} else {
d.DecRef()
}
-
}
})
}
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index bee6ee336..0af30456e 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -17,6 +17,7 @@ package boot
import (
"fmt"
"net"
+ "runtime"
"strings"
"syscall"
@@ -24,6 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+ "gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo"
"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
"gvisor.dev/gvisor/pkg/tcpip/network/arp"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
@@ -75,6 +77,44 @@ type DefaultRoute struct {
Name string
}
+// QueueingDiscipline is used to specify the kind of Queueing Discipline to
+// apply for a give FDBasedLink.
+type QueueingDiscipline int
+
+const (
+ // QDiscNone disables any queueing for the underlying FD.
+ QDiscNone QueueingDiscipline = iota
+
+ // QDiscFIFO applies a simple fifo based queue to the underlying
+ // FD.
+ QDiscFIFO
+)
+
+// MakeQueueingDiscipline if possible the equivalent QueuingDiscipline for s
+// else returns an error.
+func MakeQueueingDiscipline(s string) (QueueingDiscipline, error) {
+ switch s {
+ case "none":
+ return QDiscNone, nil
+ case "fifo":
+ return QDiscFIFO, nil
+ default:
+ return 0, fmt.Errorf("unsupported qdisc specified: %q", s)
+ }
+}
+
+// String implements fmt.Stringer.
+func (q QueueingDiscipline) String() string {
+ switch q {
+ case QDiscNone:
+ return "none"
+ case QDiscFIFO:
+ return "fifo"
+ default:
+ panic(fmt.Sprintf("Invalid queueing discipline: %d", q))
+ }
+}
+
// FDBasedLink configures an fd-based link.
type FDBasedLink struct {
Name string
@@ -84,6 +124,7 @@ type FDBasedLink struct {
GSOMaxSize uint32
SoftwareGSOEnabled bool
LinkAddress net.HardwareAddr
+ QDisc QueueingDiscipline
// NumChannels controls how many underlying FD's are to be used to
// create this endpoint.
@@ -185,6 +226,8 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
}
mac := tcpip.LinkAddress(link.LinkAddress)
+ log.Infof("gso max size is: %d", link.GSOMaxSize)
+
linkEP, err := fdbased.New(&fdbased.Options{
FDs: FDs,
MTU: uint32(link.MTU),
@@ -199,6 +242,13 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
return err
}
+ switch link.QDisc {
+ case QDiscNone:
+ case QDiscFIFO:
+ log.Infof("Enabling FIFO QDisc on %q", link.Name)
+ linkEP = fifo.New(linkEP, runtime.GOMAXPROCS(0), 1000)
+ }
+
log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels)
if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
return err
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 0b9b0b436..d1397ed2c 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -26,6 +26,7 @@ import (
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/devices/memdev"
"gvisor.dev/gvisor/pkg/sentry/fs"
+ devpts2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
devtmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
goferimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
procimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
@@ -41,37 +42,28 @@ import (
)
func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error {
-
- vfsObj.MustRegisterFilesystemType(rootFsName, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserList: true,
- })
-
- vfsObj.MustRegisterFilesystemType(bind, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ vfsObj.MustRegisterFilesystemType(devpts2.Name, &devpts2.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserList: true,
+ // TODO(b/29356795): Users may mount this once the terminals are in a
+ // usable state.
+ AllowUserMount: false,
})
-
- vfsObj.MustRegisterFilesystemType(devpts, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
- AllowUserList: true,
- })
-
- vfsObj.MustRegisterFilesystemType(devtmpfs, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ vfsObj.MustRegisterFilesystemType(devtmpfsimpl.Name, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserMount: true,
AllowUserList: true,
})
- vfsObj.MustRegisterFilesystemType(proc, &procimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
- AllowUserList: true,
+ vfsObj.MustRegisterFilesystemType(goferimpl.Name, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserList: true,
})
- vfsObj.MustRegisterFilesystemType(sysfs, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ vfsObj.MustRegisterFilesystemType(procimpl.Name, &procimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserMount: true,
AllowUserList: true,
})
- vfsObj.MustRegisterFilesystemType(tmpfs, &tmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ vfsObj.MustRegisterFilesystemType(sysimpl.Name, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserMount: true,
AllowUserList: true,
})
- vfsObj.MustRegisterFilesystemType(nonefs, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ vfsObj.MustRegisterFilesystemType(tmpfsimpl.Name, &tmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserMount: true,
AllowUserList: true,
})
@@ -108,7 +100,6 @@ func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounte
}
func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
-
exe := procArgs.Argv[0]
// Absolute paths can be used directly.
@@ -120,11 +111,9 @@ func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessAr
// Paths with '/' in them should be joined to the working directory, or
// to the root if working directory is not set.
if strings.IndexByte(exe, '/') > 0 {
-
if !path.IsAbs(procArgs.WorkingDirectory) {
return fmt.Errorf("working directory %q must be absolute", procArgs.WorkingDirectory)
}
-
procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe)
return nil
}
@@ -144,21 +133,17 @@ func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessAr
creds := procArgs.Credentials
for _, p := range paths {
-
binPath := path.Join(p, exe)
-
pop := &vfs.PathOperation{
Root: root,
Start: root,
Path: fspath.Parse(binPath),
FollowFinalSymlink: true,
}
-
opts := &vfs.OpenOptions{
FileExec: true,
Flags: linux.O_RDONLY,
}
-
dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts)
if err == syserror.ENOENT || err == syserror.EACCES {
// Didn't find it here.
@@ -181,35 +166,32 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs
// Create context with root credentials to mount the filesystem (the current
// user may not be privileged enough).
+ rootCreds := auth.NewRootCredentials(procArgs.Credentials.UserNamespace)
rootProcArgs := *procArgs
rootProcArgs.WorkingDirectory = "/"
- rootProcArgs.Credentials = auth.NewRootCredentials(procArgs.Credentials.UserNamespace)
+ rootProcArgs.Credentials = rootCreds
rootProcArgs.Umask = 0022
rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals
rootCtx := procArgs.NewContext(c.k)
- creds := procArgs.Credentials
- if err := registerFilesystems(rootCtx, c.k.VFS(), creds); err != nil {
+ if err := registerFilesystems(rootCtx, c.k.VFS(), rootCreds); err != nil {
return nil, fmt.Errorf("register filesystems: %w", err)
}
- mns, err := c.createMountNamespaceVFS2(ctx, conf, creds)
+ mns, err := c.createMountNamespaceVFS2(rootCtx, conf, rootCreds)
if err != nil {
return nil, fmt.Errorf("creating mount namespace: %w", err)
}
-
rootProcArgs.MountNamespaceVFS2 = mns
// Mount submounts.
- if err := c.mountSubmountsVFS2(rootCtx, conf, mns, creds); err != nil {
+ if err := c.mountSubmountsVFS2(rootCtx, conf, mns, rootCreds); err != nil {
return nil, fmt.Errorf("mounting submounts vfs2: %w", err)
}
-
return mns, nil
}
func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) {
-
fd := c.fds.remove()
opts := strings.Join(p9MountOptionsVFS2(fd, conf.FileAccess), ",")
@@ -222,7 +204,6 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *C
}
func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error {
-
c.prepareMountsVFS2()
for _, submount := range c.mounts {
@@ -256,7 +237,6 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config,
if err != nil {
return fmt.Errorf("mountOptions failed: %w", err)
}
-
if fsName == "" {
// Filesystem is not supported (e.g. cgroup), just skip it.
return nil
@@ -277,7 +257,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config,
// All writes go to upper, be paranoid and make lower readonly.
opts.ReadOnly = useOverlay
- if err := c.k.VFS().MountAt(ctx, creds, "", target, submount.Type, opts); err != nil {
+ if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil {
return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts)
}
log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts)
@@ -336,7 +316,6 @@ func p9MountOptionsVFS2(fd int, fa FileAccessType) []string {
}
func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error {
-
target := &vfs.PathOperation{
Root: root,
Start: root,
@@ -345,12 +324,10 @@ func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath s
_, err := c.k.VFS().StatAt(ctx, creds, target, &vfs.StatOptions{})
switch {
-
case err == syserror.ENOENT:
if err := c.makeSyntheticMount(ctx, path.Dir(currentPath), root, creds); err != nil {
return err
}
-
mkdirOpts := &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}
if err := c.k.VFS().MkdirAt(ctx, creds, target, mkdirOpts); err != nil {
return fmt.Errorf("failed to makedir for mount %+v: %w", target, err)
diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go
index 7072547be..a37d66139 100644
--- a/runsc/cmd/syscalls.go
+++ b/runsc/cmd/syscalls.go
@@ -32,9 +32,10 @@ import (
// Syscalls implements subcommands.Command for the "syscalls" command.
type Syscalls struct {
- output string
- os string
- arch string
+ format string
+ os string
+ arch string
+ filename string
}
// CompatibilityInfo is a map of system and architecture to compatibility doc.
@@ -95,16 +96,17 @@ func (*Syscalls) Usage() string {
// SetFlags implements subcommands.Command.SetFlags.
func (s *Syscalls) SetFlags(f *flag.FlagSet) {
- f.StringVar(&s.output, "o", "table", "Output format (table, csv, json).")
+ f.StringVar(&s.format, "format", "table", "Output format (table, csv, json).")
f.StringVar(&s.os, "os", osAll, "The OS (e.g. linux)")
f.StringVar(&s.arch, "arch", archAll, "The CPU architecture (e.g. amd64).")
+ f.StringVar(&s.filename, "filename", "", "Output filename (otherwise stdout).")
}
// Execute implements subcommands.Command.Execute.
func (s *Syscalls) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
- out, ok := outputMap[s.output]
+ out, ok := outputMap[s.format]
if !ok {
- Fatalf("Unsupported output format %q", s.output)
+ Fatalf("Unsupported output format %q", s.format)
}
// Build map of all supported architectures.
@@ -124,7 +126,14 @@ func (s *Syscalls) Execute(_ context.Context, f *flag.FlagSet, args ...interface
Fatalf("%v", err)
}
- if err := out(os.Stdout, info); err != nil {
+ w := os.Stdout // Default.
+ if s.filename != "" {
+ w, err = os.OpenFile(s.filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
+ if err != nil {
+ Fatalf("Error opening %q: %v", s.filename, err)
+ }
+ }
+ if err := out(w, info); err != nil {
Fatalf("Error writing output: %v", err)
}
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 331b8e866..46154df60 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -15,8 +15,10 @@ go_library(
"//test:__subpackages__",
],
deps = [
+ "//pkg/abi/linux",
"//pkg/log",
"//pkg/sentry/control",
+ "//pkg/sentry/sighandling",
"//pkg/sync",
"//runsc/boot",
"//runsc/cgroup",
diff --git a/runsc/container/container.go b/runsc/container/container.go
index e0153832c..8539f252d 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -22,7 +22,6 @@ import (
"io/ioutil"
"os"
"os/exec"
- "os/signal"
"regexp"
"strconv"
"strings"
@@ -31,8 +30,10 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/sighandling"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/cgroup"
"gvisor.dev/gvisor/runsc/sandbox"
@@ -621,21 +622,15 @@ func (c *Container) SignalProcess(sig syscall.Signal, pid int32) error {
// forwarding signals.
func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() {
log.Debugf("Forwarding all signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
- sigCh := make(chan os.Signal, 1)
- signal.Notify(sigCh)
- go func() {
- for s := range sigCh {
- log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", s, c.ID, pid, fgProcess)
- if err := c.Sandbox.SignalProcess(c.ID, pid, s.(syscall.Signal), fgProcess); err != nil {
- log.Warningf("error forwarding signal %d to container %q: %v", s, c.ID, err)
- }
+ stop := sighandling.StartSignalForwarding(func(sig linux.Signal) {
+ log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", sig, c.ID, pid, fgProcess)
+ if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.Signal(sig), fgProcess); err != nil {
+ log.Warningf("error forwarding signal %d to container %q: %v", sig, c.ID, err)
}
- log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
- }()
-
+ })
return func() {
- signal.Stop(sigCh)
- close(sigCh)
+ log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
+ stop()
}
}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index a1d4d3b7e..acf988aa0 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -256,6 +256,8 @@ var (
func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
// Always load the default config.
cs := make(map[string]*boot.Config)
+ cs["default"] = testutil.TestConfig(t)
+
for _, o := range opts {
switch o {
case overlay:
@@ -281,7 +283,7 @@ func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
return cs
}
-func configsWithVFS2(t *testing.T, opts []configOption) map[string]*boot.Config {
+func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*boot.Config {
vfs1 := configs(t, opts...)
vfs2 := configs(t, opts...)
@@ -302,7 +304,7 @@ func TestLifecycle(t *testing.T) {
childReaper.Start()
defer childReaper.Stop()
- for name, conf := range configsWithVFS2(t, all) {
+ for name, conf := range configsWithVFS2(t, all...) {
t.Run(name, func(t *testing.T) {
// The container will just sleep for a long time. We will kill it before
// it finishes sleeping.
@@ -476,7 +478,7 @@ func TestExePath(t *testing.T) {
t.Fatalf("error making directory: %v", err)
}
- for name, conf := range configsWithVFS2(t, []configOption{overlay}) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
for _, test := range []struct {
path string
@@ -1297,7 +1299,7 @@ func TestCapabilities(t *testing.T) {
// TestRunNonRoot checks that sandbox can be configured when running as
// non-privileged user.
func TestRunNonRoot(t *testing.T) {
- for name, conf := range configs(t, noOverlay...) {
+ for name, conf := range configsWithVFS2(t, noOverlay...) {
t.Run(name, func(t *testing.T) {
spec := testutil.NewSpecWithArgs("/bin/true")
@@ -1341,7 +1343,7 @@ func TestRunNonRoot(t *testing.T) {
// TestMountNewDir checks that runsc will create destination directory if it
// doesn't exit.
func TestMountNewDir(t *testing.T) {
- for name, conf := range configsWithVFS2(t, []configOption{overlay}) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
root, err := ioutil.TempDir(testutil.TmpDir(), "root")
if err != nil {
@@ -1370,7 +1372,7 @@ func TestMountNewDir(t *testing.T) {
}
func TestReadonlyRoot(t *testing.T) {
- for name, conf := range configsWithVFS2(t, []configOption{overlay}) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
spec := testutil.NewSpecWithArgs("/bin/touch", "/foo")
spec.Root.Readonly = true
@@ -1488,7 +1490,7 @@ func TestUIDMap(t *testing.T) {
}
func TestReadonlyMount(t *testing.T) {
- for name, conf := range configsWithVFS2(t, []configOption{overlay}) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount")
spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
@@ -1535,6 +1537,28 @@ func TestReadonlyMount(t *testing.T) {
}
}
+func TestBindMountByOption(t *testing.T) {
+ for _, conf := range configs(t, overlay) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "bind-mount")
+ spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
+ if err != nil {
+ t.Fatalf("ioutil.TempDir() failed: %v", err)
+ }
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Destination: dir,
+ Source: dir,
+ Type: "none",
+ Options: []string{"rw", "bind"},
+ })
+
+ if err := run(spec, conf); err != nil {
+ t.Fatalf("error running sandbox: %v", err)
+ }
+ }
+}
+
// TestAbbreviatedIDs checks that runsc supports using abbreviated container
// IDs in place of full IDs.
func TestAbbreviatedIDs(t *testing.T) {
@@ -1742,7 +1766,7 @@ func TestUserLog(t *testing.T) {
}
func TestWaitOnExitedSandbox(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all) {
+ for name, conf := range configsWithVFS2(t, all...) {
t.Run(name, func(t *testing.T) {
// Run a shell that sleeps for 1 second and then exits with a
// non-zero code.
@@ -2167,63 +2191,70 @@ func TestTTYField(t *testing.T) {
}
for _, test := range testCases {
- t.Run(test.name, func(t *testing.T) {
- conf := testutil.TestConfig(t)
-
- // We will run /bin/sleep, possibly with an open TTY.
- cmd := []string{"/bin/sleep", "10000"}
- if test.useTTY {
- // Run inside the "pty-runner".
- cmd = append([]string{testApp, "pty-runner"}, cmd...)
- }
-
- spec := testutil.NewSpecWithArgs(cmd...)
- _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer cleanup()
+ for _, vfs2 := range []bool{false, true} {
+ name := test.name
+ if vfs2 {
+ name += "-vfs2"
+ }
+ t.Run(name, func(t *testing.T) {
+ conf := testutil.TestConfig(t)
+ conf.VFS2 = vfs2
+
+ // We will run /bin/sleep, possibly with an open TTY.
+ cmd := []string{"/bin/sleep", "10000"}
+ if test.useTTY {
+ // Run inside the "pty-runner".
+ cmd = append([]string{testApp, "pty-runner"}, cmd...)
+ }
- // Create and start the container.
- args := Args{
- ID: testutil.RandomContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- c, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer c.Destroy()
- if err := c.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ spec := testutil.NewSpecWithArgs(cmd...)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // Wait for sleep to be running, and check the TTY
- // field.
- var gotTTYField string
- cb := func() error {
- ps, err := c.Processes()
+ // Create and start the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
- err = fmt.Errorf("error getting process data from container: %v", err)
- return &backoff.PermanentError{Err: err}
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer c.Destroy()
+ if err := c.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
}
- for _, p := range ps {
- if strings.Contains(p.Cmd, "sleep") {
- gotTTYField = p.TTY
- return nil
+
+ // Wait for sleep to be running, and check the TTY
+ // field.
+ var gotTTYField string
+ cb := func() error {
+ ps, err := c.Processes()
+ if err != nil {
+ err = fmt.Errorf("error getting process data from container: %v", err)
+ return &backoff.PermanentError{Err: err}
+ }
+ for _, p := range ps {
+ if strings.Contains(p.Cmd, "sleep") {
+ gotTTYField = p.TTY
+ return nil
+ }
}
+ return fmt.Errorf("sleep not running")
+ }
+ if err := testutil.Poll(cb, 30*time.Second); err != nil {
+ t.Fatalf("error waiting for sleep process: %v", err)
}
- return fmt.Errorf("sleep not running")
- }
- if err := testutil.Poll(cb, 30*time.Second); err != nil {
- t.Fatalf("error waiting for sleep process: %v", err)
- }
- if gotTTYField != test.wantTTYField {
- t.Errorf("tty field got %q, want %q", gotTTYField, test.wantTTYField)
- }
- })
+ if gotTTYField != test.wantTTYField {
+ t.Errorf("tty field got %q, want %q", gotTTYField, test.wantTTYField)
+ }
+ })
+ }
}
}
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index e3704b453..f6861b1dd 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -1394,7 +1394,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) {
Destination: "/mydir/test",
Source: "/some/dir",
Type: "tmpfs",
- Options: []string{"rw", "rbind", "relatime"},
+ Options: []string{"rw", "relatime"},
}
podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
diff --git a/runsc/main.go b/runsc/main.go
index 8e594c58e..0625a06e0 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -72,6 +72,7 @@ var (
network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
hardwareGSO = flag.Bool("gso", true, "enable hardware segmentation offload if it is supported by a network device.")
softwareGSO = flag.Bool("software-gso", true, "enable software segmentation offload when hardware ofload can't be enabled.")
+ qDisc = flag.String("qdisc", "fifo", "specifies which queueing discipline to apply by default to the non loopback nics used by the sandbox.")
fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
fsGoferHostUDS = flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
@@ -198,6 +199,11 @@ func main() {
cmd.Fatalf("%v", err)
}
+ queueingDiscipline, err := boot.MakeQueueingDiscipline(*qDisc)
+ if err != nil {
+ cmd.Fatalf("%s", err)
+ }
+
// Sets the reference leak check mode. Also set it in config below to
// propagate it to child processes.
refs.SetLeakMode(refsLeakMode)
@@ -232,7 +238,7 @@ func main() {
OverlayfsStaleRead: *overlayfsStaleRead,
CPUNumFromQuota: *cpuNumFromQuota,
VFS2: *vfs2Enabled,
-
+ QDisc: queueingDiscipline,
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
TestOnlyTestNameEnv: *testOnlyTestNameEnv,
}
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index bc093fba5..209bfdb20 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -62,7 +62,7 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi
// Build the path to the net namespace of the sandbox process.
// This is what we will copy.
nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net")
- if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.HardwareGSO, conf.SoftwareGSO, conf.NumNetworkChannels); err != nil {
+ if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.HardwareGSO, conf.SoftwareGSO, conf.NumNetworkChannels, conf.QDisc); err != nil {
return fmt.Errorf("creating interfaces from net namespace %q: %v", nsPath, err)
}
case boot.NetworkHost:
@@ -115,7 +115,7 @@ func isRootNS() (bool, error) {
// createInterfacesAndRoutesFromNS scrapes the interface and routes from the
// net namespace with the given path, creates them in the sandbox, and removes
// them from the host.
-func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareGSO bool, softwareGSO bool, numNetworkChannels int) error {
+func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareGSO bool, softwareGSO bool, numNetworkChannels int, qDisc boot.QueueingDiscipline) error {
// Join the network namespace that we will be copying.
restore, err := joinNetNS(nsPath)
if err != nil {
@@ -201,6 +201,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
MTU: iface.MTU,
Routes: routes,
NumChannels: numNetworkChannels,
+ QDisc: qDisc,
}
// Get the link for the interface.
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 837d5e238..202518b58 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -311,7 +311,19 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth.
// Is9PMount returns true if the given mount can be mounted as an external gofer.
func Is9PMount(m specs.Mount) bool {
- return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m)
+ var isBind bool
+ switch m.Type {
+ case "bind":
+ isBind = true
+ default:
+ for _, opt := range m.Options {
+ if opt == "bind" || opt == "rbind" {
+ isBind = true
+ break
+ }
+ }
+ }
+ return isBind && m.Source != "" && IsSupportedDevMount(m)
}
// IsSupportedDevMount returns true if the mount is a supported /dev mount.