summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/BUILD14
-rw-r--r--runsc/boot/compat.go2
-rw-r--r--runsc/boot/config.go9
-rw-r--r--runsc/boot/controller.go24
-rw-r--r--runsc/boot/fds.go38
-rw-r--r--runsc/boot/filter/config.go13
-rw-r--r--runsc/boot/fs.go24
-rw-r--r--runsc/boot/loader.go216
-rw-r--r--runsc/boot/loader_amd64.go5
-rw-r--r--runsc/boot/loader_arm64.go5
-rw-r--r--runsc/boot/loader_test.go49
-rw-r--r--runsc/boot/network.go27
-rw-r--r--runsc/boot/pprof/BUILD11
-rw-r--r--runsc/boot/pprof/pprof.go (renamed from runsc/boot/pprof.go)6
-rw-r--r--runsc/boot/user.go64
-rw-r--r--runsc/boot/vfs.go310
-rw-r--r--runsc/cmd/BUILD3
-rw-r--r--runsc/cmd/boot.go75
-rw-r--r--runsc/cmd/capability_test.go2
-rw-r--r--runsc/cmd/chroot.go2
-rw-r--r--runsc/cmd/debug.go64
-rw-r--r--runsc/cmd/gofer.go7
-rw-r--r--runsc/cmd/statefile.go143
-rw-r--r--runsc/container/console_test.go10
-rw-r--r--runsc/container/container.go43
-rw-r--r--runsc/container/container_test.go92
-rw-r--r--runsc/container/multi_container_test.go44
-rw-r--r--runsc/container/shared_volume_test.go4
-rw-r--r--runsc/container/test_app/test_app.go2
-rw-r--r--runsc/dockerutil/dockerutil.go12
-rw-r--r--runsc/fsgofer/fsgofer.go4
-rw-r--r--runsc/main.go49
-rw-r--r--runsc/sandbox/network.go55
-rw-r--r--runsc/sandbox/sandbox.go185
-rw-r--r--runsc/specutils/namespace.go3
-rw-r--r--runsc/specutils/specutils.go11
-rw-r--r--runsc/testutil/testutil.go26
37 files changed, 1310 insertions, 343 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index ae4dd102a..5451f1eba 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -19,9 +19,9 @@ go_library(
"loader_amd64.go",
"loader_arm64.go",
"network.go",
- "pprof.go",
"strace.go",
"user.go",
+ "vfs.go",
],
visibility = [
"//runsc:__subpackages__",
@@ -34,6 +34,7 @@ go_library(
"//pkg/control/server",
"//pkg/cpuid",
"//pkg/eventchannel",
+ "//pkg/fspath",
"//pkg/log",
"//pkg/memutil",
"//pkg/rand",
@@ -41,6 +42,7 @@ go_library(
"//pkg/sentry/arch",
"//pkg/sentry/arch:registers_go_proto",
"//pkg/sentry/control",
+ "//pkg/sentry/devices/memdev",
"//pkg/sentry/fs",
"//pkg/sentry/fs/dev",
"//pkg/sentry/fs/gofer",
@@ -50,6 +52,12 @@ go_library(
"//pkg/sentry/fs/sys",
"//pkg/sentry/fs/tmpfs",
"//pkg/sentry/fs/tty",
+ "//pkg/sentry/fsimpl/devtmpfs",
+ "//pkg/sentry/fsimpl/gofer",
+ "//pkg/sentry/fsimpl/host",
+ "//pkg/sentry/fsimpl/proc",
+ "//pkg/sentry/fsimpl/sys",
+ "//pkg/sentry/fsimpl/tmpfs",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel:uncaught_signal_go_proto",
@@ -72,6 +80,7 @@ go_library(
"//pkg/sentry/time",
"//pkg/sentry/unimpl:unimplemented_syscall_go_proto",
"//pkg/sentry/usage",
+ "//pkg/sentry/vfs",
"//pkg/sentry/watchdog",
"//pkg/sync",
"//pkg/syserror",
@@ -91,6 +100,7 @@ go_library(
"//pkg/usermem",
"//runsc/boot/filter",
"//runsc/boot/platforms",
+ "//runsc/boot/pprof",
"//runsc/specutils",
"@com_github_golang_protobuf//proto:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
@@ -114,10 +124,12 @@ go_test(
"//pkg/p9",
"//pkg/sentry/contexttest",
"//pkg/sentry/fs",
+ "//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sync",
"//pkg/unet",
"//runsc/fsgofer",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index 8995d678e..b7cfb35bf 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -65,7 +65,7 @@ func newCompatEmitter(logFD int) (*compatEmitter, error) {
if logFD > 0 {
f := os.NewFile(uintptr(logFD), "user log file")
- target := &log.MultiEmitter{c.sink, &log.K8sJSONEmitter{log.Writer{Next: f}}}
+ target := &log.MultiEmitter{c.sink, log.K8sJSONEmitter{&log.Writer{Next: f}}}
c.sink = &log.BasicLogger{Level: log.Info, Emitter: target}
}
return c, nil
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 35391030f..715a19112 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -158,6 +158,9 @@ type Config struct {
// DebugLog is the path to log debug information to, if not empty.
DebugLog string
+ // PanicLog is the path to log GO's runtime messages, if not empty.
+ PanicLog string
+
// DebugLogFormat is the log format for debug.
DebugLogFormat string
@@ -269,6 +272,7 @@ func (c *Config) ToFlags() []string {
"--log=" + c.LogFilename,
"--log-format=" + c.LogFormat,
"--debug-log=" + c.DebugLog,
+ "--panic-log=" + c.PanicLog,
"--debug-log-format=" + c.DebugLogFormat,
"--file-access=" + c.FileAccess.String(),
"--overlay=" + strconv.FormatBool(c.Overlay),
@@ -301,5 +305,10 @@ func (c *Config) ToFlags() []string {
if len(c.TestOnlyTestNameEnv) != 0 {
f = append(f, "--TESTONLY-test-name-env="+c.TestOnlyTestNameEnv)
}
+
+ if c.VFS2 {
+ f = append(f, "--vfs2=true")
+ }
+
return f
}
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 9c9e94864..8125d5061 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -32,6 +32,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/watchdog"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot/pprof"
"gvisor.dev/gvisor/runsc/specutils"
)
@@ -100,11 +101,14 @@ const (
// Profiling related commands (see pprof.go for more details).
const (
- StartCPUProfile = "Profile.StartCPUProfile"
- StopCPUProfile = "Profile.StopCPUProfile"
- HeapProfile = "Profile.HeapProfile"
- StartTrace = "Profile.StartTrace"
- StopTrace = "Profile.StopTrace"
+ StartCPUProfile = "Profile.StartCPUProfile"
+ StopCPUProfile = "Profile.StopCPUProfile"
+ HeapProfile = "Profile.HeapProfile"
+ GoroutineProfile = "Profile.GoroutineProfile"
+ BlockProfile = "Profile.BlockProfile"
+ MutexProfile = "Profile.MutexProfile"
+ StartTrace = "Profile.StartTrace"
+ StopTrace = "Profile.StopTrace"
)
// Logging related commands (see logging.go for more details).
@@ -142,7 +146,7 @@ func newController(fd int, l *Loader) (*controller, error) {
}
srv.Register(manager)
- if eps, ok := l.k.NetworkStack().(*netstack.Stack); ok {
+ if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok {
net := &Network{
Stack: eps.Stack,
}
@@ -341,7 +345,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
return fmt.Errorf("creating memory file: %v", err)
}
k.SetMemoryFile(mf)
- networkStack := cm.l.k.NetworkStack()
+ networkStack := cm.l.k.RootNetworkNamespace().Stack()
cm.l.k = k
// Set up the restore environment.
@@ -365,9 +369,9 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
}
if cm.l.conf.ProfileEnable {
- // initializePProf opens /proc/self/maps, so has to be
- // called before installing seccomp filters.
- initializePProf()
+ // pprof.Initialize opens /proc/self/maps, so has to be called before
+ // installing seccomp filters.
+ pprof.Initialize()
}
// Seccomp filters have to be applied before parsing the state file.
diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go
index 417d2d5fb..7e49f6f9f 100644
--- a/runsc/boot/fds.go
+++ b/runsc/boot/fds.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/host"
+ vfshost "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
"gvisor.dev/gvisor/pkg/sentry/kernel"
)
@@ -31,10 +32,13 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F
return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
}
+ if kernel.VFS2Enabled {
+ return createFDTableVFS2(ctx, console, stdioFDs)
+ }
+
k := kernel.KernelFromContext(ctx)
fdTable := k.NewFDTable()
defer fdTable.DecRef()
- mounter := fs.FileOwnerFromContext(ctx)
var ttyFile *fs.File
for appFD, hostFD := range stdioFDs {
@@ -44,7 +48,7 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F
// Import the file as a host TTY file.
if ttyFile == nil {
var err error
- appFile, err = host.ImportFile(ctx, hostFD, mounter, true /* isTTY */)
+ appFile, err = host.ImportFile(ctx, hostFD, true /* isTTY */)
if err != nil {
return nil, err
}
@@ -63,7 +67,7 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F
} else {
// Import the file as a regular host file.
var err error
- appFile, err = host.ImportFile(ctx, hostFD, mounter, false /* isTTY */)
+ appFile, err = host.ImportFile(ctx, hostFD, false /* isTTY */)
if err != nil {
return nil, err
}
@@ -79,3 +83,31 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F
fdTable.IncRef()
return fdTable, nil
}
+
+func createFDTableVFS2(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) {
+ k := kernel.KernelFromContext(ctx)
+ fdTable := k.NewFDTable()
+ defer fdTable.DecRef()
+
+ hostMount, err := vfshost.NewMount(k.VFS())
+ if err != nil {
+ return nil, fmt.Errorf("creating host mount: %w", err)
+ }
+
+ for appFD, hostFD := range stdioFDs {
+ // TODO(gvisor.dev/issue/1482): Add TTY support.
+ appFile, err := vfshost.ImportFD(hostMount, hostFD, false)
+ if err != nil {
+ return nil, err
+ }
+
+ if err := fdTable.NewFDAtVFS2(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil {
+ appFile.DecRef()
+ return nil, err
+ }
+ appFile.DecRef()
+ }
+
+ fdTable.IncRef()
+ return fdTable, nil
+}
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index c69f4c602..1828d116a 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -44,7 +44,7 @@ var allowedSyscalls = seccomp.SyscallRules{
{
seccomp.AllowAny{},
seccomp.AllowAny{},
- seccomp.AllowValue(0),
+ seccomp.AllowValue(syscall.O_CLOEXEC),
},
},
syscall.SYS_EPOLL_CREATE1: {},
@@ -229,7 +229,9 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_NANOSLEEP: {},
syscall.SYS_PPOLL: {},
syscall.SYS_PREAD64: {},
+ syscall.SYS_PREADV: {},
syscall.SYS_PWRITE64: {},
+ syscall.SYS_PWRITEV: {},
syscall.SYS_READ: {},
syscall.SYS_RECVMSG: []seccomp.Rule{
{
@@ -282,12 +284,21 @@ var allowedSyscalls = seccomp.SyscallRules{
{seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)},
},
syscall.SYS_SIGALTSTACK: {},
+ unix.SYS_STATX: {},
syscall.SYS_SYNC_FILE_RANGE: {},
syscall.SYS_TGKILL: []seccomp.Rule{
{
seccomp.AllowValue(uint64(os.Getpid())),
},
},
+ syscall.SYS_UTIMENSAT: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(0), /* null pathname */
+ seccomp.AllowAny{},
+ seccomp.AllowValue(0), /* flags */
+ },
+ },
syscall.SYS_WRITE: {},
// The only user in rawfile.NonBlockingWrite3 always passes iovcnt with
// values 2 or 3. Three iovec-s are passed, when the PACKET_VNET_HDR
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 0f62842ea..98cce60af 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -278,6 +278,9 @@ func subtargets(root string, mnts []specs.Mount) []string {
}
func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
+ if conf.VFS2 {
+ return setupContainerVFS2(ctx, conf, mntr, procArgs)
+ }
mns, err := mntr.setupFS(conf, procArgs)
if err != nil {
return err
@@ -573,6 +576,9 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin
// should be mounted (e.g. a volume shared between containers). It must be
// called for the root container only.
func (c *containerMounter) processHints(conf *Config) error {
+ if conf.VFS2 {
+ return nil
+ }
ctx := c.k.SupervisorContext()
for _, hint := range c.hints.mounts {
// TODO(b/142076984): Only support tmpfs for now. Bind mounts require a
@@ -781,9 +787,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
default:
- // TODO(nlacasse): Support all the mount types and make this a fatal error.
- // Most applications will "just work" without them, so this is a warning
- // for now.
log.Warningf("ignoring unknown filesystem type %q", m.Type)
}
return fsName, opts, useOverlay, nil
@@ -824,7 +827,20 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns
inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(opts, ","), nil)
if err != nil {
- return fmt.Errorf("creating mount with source %q: %v", m.Source, err)
+ err := fmt.Errorf("creating mount with source %q: %v", m.Source, err)
+ // Check to see if this is a common error due to a Linux bug.
+ // This error is generated here in order to cause it to be
+ // printed to the user using Docker via 'runsc create' etc. rather
+ // than simply printed to the logs for the 'runsc boot' command.
+ //
+ // We check the error message string rather than type because the
+ // actual error types (syscall.EIO, syscall.EPIPE) are lost by file system
+ // implementation (e.g. p9).
+ // TODO(gvisor.dev/issue/1765): Remove message when bug is resolved.
+ if strings.Contains(err.Error(), syscall.EIO.Error()) || strings.Contains(err.Error(), syscall.EPIPE.Error()) {
+ return fmt.Errorf("%v: %s", err, specutils.FaqErrorMsg("memlock", "you may be encountering a Linux kernel bug"))
+ }
+ return err
}
// If there are submounts, we need to overlay the mount on top of a ramfs
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 9f0d5d7af..cf1f47bc7 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -26,7 +26,6 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/abi"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/log"
@@ -49,6 +48,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/watchdog"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/loopback"
"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
"gvisor.dev/gvisor/pkg/tcpip/network/arp"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
@@ -60,6 +60,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
"gvisor.dev/gvisor/runsc/boot/filter"
_ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms.
+ "gvisor.dev/gvisor/runsc/boot/pprof"
"gvisor.dev/gvisor/runsc/specutils"
// Include supported socket providers.
@@ -71,6 +72,8 @@ import (
_ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
)
+var syscallTable *kernel.SyscallTable
+
// Loader keeps state needed to start the kernel and run the container..
type Loader struct {
// k is the kernel.
@@ -154,13 +157,17 @@ type Args struct {
Spec *specs.Spec
// Conf is the system configuration.
Conf *Config
- // ControllerFD is the FD to the URPC controller.
+ // ControllerFD is the FD to the URPC controller. The Loader takes ownership
+ // of this FD and may close it at any time.
ControllerFD int
- // Device is an optional argument that is passed to the platform.
+ // Device is an optional argument that is passed to the platform. The Loader
+ // takes ownership of this file and may close it at any time.
Device *os.File
- // GoferFDs is an array of FDs used to connect with the Gofer.
+ // GoferFDs is an array of FDs used to connect with the Gofer. The Loader
+ // takes ownership of these FDs and may close them at any time.
GoferFDs []int
- // StdioFDs is the stdio for the application.
+ // StdioFDs is the stdio for the application. The Loader takes ownership of
+ // these FDs and may close them at any time.
StdioFDs []int
// Console is set to true if using TTY.
Console bool
@@ -173,6 +180,9 @@ type Args struct {
UserLogFD int
}
+// make sure stdioFDs are always the same on initial start and on restore
+const startingStdioFD = 64
+
// New initializes a new kernel loader configured by spec.
// New also handles setting up a kernel for restoring a container.
func New(args Args) (*Loader, error) {
@@ -186,13 +196,14 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("setting up memory usage: %v", err)
}
- if args.Conf.VFS2 {
- st, ok := kernel.LookupSyscallTable(abi.Linux, arch.Host)
- if ok {
- vfs2.Override(st.Table)
- }
+ // Patch the syscall table.
+ kernel.VFS2Enabled = args.Conf.VFS2
+ if kernel.VFS2Enabled {
+ vfs2.Override(syscallTable.Table)
}
+ kernel.RegisterSyscallTable(syscallTable)
+
// Create kernel and platform.
p, err := createPlatform(args.Conf, args.Device)
if err != nil {
@@ -230,11 +241,8 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("enabling strace: %v", err)
}
- // Create an empty network stack because the network namespace may be empty at
- // this point. Netns is configured before Run() is called. Netstack is
- // configured using a control uRPC message. Host network is configured inside
- // Run().
- networkStack, err := newEmptyNetworkStack(args.Conf, k, k)
+ // Create root network namespace/stack.
+ netns, err := newRootNetworkNamespace(args.Conf, k, k)
if err != nil {
return nil, fmt.Errorf("creating network: %v", err)
}
@@ -277,7 +285,7 @@ func New(args Args) (*Loader, error) {
FeatureSet: cpuid.HostFeatureSet(),
Timekeeper: tk,
RootUserNamespace: creds.UserNamespace,
- NetworkStack: networkStack,
+ RootNetworkNamespace: netns,
ApplicationCores: uint(args.NumCPU),
Vdso: vdso,
RootUTSNamespace: kernel.NewUTSNamespace(args.Spec.Hostname, args.Spec.Hostname, creds.UserNamespace),
@@ -320,6 +328,24 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("creating pod mount hints: %v", err)
}
+ // Make host FDs stable between invocations. Host FDs must map to the exact
+ // same number when the sandbox is restored. Otherwise the wrong FD will be
+ // used.
+ var stdioFDs []int
+ newfd := startingStdioFD
+ for _, fd := range args.StdioFDs {
+ err := syscall.Dup3(fd, newfd, syscall.O_CLOEXEC)
+ if err != nil {
+ return nil, fmt.Errorf("dup3 of stdioFDs failed: %v", err)
+ }
+ stdioFDs = append(stdioFDs, newfd)
+ err = syscall.Close(fd)
+ if err != nil {
+ return nil, fmt.Errorf("close original stdioFDs failed: %v", err)
+ }
+ newfd++
+ }
+
eid := execID{cid: args.ID}
l := &Loader{
k: k,
@@ -328,7 +354,7 @@ func New(args Args) (*Loader, error) {
watchdog: dog,
spec: args.Spec,
goferFDs: args.GoferFDs,
- stdioFDs: args.StdioFDs,
+ stdioFDs: stdioFDs,
rootProcArgs: procArgs,
sandboxID: args.ID,
processes: map[execID]*execProcess{eid: {}},
@@ -368,11 +394,16 @@ func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.
return kernel.CreateProcessArgs{}, fmt.Errorf("creating limits: %v", err)
}
+ wd := spec.Process.Cwd
+ if wd == "" {
+ wd = "/"
+ }
+
// Create the process arguments.
procArgs := kernel.CreateProcessArgs{
Argv: spec.Process.Args,
Envv: spec.Process.Env,
- WorkingDirectory: spec.Process.Cwd, // Defaults to '/' if empty.
+ WorkingDirectory: wd,
Credentials: creds,
Umask: 0022,
Limits: ls,
@@ -466,7 +497,7 @@ func (l *Loader) run() error {
// Delay host network configuration to this point because network namespace
// is configured after the loader is created and before Run() is called.
log.Debugf("Configuring host network")
- stack := l.k.NetworkStack().(*hostinet.Stack)
+ stack := l.k.RootNetworkNamespace().Stack().(*hostinet.Stack)
if err := stack.Configure(); err != nil {
return err
}
@@ -485,7 +516,7 @@ func (l *Loader) run() error {
// l.restore is set by the container manager when a restore call is made.
if !l.restore {
if l.conf.ProfileEnable {
- initializePProf()
+ pprof.Initialize()
}
// Finally done with all configuration. Setup filters before user code
@@ -517,7 +548,15 @@ func (l *Loader) run() error {
}
// Add the HOME enviroment variable if it is not already set.
- envv, err := maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+ var envv []string
+ if kernel.VFS2Enabled {
+ envv, err = maybeAddExecUserHomeVFS2(ctx, l.rootProcArgs.MountNamespaceVFS2,
+ l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+
+ } else {
+ envv, err = maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace,
+ l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+ }
if err != nil {
return err
}
@@ -570,6 +609,16 @@ func (l *Loader) run() error {
}
})
+ // l.stdioFDs are derived from dup() in boot.New() and they are now dup()ed again
+ // either in createFDTable() during initial start or in descriptor.initAfterLoad()
+ // during restore, we can release l.stdioFDs now.
+ for _, fd := range l.stdioFDs {
+ err := syscall.Close(fd)
+ if err != nil {
+ return fmt.Errorf("close dup()ed stdioFDs: %v", err)
+ }
+ }
+
log.Infof("Process should have started...")
l.watchdog.Start()
return l.k.Start()
@@ -795,16 +844,19 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
return 0, fmt.Errorf("container %q not started", args.ContainerID)
}
+ // TODO(gvisor.dev/issue/1623): Add VFS2 support
+
// Get the container MountNamespace from the Task.
tg.Leader().WithMuLocked(func(t *kernel.Task) {
- // task.MountNamespace() does not take a ref, so we must do so
- // ourselves.
+ // task.MountNamespace() does not take a ref, so we must do so ourselves.
args.MountNamespace = t.MountNamespace()
args.MountNamespace.IncRef()
})
- defer args.MountNamespace.DecRef()
+ if args.MountNamespace != nil {
+ defer args.MountNamespace.DecRef()
+ }
- // Add the HOME enviroment varible if it is not already set.
+ // Add the HOME environment variable if it is not already set.
root := args.MountNamespace.Root()
defer root.DecRef()
ctx := fs.WithRoot(l.k.SupervisorContext(), root)
@@ -905,48 +957,92 @@ func (l *Loader) WaitExit() kernel.ExitStatus {
return l.k.GlobalInit().ExitStatus()
}
-func newEmptyNetworkStack(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) {
+func newRootNetworkNamespace(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (*inet.Namespace, error) {
+ // Create an empty network stack because the network namespace may be empty at
+ // this point. Netns is configured before Run() is called. Netstack is
+ // configured using a control uRPC message. Host network is configured inside
+ // Run().
switch conf.Network {
case NetworkHost:
- return hostinet.NewStack(), nil
+ // No network namespacing support for hostinet yet, hence creator is nil.
+ return inet.NewRootNamespace(hostinet.NewStack(), nil), nil
case NetworkNone, NetworkSandbox:
- // NetworkNone sets up loopback using netstack.
- netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()}
- transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()}
- s := netstack.Stack{stack.New(stack.Options{
- NetworkProtocols: netProtos,
- TransportProtocols: transProtos,
- Clock: clock,
- Stats: netstack.Metrics,
- HandleLocal: true,
- // Enable raw sockets for users with sufficient
- // privileges.
- RawFactory: raw.EndpointFactory{},
- UniqueID: uniqueID,
- })}
-
- // Enable SACK Recovery.
- if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil {
- return nil, fmt.Errorf("failed to enable SACK: %v", err)
+ s, err := newEmptySandboxNetworkStack(clock, uniqueID)
+ if err != nil {
+ return nil, err
+ }
+ creator := &sandboxNetstackCreator{
+ clock: clock,
+ uniqueID: uniqueID,
}
+ return inet.NewRootNamespace(s, creator), nil
+
+ default:
+ panic(fmt.Sprintf("invalid network configuration: %v", conf.Network))
+ }
- // Set default TTLs as required by socket/netstack.
- s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
- s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
+}
- // Enable Receive Buffer Auto-Tuning.
- if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
- return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
- }
+func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) {
+ netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()}
+ transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()}
+ s := netstack.Stack{stack.New(stack.Options{
+ NetworkProtocols: netProtos,
+ TransportProtocols: transProtos,
+ Clock: clock,
+ Stats: netstack.Metrics,
+ HandleLocal: true,
+ // Enable raw sockets for users with sufficient
+ // privileges.
+ RawFactory: raw.EndpointFactory{},
+ UniqueID: uniqueID,
+ })}
- s.FillDefaultIPTables()
+ // Enable SACK Recovery.
+ if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil {
+ return nil, fmt.Errorf("failed to enable SACK: %v", err)
+ }
- return &s, nil
+ // Set default TTLs as required by socket/netstack.
+ s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
+ s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
- default:
- panic(fmt.Sprintf("invalid network configuration: %v", conf.Network))
+ // Enable Receive Buffer Auto-Tuning.
+ if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
+ return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
}
+
+ s.FillDefaultIPTables()
+
+ return &s, nil
+}
+
+// sandboxNetstackCreator implements kernel.NetworkStackCreator.
+//
+// +stateify savable
+type sandboxNetstackCreator struct {
+ clock tcpip.Clock
+ uniqueID stack.UniqueID
+}
+
+// CreateStack implements kernel.NetworkStackCreator.CreateStack.
+func (f *sandboxNetstackCreator) CreateStack() (inet.Stack, error) {
+ s, err := newEmptySandboxNetworkStack(f.clock, f.uniqueID)
+ if err != nil {
+ return nil, err
+ }
+
+ // Setup loopback.
+ n := &Network{Stack: s.(*netstack.Stack).Stack}
+ nicID := tcpip.NICID(f.uniqueID.UniqueID())
+ link := DefaultLoopbackLink
+ linkEP := loopback.New()
+ if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
+ return nil, err
+ }
+
+ return s, nil
}
// signal sends a signal to one or more processes in a container. If PID is 0,
@@ -994,7 +1090,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er
execTG, _, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
if err == nil {
// Send signal directly to the identified process.
- return execTG.SendSignal(&arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo})
}
// The caller may be signaling a process not started directly via exec.
@@ -1011,7 +1107,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er
if tg.Leader().ContainerID() != cid {
return fmt.Errorf("process %d is part of a different container: %q", tgid, tg.Leader().ContainerID())
}
- return tg.SendSignal(&arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo})
}
func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, signo int32) error {
@@ -1029,7 +1125,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s
// No foreground process group has been set. Signal the
// original thread group.
log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, tgid, tgid)
- return tg.SendSignal(&arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo})
}
// Send the signal to all processes in the process group.
var lastErr error
@@ -1037,7 +1133,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s
if tg.ProcessGroup() != pg {
continue
}
- if err := tg.SendSignal(&arch.SignalInfo{Signo: signo}); err != nil {
+ if err := l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}); err != nil {
lastErr = err
}
}
diff --git a/runsc/boot/loader_amd64.go b/runsc/boot/loader_amd64.go
index b9669f2ac..78df86611 100644
--- a/runsc/boot/loader_amd64.go
+++ b/runsc/boot/loader_amd64.go
@@ -17,11 +17,10 @@
package boot
import (
- "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
)
func init() {
- // Register the global syscall table.
- kernel.RegisterSyscallTable(linux.AMD64)
+ // Set the global syscall table.
+ syscallTable = linux.AMD64
}
diff --git a/runsc/boot/loader_arm64.go b/runsc/boot/loader_arm64.go
index cf64d28c8..250785010 100644
--- a/runsc/boot/loader_arm64.go
+++ b/runsc/boot/loader_arm64.go
@@ -17,11 +17,10 @@
package boot
import (
- "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
)
func init() {
- // Register the global syscall table.
- kernel.RegisterSyscallTable(linux.ARM64)
+ // Set the global syscall table.
+ syscallTable = linux.ARM64
}
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 44aa63196..e7c71734f 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -24,11 +24,13 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/control/server"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/unet"
"gvisor.dev/gvisor/runsc/fsgofer"
@@ -65,6 +67,11 @@ func testSpec() *specs.Spec {
}
}
+func resetSyscallTable() {
+ kernel.VFS2Enabled = false
+ kernel.FlushSyscallTablesTestOnly()
+}
+
// startGofer starts a new gofer routine serving 'root' path. It returns the
// sandbox side of the connection, and a function that when called will stop the
// gofer.
@@ -100,7 +107,7 @@ func startGofer(root string) (int, func(), error) {
return sandboxEnd, cleanup, nil
}
-func createLoader() (*Loader, func(), error) {
+func createLoader(vfsEnabled bool) (*Loader, func(), error) {
fd, err := server.CreateSocket(ControlSocketAddr(fmt.Sprintf("%010d", rand.Int())[:10]))
if err != nil {
return nil, nil, err
@@ -108,12 +115,23 @@ func createLoader() (*Loader, func(), error) {
conf := testConfig()
spec := testSpec()
+ conf.VFS2 = vfsEnabled
+
sandEnd, cleanup, err := startGofer(spec.Root.Path)
if err != nil {
return nil, nil, err
}
- stdio := []int{int(os.Stdin.Fd()), int(os.Stdout.Fd()), int(os.Stderr.Fd())}
+ // Loader takes ownership of stdio.
+ var stdio []int
+ for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} {
+ newFd, err := unix.Dup(int(f.Fd()))
+ if err != nil {
+ return nil, nil, err
+ }
+ stdio = append(stdio, newFd)
+ }
+
args := Args{
ID: "foo",
Spec: spec,
@@ -132,10 +150,22 @@ func createLoader() (*Loader, func(), error) {
// TestRun runs a simple application in a sandbox and checks that it succeeds.
func TestRun(t *testing.T) {
- l, cleanup, err := createLoader()
+ defer resetSyscallTable()
+ doRun(t, false)
+}
+
+// TestRunVFS2 runs TestRun in VFSv2.
+func TestRunVFS2(t *testing.T) {
+ defer resetSyscallTable()
+ doRun(t, true)
+}
+
+func doRun(t *testing.T, vfsEnabled bool) {
+ l, cleanup, err := createLoader(vfsEnabled)
if err != nil {
t.Fatalf("error creating loader: %v", err)
}
+
defer l.Destroy()
defer cleanup()
@@ -169,7 +199,18 @@ func TestRun(t *testing.T) {
// TestStartSignal tests that the controller Start message will cause
// WaitForStartSignal to return.
func TestStartSignal(t *testing.T) {
- l, cleanup, err := createLoader()
+ defer resetSyscallTable()
+ doStartSignal(t, false)
+}
+
+// TestStartSignalVFS2 does TestStartSignal with VFS2.
+func TestStartSignalVFS2(t *testing.T) {
+ defer resetSyscallTable()
+ doStartSignal(t, true)
+}
+
+func doStartSignal(t *testing.T, vfsEnabled bool) {
+ l, cleanup, err := createLoader(vfsEnabled)
if err != nil {
t.Fatalf("error creating loader: %v", err)
}
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 6a8765ec8..bee6ee336 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -17,6 +17,7 @@ package boot
import (
"fmt"
"net"
+ "strings"
"syscall"
"gvisor.dev/gvisor/pkg/log"
@@ -31,6 +32,32 @@ import (
"gvisor.dev/gvisor/pkg/urpc"
)
+var (
+ // DefaultLoopbackLink contains IP addresses and routes of "127.0.0.1/8" and
+ // "::1/8" on "lo" interface.
+ DefaultLoopbackLink = LoopbackLink{
+ Name: "lo",
+ Addresses: []net.IP{
+ net.IP("\x7f\x00\x00\x01"),
+ net.IPv6loopback,
+ },
+ Routes: []Route{
+ {
+ Destination: net.IPNet{
+ IP: net.IPv4(0x7f, 0, 0, 0),
+ Mask: net.IPv4Mask(0xff, 0, 0, 0),
+ },
+ },
+ {
+ Destination: net.IPNet{
+ IP: net.IPv6loopback,
+ Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)),
+ },
+ },
+ },
+ }
+)
+
// Network exposes methods that can be used to configure a network stack.
type Network struct {
Stack *stack.Stack
diff --git a/runsc/boot/pprof/BUILD b/runsc/boot/pprof/BUILD
new file mode 100644
index 000000000..29cb42b2f
--- /dev/null
+++ b/runsc/boot/pprof/BUILD
@@ -0,0 +1,11 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "pprof",
+ srcs = ["pprof.go"],
+ visibility = [
+ "//runsc:__subpackages__",
+ ],
+)
diff --git a/runsc/boot/pprof.go b/runsc/boot/pprof/pprof.go
index 463362f02..1ded20dee 100644
--- a/runsc/boot/pprof.go
+++ b/runsc/boot/pprof/pprof.go
@@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package boot
+// Package pprof provides a stub to initialize custom profilers.
+package pprof
-func initializePProf() {
+// Initialize will be called at boot for initializing custom profilers.
+func Initialize() {
}
diff --git a/runsc/boot/user.go b/runsc/boot/user.go
index f0aa52135..332e4fce5 100644
--- a/runsc/boot/user.go
+++ b/runsc/boot/user.go
@@ -23,8 +23,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -84,6 +86,48 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.K
File: f,
}
+ return findHomeInPasswd(uint32(uid), r, defaultHome)
+}
+
+type fileReaderVFS2 struct {
+ ctx context.Context
+ fd *vfs.FileDescription
+}
+
+func (r *fileReaderVFS2) Read(buf []byte) (int, error) {
+ n, err := r.fd.Read(r.ctx, usermem.BytesIOSequence(buf), vfs.ReadOptions{})
+ return int(n), err
+}
+
+func getExecUserHomeVFS2(ctx context.Context, mns *vfs.MountNamespace, uid auth.KUID) (string, error) {
+ const defaultHome = "/"
+
+ root := mns.Root()
+ defer root.DecRef()
+
+ creds := auth.CredentialsFromContext(ctx)
+
+ target := &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse("/etc/passwd"),
+ }
+
+ opts := &vfs.OpenOptions{
+ Flags: linux.O_RDONLY,
+ }
+
+ fd, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, target, opts)
+ if err != nil {
+ return defaultHome, nil
+ }
+ defer fd.DecRef()
+
+ r := &fileReaderVFS2{
+ ctx: ctx,
+ fd: fd,
+ }
+
homeDir, err := findHomeInPasswd(uint32(uid), r, defaultHome)
if err != nil {
return "", err
@@ -111,6 +155,26 @@ func maybeAddExecUserHome(ctx context.Context, mns *fs.MountNamespace, uid auth.
if err != nil {
return nil, fmt.Errorf("error reading exec user: %v", err)
}
+
+ return append(envv, "HOME="+homeDir), nil
+}
+
+func maybeAddExecUserHomeVFS2(ctx context.Context, vmns *vfs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) {
+ // Check if the envv already contains HOME.
+ for _, env := range envv {
+ if strings.HasPrefix(env, "HOME=") {
+ // We have it. Return the original slice unmodified.
+ return envv, nil
+ }
+ }
+
+ // Read /etc/passwd for the user's HOME directory and set the HOME
+ // environment variable as required by POSIX if it is not overridden by
+ // the user.
+ homeDir, err := getExecUserHomeVFS2(ctx, vmns, uid)
+ if err != nil {
+ return nil, fmt.Errorf("error reading exec user: %v", err)
+ }
return append(envv, "HOME="+homeDir), nil
}
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
new file mode 100644
index 000000000..82083c57d
--- /dev/null
+++ b/runsc/boot/vfs.go
@@ -0,0 +1,310 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "fmt"
+ "path"
+ "strconv"
+ "strings"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/sentry/devices/memdev"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ devtmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+ goferimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
+ procimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
+ sysimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
+ tmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error {
+
+ vfsObj.MustRegisterFilesystemType(rootFsName, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserList: true,
+ })
+
+ vfsObj.MustRegisterFilesystemType(bind, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserList: true,
+ })
+
+ vfsObj.MustRegisterFilesystemType(devpts, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+
+ vfsObj.MustRegisterFilesystemType(devtmpfs, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+ vfsObj.MustRegisterFilesystemType(proc, &procimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+ vfsObj.MustRegisterFilesystemType(sysfs, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+ vfsObj.MustRegisterFilesystemType(tmpfs, &tmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+ vfsObj.MustRegisterFilesystemType(nonefs, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+
+ // Setup files in devtmpfs.
+ if err := memdev.Register(vfsObj); err != nil {
+ return fmt.Errorf("registering memdev: %w", err)
+ }
+ a, err := devtmpfsimpl.NewAccessor(ctx, vfsObj, creds, devtmpfsimpl.Name)
+ if err != nil {
+ return fmt.Errorf("creating devtmpfs accessor: %w", err)
+ }
+ defer a.Release()
+
+ if err := a.UserspaceInit(ctx); err != nil {
+ return fmt.Errorf("initializing userspace: %w", err)
+ }
+ if err := memdev.CreateDevtmpfsFiles(ctx, a); err != nil {
+ return fmt.Errorf("creating devtmpfs files: %w", err)
+ }
+ return nil
+}
+
+func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
+ if err := mntr.k.VFS().Init(); err != nil {
+ return fmt.Errorf("failed to initialize VFS: %w", err)
+ }
+ mns, err := mntr.setupVFS2(ctx, conf, procArgs)
+ if err != nil {
+ return fmt.Errorf("failed to setupFS: %w", err)
+ }
+ procArgs.MountNamespaceVFS2 = mns
+ return setExecutablePathVFS2(ctx, procArgs)
+}
+
+func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
+
+ exe := procArgs.Argv[0]
+
+ // Absolute paths can be used directly.
+ if path.IsAbs(exe) {
+ procArgs.Filename = exe
+ return nil
+ }
+
+ // Paths with '/' in them should be joined to the working directory, or
+ // to the root if working directory is not set.
+ if strings.IndexByte(exe, '/') > 0 {
+
+ if !path.IsAbs(procArgs.WorkingDirectory) {
+ return fmt.Errorf("working directory %q must be absolute", procArgs.WorkingDirectory)
+ }
+
+ procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe)
+ return nil
+ }
+
+ // Paths with a '/' are relative to the CWD.
+ if strings.IndexByte(exe, '/') > 0 {
+ procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe)
+ return nil
+ }
+
+ // Otherwise, We must lookup the name in the paths, starting from the
+ // root directory.
+ root := procArgs.MountNamespaceVFS2.Root()
+ defer root.DecRef()
+
+ paths := fs.GetPath(procArgs.Envv)
+ creds := procArgs.Credentials
+
+ for _, p := range paths {
+
+ binPath := path.Join(p, exe)
+
+ pop := &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(binPath),
+ FollowFinalSymlink: true,
+ }
+
+ opts := &vfs.OpenOptions{
+ FileExec: true,
+ Flags: linux.O_RDONLY,
+ }
+
+ dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts)
+ if err == syserror.ENOENT || err == syserror.EACCES {
+ // Didn't find it here.
+ continue
+ }
+ if err != nil {
+ return err
+ }
+ dentry.DecRef()
+
+ procArgs.Filename = binPath
+ return nil
+ }
+
+ return fmt.Errorf("executable %q not found in $PATH=%q", exe, strings.Join(paths, ":"))
+}
+
+func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) {
+ log.Infof("Configuring container's file system with VFS2")
+
+ // Create context with root credentials to mount the filesystem (the current
+ // user may not be privileged enough).
+ rootProcArgs := *procArgs
+ rootProcArgs.WorkingDirectory = "/"
+ rootProcArgs.Credentials = auth.NewRootCredentials(procArgs.Credentials.UserNamespace)
+ rootProcArgs.Umask = 0022
+ rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals
+ rootCtx := procArgs.NewContext(c.k)
+
+ creds := procArgs.Credentials
+ if err := registerFilesystems(rootCtx, c.k.VFS(), creds); err != nil {
+ return nil, fmt.Errorf("register filesystems: %w", err)
+ }
+
+ fd := c.fds.remove()
+
+ opts := strings.Join(p9MountOptionsVFS2(fd, conf.FileAccess), ",")
+
+ log.Infof("Mounting root over 9P, ioFD: %d", fd)
+ mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", rootFsName, &vfs.GetFilesystemOptions{Data: opts})
+ if err != nil {
+ return nil, fmt.Errorf("setting up mountnamespace: %w", err)
+ }
+
+ rootProcArgs.MountNamespaceVFS2 = mns
+
+ // Mount submounts.
+ if err := c.mountSubmountsVFS2(rootCtx, conf, mns, creds); err != nil {
+ return nil, fmt.Errorf("mounting submounts vfs2: %w", err)
+ }
+
+ return mns, nil
+}
+
+func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error {
+
+ for _, submount := range c.mounts {
+ log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options)
+ if err := c.mountSubmountVFS2(ctx, conf, mns, creds, &submount); err != nil {
+ return err
+ }
+ }
+
+ // TODO(gvisor.dev/issue/1487): implement mountTmp from fs.go.
+
+ return c.checkDispenser()
+}
+
+// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1 version.
+func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *specs.Mount) error {
+ root := mns.Root()
+ defer root.DecRef()
+ target := &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(submount.Destination),
+ }
+
+ _, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, *submount)
+ if err != nil {
+ return fmt.Errorf("mountOptions failed: %w", err)
+ }
+
+ opts := &vfs.MountOptions{
+ GetFilesystemOptions: vfs.GetFilesystemOptions{
+ Data: strings.Join(options, ","),
+ },
+ InternalMount: true,
+ }
+
+ // All writes go to upper, be paranoid and make lower readonly.
+ opts.ReadOnly = useOverlay
+
+ if err := c.k.VFS().MountAt(ctx, creds, "", target, submount.Type, opts); err != nil {
+ return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts)
+ }
+ log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts)
+ return nil
+}
+
+// getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values
+// used for mounts.
+func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Mount) (string, []string, bool, error) {
+ var (
+ fsName string
+ opts []string
+ useOverlay bool
+ )
+
+ switch m.Type {
+ case devpts, devtmpfs, proc, sysfs:
+ fsName = m.Type
+ case nonefs:
+ fsName = sysfs
+ case tmpfs:
+ fsName = m.Type
+
+ var err error
+ opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...)
+ if err != nil {
+ return "", nil, false, err
+ }
+
+ case bind:
+ fd := c.fds.remove()
+ fsName = "9p"
+ opts = p9MountOptionsVFS2(fd, c.getMountAccessType(m))
+ // If configured, add overlay to all writable mounts.
+ useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
+
+ default:
+ log.Warningf("ignoring unknown filesystem type %q", m.Type)
+ }
+ return fsName, opts, useOverlay, nil
+}
+
+// p9MountOptions creates a slice of options for a p9 mount.
+// TODO(gvisor.dev/issue/1200): Remove this version in favor of the one in
+// fs.go when privateunixsocket lands.
+func p9MountOptionsVFS2(fd int, fa FileAccessType) []string {
+ opts := []string{
+ "trans=fd",
+ "rfdno=" + strconv.Itoa(fd),
+ "wfdno=" + strconv.Itoa(fd),
+ }
+ if fa == FileAccessShared {
+ opts = append(opts, "cache=remote_revalidating")
+ }
+ return opts
+}
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index 2a88b85a9..d0bb4613a 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -31,6 +31,7 @@ go_library(
"spec.go",
"start.go",
"state.go",
+ "statefile.go",
"syscalls.go",
"wait.go",
],
@@ -43,6 +44,8 @@ go_library(
"//pkg/sentry/control",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
+ "//pkg/state",
+ "//pkg/state/statefile",
"//pkg/sync",
"//pkg/unet",
"//pkg/urpc",
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index 0f3da69a0..0938944a6 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -23,6 +23,7 @@ import (
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/boot/platforms"
@@ -82,8 +83,13 @@ type Boot struct {
// sandbox (e.g. gofer) and sent through this FD.
mountsFD int
- // pidns is set if the sanadbox is in its own pid namespace.
+ // pidns is set if the sandbox is in its own pid namespace.
pidns bool
+
+ // attached is set to true to kill the sandbox process when the parent process
+ // terminates. This flag is set when the command execve's itself because
+ // parent death signal doesn't propagate through execve when uid/gid changes.
+ attached bool
}
// Name implements subcommands.Command.Name.
@@ -118,6 +124,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) {
f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.")
f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup")
f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).")
+ f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates")
}
// Execute implements subcommands.Command.Execute. It starts a sandbox in a
@@ -133,29 +140,32 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
conf := args[0].(*boot.Config)
+ if b.attached {
+ // Ensure this process is killed after parent process terminates when
+ // attached mode is enabled. In the unfortunate event that the parent
+ // terminates before this point, this process leaks.
+ if err := unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0); err != nil {
+ Fatalf("error setting parent death signal: %v", err)
+ }
+ }
+
if b.setUpRoot {
if err := setUpChroot(b.pidns); err != nil {
Fatalf("error setting up chroot: %v", err)
}
- if !b.applyCaps {
- // Remove --setup-root arg to call myself.
- var args []string
- for _, arg := range os.Args {
- if !strings.Contains(arg, "setup-root") {
- args = append(args, arg)
- }
- }
- if !conf.Rootless {
- // Note that we've already read the spec from the spec FD, and
- // we will read it again after the exec call. This works
- // because the ReadSpecFromFile function seeks to the beginning
- // of the file before reading.
- if err := callSelfAsNobody(args); err != nil {
- Fatalf("%v", err)
- }
- panic("callSelfAsNobody must never return success")
+ if !b.applyCaps && !conf.Rootless {
+ // Remove --apply-caps arg to call myself. It has already been done.
+ args := prepareArgs(b.attached, "setup-root")
+
+ // Note that we've already read the spec from the spec FD, and
+ // we will read it again after the exec call. This works
+ // because the ReadSpecFromFile function seeks to the beginning
+ // of the file before reading.
+ if err := callSelfAsNobody(args); err != nil {
+ Fatalf("%v", err)
}
+ panic("callSelfAsNobody must never return success")
}
}
@@ -181,13 +191,9 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
caps.Permitted = append(caps.Permitted, c)
}
- // Remove --apply-caps arg to call myself.
- var args []string
- for _, arg := range os.Args {
- if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") {
- args = append(args, arg)
- }
- }
+ // Remove --apply-caps and --setup-root arg to call myself. Both have
+ // already been done.
+ args := prepareArgs(b.attached, "setup-root", "apply-caps")
// Note that we've already read the spec from the spec FD, and
// we will read it again after the exec call. This works
@@ -258,3 +264,22 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
l.Destroy()
return subcommands.ExitSuccess
}
+
+func prepareArgs(attached bool, exclude ...string) []string {
+ var args []string
+ for _, arg := range os.Args {
+ for _, excl := range exclude {
+ if strings.Contains(arg, excl) {
+ goto skip
+ }
+ }
+ args = append(args, arg)
+ if attached && arg == "boot" {
+ // Strategicaly place "--attached" after the command. This is needed
+ // to ensure the new process is killed when the parent process terminates.
+ args = append(args, "--attached")
+ }
+ skip:
+ }
+ return args
+}
diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go
index 0c27f7313..9360d7442 100644
--- a/runsc/cmd/capability_test.go
+++ b/runsc/cmd/capability_test.go
@@ -85,7 +85,7 @@ func TestCapabilities(t *testing.T) {
Inheritable: caps,
}
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
// Use --network=host to make sandbox use spec's capabilities.
conf.Network = boot.NetworkHost
diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go
index b5a0ce17d..189244765 100644
--- a/runsc/cmd/chroot.go
+++ b/runsc/cmd/chroot.go
@@ -50,7 +50,7 @@ func pivotRoot(root string) error {
// new_root, so after umounting the old_root, we will see only
// the new_root in "/".
if err := syscall.PivotRoot(".", "."); err != nil {
- return fmt.Errorf("error changing root filesystem: %v", err)
+ return fmt.Errorf("pivot_root failed, make sure that the root mount has a parent: %v", err)
}
if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil {
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 79965460e..b5de2588b 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -32,17 +32,20 @@ import (
// Debug implements subcommands.Command for the "debug" command.
type Debug struct {
- pid int
- stacks bool
- signal int
- profileHeap string
- profileCPU string
- trace string
- strace string
- logLevel string
- logPackets string
- duration time.Duration
- ps bool
+ pid int
+ stacks bool
+ signal int
+ profileHeap string
+ profileCPU string
+ profileGoroutine string
+ profileBlock string
+ profileMutex string
+ trace string
+ strace string
+ logLevel string
+ logPackets string
+ duration time.Duration
+ ps bool
}
// Name implements subcommands.Command.
@@ -66,6 +69,9 @@ func (d *Debug) SetFlags(f *flag.FlagSet) {
f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log")
f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.")
f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.")
+ f.StringVar(&d.profileGoroutine, "profile-goroutine", "", "writes goroutine profile to the given file.")
+ f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.")
+ f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.")
f.DurationVar(&d.duration, "duration", time.Second, "amount of time to wait for CPU and trace profiles")
f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
@@ -147,6 +153,42 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
log.Infof("Heap profile written to %q", d.profileHeap)
}
+ if d.profileGoroutine != "" {
+ f, err := os.Create(d.profileGoroutine)
+ if err != nil {
+ return Errorf(err.Error())
+ }
+ defer f.Close()
+
+ if err := c.Sandbox.GoroutineProfile(f); err != nil {
+ return Errorf(err.Error())
+ }
+ log.Infof("Goroutine profile written to %q", d.profileGoroutine)
+ }
+ if d.profileBlock != "" {
+ f, err := os.Create(d.profileBlock)
+ if err != nil {
+ return Errorf(err.Error())
+ }
+ defer f.Close()
+
+ if err := c.Sandbox.BlockProfile(f); err != nil {
+ return Errorf(err.Error())
+ }
+ log.Infof("Block profile written to %q", d.profileBlock)
+ }
+ if d.profileMutex != "" {
+ f, err := os.Create(d.profileMutex)
+ if err != nil {
+ return Errorf(err.Error())
+ }
+ defer f.Close()
+
+ if err := c.Sandbox.MutexProfile(f); err != nil {
+ return Errorf(err.Error())
+ }
+ log.Infof("Mutex profile written to %q", d.profileMutex)
+ }
delay := false
if d.profileCPU != "" {
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 6e06f3c0f..28f0d54b9 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -272,9 +272,8 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
root := spec.Root.Path
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
- // FIXME: runsc can't be re-executed without
- // /proc, so we create a tmpfs mount, mount ./proc and ./root
- // there, then move this mount to the root and after
+ // runsc can't be re-executed without /proc, so we create a tmpfs mount,
+ // mount ./proc and ./root there, then move this mount to the root and after
// setCapsAndCallSelf, runsc will chroot into /root.
//
// We need a directory to construct a new root and we know that
@@ -335,7 +334,7 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
if err := pivotRoot("/proc"); err != nil {
- Fatalf("faild to change the root file system: %v", err)
+ Fatalf("failed to change the root file system: %v", err)
}
if err := os.Chdir("/"); err != nil {
Fatalf("failed to change working directory")
diff --git a/runsc/cmd/statefile.go b/runsc/cmd/statefile.go
new file mode 100644
index 000000000..e6f1907da
--- /dev/null
+++ b/runsc/cmd/statefile.go
@@ -0,0 +1,143 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "fmt"
+ "os"
+
+ "github.com/google/subcommands"
+ "gvisor.dev/gvisor/pkg/state"
+ "gvisor.dev/gvisor/pkg/state/statefile"
+ "gvisor.dev/gvisor/runsc/flag"
+)
+
+// Statefile implements subcommands.Command for the "statefile" command.
+type Statefile struct {
+ list bool
+ get string
+ key string
+ output string
+ html bool
+}
+
+// Name implements subcommands.Command.
+func (*Statefile) Name() string {
+ return "state"
+}
+
+// Synopsis implements subcommands.Command.
+func (*Statefile) Synopsis() string {
+ return "shows information about a statefile"
+}
+
+// Usage implements subcommands.Command.
+func (*Statefile) Usage() string {
+ return `statefile [flags] <statefile>`
+}
+
+// SetFlags implements subcommands.Command.
+func (s *Statefile) SetFlags(f *flag.FlagSet) {
+ f.BoolVar(&s.list, "list", false, "lists the metdata in the statefile.")
+ f.StringVar(&s.get, "get", "", "extracts the given metadata key.")
+ f.StringVar(&s.key, "key", "", "the integrity key for the file.")
+ f.StringVar(&s.output, "output", "", "target to write the result.")
+ f.BoolVar(&s.html, "html", false, "outputs in HTML format.")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (s *Statefile) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ // Check arguments.
+ if s.list && s.get != "" {
+ Fatalf("error: can't specify -list and -get simultaneously.")
+ }
+
+ // Setup output.
+ var output = os.Stdout // Default.
+ if s.output != "" {
+ f, err := os.OpenFile(s.output, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
+ if err != nil {
+ Fatalf("error opening output: %v", err)
+ }
+ defer func() {
+ if err := f.Close(); err != nil {
+ Fatalf("error flushing output: %v", err)
+ }
+ }()
+ output = f
+ }
+
+ // Open the file.
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+ input, err := os.Open(f.Arg(0))
+ if err != nil {
+ Fatalf("error opening input: %v\n", err)
+ }
+
+ if s.html {
+ fmt.Fprintf(output, "<html><body>\n")
+ defer fmt.Fprintf(output, "</body></html>\n")
+ }
+
+ // Dump the full file?
+ if !s.list && s.get == "" {
+ var key []byte
+ if s.key != "" {
+ key = []byte(s.key)
+ }
+ rc, _, err := statefile.NewReader(input, key)
+ if err != nil {
+ Fatalf("error parsing statefile: %v", err)
+ }
+ if err := state.PrettyPrint(output, rc, s.html); err != nil {
+ Fatalf("error printing state: %v", err)
+ }
+ return subcommands.ExitSuccess
+ }
+
+ // Load just the metadata.
+ metadata, err := statefile.MetadataUnsafe(input)
+ if err != nil {
+ Fatalf("error reading metadata: %v", err)
+ }
+
+ // Is it a single key?
+ if s.get != "" {
+ val, ok := metadata[s.get]
+ if !ok {
+ Fatalf("metadata key %s: not found", s.get)
+ }
+ fmt.Fprintf(output, "%s\n", val)
+ return subcommands.ExitSuccess
+ }
+
+ // List all keys.
+ if s.html {
+ fmt.Fprintf(output, " <ul>\n")
+ defer fmt.Fprintf(output, " </ul>\n")
+ }
+ for key := range metadata {
+ if s.html {
+ fmt.Fprintf(output, " <li>%s</li>\n", key)
+ } else {
+ fmt.Fprintf(output, "%s\n", key)
+ }
+ }
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index c2518d52b..af245b6d8 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -118,7 +118,7 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) {
// Test that an pty FD is sent over the console socket if one is provided.
func TestConsoleSocket(t *testing.T) {
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
spec := testutil.NewSpecWithArgs("true")
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
@@ -163,7 +163,7 @@ func TestConsoleSocket(t *testing.T) {
// Test that job control signals work on a console created with "exec -ti".
func TestJobControlSignalExec(t *testing.T) {
spec := testutil.NewSpecWithArgs("/bin/sleep", "10000")
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
if err != nil {
@@ -286,7 +286,7 @@ func TestJobControlSignalExec(t *testing.T) {
// Test that job control signals work on a console created with "run -ti".
func TestJobControlSignalRootContainer(t *testing.T) {
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
// Don't let bash execute from profile or rc files, otherwise our PID
// counts get messed up.
spec := testutil.NewSpecWithArgs("/bin/bash", "--noprofile", "--norc")
@@ -333,13 +333,13 @@ func TestJobControlSignalRootContainer(t *testing.T) {
// file. Writes after a certain point will block unless we drain the
// PTY, so we must continually copy from it.
//
- // We log the output to stdout for debugabilitly, and also to a buffer,
+ // We log the output to stderr for debugabilitly, and also to a buffer,
// since we wait on particular output from bash below. We use a custom
// blockingBuffer which is thread-safe and also blocks on Read calls,
// which makes this a suitable Reader for WaitUntilRead.
ptyBuf := newBlockingBuffer()
tee := io.TeeReader(ptyMaster, ptyBuf)
- go io.Copy(os.Stdout, tee)
+ go io.Copy(os.Stderr, tee)
// Start the container.
if err := c.Start(conf); err != nil {
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 68782c4be..7233659b1 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -17,6 +17,7 @@ package container
import (
"context"
+ "errors"
"fmt"
"io/ioutil"
"os"
@@ -1066,27 +1067,19 @@ func runInCgroup(cg *cgroup.Cgroup, fn func() error) error {
// adjustGoferOOMScoreAdj sets the oom_store_adj for the container's gofer.
func (c *Container) adjustGoferOOMScoreAdj() error {
- if c.GoferPid != 0 && c.Spec.Process.OOMScoreAdj != nil {
- if err := setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj); err != nil {
- // Ignore NotExist error because it can be returned when the sandbox
- // exited while OOM score was being adjusted.
- if !os.IsNotExist(err) {
- return fmt.Errorf("setting gofer oom_score_adj for container %q: %v", c.ID, err)
- }
- log.Warningf("Gofer process (%d) not found setting oom_score_adj", c.GoferPid)
- }
+ if c.GoferPid == 0 || c.Spec.Process.OOMScoreAdj == nil {
+ return nil
}
-
- return nil
+ return setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj)
}
// adjustSandboxOOMScoreAdj sets the oom_score_adj for the sandbox.
// oom_score_adj is set to the lowest oom_score_adj among the containers
// running in the sandbox.
//
-// TODO(gvisor.dev/issue/512): This call could race with other containers being
+// TODO(gvisor.dev/issue/238): This call could race with other containers being
// created at the same time and end up setting the wrong oom_score_adj to the
-// sandbox.
+// sandbox. Use rpc client to synchronize.
func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool) error {
containers, err := loadSandbox(rootDir, s.ID)
if err != nil {
@@ -1154,29 +1147,29 @@ func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool)
}
// Set the lowest of all containers oom_score_adj to the sandbox.
- if err := setOOMScoreAdj(s.Pid, lowScore); err != nil {
- // Ignore NotExist error because it can be returned when the sandbox
- // exited while OOM score was being adjusted.
- if !os.IsNotExist(err) {
- return fmt.Errorf("setting oom_score_adj for sandbox %q: %v", s.ID, err)
- }
- log.Warningf("Sandbox process (%d) not found setting oom_score_adj", s.Pid)
- }
-
- return nil
+ return setOOMScoreAdj(s.Pid, lowScore)
}
// setOOMScoreAdj sets oom_score_adj to the given value for the given PID.
// /proc must be available and mounted read-write. scoreAdj should be between
-// -1000 and 1000.
+// -1000 and 1000. It's a noop if the process has already exited.
func setOOMScoreAdj(pid int, scoreAdj int) error {
f, err := os.OpenFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid), os.O_WRONLY, 0644)
if err != nil {
+ // Ignore NotExist errors because it can race with process exit.
+ if os.IsNotExist(err) {
+ log.Warningf("Process (%d) not found setting oom_score_adj", pid)
+ return nil
+ }
return err
}
defer f.Close()
if _, err := f.WriteString(strconv.Itoa(scoreAdj)); err != nil {
- return err
+ if errors.Is(err, syscall.ESRCH) {
+ log.Warningf("Process (%d) exited while setting oom_score_adj", pid)
+ return nil
+ }
+ return fmt.Errorf("setting oom_score_adj to %q: %v", scoreAdj, err)
}
return nil
}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 04a7dc237..5db6d64aa 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -71,6 +71,7 @@ func waitForProcessCount(cont *Container, want int) error {
return &backoff.PermanentError{Err: err}
}
if got := len(pss); got != want {
+ log.Infof("Waiting for process count to reach %d. Current: %d", want, got)
return fmt.Errorf("wrong process count, got: %d, want: %d", got, want)
}
return nil
@@ -123,23 +124,6 @@ func procListsEqual(got, want []*control.Process) (bool, error) {
return true, nil
}
-// getAndCheckProcLists is similar to waitForProcessList, but does not wait and retry the
-// test for equality. This is because we already confirmed that exec occurred.
-func getAndCheckProcLists(cont *Container, want []*control.Process) error {
- got, err := cont.Processes()
- if err != nil {
- return fmt.Errorf("error getting process data from container: %v", err)
- }
- equal, err := procListsEqual(got, want)
- if err != nil {
- return err
- }
- if equal {
- return nil
- }
- return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(want))
-}
-
func procListToString(pl []*control.Process) string {
strs := make([]string, 0, len(pl))
for _, p := range pl {
@@ -267,12 +251,12 @@ var noOverlay = []configOption{kvm, nonExclusiveFS}
var all = append(noOverlay, overlay)
// configs generates different configurations to run tests.
-func configs(opts ...configOption) []*boot.Config {
+func configs(t *testing.T, opts ...configOption) []*boot.Config {
// Always load the default config.
- cs := []*boot.Config{testutil.TestConfig()}
+ cs := []*boot.Config{testutil.TestConfig(t)}
for _, o := range opts {
- c := testutil.TestConfig()
+ c := testutil.TestConfig(t)
switch o {
case overlay:
c.Overlay = true
@@ -301,7 +285,7 @@ func TestLifecycle(t *testing.T) {
childReaper.Start()
defer childReaper.Stop()
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
// The container will just sleep for a long time. We will kill it before
// it finishes sleeping.
@@ -473,7 +457,7 @@ func TestExePath(t *testing.T) {
t.Fatal(err)
}
- for _, conf := range configs(overlay) {
+ for _, conf := range configs(t, overlay) {
t.Logf("Running test with conf: %+v", conf)
for _, test := range []struct {
path string
@@ -537,9 +521,19 @@ func TestExePath(t *testing.T) {
// Test the we can retrieve the application exit status from the container.
func TestAppExitStatus(t *testing.T) {
+ doAppExitStatus(t, false)
+}
+
+// This is TestAppExitStatus for VFSv2.
+func TestAppExitStatusVFS2(t *testing.T) {
+ doAppExitStatus(t, true)
+}
+
+func doAppExitStatus(t *testing.T, vfs2 bool) {
// First container will succeed.
succSpec := testutil.NewSpecWithArgs("true")
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
+ conf.VFS2 = vfs2
rootDir, bundleDir, err := testutil.SetupContainer(succSpec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
@@ -589,7 +583,7 @@ func TestAppExitStatus(t *testing.T) {
// TestExec verifies that a container can exec a new program.
func TestExec(t *testing.T) {
- for _, conf := range configs(overlay) {
+ for _, conf := range configs(t, overlay) {
t.Logf("Running test with conf: %+v", conf)
const uid = 343
@@ -683,7 +677,7 @@ func TestExec(t *testing.T) {
// TestKillPid verifies that we can signal individual exec'd processes.
func TestKillPid(t *testing.T) {
- for _, conf := range configs(overlay) {
+ for _, conf := range configs(t, overlay) {
t.Logf("Running test with conf: %+v", conf)
app, err := testutil.FindFile("runsc/container/test_app/test_app")
@@ -759,7 +753,7 @@ func TestKillPid(t *testing.T) {
// be the next consecutive number after the last number from the checkpointed container.
func TestCheckpointRestore(t *testing.T) {
// Skip overlay because test requires writing to host file.
- for _, conf := range configs(noOverlay...) {
+ for _, conf := range configs(t, noOverlay...) {
t.Logf("Running test with conf: %+v", conf)
dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test")
@@ -920,7 +914,7 @@ func TestCheckpointRestore(t *testing.T) {
// with filesystem Unix Domain Socket use.
func TestUnixDomainSockets(t *testing.T) {
// Skip overlay because test requires writing to host file.
- for _, conf := range configs(noOverlay...) {
+ for _, conf := range configs(t, noOverlay...) {
t.Logf("Running test with conf: %+v", conf)
// UDS path is limited to 108 chars for compatibility with older systems.
@@ -1058,7 +1052,7 @@ func TestUnixDomainSockets(t *testing.T) {
// recreated. Then it resumes the container, verify that the file gets created
// again.
func TestPauseResume(t *testing.T) {
- for _, conf := range configs(noOverlay...) {
+ for _, conf := range configs(t, noOverlay...) {
t.Run(fmt.Sprintf("conf: %+v", conf), func(t *testing.T) {
t.Logf("Running test with conf: %+v", conf)
@@ -1139,7 +1133,7 @@ func TestPauseResume(t *testing.T) {
// occurs given the correct state.
func TestPauseResumeStatus(t *testing.T) {
spec := testutil.NewSpecWithArgs("sleep", "20")
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
@@ -1205,7 +1199,7 @@ func TestCapabilities(t *testing.T) {
uid := auth.KUID(os.Getuid() + 1)
gid := auth.KGID(os.Getgid() + 1)
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
spec := testutil.NewSpecWithArgs("sleep", "100")
@@ -1294,7 +1288,7 @@ func TestCapabilities(t *testing.T) {
// TestRunNonRoot checks that sandbox can be configured when running as
// non-privileged user.
func TestRunNonRoot(t *testing.T) {
- for _, conf := range configs(noOverlay...) {
+ for _, conf := range configs(t, noOverlay...) {
t.Logf("Running test with conf: %+v", conf)
spec := testutil.NewSpecWithArgs("/bin/true")
@@ -1338,7 +1332,7 @@ func TestRunNonRoot(t *testing.T) {
// TestMountNewDir checks that runsc will create destination directory if it
// doesn't exit.
func TestMountNewDir(t *testing.T) {
- for _, conf := range configs(overlay) {
+ for _, conf := range configs(t, overlay) {
t.Logf("Running test with conf: %+v", conf)
root, err := ioutil.TempDir(testutil.TmpDir(), "root")
@@ -1367,7 +1361,7 @@ func TestMountNewDir(t *testing.T) {
}
func TestReadonlyRoot(t *testing.T) {
- for _, conf := range configs(overlay) {
+ for _, conf := range configs(t, overlay) {
t.Logf("Running test with conf: %+v", conf)
spec := testutil.NewSpecWithArgs("/bin/touch", "/foo")
@@ -1405,7 +1399,7 @@ func TestReadonlyRoot(t *testing.T) {
}
func TestUIDMap(t *testing.T) {
- for _, conf := range configs(noOverlay...) {
+ for _, conf := range configs(t, noOverlay...) {
t.Logf("Running test with conf: %+v", conf)
testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount")
if err != nil {
@@ -1486,7 +1480,7 @@ func TestUIDMap(t *testing.T) {
}
func TestReadonlyMount(t *testing.T) {
- for _, conf := range configs(overlay) {
+ for _, conf := range configs(t, overlay) {
t.Logf("Running test with conf: %+v", conf)
dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount")
@@ -1543,7 +1537,7 @@ func TestAbbreviatedIDs(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
cids := []string{
@@ -1601,7 +1595,7 @@ func TestAbbreviatedIDs(t *testing.T) {
func TestGoferExits(t *testing.T) {
spec := testutil.NewSpecWithArgs("/bin/sleep", "10000")
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
@@ -1670,7 +1664,7 @@ func TestRootNotMount(t *testing.T) {
spec.Root.Readonly = true
spec.Mounts = nil
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
if err := run(spec, conf); err != nil {
t.Fatalf("error running sandbox: %v", err)
}
@@ -1684,7 +1678,7 @@ func TestUserLog(t *testing.T) {
// sched_rr_get_interval = 148 - not implemented in gvisor.
spec := testutil.NewSpecWithArgs(app, "syscall", "--syscall=148")
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
@@ -1724,7 +1718,7 @@ func TestUserLog(t *testing.T) {
}
func TestWaitOnExitedSandbox(t *testing.T) {
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
// Run a shell that sleeps for 1 second and then exits with a
@@ -1779,7 +1773,7 @@ func TestWaitOnExitedSandbox(t *testing.T) {
func TestDestroyNotStarted(t *testing.T) {
spec := testutil.NewSpecWithArgs("/bin/sleep", "100")
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
@@ -1806,7 +1800,7 @@ func TestDestroyNotStarted(t *testing.T) {
func TestDestroyStarting(t *testing.T) {
for i := 0; i < 10; i++ {
spec := testutil.NewSpecWithArgs("/bin/sleep", "100")
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
@@ -1851,7 +1845,7 @@ func TestDestroyStarting(t *testing.T) {
}
func TestCreateWorkingDir(t *testing.T) {
- for _, conf := range configs(overlay) {
+ for _, conf := range configs(t, overlay) {
t.Logf("Running test with conf: %+v", conf)
tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create")
@@ -1924,7 +1918,7 @@ func TestMountPropagation(t *testing.T) {
},
}
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
@@ -1975,7 +1969,7 @@ func TestMountPropagation(t *testing.T) {
}
func TestMountSymlink(t *testing.T) {
- for _, conf := range configs(overlay) {
+ for _, conf := range configs(t, overlay) {
t.Logf("Running test with conf: %+v", conf)
dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink")
@@ -2055,7 +2049,7 @@ func TestNetRaw(t *testing.T) {
}
for _, enableRaw := range []bool{true, false} {
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.EnableRaw = enableRaw
test := "--enabled"
@@ -2072,7 +2066,7 @@ func TestNetRaw(t *testing.T) {
// TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works.
func TestOverlayfsStaleRead(t *testing.T) {
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.OverlayfsStaleRead = true
in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in")
@@ -2091,7 +2085,7 @@ func TestOverlayfsStaleRead(t *testing.T) {
defer out.Close()
const want = "foobar"
- cmd := fmt.Sprintf("cat %q && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name())
+ cmd := fmt.Sprintf("cat %q >&2 && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name())
spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd)
if err := run(spec, conf); err != nil {
t.Fatalf("Error running container: %v", err)
@@ -2136,7 +2130,7 @@ func TestTTYField(t *testing.T) {
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
// We will run /bin/sleep, possibly with an open TTY.
cmd := []string{"/bin/sleep", "10000"}
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 2da93ec5b..dc2fb42ce 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -135,7 +135,7 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) {
// TestMultiContainerSanity checks that it is possible to run 2 dead-simple
// containers in the same sandbox.
func TestMultiContainerSanity(t *testing.T) {
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
rootDir, err := testutil.SetupRootDir()
@@ -173,7 +173,7 @@ func TestMultiContainerSanity(t *testing.T) {
// TestMultiPIDNS checks that it is possible to run 2 dead-simple
// containers in the same sandbox with different pidns.
func TestMultiPIDNS(t *testing.T) {
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
rootDir, err := testutil.SetupRootDir()
@@ -218,7 +218,7 @@ func TestMultiPIDNS(t *testing.T) {
// TestMultiPIDNSPath checks the pidns path.
func TestMultiPIDNSPath(t *testing.T) {
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
rootDir, err := testutil.SetupRootDir()
@@ -289,7 +289,7 @@ func TestMultiContainerWait(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// The first container should run the entire duration of the test.
@@ -367,7 +367,7 @@ func TestExecWait(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// The first container should run the entire duration of the test.
@@ -463,7 +463,7 @@ func TestMultiContainerMount(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
containers, cleanup, err := startContainers(conf, sps, ids)
@@ -484,7 +484,7 @@ func TestMultiContainerMount(t *testing.T) {
// TestMultiContainerSignal checks that it is possible to signal individual
// containers without killing the entire sandbox.
func TestMultiContainerSignal(t *testing.T) {
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
rootDir, err := testutil.SetupRootDir()
@@ -585,7 +585,7 @@ func TestMultiContainerDestroy(t *testing.T) {
t.Fatal("error finding test_app:", err)
}
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
rootDir, err := testutil.SetupRootDir()
@@ -653,7 +653,7 @@ func TestMultiContainerProcesses(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// Note: use curly braces to keep 'sh' process around. Otherwise, shell
@@ -712,7 +712,7 @@ func TestMultiContainerKillAll(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
for _, tc := range []struct {
@@ -804,7 +804,7 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) {
[]string{"/bin/sleep", "100"},
[]string{"/bin/sleep", "100"})
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
@@ -858,7 +858,7 @@ func TestMultiContainerDestroyStarting(t *testing.T) {
}
specs, ids := createSpecs(cmds...)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
@@ -943,7 +943,7 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// Make sure overlay is enabled, and none of the root filesystems are
@@ -1006,7 +1006,7 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) {
childrenSpecs := allSpecs[1:]
childrenIDs := allIDs[1:]
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
rootDir, bundleDir, err := testutil.SetupContainer(rootSpec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
@@ -1080,7 +1080,7 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) {
// Test that pod shared mounts are properly mounted in 2 containers and that
// changes from one container is reflected in the other.
func TestMultiContainerSharedMount(t *testing.T) {
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
rootDir, err := testutil.SetupRootDir()
@@ -1195,7 +1195,7 @@ func TestMultiContainerSharedMount(t *testing.T) {
// Test that pod mounts are mounted as readonly when requested.
func TestMultiContainerSharedMountReadonly(t *testing.T) {
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
rootDir, err := testutil.SetupRootDir()
@@ -1262,7 +1262,7 @@ func TestMultiContainerSharedMountReadonly(t *testing.T) {
// Test that shared pod mounts continue to work after container is restarted.
func TestMultiContainerSharedMountRestart(t *testing.T) {
- for _, conf := range configs(all...) {
+ for _, conf := range configs(t, all...) {
t.Logf("Running test with conf: %+v", conf)
rootDir, err := testutil.SetupRootDir()
@@ -1381,7 +1381,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// Setup the containers.
@@ -1463,7 +1463,7 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// Create the specs.
@@ -1500,7 +1500,7 @@ func TestMultiContainerGoferKilled(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
sleep := []string{"sleep", "100"}
@@ -1587,7 +1587,7 @@ func TestMultiContainerLoadSandbox(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// Create containers for the sandbox.
@@ -1687,7 +1687,7 @@ func TestMultiContainerRunNonRoot(t *testing.T) {
}
defer os.RemoveAll(rootDir)
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
pod, cleanup, err := startContainers(conf, podSpecs, ids)
diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go
index dc4194134..f80852414 100644
--- a/runsc/container/shared_volume_test.go
+++ b/runsc/container/shared_volume_test.go
@@ -31,7 +31,7 @@ import (
// TestSharedVolume checks that modifications to a volume mount are propagated
// into and out of the sandbox.
func TestSharedVolume(t *testing.T) {
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.FileAccess = boot.FileAccessShared
t.Logf("Running test with conf: %+v", conf)
@@ -190,7 +190,7 @@ func checkFile(c *Container, filename string, want []byte) error {
// TestSharedVolumeFile tests that changes to file content outside the sandbox
// is reflected inside.
func TestSharedVolumeFile(t *testing.T) {
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.FileAccess = boot.FileAccessShared
t.Logf("Running test with conf: %+v", conf)
diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go
index 01c47c79f..5f1c4b7d6 100644
--- a/runsc/container/test_app/test_app.go
+++ b/runsc/container/test_app/test_app.go
@@ -96,7 +96,7 @@ func (c *uds) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{})
listener, err := net.Listen("unix", c.socketPath)
if err != nil {
- log.Fatal("error listening on socket %q:", c.socketPath, err)
+ log.Fatalf("error listening on socket %q: %v", c.socketPath, err)
}
go server(listener, outputFile)
diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go
index 1ff5e8cc3..f009486bc 100644
--- a/runsc/dockerutil/dockerutil.go
+++ b/runsc/dockerutil/dockerutil.go
@@ -36,8 +36,18 @@ import (
)
var (
+ // runtime is the runtime to use for tests. This will be applied to all
+ // containers. Note that the default here ("runsc") corresponds to the
+ // default used by the installations. This is important, because the
+ // default installer for vm_tests (in tools/installers:head, invoked
+ // via tools/vm:defs.bzl) will install with this name. So without
+ // changing anything, tests should have a runsc runtime available to
+ // them. Otherwise installers should update the existing runtime
+ // instead of installing a new one.
runtime = flag.String("runtime", "runsc", "specify which runtime to use")
- config = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths")
+
+ // config is the default Docker daemon configuration path.
+ config = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths")
)
// EnsureSupportedDockerVersion checks if correct docker is installed.
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index cadd83273..1942f50d7 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -767,22 +767,18 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
return err
}
-// TODO(b/127675828): support getxattr.
func (*localFile) GetXattr(string, uint64) (string, error) {
return "", syscall.EOPNOTSUPP
}
-// TODO(b/127675828): support setxattr.
func (*localFile) SetXattr(string, string, uint32) error {
return syscall.EOPNOTSUPP
}
-// TODO(b/148303075): support listxattr.
func (*localFile) ListXattr(uint64) (map[string]struct{}, error) {
return nil, syscall.EOPNOTSUPP
}
-// TODO(b/148303075): support removexattr.
func (*localFile) RemoveXattr(string) error {
return syscall.EOPNOTSUPP
}
diff --git a/runsc/main.go b/runsc/main.go
index 762b0f801..2baba90f8 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -54,9 +54,11 @@ var (
// Debugging flags.
debugLog = flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.")
+ panicLog = flag.String("panic-log", "", "file path were panic reports and other Go's runtime messages are written.")
logPackets = flag.Bool("log-packets", false, "enable network packet logging.")
logFD = flag.Int("log-fd", -1, "file descriptor to log to. If set, the 'log' flag is ignored.")
debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored.")
+ panicLogFD = flag.Int("panic-log-fd", -1, "file descriptor to write Go's runtime messages.")
debugLogFormat = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s.")
alsoLogToStderr = flag.Bool("alsologtostderr", false, "send log messages to stderr.")
@@ -82,6 +84,7 @@ var (
rootless = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
referenceLeakMode = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), log-names, log-traces.")
cpuNumFromQuota = flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)")
+ vfs2Enabled = flag.Bool("vfs2", false, "TEST ONLY; use while VFSv2 is landing. This uses the new experimental VFS layer.")
// Test flags, not to be used outside tests, ever.
testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
@@ -116,8 +119,8 @@ func main() {
subcommands.Register(new(cmd.Resume), "")
subcommands.Register(new(cmd.Run), "")
subcommands.Register(new(cmd.Spec), "")
- subcommands.Register(new(cmd.Start), "")
subcommands.Register(new(cmd.State), "")
+ subcommands.Register(new(cmd.Start), "")
subcommands.Register(new(cmd.Wait), "")
// Register internal commands with the internal group name. This causes
@@ -127,6 +130,7 @@ func main() {
subcommands.Register(new(cmd.Boot), internalGroup)
subcommands.Register(new(cmd.Debug), internalGroup)
subcommands.Register(new(cmd.Gofer), internalGroup)
+ subcommands.Register(new(cmd.Statefile), internalGroup)
// All subcommands must be registered before flag parsing.
flag.Parse()
@@ -205,6 +209,7 @@ func main() {
LogFilename: *logFilename,
LogFormat: *logFormat,
DebugLog: *debugLog,
+ PanicLog: *panicLog,
DebugLogFormat: *debugLogFormat,
FileAccess: fsAccess,
FSGoferHostUDS: *fsGoferHostUDS,
@@ -226,6 +231,7 @@ func main() {
ReferenceLeakMode: refsLeakMode,
OverlayfsStaleRead: *overlayfsStaleRead,
CPUNumFromQuota: *cpuNumFromQuota,
+ VFS2: *vfs2Enabled,
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
TestOnlyTestNameEnv: *testOnlyTestNameEnv,
@@ -257,20 +263,6 @@ func main() {
if *debugLogFD > -1 {
f := os.NewFile(uintptr(*debugLogFD), "debug log file")
- // Quick sanity check to make sure no other commands get passed
- // a log fd (they should use log dir instead).
- if subcommand != "boot" && subcommand != "gofer" {
- cmd.Fatalf("flag --debug-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand)
- }
-
- // If we are the boot process, then we own our stdio FDs and can do what we
- // want with them. Since Docker and Containerd both eat boot's stderr, we
- // dup our stderr to the provided log FD so that panics will appear in the
- // logs, rather than just disappear.
- if err := syscall.Dup3(int(f.Fd()), int(os.Stderr.Fd()), 0); err != nil {
- cmd.Fatalf("error dup'ing fd %d to stderr: %v", f.Fd(), err)
- }
-
e = newEmitter(*debugLogFormat, f)
} else if *debugLog != "" {
@@ -286,7 +278,25 @@ func main() {
e = newEmitter("text", ioutil.Discard)
}
- if *alsoLogToStderr {
+ if *panicLogFD > -1 || *debugLogFD > -1 {
+ fd := *panicLogFD
+ if fd < 0 {
+ fd = *debugLogFD
+ }
+ // Quick sanity check to make sure no other commands get passed
+ // a log fd (they should use log dir instead).
+ if subcommand != "boot" && subcommand != "gofer" {
+ cmd.Fatalf("flags --debug-log-fd and --panic-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand)
+ }
+
+ // If we are the boot process, then we own our stdio FDs and can do what we
+ // want with them. Since Docker and Containerd both eat boot's stderr, we
+ // dup our stderr to the provided log FD so that panics will appear in the
+ // logs, rather than just disappear.
+ if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil {
+ cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err)
+ }
+ } else if *alsoLogToStderr {
e = &log.MultiEmitter{e, newEmitter(*debugLogFormat, os.Stderr)}
}
@@ -303,6 +313,7 @@ func main() {
log.Infof("\t\tFileAccess: %v, overlay: %t", conf.FileAccess, conf.Overlay)
log.Infof("\t\tNetwork: %v, logging: %t", conf.Network, conf.LogPackets)
log.Infof("\t\tStrace: %t, max size: %d, syscalls: %s", conf.Strace, conf.StraceLogSize, conf.StraceSyscalls)
+ log.Infof("\t\tVFS2 enabled: %v", conf.VFS2)
log.Infof("***************************")
if *testOnlyAllowRunAsCurrentUserWithoutChroot {
@@ -332,11 +343,11 @@ func main() {
func newEmitter(format string, logFile io.Writer) log.Emitter {
switch format {
case "text":
- return &log.GoogleEmitter{log.Writer{Next: logFile}}
+ return log.GoogleEmitter{&log.Writer{Next: logFile}}
case "json":
- return &log.JSONEmitter{log.Writer{Next: logFile}}
+ return log.JSONEmitter{&log.Writer{Next: logFile}}
case "json-k8s":
- return &log.K8sJSONEmitter{log.Writer{Next: logFile}}
+ return log.K8sJSONEmitter{&log.Writer{Next: logFile}}
}
cmd.Fatalf("invalid log format %q, must be 'text', 'json', or 'json-k8s'", format)
panic("unreachable")
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index ff48f5646..bc093fba5 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -21,7 +21,6 @@ import (
"path/filepath"
"runtime"
"strconv"
- "strings"
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
@@ -75,30 +74,8 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi
}
func createDefaultLoopbackInterface(conn *urpc.Client) error {
- link := boot.LoopbackLink{
- Name: "lo",
- Addresses: []net.IP{
- net.IP("\x7f\x00\x00\x01"),
- net.IPv6loopback,
- },
- Routes: []boot.Route{
- {
- Destination: net.IPNet{
-
- IP: net.IPv4(0x7f, 0, 0, 0),
- Mask: net.IPv4Mask(0xff, 0, 0, 0),
- },
- },
- {
- Destination: net.IPNet{
- IP: net.IPv6loopback,
- Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)),
- },
- },
- },
- }
if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &boot.CreateLinksAndRoutesArgs{
- LoopbackLinks: []boot.LoopbackLink{link},
+ LoopbackLinks: []boot.LoopbackLink{boot.DefaultLoopbackLink},
}, nil); err != nil {
return fmt.Errorf("creating loopback link and routes: %v", err)
}
@@ -174,13 +151,13 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
return fmt.Errorf("fetching interface addresses for %q: %v", iface.Name, err)
}
- // We build our own loopback devices.
+ // We build our own loopback device.
if iface.Flags&net.FlagLoopback != 0 {
- links, err := loopbackLinks(iface, allAddrs)
+ link, err := loopbackLink(iface, allAddrs)
if err != nil {
- return fmt.Errorf("getting loopback routes and links for iface %q: %v", iface.Name, err)
+ return fmt.Errorf("getting loopback link for iface %q: %v", iface.Name, err)
}
- args.LoopbackLinks = append(args.LoopbackLinks, links...)
+ args.LoopbackLinks = append(args.LoopbackLinks, link)
continue
}
@@ -339,25 +316,25 @@ func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) (
return &socketEntry{deviceFile, gsoMaxSize}, nil
}
-// loopbackLinks collects the links for a loopback interface.
-func loopbackLinks(iface net.Interface, addrs []net.Addr) ([]boot.LoopbackLink, error) {
- var links []boot.LoopbackLink
+// loopbackLink returns the link with addresses and routes for a loopback
+// interface.
+func loopbackLink(iface net.Interface, addrs []net.Addr) (boot.LoopbackLink, error) {
+ link := boot.LoopbackLink{
+ Name: iface.Name,
+ }
for _, addr := range addrs {
ipNet, ok := addr.(*net.IPNet)
if !ok {
- return nil, fmt.Errorf("address is not IPNet: %+v", addr)
+ return boot.LoopbackLink{}, fmt.Errorf("address is not IPNet: %+v", addr)
}
dst := *ipNet
dst.IP = dst.IP.Mask(dst.Mask)
- links = append(links, boot.LoopbackLink{
- Name: iface.Name,
- Addresses: []net.IP{ipNet.IP},
- Routes: []boot.Route{{
- Destination: dst,
- }},
+ link.Addresses = append(link.Addresses, ipNet.IP)
+ link.Routes = append(link.Routes, boot.Route{
+ Destination: dst,
})
}
- return links, nil
+ return link, nil
}
// routesForIface iterates over all routes for the given interface and converts
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index ec72bdbfd..e82bcef6f 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -18,10 +18,12 @@ package sandbox
import (
"context"
"fmt"
+ "io"
"math"
"os"
"os/exec"
"strconv"
+ "strings"
"syscall"
"time"
@@ -142,7 +144,19 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) {
// Wait until the sandbox has booted.
b := make([]byte, 1)
if l, err := clientSyncFile.Read(b); err != nil || l != 1 {
- return nil, fmt.Errorf("waiting for sandbox to start: %v", err)
+ err := fmt.Errorf("waiting for sandbox to start: %v", err)
+ // If the sandbox failed to start, it may be because the binary
+ // permissions were incorrect. Check the bits and return a more helpful
+ // error message.
+ //
+ // NOTE: The error message is checked because error types are lost over
+ // rpc calls.
+ if strings.Contains(err.Error(), io.EOF.Error()) {
+ if permsErr := checkBinaryPermissions(conf); permsErr != nil {
+ return nil, fmt.Errorf("%v: %v", err, permsErr)
+ }
+ }
+ return nil, err
}
c.Release()
@@ -369,8 +383,24 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
cmd.Args = append(cmd.Args, "--debug-log-fd="+strconv.Itoa(nextFD))
nextFD++
}
+ if conf.PanicLog != "" {
+ test := ""
+ if len(conf.TestOnlyTestNameEnv) != 0 {
+ // Fetch test name if one is provided and the test only flag was set.
+ if t, ok := specutils.EnvVar(args.Spec.Process.Env, conf.TestOnlyTestNameEnv); ok {
+ test = t
+ }
+ }
- cmd.Args = append(cmd.Args, "--panic-signal="+strconv.Itoa(int(syscall.SIGTERM)))
+ panicLogFile, err := specutils.DebugLogFile(conf.PanicLog, "panic", test)
+ if err != nil {
+ return fmt.Errorf("opening debug log file in %q: %v", conf.PanicLog, err)
+ }
+ defer panicLogFile.Close()
+ cmd.ExtraFiles = append(cmd.ExtraFiles, panicLogFile)
+ cmd.Args = append(cmd.Args, "--panic-log-fd="+strconv.Itoa(nextFD))
+ nextFD++
+ }
// Add the "boot" command to the args.
//
@@ -426,6 +456,12 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
nextFD++
}
+ // TODO(b/151157106): syscall tests fail by timeout if asyncpreemptoff
+ // isn't set.
+ if conf.Platform == "kvm" {
+ cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1")
+ }
+
// The current process' stdio must be passed to the application via the
// --stdio-fds flag. The stdio of the sandbox process itself must not
// be connected to the same FDs, otherwise we risk leaking sandbox
@@ -564,45 +600,32 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
cmd.Args = append(cmd.Args, "--setup-root")
+ const nobody = 65534
if conf.Rootless {
- log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
+ log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
- ContainerID: 0,
+ ContainerID: nobody,
HostID: os.Getuid(),
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
- ContainerID: 0,
+ ContainerID: nobody,
HostID: os.Getgid(),
Size: 1,
},
}
- cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
} else {
// Map nobody in the new namespace to nobody in the parent namespace.
//
// A sandbox process will construct an empty
- // root for itself, so it has to have the CAP_SYS_ADMIN
- // capability.
- //
- // FIXME(b/122554829): The current implementations of
- // os/exec doesn't allow to set ambient capabilities if
- // a process is started in a new user namespace. As a
- // workaround, we start the sandbox process with the 0
- // UID and then it constructs a chroot and sets UID to
- // nobody. https://github.com/golang/go/issues/2315
- const nobody = 65534
+ // root for itself, so it has to have
+ // CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities.
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
- ContainerID: 0,
- HostID: nobody - 1,
- Size: 1,
- },
- {
ContainerID: nobody,
HostID: nobody,
Size: 1,
@@ -615,11 +638,11 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
Size: 1,
},
}
-
- // Set credentials to run as user and group nobody.
- cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody}
}
+ // Set credentials to run as user and group nobody.
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody}
+ cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT))
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}
@@ -677,6 +700,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
nextFD++
}
+ if args.Attached {
+ // Kill sandbox if parent process exits in attached mode.
+ cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
+ // Tells boot that any process it creates must have pdeathsig set.
+ cmd.Args = append(cmd.Args, "--attached")
+ }
+
// Add container as the last argument.
cmd.Args = append(cmd.Args, s.ID)
@@ -685,15 +715,22 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
log.Debugf("Donating FD %d: %q", i+3, f.Name())
}
- if args.Attached {
- // Kill sandbox if parent process exits in attached mode.
- cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
- }
-
log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args)
log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr)
if err := specutils.StartInNS(cmd, nss); err != nil {
- return fmt.Errorf("Sandbox: %v", err)
+ err := fmt.Errorf("starting sandbox: %v", err)
+ // If the sandbox failed to start, it may be because the binary
+ // permissions were incorrect. Check the bits and return a more helpful
+ // error message.
+ //
+ // NOTE: The error message is checked because error types are lost over
+ // rpc calls.
+ if strings.Contains(err.Error(), syscall.EACCES.Error()) {
+ if permsErr := checkBinaryPermissions(conf); permsErr != nil {
+ return fmt.Errorf("%v: %v", err, permsErr)
+ }
+ }
+ return err
}
s.child = true
s.Pid = cmd.Process.Pid
@@ -972,6 +1009,66 @@ func (s *Sandbox) StopCPUProfile() error {
return nil
}
+// GoroutineProfile writes a goroutine profile to the given file.
+func (s *Sandbox) GoroutineProfile(f *os.File) error {
+ log.Debugf("Goroutine profile %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ opts := control.ProfileOpts{
+ FilePayload: urpc.FilePayload{
+ Files: []*os.File{f},
+ },
+ }
+ if err := conn.Call(boot.GoroutineProfile, &opts, nil); err != nil {
+ return fmt.Errorf("getting sandbox %q goroutine profile: %v", s.ID, err)
+ }
+ return nil
+}
+
+// BlockProfile writes a block profile to the given file.
+func (s *Sandbox) BlockProfile(f *os.File) error {
+ log.Debugf("Block profile %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ opts := control.ProfileOpts{
+ FilePayload: urpc.FilePayload{
+ Files: []*os.File{f},
+ },
+ }
+ if err := conn.Call(boot.BlockProfile, &opts, nil); err != nil {
+ return fmt.Errorf("getting sandbox %q block profile: %v", s.ID, err)
+ }
+ return nil
+}
+
+// MutexProfile writes a mutex profile to the given file.
+func (s *Sandbox) MutexProfile(f *os.File) error {
+ log.Debugf("Mutex profile %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ opts := control.ProfileOpts{
+ FilePayload: urpc.FilePayload{
+ Files: []*os.File{f},
+ },
+ }
+ if err := conn.Call(boot.MutexProfile, &opts, nil); err != nil {
+ return fmt.Errorf("getting sandbox %q mutex profile: %v", s.ID, err)
+ }
+ return nil
+}
+
// StartTrace start trace writing to the given file.
func (s *Sandbox) StartTrace(f *os.File) error {
log.Debugf("Trace start %q", s.ID)
@@ -1096,3 +1193,31 @@ func deviceFileForPlatform(name string) (*os.File, error) {
}
return f, nil
}
+
+// checkBinaryPermissions verifies that the required binary bits are set on
+// the runsc executable.
+func checkBinaryPermissions(conf *boot.Config) error {
+ // All platforms need the other exe bit
+ neededBits := os.FileMode(0001)
+ if conf.Platform == platforms.Ptrace {
+ // Ptrace needs the other read bit
+ neededBits |= os.FileMode(0004)
+ }
+
+ exePath, err := os.Executable()
+ if err != nil {
+ return fmt.Errorf("getting exe path: %v", err)
+ }
+
+ // Check the permissions of the runsc binary and print an error if it
+ // doesn't match expectations.
+ info, err := os.Stat(exePath)
+ if err != nil {
+ return fmt.Errorf("stat file: %v", err)
+ }
+
+ if info.Mode().Perm()&neededBits != neededBits {
+ return fmt.Errorf(specutils.FaqErrorMsg("runsc-perms", fmt.Sprintf("%s does not have the correct permissions", exePath)))
+ }
+ return nil
+}
diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go
index c7dd3051c..60bb7b7ee 100644
--- a/runsc/specutils/namespace.go
+++ b/runsc/specutils/namespace.go
@@ -252,6 +252,9 @@ func MaybeRunAsRoot() error {
},
Credential: &syscall.Credential{Uid: 0, Gid: 0},
GidMappingsEnableSetgroups: false,
+
+ // Make sure child is killed when the parent terminates.
+ Pdeathsig: syscall.SIGKILL,
}
cmd.Env = os.Environ()
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index d3c2e4e78..837d5e238 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -92,6 +92,12 @@ func ValidateSpec(spec *specs.Spec) error {
log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile)
}
+ // PR_SET_NO_NEW_PRIVS is assumed to always be set.
+ // See kernel.Task.updateCredsForExecLocked.
+ if !spec.Process.NoNewPrivileges {
+ log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.")
+ }
+
// TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox.
if spec.Linux != nil && spec.Linux.Seccomp != nil {
log.Warningf("Seccomp spec is being ignored")
@@ -528,3 +534,8 @@ func EnvVar(env []string, name string) (string, bool) {
}
return "", false
}
+
+// FaqErrorMsg returns an error message pointing to the FAQ.
+func FaqErrorMsg(anchor, msg string) string {
+ return fmt.Sprintf("%s; see https://gvisor.dev/faq#%s for more details", msg, anchor)
+}
diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go
index 92d677e71..5e09f8f16 100644
--- a/runsc/testutil/testutil.go
+++ b/runsc/testutil/testutil.go
@@ -31,11 +31,13 @@ import (
"os"
"os/exec"
"os/signal"
+ "path"
"path/filepath"
"strconv"
"strings"
"sync/atomic"
"syscall"
+ "testing"
"time"
"github.com/cenkalti/backoff"
@@ -81,24 +83,24 @@ func ConfigureExePath() error {
// TestConfig returns the default configuration to use in tests. Note that
// 'RootDir' must be set by caller if required.
-func TestConfig() *boot.Config {
+func TestConfig(t *testing.T) *boot.Config {
logDir := ""
if dir, ok := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR"); ok {
logDir = dir + "/"
}
return &boot.Config{
- Debug: true,
- DebugLog: logDir,
- LogFormat: "text",
- DebugLogFormat: "text",
- AlsoLogToStderr: true,
- LogPackets: true,
- Network: boot.NetworkNone,
- Strace: true,
- Platform: "ptrace",
- FileAccess: boot.FileAccessExclusive,
+ Debug: true,
+ DebugLog: path.Join(logDir, "runsc.log."+t.Name()+".%TIMESTAMP%.%COMMAND%"),
+ LogFormat: "text",
+ DebugLogFormat: "text",
+ LogPackets: true,
+ Network: boot.NetworkNone,
+ Strace: true,
+ Platform: "ptrace",
+ FileAccess: boot.FileAccessExclusive,
+ NumNetworkChannels: 1,
+
TestOnlyAllowRunAsCurrentUserWithoutChroot: true,
- NumNetworkChannels: 1,
}
}