diff options
Diffstat (limited to 'runsc')
57 files changed, 3579 insertions, 4794 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index ae4dd102a..a907c103b 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -12,18 +12,15 @@ go_library( "controller.go", "debug.go", "events.go", - "fds.go", "fs.go", "limits.go", "loader.go", - "loader_amd64.go", - "loader_arm64.go", "network.go", - "pprof.go", "strace.go", - "user.go", + "vfs.go", ], visibility = [ + "//pkg/test:__subpackages__", "//runsc:__subpackages__", "//test:__subpackages__", ], @@ -34,6 +31,7 @@ go_library( "//pkg/control/server", "//pkg/cpuid", "//pkg/eventchannel", + "//pkg/fspath", "//pkg/log", "//pkg/memutil", "//pkg/rand", @@ -41,6 +39,8 @@ go_library( "//pkg/sentry/arch", "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/control", + "//pkg/sentry/devices/memdev", + "//pkg/sentry/fdimport", "//pkg/sentry/fs", "//pkg/sentry/fs/dev", "//pkg/sentry/fs/gofer", @@ -50,6 +50,14 @@ go_library( "//pkg/sentry/fs/sys", "//pkg/sentry/fs/tmpfs", "//pkg/sentry/fs/tty", + "//pkg/sentry/fs/user", + "//pkg/sentry/fsimpl/devpts", + "//pkg/sentry/fsimpl/devtmpfs", + "//pkg/sentry/fsimpl/gofer", + "//pkg/sentry/fsimpl/host", + "//pkg/sentry/fsimpl/proc", + "//pkg/sentry/fsimpl/sys", + "//pkg/sentry/fsimpl/tmpfs", "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel:uncaught_signal_go_proto", @@ -67,17 +75,18 @@ go_library( "//pkg/sentry/socket/unix", "//pkg/sentry/state", "//pkg/sentry/strace", - "//pkg/sentry/syscalls/linux", "//pkg/sentry/syscalls/linux/vfs2", "//pkg/sentry/time", "//pkg/sentry/unimpl:unimplemented_syscall_go_proto", "//pkg/sentry/usage", + "//pkg/sentry/vfs", "//pkg/sentry/watchdog", "//pkg/sync", "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/link/fdbased", "//pkg/tcpip/link/loopback", + "//pkg/tcpip/link/qdisc/fifo", "//pkg/tcpip/link/sniffer", "//pkg/tcpip/network/arp", "//pkg/tcpip/network/ipv4", @@ -88,9 +97,9 @@ go_library( "//pkg/tcpip/transport/tcp", "//pkg/tcpip/transport/udp", "//pkg/urpc", - "//pkg/usermem", "//runsc/boot/filter", "//runsc/boot/platforms", + "//runsc/boot/pprof", "//runsc/specutils", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", @@ -105,19 +114,20 @@ go_test( "compat_test.go", "fs_test.go", "loader_test.go", - "user_test.go", ], library = ":boot", deps = [ "//pkg/control/server", + "//pkg/fspath", "//pkg/log", "//pkg/p9", "//pkg/sentry/contexttest", "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", + "//pkg/sentry/vfs", "//pkg/sync", "//pkg/unet", "//runsc/fsgofer", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index 8995d678e..84c67cbc2 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -65,7 +65,7 @@ func newCompatEmitter(logFD int) (*compatEmitter, error) { if logFD > 0 { f := os.NewFile(uintptr(logFD), "user log file") - target := &log.MultiEmitter{c.sink, &log.K8sJSONEmitter{log.Writer{Next: f}}} + target := &log.MultiEmitter{c.sink, log.K8sJSONEmitter{&log.Writer{Next: f}}} c.sink = &log.BasicLogger{Level: log.Info, Emitter: target} } return c, nil @@ -119,7 +119,13 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) { } if tr.shouldReport(regs) { - c.sink.Infof("Unsupported syscall: %s, regs: %+v", c.nameMap.Name(uintptr(sysnr)), regs) + name := c.nameMap.Name(uintptr(sysnr)) + c.sink.Infof("Unsupported syscall %s(%#x,%#x,%#x,%#x,%#x,%#x). It is "+ + "likely that you can safely ignore this message and that this is not "+ + "the cause of any error. Please, refer to %s/%s for more information.", + name, argVal(0, regs), argVal(1, regs), argVal(2, regs), argVal(3, regs), + argVal(4, regs), argVal(5, regs), syscallLink, name) + tr.onReported(regs) } } diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go index 42b0ca8b0..8eb76b2ba 100644 --- a/runsc/boot/compat_amd64.go +++ b/runsc/boot/compat_amd64.go @@ -24,8 +24,12 @@ import ( "gvisor.dev/gvisor/pkg/sentry/strace" ) -// reportLimit is the max number of events that should be reported per tracker. -const reportLimit = 100 +const ( + // reportLimit is the max number of events that should be reported per + // tracker. + reportLimit = 100 + syscallLink = "https://gvisor.dev/c/linux/amd64" +) // newRegs create a empty Registers instance. func newRegs() *rpb.Registers { @@ -36,22 +40,22 @@ func newRegs() *rpb.Registers { } } -func argVal(argIdx int, regs *rpb.Registers) uint32 { +func argVal(argIdx int, regs *rpb.Registers) uint64 { amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64 switch argIdx { case 0: - return uint32(amd64Regs.Rdi) + return amd64Regs.Rdi case 1: - return uint32(amd64Regs.Rsi) + return amd64Regs.Rsi case 2: - return uint32(amd64Regs.Rdx) + return amd64Regs.Rdx case 3: - return uint32(amd64Regs.R10) + return amd64Regs.R10 case 4: - return uint32(amd64Regs.R8) + return amd64Regs.R8 case 5: - return uint32(amd64Regs.R9) + return amd64Regs.R9 } panic(fmt.Sprintf("invalid syscall argument index %d", argIdx)) } diff --git a/runsc/boot/compat_arm64.go b/runsc/boot/compat_arm64.go index f784cd237..bce9d95b3 100644 --- a/runsc/boot/compat_arm64.go +++ b/runsc/boot/compat_arm64.go @@ -23,8 +23,12 @@ import ( "gvisor.dev/gvisor/pkg/sentry/strace" ) -// reportLimit is the max number of events that should be reported per tracker. -const reportLimit = 100 +const ( + // reportLimit is the max number of events that should be reported per + // tracker. + reportLimit = 100 + syscallLink = "https://gvisor.dev/c/linux/arm64" +) // newRegs create a empty Registers instance. func newRegs() *rpb.Registers { @@ -35,22 +39,22 @@ func newRegs() *rpb.Registers { } } -func argVal(argIdx int, regs *rpb.Registers) uint32 { +func argVal(argIdx int, regs *rpb.Registers) uint64 { arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64 switch argIdx { case 0: - return uint32(arm64Regs.R0) + return arm64Regs.R0 case 1: - return uint32(arm64Regs.R1) + return arm64Regs.R1 case 2: - return uint32(arm64Regs.R2) + return arm64Regs.R2 case 3: - return uint32(arm64Regs.R3) + return arm64Regs.R3 case 4: - return uint32(arm64Regs.R4) + return arm64Regs.R4 case 5: - return uint32(arm64Regs.R5) + return arm64Regs.R5 } panic(fmt.Sprintf("invalid syscall argument index %d", argIdx)) } diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 35391030f..bcec7e4db 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -158,6 +158,9 @@ type Config struct { // DebugLog is the path to log debug information to, if not empty. DebugLog string + // PanicLog is the path to log GO's runtime messages, if not empty. + PanicLog string + // DebugLogFormat is the log format for debug. DebugLogFormat string @@ -184,6 +187,10 @@ type Config struct { // SoftwareGSO indicates that software segmentation offload is enabled. SoftwareGSO bool + // QDisc indicates the type of queuening discipline to use by default + // for non-loopback interfaces. + QDisc QueueingDiscipline + // LogPackets indicates that all network packets should be logged. LogPackets bool @@ -234,8 +241,10 @@ type Config struct { // ReferenceLeakMode sets reference leak check mode ReferenceLeakMode refs.LeakMode - // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for - // write to workaround overlayfs limitation on kernels before 4.19. + // OverlayfsStaleRead instructs the sandbox to assume that the root mount + // is on a Linux overlayfs mount, which does not necessarily preserve + // coherence between read-only and subsequent writable file descriptors + // representing the "same" file. OverlayfsStaleRead bool // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in @@ -269,6 +278,7 @@ func (c *Config) ToFlags() []string { "--log=" + c.LogFilename, "--log-format=" + c.LogFormat, "--debug-log=" + c.DebugLog, + "--panic-log=" + c.PanicLog, "--debug-log-format=" + c.DebugLogFormat, "--file-access=" + c.FileAccess.String(), "--overlay=" + strconv.FormatBool(c.Overlay), @@ -290,6 +300,7 @@ func (c *Config) ToFlags() []string { "--gso=" + strconv.FormatBool(c.HardwareGSO), "--software-gso=" + strconv.FormatBool(c.SoftwareGSO), "--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead), + "--qdisc=" + c.QDisc.String(), } if c.CPUNumFromQuota { f = append(f, "--cpu-num-from-quota") @@ -301,5 +312,10 @@ func (c *Config) ToFlags() []string { if len(c.TestOnlyTestNameEnv) != 0 { f = append(f, "--TESTONLY-test-name-env="+c.TestOnlyTestNameEnv) } + + if c.VFS2 { + f = append(f, "--vfs2=true") + } + return f } diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 9c9e94864..8125d5061 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -32,6 +32,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/boot/pprof" "gvisor.dev/gvisor/runsc/specutils" ) @@ -100,11 +101,14 @@ const ( // Profiling related commands (see pprof.go for more details). const ( - StartCPUProfile = "Profile.StartCPUProfile" - StopCPUProfile = "Profile.StopCPUProfile" - HeapProfile = "Profile.HeapProfile" - StartTrace = "Profile.StartTrace" - StopTrace = "Profile.StopTrace" + StartCPUProfile = "Profile.StartCPUProfile" + StopCPUProfile = "Profile.StopCPUProfile" + HeapProfile = "Profile.HeapProfile" + GoroutineProfile = "Profile.GoroutineProfile" + BlockProfile = "Profile.BlockProfile" + MutexProfile = "Profile.MutexProfile" + StartTrace = "Profile.StartTrace" + StopTrace = "Profile.StopTrace" ) // Logging related commands (see logging.go for more details). @@ -142,7 +146,7 @@ func newController(fd int, l *Loader) (*controller, error) { } srv.Register(manager) - if eps, ok := l.k.NetworkStack().(*netstack.Stack); ok { + if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok { net := &Network{ Stack: eps.Stack, } @@ -341,7 +345,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { return fmt.Errorf("creating memory file: %v", err) } k.SetMemoryFile(mf) - networkStack := cm.l.k.NetworkStack() + networkStack := cm.l.k.RootNetworkNamespace().Stack() cm.l.k = k // Set up the restore environment. @@ -365,9 +369,9 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { } if cm.l.conf.ProfileEnable { - // initializePProf opens /proc/self/maps, so has to be - // called before installing seccomp filters. - initializePProf() + // pprof.Initialize opens /proc/self/maps, so has to be called before + // installing seccomp filters. + pprof.Initialize() } // Seccomp filters have to be applied before parsing the state file. diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go deleted file mode 100644 index 417d2d5fb..000000000 --- a/runsc/boot/fds.go +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package boot - -import ( - "fmt" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/host" - "gvisor.dev/gvisor/pkg/sentry/kernel" -) - -// createFDTable creates an FD table that contains stdin, stdout, and stderr. -// If console is true, then ioctl calls will be passed through to the host FD. -// Upon success, createFDMap dups then closes stdioFDs. -func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) { - if len(stdioFDs) != 3 { - return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) - } - - k := kernel.KernelFromContext(ctx) - fdTable := k.NewFDTable() - defer fdTable.DecRef() - mounter := fs.FileOwnerFromContext(ctx) - - var ttyFile *fs.File - for appFD, hostFD := range stdioFDs { - var appFile *fs.File - - if console && appFD < 3 { - // Import the file as a host TTY file. - if ttyFile == nil { - var err error - appFile, err = host.ImportFile(ctx, hostFD, mounter, true /* isTTY */) - if err != nil { - return nil, err - } - defer appFile.DecRef() - - // Remember this in the TTY file, as we will - // use it for the other stdio FDs. - ttyFile = appFile - } else { - // Re-use the existing TTY file, as all three - // stdio FDs must point to the same fs.File in - // order to share TTY state, specifically the - // foreground process group id. - appFile = ttyFile - } - } else { - // Import the file as a regular host file. - var err error - appFile, err = host.ImportFile(ctx, hostFD, mounter, false /* isTTY */) - if err != nil { - return nil, err - } - defer appFile.DecRef() - } - - // Add the file to the FD map. - if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { - return nil, err - } - } - - fdTable.IncRef() - return fdTable, nil -} diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index c69f4c602..60e33425f 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -44,7 +44,7 @@ var allowedSyscalls = seccomp.SyscallRules{ { seccomp.AllowAny{}, seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.AllowValue(syscall.O_CLOEXEC), }, }, syscall.SYS_EPOLL_CREATE1: {}, @@ -229,7 +229,11 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_NANOSLEEP: {}, syscall.SYS_PPOLL: {}, syscall.SYS_PREAD64: {}, + syscall.SYS_PREADV: {}, + unix.SYS_PREADV2: {}, syscall.SYS_PWRITE64: {}, + syscall.SYS_PWRITEV: {}, + unix.SYS_PWRITEV2: {}, syscall.SYS_READ: {}, syscall.SYS_RECVMSG: []seccomp.Rule{ { @@ -282,12 +286,29 @@ var allowedSyscalls = seccomp.SyscallRules{ {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)}, }, syscall.SYS_SIGALTSTACK: {}, + unix.SYS_STATX: {}, syscall.SYS_SYNC_FILE_RANGE: {}, + syscall.SYS_TEE: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(1), /* len */ + seccomp.AllowValue(unix.SPLICE_F_NONBLOCK), /* flags */ + }, + }, syscall.SYS_TGKILL: []seccomp.Rule{ { seccomp.AllowValue(uint64(os.Getpid())), }, }, + syscall.SYS_UTIMENSAT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(0), /* null pathname */ + seccomp.AllowAny{}, + seccomp.AllowValue(0), /* flags */ + }, + }, syscall.SYS_WRITE: {}, // The only user in rawfile.NonBlockingWrite3 always passes iovcnt with // values 2 or 3. Three iovec-s are passed, when the PACKET_VNET_HDR diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go index 5e5a3c998..209e646a7 100644 --- a/runsc/boot/filter/extra_filters_msan.go +++ b/runsc/boot/filter/extra_filters_msan.go @@ -26,6 +26,8 @@ import ( func instrumentationFilters() seccomp.SyscallRules { Report("MSAN is enabled: syscall filters less restrictive!") return seccomp.SyscallRules{ + syscall.SYS_CLONE: {}, + syscall.SYS_MMAP: {}, syscall.SYS_SCHED_GETAFFINITY: {}, syscall.SYS_SET_ROBUST_LIST: {}, } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 0f62842ea..b98a1eb50 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -37,6 +37,13 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/gofer" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/fs/user" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" + procvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" + sysvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" + tmpfsvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" @@ -44,27 +51,19 @@ import ( ) const ( - // Filesystem name for 9p gofer mounts. - rootFsName = "9p" - // Device name for root mount. rootDevice = "9pfs-/" // MountPrefix is the annotation prefix for mount hints. MountPrefix = "dev.gvisor.spec.mount." - // Filesystems that runsc supports. - bind = "bind" - devpts = "devpts" - devtmpfs = "devtmpfs" - proc = "proc" - sysfs = "sysfs" - tmpfs = "tmpfs" - nonefs = "none" + // Supported filesystems that map to different internal filesystem. + bind = "bind" + nonefs = "none" ) // tmpfs has some extra supported options that we must pass through. -var tmpfsAllowedOptions = []string{"mode", "uid", "gid"} +var tmpfsAllowedData = []string{"mode", "uid", "gid"} func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) { // Upper layer uses the same flags as lower, but it must be read-write. @@ -108,12 +107,12 @@ func compileMounts(spec *specs.Spec) []specs.Mount { // Always mount /dev. mounts = append(mounts, specs.Mount{ - Type: devtmpfs, + Type: devtmpfs.Name, Destination: "/dev", }) mounts = append(mounts, specs.Mount{ - Type: devpts, + Type: devpts.Name, Destination: "/dev/pts", }) @@ -137,13 +136,13 @@ func compileMounts(spec *specs.Spec) []specs.Mount { var mandatoryMounts []specs.Mount if !procMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: proc, + Type: procvfs2.Name, Destination: "/proc", }) } if !sysMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: sysfs, + Type: sysvfs2.Name, Destination: "/sys", }) } @@ -155,13 +154,17 @@ func compileMounts(spec *specs.Spec) []specs.Mount { return mounts } -// p9MountOptions creates a slice of options for a p9 mount. -func p9MountOptions(fd int, fa FileAccessType) []string { +// p9MountData creates a slice of p9 mount data. +func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string { opts := []string{ "trans=fd", "rfdno=" + strconv.Itoa(fd), "wfdno=" + strconv.Itoa(fd), - "privateunixsocket=true", + } + if !vfs2 { + // privateunixsocket is always enabled in VFS2. VFS1 requires explicit + // enablement. + opts = append(opts, "privateunixsocket=true") } if fa == FileAccessShared { opts = append(opts, "cache=remote_revalidating") @@ -231,8 +234,8 @@ func isSupportedMountFlag(fstype, opt string) bool { case "rw", "ro", "noatime", "noexec": return true } - if fstype == tmpfs { - ok, err := parseMountOption(opt, tmpfsAllowedOptions...) + if fstype == tmpfsvfs2.Name { + ok, err := parseMountOption(opt, tmpfsAllowedData...) return ok && err == nil } return false @@ -278,6 +281,9 @@ func subtargets(root string, mnts []specs.Mount) []string { } func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error { + if conf.VFS2 { + return setupContainerVFS2(ctx, conf, mntr, procArgs) + } mns, err := mntr.setupFS(conf, procArgs) if err != nil { return err @@ -286,17 +292,10 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, // Set namespace here so that it can be found in ctx. procArgs.MountNamespace = mns - return setExecutablePath(ctx, procArgs) -} - -// setExecutablePath sets the procArgs.Filename by searching the PATH for an -// executable matching the procArgs.Argv[0]. -func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { - paths := fs.GetPath(procArgs.Envv) - exe := procArgs.Argv[0] - f, err := procArgs.MountNamespace.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths) + // Resolve the executable path from working dir and environment. + f, err := user.ResolveExecutablePath(ctx, procArgs.Credentials, procArgs.MountNamespace, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) if err != nil { - return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err) + return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) } procArgs.Filename = f return nil @@ -438,7 +437,7 @@ func (m *mountHint) setOptions(val string) error { } func (m *mountHint) isSupported() bool { - return m.mount.Type == tmpfs && m.share == pod + return m.mount.Type == tmpfsvfs2.Name && m.share == pod } // checkCompatible verifies that shared mount is compatible with master. @@ -573,11 +572,14 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin // should be mounted (e.g. a volume shared between containers). It must be // called for the root container only. func (c *containerMounter) processHints(conf *Config) error { + if conf.VFS2 { + return nil + } ctx := c.k.SupervisorContext() for _, hint := range c.hints.mounts { // TODO(b/142076984): Only support tmpfs for now. Bind mounts require a // common gofer to mount all shared volumes. - if hint.mount.Type != tmpfs { + if hint.mount.Type != tmpfsvfs2.Name { continue } log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type) @@ -714,7 +716,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (* fd := c.fds.remove() log.Infof("Mounting root over 9P, ioFD: %d", fd) p9FS := mustFindFilesystem("9p") - opts := p9MountOptions(fd, conf.FileAccess) + opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) if conf.OverlayfsStaleRead { // We can't check for overlayfs here because sandbox is chroot'ed and gofer @@ -760,30 +762,27 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( ) switch m.Type { - case devpts, devtmpfs, proc, sysfs: + case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name: fsName = m.Type case nonefs: - fsName = sysfs - case tmpfs: + fsName = sysvfs2.Name + case tmpfsvfs2.Name: fsName = m.Type var err error - opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...) + opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...) if err != nil { return "", nil, false, err } case bind: fd := c.fds.remove() - fsName = "9p" - opts = p9MountOptions(fd, c.getMountAccessType(m)) + fsName = gofervfs2.Name + opts = p9MountData(fd, c.getMountAccessType(m), conf.VFS2) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly default: - // TODO(nlacasse): Support all the mount types and make this a fatal error. - // Most applications will "just work" without them, so this is a warning - // for now. log.Warningf("ignoring unknown filesystem type %q", m.Type) } return fsName, opts, useOverlay, nil @@ -824,7 +823,20 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(opts, ","), nil) if err != nil { - return fmt.Errorf("creating mount with source %q: %v", m.Source, err) + err := fmt.Errorf("creating mount with source %q: %v", m.Source, err) + // Check to see if this is a common error due to a Linux bug. + // This error is generated here in order to cause it to be + // printed to the user using Docker via 'runsc create' etc. rather + // than simply printed to the logs for the 'runsc boot' command. + // + // We check the error message string rather than type because the + // actual error types (syscall.EIO, syscall.EPIPE) are lost by file system + // implementation (e.g. p9). + // TODO(gvisor.dev/issue/1765): Remove message when bug is resolved. + if strings.Contains(err.Error(), syscall.EIO.Error()) || strings.Contains(err.Error(), syscall.EPIPE.Error()) { + return fmt.Errorf("%v: %s", err, specutils.FaqErrorMsg("memlock", "you may be encountering a Linux kernel bug")) + } + return err } // If there are submounts, we need to overlay the mount on top of a ramfs @@ -919,7 +931,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // Add root mount. fd := c.fds.remove() - opts := p9MountOptions(fd, conf.FileAccess) + opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) mf := fs.MountSourceFlags{} if c.root.Readonly || conf.Overlay { @@ -931,7 +943,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn Flags: mf, DataString: strings.Join(opts, ","), } - renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount) + renv.MountSources[gofervfs2.Name] = append(renv.MountSources[gofervfs2.Name], rootMount) // Add submounts. var tmpMounted bool @@ -947,7 +959,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // TODO(b/67958150): handle '/tmp' properly (see mountTmp()). if !tmpMounted { tmpMount := specs.Mount{ - Type: tmpfs, + Type: tmpfsvfs2.Name, Destination: "/tmp", } if err := c.addRestoreMount(conf, renv, tmpMount); err != nil { @@ -1003,11 +1015,11 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M // No '/tmp' found (or fallthrough from above). Safe to mount internal // tmpfs. tmpMount := specs.Mount{ - Type: tmpfs, + Type: tmpfsvfs2.Name, Destination: "/tmp", // Sticky bit is added to prevent accidental deletion of files from // another user. This is normally done for /tmp. - Options: []string{"mode=1777"}, + Options: []string{"mode=01777"}, } return c.mountSubmount(ctx, conf, mns, root, tmpMount) diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 9f0d5d7af..002479612 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -26,16 +26,19 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/memutil" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/fdimport" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/fs/user" + hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -46,9 +49,11 @@ import ( "gvisor.dev/gvisor/pkg/sentry/syscalls/linux/vfs2" "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" "gvisor.dev/gvisor/pkg/tcpip/network/arp" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" @@ -60,6 +65,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/transport/udp" "gvisor.dev/gvisor/runsc/boot/filter" _ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms. + "gvisor.dev/gvisor/runsc/boot/pprof" "gvisor.dev/gvisor/runsc/specutils" // Include supported socket providers. @@ -137,6 +143,9 @@ type execProcess struct { // tty will be nil if the process is not attached to a terminal. tty *host.TTYFileOperations + // tty will be nil if the process is not attached to a terminal. + ttyVFS2 *hostvfs2.TTYFileDescription + // pidnsPath is the pid namespace path in spec pidnsPath string } @@ -154,13 +163,17 @@ type Args struct { Spec *specs.Spec // Conf is the system configuration. Conf *Config - // ControllerFD is the FD to the URPC controller. + // ControllerFD is the FD to the URPC controller. The Loader takes ownership + // of this FD and may close it at any time. ControllerFD int - // Device is an optional argument that is passed to the platform. + // Device is an optional argument that is passed to the platform. The Loader + // takes ownership of this file and may close it at any time. Device *os.File - // GoferFDs is an array of FDs used to connect with the Gofer. + // GoferFDs is an array of FDs used to connect with the Gofer. The Loader + // takes ownership of these FDs and may close them at any time. GoferFDs []int - // StdioFDs is the stdio for the application. + // StdioFDs is the stdio for the application. The Loader takes ownership of + // these FDs and may close them at any time. StdioFDs []int // Console is set to true if using TTY. Console bool @@ -173,6 +186,9 @@ type Args struct { UserLogFD int } +// make sure stdioFDs are always the same on initial start and on restore +const startingStdioFD = 64 + // New initializes a new kernel loader configured by spec. // New also handles setting up a kernel for restoring a container. func New(args Args) (*Loader, error) { @@ -186,11 +202,10 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("setting up memory usage: %v", err) } + // Is this a VFSv2 kernel? if args.Conf.VFS2 { - st, ok := kernel.LookupSyscallTable(abi.Linux, arch.Host) - if ok { - vfs2.Override(st.Table) - } + kernel.VFS2Enabled = true + vfs2.Override() } // Create kernel and platform. @@ -230,11 +245,8 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("enabling strace: %v", err) } - // Create an empty network stack because the network namespace may be empty at - // this point. Netns is configured before Run() is called. Netstack is - // configured using a control uRPC message. Host network is configured inside - // Run(). - networkStack, err := newEmptyNetworkStack(args.Conf, k, k) + // Create root network namespace/stack. + netns, err := newRootNetworkNamespace(args.Conf, k, k) if err != nil { return nil, fmt.Errorf("creating network: %v", err) } @@ -277,7 +289,7 @@ func New(args Args) (*Loader, error) { FeatureSet: cpuid.HostFeatureSet(), Timekeeper: tk, RootUserNamespace: creds.UserNamespace, - NetworkStack: networkStack, + RootNetworkNamespace: netns, ApplicationCores: uint(args.NumCPU), Vdso: vdso, RootUTSNamespace: kernel.NewUTSNamespace(args.Spec.Hostname, args.Spec.Hostname, creds.UserNamespace), @@ -320,6 +332,38 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("creating pod mount hints: %v", err) } + if kernel.VFS2Enabled { + // Set up host mount that will be used for imported fds. + hostFilesystem, err := hostvfs2.NewFilesystem(k.VFS()) + if err != nil { + return nil, fmt.Errorf("failed to create hostfs filesystem: %v", err) + } + defer hostFilesystem.DecRef() + hostMount, err := k.VFS().NewDisconnectedMount(hostFilesystem, nil, &vfs.MountOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create hostfs mount: %v", err) + } + k.SetHostMount(hostMount) + } + + // Make host FDs stable between invocations. Host FDs must map to the exact + // same number when the sandbox is restored. Otherwise the wrong FD will be + // used. + var stdioFDs []int + newfd := startingStdioFD + for _, fd := range args.StdioFDs { + err := syscall.Dup3(fd, newfd, syscall.O_CLOEXEC) + if err != nil { + return nil, fmt.Errorf("dup3 of stdioFDs failed: %v", err) + } + stdioFDs = append(stdioFDs, newfd) + err = syscall.Close(fd) + if err != nil { + return nil, fmt.Errorf("close original stdioFDs failed: %v", err) + } + newfd++ + } + eid := execID{cid: args.ID} l := &Loader{ k: k, @@ -328,7 +372,7 @@ func New(args Args) (*Loader, error) { watchdog: dog, spec: args.Spec, goferFDs: args.GoferFDs, - stdioFDs: args.StdioFDs, + stdioFDs: stdioFDs, rootProcArgs: procArgs, sandboxID: args.ID, processes: map[execID]*execProcess{eid: {}}, @@ -368,11 +412,16 @@ func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel. return kernel.CreateProcessArgs{}, fmt.Errorf("creating limits: %v", err) } + wd := spec.Process.Cwd + if wd == "" { + wd = "/" + } + // Create the process arguments. procArgs := kernel.CreateProcessArgs{ Argv: spec.Process.Args, Envv: spec.Process.Env, - WorkingDirectory: spec.Process.Cwd, // Defaults to '/' if empty. + WorkingDirectory: wd, Credentials: creds, Umask: 0022, Limits: ls, @@ -466,7 +515,7 @@ func (l *Loader) run() error { // Delay host network configuration to this point because network namespace // is configured after the loader is created and before Run() is called. log.Debugf("Configuring host network") - stack := l.k.NetworkStack().(*hostinet.Stack) + stack := l.k.RootNetworkNamespace().Stack().(*hostinet.Stack) if err := stack.Configure(); err != nil { return err } @@ -483,9 +532,11 @@ func (l *Loader) run() error { // If we are restoring, we do not want to create a process. // l.restore is set by the container manager when a restore call is made. + var ttyFile *host.TTYFileOperations + var ttyFileVFS2 *hostvfs2.TTYFileDescription if !l.restore { if l.conf.ProfileEnable { - initializePProf() + pprof.Initialize() } // Finally done with all configuration. Setup filters before user code @@ -497,13 +548,14 @@ func (l *Loader) run() error { // Create the FD map, which will set stdin, stdout, and stderr. If console // is true, then ioctl calls will be passed through to the host fd. ctx := l.rootProcArgs.NewContext(l.k) - fdTable, err := createFDTable(ctx, l.console, l.stdioFDs) + var err error + + // CreateProcess takes a reference on FDMap if successful. We won't need + // ours either way. + l.rootProcArgs.FDTable, ttyFile, ttyFileVFS2, err = createFDTable(ctx, l.console, l.stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } - // CreateProcess takes a reference on FDMap if successful. We won't need - // ours either way. - l.rootProcArgs.FDTable = fdTable // Setup the root container file system. l.startGoferMonitor(l.sandboxID, l.goferFDs) @@ -517,7 +569,15 @@ func (l *Loader) run() error { } // Add the HOME enviroment variable if it is not already set. - envv, err := maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv) + var envv []string + if kernel.VFS2Enabled { + envv, err = user.MaybeAddExecUserHomeVFS2(ctx, l.rootProcArgs.MountNamespaceVFS2, + l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv) + + } else { + envv, err = user.MaybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, + l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv) + } if err != nil { return err } @@ -538,14 +598,16 @@ func (l *Loader) run() error { ep.pidnsPath = ns.Path } if l.console { - ttyFile, _ := l.rootProcArgs.FDTable.Get(0) - defer ttyFile.DecRef() - ep.tty = ttyFile.FileOperations.(*host.TTYFileOperations) - - // Set the foreground process group on the TTY to the global - // init process group, since that is what we are about to - // start running. - ep.tty.InitForegroundProcessGroup(ep.tg.ProcessGroup()) + // Set the foreground process group on the TTY to the global init process + // group, since that is what we are about to start running. + switch { + case ttyFileVFS2 != nil: + ep.ttyVFS2 = ttyFileVFS2 + ttyFileVFS2.InitForegroundProcessGroup(ep.tg.ProcessGroup()) + case ttyFile != nil: + ep.tty = ttyFile + ttyFile.InitForegroundProcessGroup(ep.tg.ProcessGroup()) + } } // Handle signals by forwarding them to the root container process @@ -570,6 +632,19 @@ func (l *Loader) run() error { } }) + // l.stdioFDs are derived from dup() in boot.New() and they are now dup()ed again + // either in createFDTable() during initial start or in descriptor.initAfterLoad() + // during restore, we can release l.stdioFDs now. VFS2 takes ownership of the + // passed FDs, so only close for VFS1. + if !kernel.VFS2Enabled { + for _, fd := range l.stdioFDs { + err := syscall.Close(fd) + if err != nil { + return fmt.Errorf("close dup()ed stdioFDs: %v", err) + } + } + } + log.Infof("Process should have started...") l.watchdog.Start() return l.k.Start() @@ -653,7 +728,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file // Create the FD map, which will set stdin, stdout, and stderr. ctx := procArgs.NewContext(l.k) - fdTable, err := createFDTable(ctx, false, stdioFDs) + fdTable, _, _, err := createFDTable(ctx, false, stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } @@ -738,14 +813,14 @@ func (l *Loader) destroyContainer(cid string) error { l.mu.Lock() defer l.mu.Unlock() - _, _, started, err := l.threadGroupFromIDLocked(execID{cid: cid}) + tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid}) if err != nil { // Container doesn't exist. return err } - // The container exists, has it been started? - if started { + // The container exists, but has it been started? + if tg != nil { if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil { return fmt.Errorf("sending SIGKILL to all container processes: %v", err) } @@ -787,45 +862,65 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { l.mu.Lock() defer l.mu.Unlock() - tg, _, started, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID}) + tg, err := l.tryThreadGroupFromIDLocked(execID{cid: args.ContainerID}) if err != nil { return 0, err } - if !started { + if tg == nil { return 0, fmt.Errorf("container %q not started", args.ContainerID) } // Get the container MountNamespace from the Task. - tg.Leader().WithMuLocked(func(t *kernel.Task) { - // task.MountNamespace() does not take a ref, so we must do so - // ourselves. - args.MountNamespace = t.MountNamespace() - args.MountNamespace.IncRef() - }) - defer args.MountNamespace.DecRef() + if kernel.VFS2Enabled { + // task.MountNamespace() does not take a ref, so we must do so ourselves. + args.MountNamespaceVFS2 = tg.Leader().MountNamespaceVFS2() + args.MountNamespaceVFS2.IncRef() + } else { + tg.Leader().WithMuLocked(func(t *kernel.Task) { + // task.MountNamespace() does not take a ref, so we must do so ourselves. + args.MountNamespace = t.MountNamespace() + args.MountNamespace.IncRef() + }) + } - // Add the HOME enviroment varible if it is not already set. - root := args.MountNamespace.Root() - defer root.DecRef() - ctx := fs.WithRoot(l.k.SupervisorContext(), root) - envv, err := maybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv) - if err != nil { - return 0, err + // Add the HOME environment variable if it is not already set. + if kernel.VFS2Enabled { + defer args.MountNamespaceVFS2.DecRef() + + root := args.MountNamespaceVFS2.Root() + defer root.DecRef() + ctx := vfs.WithRoot(l.k.SupervisorContext(), root) + envv, err := user.MaybeAddExecUserHomeVFS2(ctx, args.MountNamespaceVFS2, args.KUID, args.Envv) + if err != nil { + return 0, err + } + args.Envv = envv + } else { + defer args.MountNamespace.DecRef() + + root := args.MountNamespace.Root() + defer root.DecRef() + ctx := fs.WithRoot(l.k.SupervisorContext(), root) + envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv) + if err != nil { + return 0, err + } + args.Envv = envv } - args.Envv = envv // Start the process. proc := control.Proc{Kernel: l.k} args.PIDNamespace = tg.PIDNamespace() - newTG, tgid, ttyFile, err := control.ExecAsync(&proc, args) + newTG, tgid, ttyFile, ttyFileVFS2, err := control.ExecAsync(&proc, args) if err != nil { return 0, err } eid := execID{cid: args.ContainerID, pid: tgid} l.processes[eid] = &execProcess{ - tg: newTG, - tty: ttyFile, + tg: newTG, + tty: ttyFile, + ttyVFS2: ttyFileVFS2, } log.Debugf("updated processes: %v", l.processes) @@ -836,7 +931,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { // Don't defer unlock, as doing so would make it impossible for // multiple clients to wait on the same container. - tg, _, err := l.threadGroupFromID(execID{cid: cid}) + tg, err := l.threadGroupFromID(execID{cid: cid}) if err != nil { return fmt.Errorf("can't wait for container %q: %v", cid, err) } @@ -855,7 +950,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e // Try to find a process that was exec'd eid := execID{cid: cid, pid: tgid} - execTG, _, err := l.threadGroupFromID(eid) + execTG, err := l.threadGroupFromID(eid) if err == nil { ws := l.wait(execTG) *waitStatus = ws @@ -869,7 +964,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e // The caller may be waiting on a process not started directly via exec. // In this case, find the process in the container's PID namespace. - initTG, _, err := l.threadGroupFromID(execID{cid: cid}) + initTG, err := l.threadGroupFromID(execID{cid: cid}) if err != nil { return fmt.Errorf("waiting for PID %d: %v", tgid, err) } @@ -905,48 +1000,92 @@ func (l *Loader) WaitExit() kernel.ExitStatus { return l.k.GlobalInit().ExitStatus() } -func newEmptyNetworkStack(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) { +func newRootNetworkNamespace(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (*inet.Namespace, error) { + // Create an empty network stack because the network namespace may be empty at + // this point. Netns is configured before Run() is called. Netstack is + // configured using a control uRPC message. Host network is configured inside + // Run(). switch conf.Network { case NetworkHost: - return hostinet.NewStack(), nil + // No network namespacing support for hostinet yet, hence creator is nil. + return inet.NewRootNamespace(hostinet.NewStack(), nil), nil case NetworkNone, NetworkSandbox: - // NetworkNone sets up loopback using netstack. - netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()} - transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()} - s := netstack.Stack{stack.New(stack.Options{ - NetworkProtocols: netProtos, - TransportProtocols: transProtos, - Clock: clock, - Stats: netstack.Metrics, - HandleLocal: true, - // Enable raw sockets for users with sufficient - // privileges. - RawFactory: raw.EndpointFactory{}, - UniqueID: uniqueID, - })} - - // Enable SACK Recovery. - if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { - return nil, fmt.Errorf("failed to enable SACK: %v", err) + s, err := newEmptySandboxNetworkStack(clock, uniqueID) + if err != nil { + return nil, err + } + creator := &sandboxNetstackCreator{ + clock: clock, + uniqueID: uniqueID, } + return inet.NewRootNamespace(s, creator), nil - // Set default TTLs as required by socket/netstack. - s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) - s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) + default: + panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) + } - // Enable Receive Buffer Auto-Tuning. - if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { - return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) - } +} + +func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) { + netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()} + transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()} + s := netstack.Stack{stack.New(stack.Options{ + NetworkProtocols: netProtos, + TransportProtocols: transProtos, + Clock: clock, + Stats: netstack.Metrics, + HandleLocal: true, + // Enable raw sockets for users with sufficient + // privileges. + RawFactory: raw.EndpointFactory{}, + UniqueID: uniqueID, + })} - s.FillDefaultIPTables() + // Enable SACK Recovery. + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { + return nil, fmt.Errorf("failed to enable SACK: %v", err) + } - return &s, nil + // Set default TTLs as required by socket/netstack. + s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) + s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) - default: - panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) + // Enable Receive Buffer Auto-Tuning. + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { + return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) + } + + s.FillIPTablesMetadata() + + return &s, nil +} + +// sandboxNetstackCreator implements kernel.NetworkStackCreator. +// +// +stateify savable +type sandboxNetstackCreator struct { + clock tcpip.Clock + uniqueID stack.UniqueID +} + +// CreateStack implements kernel.NetworkStackCreator.CreateStack. +func (f *sandboxNetstackCreator) CreateStack() (inet.Stack, error) { + s, err := newEmptySandboxNetworkStack(f.clock, f.uniqueID) + if err != nil { + return nil, err + } + + // Setup loopback. + n := &Network{Stack: s.(*netstack.Stack).Stack} + nicID := tcpip.NICID(f.uniqueID.UniqueID()) + link := DefaultLoopbackLink + linkEP := loopback.New() + if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil { + return nil, err } + + return s, nil } // signal sends a signal to one or more processes in a container. If PID is 0, @@ -976,8 +1115,7 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e return fmt.Errorf("PID (%d) cannot be set when signaling all processes", pid) } // Check that the container has actually started before signaling it. - _, _, err := l.threadGroupFromID(execID{cid: cid}) - if err != nil { + if _, err := l.threadGroupFromID(execID{cid: cid}); err != nil { return err } if err := l.signalAllProcesses(cid, signo); err != nil { @@ -991,16 +1129,16 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e } func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) error { - execTG, _, err := l.threadGroupFromID(execID{cid: cid, pid: tgid}) + execTG, err := l.threadGroupFromID(execID{cid: cid, pid: tgid}) if err == nil { // Send signal directly to the identified process. - return execTG.SendSignal(&arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo}) } // The caller may be signaling a process not started directly via exec. // In this case, find the process in the container's PID namespace and // signal it. - initTG, _, err := l.threadGroupFromID(execID{cid: cid}) + initTG, err := l.threadGroupFromID(execID{cid: cid}) if err != nil { return fmt.Errorf("no thread group found: %v", err) } @@ -1011,25 +1149,43 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er if tg.Leader().ContainerID() != cid { return fmt.Errorf("process %d is part of a different container: %q", tgid, tg.Leader().ContainerID()) } - return tg.SendSignal(&arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}) } +// signalForegrondProcessGroup looks up foreground process group from the TTY +// for the given "tgid" inside container "cid", and send the signal to it. func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, signo int32) error { - // Lookup foreground process group from the TTY for the given process, - // and send the signal to it. - tg, tty, err := l.threadGroupFromID(execID{cid: cid, pid: tgid}) + l.mu.Lock() + tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid, pid: tgid}) if err != nil { + l.mu.Unlock() return fmt.Errorf("no thread group found: %v", err) } - if tty == nil { + if tg == nil { + l.mu.Unlock() + return fmt.Errorf("container %q not started", cid) + } + + tty, ttyVFS2, err := l.ttyFromIDLocked(execID{cid: cid, pid: tgid}) + l.mu.Unlock() + if err != nil { + return fmt.Errorf("no thread group found: %v", err) + } + + var pg *kernel.ProcessGroup + switch { + case ttyVFS2 != nil: + pg = ttyVFS2.ForegroundProcessGroup() + case tty != nil: + pg = tty.ForegroundProcessGroup() + default: return fmt.Errorf("no TTY attached") } - pg := tty.ForegroundProcessGroup() if pg == nil { // No foreground process group has been set. Signal the // original thread group. log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, tgid, tgid) - return tg.SendSignal(&arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}) } // Send the signal to all processes in the process group. var lastErr error @@ -1037,7 +1193,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s if tg.ProcessGroup() != pg { continue } - if err := tg.SendSignal(&arch.SignalInfo{Signo: signo}); err != nil { + if err := l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}); err != nil { lastErr = err } } @@ -1055,33 +1211,57 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error { return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo}) } -// threadGroupFromID same as threadGroupFromIDLocked except that it acquires -// mutex before calling it. -func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) { +// threadGroupFromID is similar to tryThreadGroupFromIDLocked except that it +// acquires mutex before calling it and fails in case container hasn't started +// yet. +func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, error) { l.mu.Lock() defer l.mu.Unlock() - tg, tty, ok, err := l.threadGroupFromIDLocked(key) + tg, err := l.tryThreadGroupFromIDLocked(key) if err != nil { - return nil, nil, err + return nil, err } - if !ok { - return nil, nil, fmt.Errorf("container %q not started", key.cid) + if tg == nil { + return nil, fmt.Errorf("container %q not started", key.cid) } - return tg, tty, nil + return tg, nil } -// threadGroupFromIDLocked returns the thread group and TTY for the given -// execution ID. TTY may be nil if the process is not attached to a terminal. -// Also returns a boolean indicating whether the container has already started. -// Returns error if execution ID is invalid or if the container cannot be -// found (maybe it has been deleted). Caller must hold 'mu'. -func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, bool, error) { +// tryThreadGroupFromIDLocked returns the thread group for the given execution +// ID. It may return nil in case the container has not started yet. Returns +// error if execution ID is invalid or if the container cannot be found (maybe +// it has been deleted). Caller must hold 'mu'. +func (l *Loader) tryThreadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, error) { ep := l.processes[key] if ep == nil { - return nil, nil, false, fmt.Errorf("container %q not found", key.cid) + return nil, fmt.Errorf("container %q not found", key.cid) } - if ep.tg == nil { - return nil, nil, false, nil + return ep.tg, nil +} + +// ttyFromIDLocked returns the TTY files for the given execution ID. It may +// return nil in case the container has not started yet. Returns error if +// execution ID is invalid or if the container cannot be found (maybe it has +// been deleted). Caller must hold 'mu'. +func (l *Loader) ttyFromIDLocked(key execID) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { + ep := l.processes[key] + if ep == nil { + return nil, nil, fmt.Errorf("container %q not found", key.cid) + } + return ep.tty, ep.ttyVFS2, nil +} + +func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { + if len(stdioFDs) != 3 { + return nil, nil, nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) + } + + k := kernel.KernelFromContext(ctx) + fdTable := k.NewFDTable() + ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, stdioFDs) + if err != nil { + fdTable.DecRef() + return nil, nil, nil, err } - return ep.tg, ep.tty, true, nil + return fdTable, ttyFile, ttyFileVFS2, nil } diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 44aa63196..e448fd773 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -24,11 +24,14 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/runsc/fsgofer" @@ -100,20 +103,29 @@ func startGofer(root string) (int, func(), error) { return sandboxEnd, cleanup, nil } -func createLoader() (*Loader, func(), error) { +func createLoader(vfsEnabled bool, spec *specs.Spec) (*Loader, func(), error) { fd, err := server.CreateSocket(ControlSocketAddr(fmt.Sprintf("%010d", rand.Int())[:10])) if err != nil { return nil, nil, err } conf := testConfig() - spec := testSpec() + conf.VFS2 = vfsEnabled sandEnd, cleanup, err := startGofer(spec.Root.Path) if err != nil { return nil, nil, err } - stdio := []int{int(os.Stdin.Fd()), int(os.Stdout.Fd()), int(os.Stderr.Fd())} + // Loader takes ownership of stdio. + var stdio []int + for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} { + newFd, err := unix.Dup(int(f.Fd())) + if err != nil { + return nil, nil, err + } + stdio = append(stdio, newFd) + } + args := Args{ ID: "foo", Spec: spec, @@ -132,10 +144,20 @@ func createLoader() (*Loader, func(), error) { // TestRun runs a simple application in a sandbox and checks that it succeeds. func TestRun(t *testing.T) { - l, cleanup, err := createLoader() + doRun(t, false) +} + +// TestRunVFS2 runs TestRun in VFSv2. +func TestRunVFS2(t *testing.T) { + doRun(t, true) +} + +func doRun(t *testing.T, vfsEnabled bool) { + l, cleanup, err := createLoader(vfsEnabled, testSpec()) if err != nil { t.Fatalf("error creating loader: %v", err) } + defer l.Destroy() defer cleanup() @@ -169,7 +191,16 @@ func TestRun(t *testing.T) { // TestStartSignal tests that the controller Start message will cause // WaitForStartSignal to return. func TestStartSignal(t *testing.T) { - l, cleanup, err := createLoader() + doStartSignal(t, false) +} + +// TestStartSignalVFS2 does TestStartSignal with VFS2. +func TestStartSignalVFS2(t *testing.T) { + doStartSignal(t, true) +} + +func doStartSignal(t *testing.T, vfsEnabled bool) { + l, cleanup, err := createLoader(vfsEnabled, testSpec()) if err != nil { t.Fatalf("error creating loader: %v", err) } @@ -217,18 +248,19 @@ func TestStartSignal(t *testing.T) { } -// Test that MountNamespace can be created with various specs. -func TestCreateMountNamespace(t *testing.T) { - testCases := []struct { - name string - // Spec that will be used to create the mount manager. Note - // that we can't mount procfs without a kernel, so each spec - // MUST contain something other than procfs mounted at /proc. - spec specs.Spec - // Paths that are expected to exist in the resulting fs. - expectedPaths []string - }{ - { +type CreateMountTestcase struct { + name string + // Spec that will be used to create the mount manager. Note + // that we can't mount procfs without a kernel, so each spec + // MUST contain something other than procfs mounted at /proc. + spec specs.Spec + // Paths that are expected to exist in the resulting fs. + expectedPaths []string +} + +func createMountTestcases(vfs2 bool) []*CreateMountTestcase { + testCases := []*CreateMountTestcase{ + &CreateMountTestcase{ // Only proc. name: "only proc mount", spec: specs.Spec{ @@ -270,7 +302,7 @@ func TestCreateMountNamespace(t *testing.T) { // /dev, and /sys. expectedPaths: []string{"/some/very/very/deep/path", "/proc", "/dev", "/sys"}, }, - { + &CreateMountTestcase{ // Mounts are nested inside each other. name: "nested mounts", spec: specs.Spec{ @@ -314,7 +346,7 @@ func TestCreateMountNamespace(t *testing.T) { expectedPaths: []string{"/foo", "/foo/bar", "/foo/bar/baz", "/foo/qux", "/foo/qux-quz", "/foo/some/very/very/deep/path", "/proc", "/dev", "/sys"}, }, - { + &CreateMountTestcase{ name: "mount inside /dev", spec: specs.Spec{ Root: &specs.Root{ @@ -357,40 +389,46 @@ func TestCreateMountNamespace(t *testing.T) { }, expectedPaths: []string{"/proc", "/dev", "/dev/fd-foo", "/dev/foo", "/dev/bar", "/sys"}, }, - { - name: "mounts inside mandatory mounts", - spec: specs.Spec{ - Root: &specs.Root{ - Path: os.TempDir(), - Readonly: true, + } + + vfsCase := &CreateMountTestcase{ + name: "mounts inside mandatory mounts", + spec: specs.Spec{ + Root: &specs.Root{ + Path: os.TempDir(), + Readonly: true, + }, + Mounts: []specs.Mount{ + { + Destination: "/proc", + Type: "tmpfs", }, - Mounts: []specs.Mount{ - { - Destination: "/proc", - Type: "tmpfs", - }, - // We don't include /sys, and /tmp in - // the spec, since they will be added - // automatically. - // - // Instead, add submounts inside these - // directories and make sure they are - // visible under the mandatory mounts. - { - Destination: "/sys/bar", - Type: "tmpfs", - }, - { - Destination: "/tmp/baz", - Type: "tmpfs", - }, + // TODO (gvisor.dev/issue/1487): Re-add this case when sysfs supports + // MkDirAt in VFS2 (and remove the reduntant append). + // { + // Destination: "/sys/bar", + // Type: "tmpfs", + // }, + // + { + Destination: "/tmp/baz", + Type: "tmpfs", }, }, - expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz"}, }, + expectedPaths: []string{"/proc", "/sys" /* "/sys/bar" ,*/, "/tmp", "/tmp/baz"}, } - for _, tc := range testCases { + if !vfs2 { + vfsCase.spec.Mounts = append(vfsCase.spec.Mounts, specs.Mount{Destination: "/sys/bar", Type: "tmpfs"}) + vfsCase.expectedPaths = append(vfsCase.expectedPaths, "/sys/bar") + } + return append(testCases, vfsCase) +} + +// Test that MountNamespace can be created with various specs. +func TestCreateMountNamespace(t *testing.T) { + for _, tc := range createMountTestcases(false /* vfs2 */) { t.Run(tc.name, func(t *testing.T) { conf := testConfig() ctx := contexttest.Context(t) @@ -425,6 +463,52 @@ func TestCreateMountNamespace(t *testing.T) { } } +// Test that MountNamespace can be created with various specs. +func TestCreateMountNamespaceVFS2(t *testing.T) { + for _, tc := range createMountTestcases(true /* vfs2 */) { + t.Run(tc.name, func(t *testing.T) { + spec := testSpec() + spec.Mounts = tc.spec.Mounts + spec.Root = tc.spec.Root + + t.Logf("Using root: %q", spec.Root.Path) + l, loaderCleanup, err := createLoader(true /* VFS2 Enabled */, spec) + if err != nil { + t.Fatalf("failed to create loader: %v", err) + } + defer l.Destroy() + defer loaderCleanup() + + mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints) + if err := mntr.processHints(l.conf); err != nil { + t.Fatalf("failed process hints: %v", err) + } + + ctx := l.k.SupervisorContext() + mns, err := mntr.setupVFS2(ctx, l.conf, &l.rootProcArgs) + if err != nil { + t.Fatalf("failed to setupVFS2: %v", err) + } + + root := mns.Root() + defer root.DecRef() + for _, p := range tc.expectedPaths { + target := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(p), + } + + if d, err := l.k.VFS().GetDentryAt(ctx, l.rootProcArgs.Credentials, target, &vfs.GetDentryOptions{}); err != nil { + t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err) + } else { + d.DecRef() + } + } + }) + } +} + // TestRestoreEnvironment tests that the correct mounts are collected from the spec and config // in order to build the environment for restoring. func TestRestoreEnvironment(t *testing.T) { diff --git a/runsc/boot/network.go b/runsc/boot/network.go index 6a8765ec8..0af30456e 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -17,12 +17,15 @@ package boot import ( "fmt" "net" + "runtime" + "strings" "syscall" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo" "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" "gvisor.dev/gvisor/pkg/tcpip/network/arp" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" @@ -31,6 +34,32 @@ import ( "gvisor.dev/gvisor/pkg/urpc" ) +var ( + // DefaultLoopbackLink contains IP addresses and routes of "127.0.0.1/8" and + // "::1/8" on "lo" interface. + DefaultLoopbackLink = LoopbackLink{ + Name: "lo", + Addresses: []net.IP{ + net.IP("\x7f\x00\x00\x01"), + net.IPv6loopback, + }, + Routes: []Route{ + { + Destination: net.IPNet{ + IP: net.IPv4(0x7f, 0, 0, 0), + Mask: net.IPv4Mask(0xff, 0, 0, 0), + }, + }, + { + Destination: net.IPNet{ + IP: net.IPv6loopback, + Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)), + }, + }, + }, + } +) + // Network exposes methods that can be used to configure a network stack. type Network struct { Stack *stack.Stack @@ -48,6 +77,44 @@ type DefaultRoute struct { Name string } +// QueueingDiscipline is used to specify the kind of Queueing Discipline to +// apply for a give FDBasedLink. +type QueueingDiscipline int + +const ( + // QDiscNone disables any queueing for the underlying FD. + QDiscNone QueueingDiscipline = iota + + // QDiscFIFO applies a simple fifo based queue to the underlying + // FD. + QDiscFIFO +) + +// MakeQueueingDiscipline if possible the equivalent QueuingDiscipline for s +// else returns an error. +func MakeQueueingDiscipline(s string) (QueueingDiscipline, error) { + switch s { + case "none": + return QDiscNone, nil + case "fifo": + return QDiscFIFO, nil + default: + return 0, fmt.Errorf("unsupported qdisc specified: %q", s) + } +} + +// String implements fmt.Stringer. +func (q QueueingDiscipline) String() string { + switch q { + case QDiscNone: + return "none" + case QDiscFIFO: + return "fifo" + default: + panic(fmt.Sprintf("Invalid queueing discipline: %d", q)) + } +} + // FDBasedLink configures an fd-based link. type FDBasedLink struct { Name string @@ -57,6 +124,7 @@ type FDBasedLink struct { GSOMaxSize uint32 SoftwareGSOEnabled bool LinkAddress net.HardwareAddr + QDisc QueueingDiscipline // NumChannels controls how many underlying FD's are to be used to // create this endpoint. @@ -158,6 +226,8 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct } mac := tcpip.LinkAddress(link.LinkAddress) + log.Infof("gso max size is: %d", link.GSOMaxSize) + linkEP, err := fdbased.New(&fdbased.Options{ FDs: FDs, MTU: uint32(link.MTU), @@ -172,6 +242,13 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct return err } + switch link.QDisc { + case QDiscNone: + case QDiscFIFO: + log.Infof("Enabling FIFO QDisc on %q", link.Name) + linkEP = fifo.New(linkEP, runtime.GOMAXPROCS(0), 1000) + } + log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels) if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil { return err diff --git a/runsc/boot/pprof/BUILD b/runsc/boot/pprof/BUILD new file mode 100644 index 000000000..29cb42b2f --- /dev/null +++ b/runsc/boot/pprof/BUILD @@ -0,0 +1,11 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "pprof", + srcs = ["pprof.go"], + visibility = [ + "//runsc:__subpackages__", + ], +) diff --git a/runsc/boot/pprof.go b/runsc/boot/pprof/pprof.go index 463362f02..1ded20dee 100644 --- a/runsc/boot/pprof.go +++ b/runsc/boot/pprof/pprof.go @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -package boot +// Package pprof provides a stub to initialize custom profilers. +package pprof -func initializePProf() { +// Initialize will be called at boot for initializing custom profilers. +func Initialize() { } diff --git a/runsc/boot/user.go b/runsc/boot/user.go deleted file mode 100644 index f0aa52135..000000000 --- a/runsc/boot/user.go +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package boot - -import ( - "bufio" - "fmt" - "io" - "strconv" - "strings" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/usermem" -) - -type fileReader struct { - // Ctx is the context for the file reader. - Ctx context.Context - - // File is the file to read from. - File *fs.File -} - -// Read implements io.Reader.Read. -func (r *fileReader) Read(buf []byte) (int, error) { - n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf)) - return int(n), err -} - -// getExecUserHome returns the home directory of the executing user read from -// /etc/passwd as read from the container filesystem. -func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.KUID) (string, error) { - // The default user home directory to return if no user matching the user - // if found in the /etc/passwd found in the image. - const defaultHome = "/" - - // Open the /etc/passwd file from the dirent via the root mount namespace. - mnsRoot := rootMns.Root() - maxTraversals := uint(linux.MaxSymlinkTraversals) - dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals) - if err != nil { - // NOTE: Ignore errors opening the passwd file. If the passwd file - // doesn't exist we will return the default home directory. - return defaultHome, nil - } - defer dirent.DecRef() - - // Check read permissions on the file. - if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil { - // NOTE: Ignore permissions errors here and return default root dir. - return defaultHome, nil - } - - // Only open regular files. We don't open other files like named pipes as - // they may block and might present some attack surface to the container. - // Note that runc does not seem to do this kind of checking. - if !fs.IsRegular(dirent.Inode.StableAttr) { - return defaultHome, nil - } - - f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false}) - if err != nil { - return "", err - } - defer f.DecRef() - - r := &fileReader{ - Ctx: ctx, - File: f, - } - - homeDir, err := findHomeInPasswd(uint32(uid), r, defaultHome) - if err != nil { - return "", err - } - - return homeDir, nil -} - -// maybeAddExecUserHome returns a new slice with the HOME enviroment variable -// set if the slice does not already contain it, otherwise it returns the -// original slice unmodified. -func maybeAddExecUserHome(ctx context.Context, mns *fs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) { - // Check if the envv already contains HOME. - for _, env := range envv { - if strings.HasPrefix(env, "HOME=") { - // We have it. Return the original slice unmodified. - return envv, nil - } - } - - // Read /etc/passwd for the user's HOME directory and set the HOME - // environment variable as required by POSIX if it is not overridden by - // the user. - homeDir, err := getExecUserHome(ctx, mns, uid) - if err != nil { - return nil, fmt.Errorf("error reading exec user: %v", err) - } - return append(envv, "HOME="+homeDir), nil -} - -// findHomeInPasswd parses a passwd file and returns the given user's home -// directory. This function does it's best to replicate the runc's behavior. -func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) { - s := bufio.NewScanner(passwd) - - for s.Scan() { - if err := s.Err(); err != nil { - return "", err - } - - line := strings.TrimSpace(s.Text()) - if line == "" { - continue - } - - // Pull out part of passwd entry. Loosely parse the passwd entry as some - // passwd files could be poorly written and for compatibility with runc. - // - // Per 'man 5 passwd' - // /etc/passwd contains one line for each user account, with seven - // fields delimited by colons (“:”). These fields are: - // - // - login name - // - optional encrypted password - // - numerical user ID - // - numerical group ID - // - user name or comment field - // - user home directory - // - optional user command interpreter - parts := strings.Split(line, ":") - - found := false - homeDir := "" - for i, p := range parts { - switch i { - case 2: - parsedUID, err := strconv.ParseUint(p, 10, 32) - if err == nil && parsedUID == uint64(uid) { - found = true - } - case 5: - homeDir = p - } - } - if found { - // NOTE: If the uid is present but the home directory is not - // present in the /etc/passwd entry we return an empty string. This - // is, for better or worse, what runc does. - return homeDir, nil - } - } - - return defaultHome, nil -} diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go deleted file mode 100644 index fb4e13dfb..000000000 --- a/runsc/boot/user_test.go +++ /dev/null @@ -1,254 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package boot - -import ( - "io/ioutil" - "os" - "path/filepath" - "strings" - "syscall" - "testing" - - specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" -) - -func setupTempDir() (string, error) { - tmpDir, err := ioutil.TempDir(os.TempDir(), "exec-user-test") - if err != nil { - return "", err - } - return tmpDir, nil -} - -func setupPasswd(contents string, perms os.FileMode) func() (string, error) { - return func() (string, error) { - tmpDir, err := setupTempDir() - if err != nil { - return "", err - } - - if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { - return "", err - } - - f, err := os.Create(filepath.Join(tmpDir, "etc", "passwd")) - if err != nil { - return "", err - } - defer f.Close() - - _, err = f.WriteString(contents) - if err != nil { - return "", err - } - - err = f.Chmod(perms) - if err != nil { - return "", err - } - return tmpDir, nil - } -} - -// TestGetExecUserHome tests the getExecUserHome function. -func TestGetExecUserHome(t *testing.T) { - tests := map[string]struct { - uid auth.KUID - createRoot func() (string, error) - expected string - }{ - "success": { - uid: 1000, - createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0666), - expected: "/home/adin", - }, - "no_passwd": { - uid: 1000, - createRoot: setupTempDir, - expected: "/", - }, - "no_perms": { - uid: 1000, - createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0000), - expected: "/", - }, - "directory": { - uid: 1000, - createRoot: func() (string, error) { - tmpDir, err := setupTempDir() - if err != nil { - return "", err - } - - if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { - return "", err - } - - if err := syscall.Mkdir(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { - return "", err - } - - return tmpDir, nil - }, - expected: "/", - }, - // Currently we don't allow named pipes. - "named_pipe": { - uid: 1000, - createRoot: func() (string, error) { - tmpDir, err := setupTempDir() - if err != nil { - return "", err - } - - if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { - return "", err - } - - if err := syscall.Mkfifo(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { - return "", err - } - - return tmpDir, nil - }, - expected: "/", - }, - } - - for name, tc := range tests { - t.Run(name, func(t *testing.T) { - tmpDir, err := tc.createRoot() - if err != nil { - t.Fatalf("failed to create root dir: %v", err) - } - - sandEnd, cleanup, err := startGofer(tmpDir) - if err != nil { - t.Fatalf("failed to create gofer: %v", err) - } - defer cleanup() - - ctx := contexttest.Context(t) - conf := &Config{ - RootDir: "unused_root_dir", - Network: NetworkNone, - DisableSeccomp: true, - } - - spec := &specs.Spec{ - Root: &specs.Root{ - Path: tmpDir, - Readonly: true, - }, - // Add /proc mount as tmpfs to avoid needing a kernel. - Mounts: []specs.Mount{ - { - Destination: "/proc", - Type: "tmpfs", - }, - }, - } - - mntr := newContainerMounter(spec, []int{sandEnd}, nil, &podMountHints{}) - mns, err := mntr.createMountNamespace(ctx, conf) - if err != nil { - t.Fatalf("failed to create mount namespace: %v", err) - } - ctx = fs.WithRoot(ctx, mns.Root()) - if err := mntr.mountSubmounts(ctx, conf, mns); err != nil { - t.Fatalf("failed to create mount namespace: %v", err) - } - - got, err := getExecUserHome(ctx, mns, tc.uid) - if err != nil { - t.Fatalf("failed to get user home: %v", err) - } - - if got != tc.expected { - t.Fatalf("expected %v, got: %v", tc.expected, got) - } - }) - } -} - -// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing. -func TestFindHomeInPasswd(t *testing.T) { - tests := map[string]struct { - uid uint32 - passwd string - expected string - def string - }{ - "empty": { - uid: 1000, - passwd: "", - expected: "/", - def: "/", - }, - "whitespace": { - uid: 1000, - passwd: " ", - expected: "/", - def: "/", - }, - "full": { - uid: 1000, - passwd: "adin::1000:1111::/home/adin:/bin/sh", - expected: "/home/adin", - def: "/", - }, - // For better or worse, this is how runc works. - "partial": { - uid: 1000, - passwd: "adin::1000:1111:", - expected: "", - def: "/", - }, - "multiple": { - uid: 1001, - passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh", - expected: "/home/ian", - def: "/", - }, - "duplicate": { - uid: 1000, - passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh", - expected: "/home/adin", - def: "/", - }, - "empty_lines": { - uid: 1001, - passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh", - expected: "/home/ian", - def: "/", - }, - } - - for name, tc := range tests { - t.Run(name, func(t *testing.T) { - got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def) - if err != nil { - t.Fatalf("error parsing passwd: %v", err) - } - if tc.expected != got { - t.Fatalf("expected %v, got: %v", tc.expected, got) - } - }) - } -} diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go new file mode 100644 index 000000000..7ed6801b4 --- /dev/null +++ b/runsc/boot/vfs.go @@ -0,0 +1,375 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "fmt" + "path" + "sort" + "strings" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/devices/memdev" + "gvisor.dev/gvisor/pkg/sentry/fs/user" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error { + vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserList: true, + // TODO(b/29356795): Users may mount this once the terminals are in a + // usable state. + AllowUserMount: false, + }) + vfsObj.MustRegisterFilesystemType(devtmpfs.Name, &devtmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) + vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserList: true, + }) + vfsObj.MustRegisterFilesystemType(proc.Name, &proc.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) + vfsObj.MustRegisterFilesystemType(sys.Name, &sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) + vfsObj.MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) + + // Setup files in devtmpfs. + if err := memdev.Register(vfsObj); err != nil { + return fmt.Errorf("registering memdev: %w", err) + } + a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name) + if err != nil { + return fmt.Errorf("creating devtmpfs accessor: %w", err) + } + defer a.Release() + + if err := a.UserspaceInit(ctx); err != nil { + return fmt.Errorf("initializing userspace: %w", err) + } + if err := memdev.CreateDevtmpfsFiles(ctx, a); err != nil { + return fmt.Errorf("creating devtmpfs files: %w", err) + } + return nil +} + +func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error { + if err := mntr.k.VFS().Init(); err != nil { + return fmt.Errorf("failed to initialize VFS: %w", err) + } + mns, err := mntr.setupVFS2(ctx, conf, procArgs) + if err != nil { + return fmt.Errorf("failed to setupFS: %w", err) + } + procArgs.MountNamespaceVFS2 = mns + + // Resolve the executable path from working dir and environment. + f, err := user.ResolveExecutablePathVFS2(ctx, procArgs.Credentials, procArgs.MountNamespaceVFS2, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) + if err != nil { + return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) + } + procArgs.Filename = f + return nil +} + +func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) { + log.Infof("Configuring container's file system with VFS2") + + // Create context with root credentials to mount the filesystem (the current + // user may not be privileged enough). + rootCreds := auth.NewRootCredentials(procArgs.Credentials.UserNamespace) + rootProcArgs := *procArgs + rootProcArgs.WorkingDirectory = "/" + rootProcArgs.Credentials = rootCreds + rootProcArgs.Umask = 0022 + rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals + rootCtx := procArgs.NewContext(c.k) + + if err := registerFilesystems(rootCtx, c.k.VFS(), rootCreds); err != nil { + return nil, fmt.Errorf("register filesystems: %w", err) + } + + mns, err := c.createMountNamespaceVFS2(rootCtx, conf, rootCreds) + if err != nil { + return nil, fmt.Errorf("creating mount namespace: %w", err) + } + rootProcArgs.MountNamespaceVFS2 = mns + + // Mount submounts. + if err := c.mountSubmountsVFS2(rootCtx, conf, mns, rootCreds); err != nil { + return nil, fmt.Errorf("mounting submounts vfs2: %w", err) + } + return mns, nil +} + +func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { + fd := c.fds.remove() + opts := strings.Join(p9MountData(fd, conf.FileAccess, true /* vfs2 */), ",") + + log.Infof("Mounting root over 9P, ioFD: %d", fd) + mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{Data: opts}) + if err != nil { + return nil, fmt.Errorf("setting up mount namespace: %w", err) + } + return mns, nil +} + +func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error { + mounts, err := c.prepareMountsVFS2() + if err != nil { + return err + } + + for i := range mounts { + submount := &mounts[i] + log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options) + if err := c.mountSubmountVFS2(ctx, conf, mns, creds, submount); err != nil { + return err + } + } + + if err := c.mountTmpVFS2(ctx, conf, creds, mns); err != nil { + return fmt.Errorf(`mount submount "\tmp": %w`, err) + } + return nil +} + +type mountAndFD struct { + specs.Mount + fd int +} + +func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { + // Associate bind mounts with their FDs before sorting since there is an + // undocumented assumption that FDs are dispensed in the order in which + // they are required by mounts. + var mounts []mountAndFD + for _, m := range c.mounts { + fd := -1 + // Only bind mounts use host FDs; see + // containerMounter.getMountNameAndOptionsVFS2. + if m.Type == bind { + fd = c.fds.remove() + } + mounts = append(mounts, mountAndFD{ + Mount: m, + fd: fd, + }) + } + if err := c.checkDispenser(); err != nil { + return nil, err + } + + // Sort the mounts so that we don't place children before parents. + sort.Slice(mounts, func(i, j int) bool { + return len(mounts[i].Destination) < len(mounts[j].Destination) + }) + + return mounts, nil +} + +func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error { + root := mns.Root() + defer root.DecRef() + target := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(submount.Destination), + } + fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, submount) + if err != nil { + return fmt.Errorf("mountOptions failed: %w", err) + } + if len(fsName) == 0 { + // Filesystem is not supported (e.g. cgroup), just skip it. + return nil + } + + if err := c.makeSyntheticMount(ctx, submount.Destination, root, creds); err != nil { + return err + } + if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil { + return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts) + } + log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts.GetFilesystemOptions.Data) + return nil +} + +// getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values +// used for mounts. +func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, *vfs.MountOptions, error) { + var ( + fsName string + data []string + ) + + // Find filesystem name and FS specific data field. + switch m.Type { + case devpts.Name, devtmpfs.Name, proc.Name, sys.Name: + fsName = m.Type + case nonefs: + fsName = sys.Name + case tmpfs.Name: + fsName = m.Type + + var err error + data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...) + if err != nil { + return "", nil, err + } + + case bind: + fsName = gofer.Name + data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) + + default: + log.Warningf("ignoring unknown filesystem type %q", m.Type) + } + + opts := &vfs.MountOptions{ + GetFilesystemOptions: vfs.GetFilesystemOptions{ + Data: strings.Join(data, ","), + }, + InternalMount: true, + } + + for _, o := range m.Options { + switch o { + case "rw": + opts.ReadOnly = false + case "ro": + opts.ReadOnly = true + case "noatime": + opts.Flags.NoATime = true + case "noexec": + opts.Flags.NoExec = true + default: + log.Warningf("ignoring unknown mount option %q", o) + } + } + + if conf.Overlay { + // All writes go to upper, be paranoid and make lower readonly. + opts.ReadOnly = true + } + return fsName, opts, nil +} + +func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error { + target := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(currentPath), + } + _, err := c.k.VFS().StatAt(ctx, creds, target, &vfs.StatOptions{}) + if err == nil { + // Mount point exists, nothing else to do. + return nil + } + if err != syserror.ENOENT { + return fmt.Errorf("stat failed for %q during mount point creation: %w", currentPath, err) + } + + // Recurse to ensure parent is created and then create the mount point. + if err := c.makeSyntheticMount(ctx, path.Dir(currentPath), root, creds); err != nil { + return err + } + log.Debugf("Creating dir %q for mount point", currentPath) + mkdirOpts := &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true} + if err := c.k.VFS().MkdirAt(ctx, creds, target, mkdirOpts); err != nil { + return fmt.Errorf("failed to create directory %q for mount: %w", currentPath, err) + } + return nil +} + +// mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so. +// Technically we don't have to mount tmpfs at /tmp, as we could just rely on +// the host /tmp, but this is a nice optimization, and fixes some apps that call +// mknod in /tmp. It's unsafe to mount tmpfs if: +// 1. /tmp is mounted explicitly: we should not override user's wish +// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp +// +// Note that when there are submounts inside of '/tmp', directories for the +// mount points must be present, making '/tmp' not empty anymore. +func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *Config, creds *auth.Credentials, mns *vfs.MountNamespace) error { + for _, m := range c.mounts { + // m.Destination has been cleaned, so it's to use equality here. + if m.Destination == "/tmp" { + log.Debugf(`Explict "/tmp" mount found, skipping internal tmpfs, mount: %+v`, m) + return nil + } + } + + root := mns.Root() + defer root.DecRef() + pop := vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse("/tmp"), + } + // TODO(gvisor.dev/issue/2782): Use O_PATH when available. + statx, err := c.k.VFS().StatAt(ctx, creds, &pop, &vfs.StatOptions{}) + switch err { + case nil: + // Found '/tmp' in filesystem, check if it's empty. + if linux.FileMode(statx.Mode).FileType() != linux.ModeDirectory { + // Not a dir?! Leave it be. + return nil + } + if statx.Nlink > 2 { + // If more than "." and ".." is found, skip internal tmpfs to prevent + // hiding existing files. + log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`) + return nil + } + log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`) + fallthrough + + case syserror.ENOENT: + // No '/tmp' found (or fallthrough from above). It's safe to mount internal + // tmpfs. + tmpMount := specs.Mount{ + Type: tmpfs.Name, + Destination: "/tmp", + // Sticky bit is added to prevent accidental deletion of files from + // another user. This is normally done for /tmp. + Options: []string{"mode=01777"}, + } + return c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount}) + + default: + return fmt.Errorf(`stating "/tmp" inside container: %w`, err) + } +} diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD index d4c7bdfbb..c087e1a3c 100644 --- a/runsc/cgroup/BUILD +++ b/runsc/cgroup/BUILD @@ -7,8 +7,8 @@ go_library( srcs = ["cgroup.go"], visibility = ["//:sandbox"], deps = [ + "//pkg/cleanup", "//pkg/log", - "//runsc/specutils", "@com_github_cenkalti_backoff//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", ], diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 653ca5f52..ef01820ef 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -19,6 +19,7 @@ package cgroup import ( "bufio" "context" + "errors" "fmt" "io/ioutil" "os" @@ -30,29 +31,31 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/runsc/specutils" ) const ( cgroupRoot = "/sys/fs/cgroup" ) -var controllers = map[string]controller{ - "blkio": &blockIO{}, - "cpu": &cpu{}, - "cpuset": &cpuSet{}, - "memory": &memory{}, - "net_cls": &networkClass{}, - "net_prio": &networkPrio{}, +var controllers = map[string]config{ + "blkio": config{ctrlr: &blockIO{}}, + "cpu": config{ctrlr: &cpu{}}, + "cpuset": config{ctrlr: &cpuSet{}}, + "memory": config{ctrlr: &memory{}}, + "net_cls": config{ctrlr: &networkClass{}}, + "net_prio": config{ctrlr: &networkPrio{}}, + "pids": config{ctrlr: &pids{}}, // These controllers either don't have anything in the OCI spec or is - // irrevalant for a sandbox, e.g. pids. - "devices": &noop{}, - "freezer": &noop{}, - "perf_event": &noop{}, - "pids": &noop{}, - "systemd": &noop{}, + // irrelevant for a sandbox. + "devices": config{ctrlr: &noop{}}, + "freezer": config{ctrlr: &noop{}}, + "hugetlb": config{ctrlr: &noop{}, optional: true}, + "perf_event": config{ctrlr: &noop{}}, + "rdma": config{ctrlr: &noop{}, optional: true}, + "systemd": config{ctrlr: &noop{}}, } func setOptionalValueInt(path, name string, val *int64) error { @@ -196,8 +199,9 @@ func LoadPaths(pid string) (map[string]string, error) { return paths, nil } -// Cgroup represents a group inside all controllers. For example: Name='/foo/bar' -// maps to /sys/fs/cgroup/<controller>/foo/bar on all controllers. +// Cgroup represents a group inside all controllers. For example: +// Name='/foo/bar' maps to /sys/fs/cgroup/<controller>/foo/bar on +// all controllers. type Cgroup struct { Name string `json:"name"` Parents map[string]string `json:"parents"` @@ -242,16 +246,20 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error { // The Cleanup object cleans up partially created cgroups when an error occurs. // Errors occuring during cleanup itself are ignored. - clean := specutils.MakeCleanup(func() { _ = c.Uninstall() }) + clean := cleanup.Make(func() { _ = c.Uninstall() }) defer clean.Clean() - for key, ctrl := range controllers { + for key, cfg := range controllers { path := c.makePath(key) if err := os.MkdirAll(path, 0755); err != nil { + if cfg.optional && errors.Is(err, syscall.EROFS) { + log.Infof("Skipping cgroup %q", key) + continue + } return err } if res != nil { - if err := ctrl.set(res, path); err != nil { + if err := cfg.ctrlr.set(res, path); err != nil { return err } } @@ -321,10 +329,13 @@ func (c *Cgroup) Join() (func(), error) { } // Now join the cgroups. - for key := range controllers { + for key, cfg := range controllers { path := c.makePath(key) log.Debugf("Joining cgroup %q", path) if err := setValue(path, "cgroup.procs", "0"); err != nil { + if cfg.optional && os.IsNotExist(err) { + continue + } return undo, err } } @@ -375,6 +386,11 @@ func (c *Cgroup) makePath(controllerName string) string { return filepath.Join(cgroupRoot, controllerName, path) } +type config struct { + ctrlr controller + optional bool +} + type controller interface { set(*specs.LinuxResources, string) error } @@ -525,3 +541,13 @@ func (*networkPrio) set(spec *specs.LinuxResources, path string) error { } return nil } + +type pids struct{} + +func (*pids) set(spec *specs.LinuxResources, path string) error { + if spec.Pids == nil { + return nil + } + val := strconv.FormatInt(spec.Pids.Limit, 10) + return setValue(path, "pids.max", val) +} diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 2a88b85a9..af3538ef0 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -31,6 +31,7 @@ go_library( "spec.go", "start.go", "state.go", + "statefile.go", "syscalls.go", "wait.go", ], @@ -43,11 +44,13 @@ go_library( "//pkg/sentry/control", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sentry/platform", + "//pkg/state", + "//pkg/state/statefile", "//pkg/sync", "//pkg/unet", "//pkg/urpc", "//runsc/boot", - "//runsc/boot/platforms", "//runsc/console", "//runsc/container", "//runsc/flag", @@ -79,11 +82,11 @@ go_test( "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/kernel/auth", + "//pkg/test/testutil", "//pkg/urpc", "//runsc/boot", "//runsc/container", "//runsc/specutils", - "//runsc/testutil", "@com_github_google_go-cmp//cmp:go_default_library", "@com_github_google_go-cmp//cmp/cmpopts:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 0f3da69a0..01204ab4d 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -23,9 +23,10 @@ import ( "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/runsc/boot" - "gvisor.dev/gvisor/runsc/boot/platforms" "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/specutils" ) @@ -82,8 +83,13 @@ type Boot struct { // sandbox (e.g. gofer) and sent through this FD. mountsFD int - // pidns is set if the sanadbox is in its own pid namespace. + // pidns is set if the sandbox is in its own pid namespace. pidns bool + + // attached is set to true to kill the sandbox process when the parent process + // terminates. This flag is set when the command execve's itself because + // parent death signal doesn't propagate through execve when uid/gid changes. + attached bool } // Name implements subcommands.Command.Name. @@ -118,6 +124,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) { f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup") f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).") + f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates") } // Execute implements subcommands.Command.Execute. It starts a sandbox in a @@ -129,33 +136,36 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } // Ensure that if there is a panic, all goroutine stacks are printed. - debug.SetTraceback("all") + debug.SetTraceback("system") conf := args[0].(*boot.Config) + if b.attached { + // Ensure this process is killed after parent process terminates when + // attached mode is enabled. In the unfortunate event that the parent + // terminates before this point, this process leaks. + if err := unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0); err != nil { + Fatalf("error setting parent death signal: %v", err) + } + } + if b.setUpRoot { if err := setUpChroot(b.pidns); err != nil { Fatalf("error setting up chroot: %v", err) } - if !b.applyCaps { - // Remove --setup-root arg to call myself. - var args []string - for _, arg := range os.Args { - if !strings.Contains(arg, "setup-root") { - args = append(args, arg) - } - } - if !conf.Rootless { - // Note that we've already read the spec from the spec FD, and - // we will read it again after the exec call. This works - // because the ReadSpecFromFile function seeks to the beginning - // of the file before reading. - if err := callSelfAsNobody(args); err != nil { - Fatalf("%v", err) - } - panic("callSelfAsNobody must never return success") + if !b.applyCaps && !conf.Rootless { + // Remove --apply-caps arg to call myself. It has already been done. + args := prepareArgs(b.attached, "setup-root") + + // Note that we've already read the spec from the spec FD, and + // we will read it again after the exec call. This works + // because the ReadSpecFromFile function seeks to the beginning + // of the file before reading. + if err := callSelfAsNobody(args); err != nil { + Fatalf("%v", err) } + panic("callSelfAsNobody must never return success") } } @@ -173,7 +183,12 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) if caps == nil { caps = &specs.LinuxCapabilities{} } - if conf.Platform == platforms.Ptrace { + + gPlatform, err := platform.Lookup(conf.Platform) + if err != nil { + Fatalf("loading platform: %v", err) + } + if gPlatform.Requirements().RequiresCapSysPtrace { // Ptrace platform requires extra capabilities. const c = "CAP_SYS_PTRACE" caps.Bounding = append(caps.Bounding, c) @@ -181,13 +196,9 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) caps.Permitted = append(caps.Permitted, c) } - // Remove --apply-caps arg to call myself. - var args []string - for _, arg := range os.Args { - if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") { - args = append(args, arg) - } - } + // Remove --apply-caps and --setup-root arg to call myself. Both have + // already been done. + args := prepareArgs(b.attached, "setup-root", "apply-caps") // Note that we've already read the spec from the spec FD, and // we will read it again after the exec call. This works @@ -258,3 +269,22 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) l.Destroy() return subcommands.ExitSuccess } + +func prepareArgs(attached bool, exclude ...string) []string { + var args []string + for _, arg := range os.Args { + for _, excl := range exclude { + if strings.Contains(arg, excl) { + goto skip + } + } + args = append(args, arg) + if attached && arg == "boot" { + // Strategicaly place "--attached" after the command. This is needed + // to ensure the new process is killed when the parent process terminates. + args = append(args, "--attached") + } + skip: + } + return args +} diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go index 0c27f7313..a84067112 100644 --- a/runsc/cmd/capability_test.go +++ b/runsc/cmd/capability_test.go @@ -23,10 +23,10 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" ) func init() { @@ -85,21 +85,20 @@ func TestCapabilities(t *testing.T) { Inheritable: caps, } - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) // Use --network=host to make sandbox use spec's capabilities. conf.Network = boot.NetworkHost - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := container.Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go index b5a0ce17d..189244765 100644 --- a/runsc/cmd/chroot.go +++ b/runsc/cmd/chroot.go @@ -50,7 +50,7 @@ func pivotRoot(root string) error { // new_root, so after umounting the old_root, we will see only // the new_root in "/". if err := syscall.PivotRoot(".", "."); err != nil { - return fmt.Errorf("error changing root filesystem: %v", err) + return fmt.Errorf("pivot_root failed, make sure that the root mount has a parent: %v", err) } if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil { diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go index 79965460e..b5de2588b 100644 --- a/runsc/cmd/debug.go +++ b/runsc/cmd/debug.go @@ -32,17 +32,20 @@ import ( // Debug implements subcommands.Command for the "debug" command. type Debug struct { - pid int - stacks bool - signal int - profileHeap string - profileCPU string - trace string - strace string - logLevel string - logPackets string - duration time.Duration - ps bool + pid int + stacks bool + signal int + profileHeap string + profileCPU string + profileGoroutine string + profileBlock string + profileMutex string + trace string + strace string + logLevel string + logPackets string + duration time.Duration + ps bool } // Name implements subcommands.Command. @@ -66,6 +69,9 @@ func (d *Debug) SetFlags(f *flag.FlagSet) { f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log") f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.") f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.") + f.StringVar(&d.profileGoroutine, "profile-goroutine", "", "writes goroutine profile to the given file.") + f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.") + f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.") f.DurationVar(&d.duration, "duration", time.Second, "amount of time to wait for CPU and trace profiles") f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.") f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox") @@ -147,6 +153,42 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } log.Infof("Heap profile written to %q", d.profileHeap) } + if d.profileGoroutine != "" { + f, err := os.Create(d.profileGoroutine) + if err != nil { + return Errorf(err.Error()) + } + defer f.Close() + + if err := c.Sandbox.GoroutineProfile(f); err != nil { + return Errorf(err.Error()) + } + log.Infof("Goroutine profile written to %q", d.profileGoroutine) + } + if d.profileBlock != "" { + f, err := os.Create(d.profileBlock) + if err != nil { + return Errorf(err.Error()) + } + defer f.Close() + + if err := c.Sandbox.BlockProfile(f); err != nil { + return Errorf(err.Error()) + } + log.Infof("Block profile written to %q", d.profileBlock) + } + if d.profileMutex != "" { + f, err := os.Create(d.profileMutex) + if err != nil { + return Errorf(err.Error()) + } + defer f.Close() + + if err := c.Sandbox.MutexProfile(f); err != nil { + return Errorf(err.Error()) + } + log.Infof("Mutex profile written to %q", d.profileMutex) + } delay := false if d.profileCPU != "" { diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go index b184bd402..7d1310c96 100644 --- a/runsc/cmd/do.go +++ b/runsc/cmd/do.go @@ -166,15 +166,33 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su return Errorf("Error write spec: %v", err) } - runArgs := container.Args{ + containerArgs := container.Args{ ID: cid, Spec: spec, BundleDir: tmpDir, Attached: true, } - ws, err := container.Run(conf, runArgs) + ct, err := container.New(conf, containerArgs) if err != nil { - return Errorf("running container: %v", err) + return Errorf("creating container: %v", err) + } + defer ct.Destroy() + + if err := ct.Start(conf); err != nil { + return Errorf("starting container: %v", err) + } + + // Forward signals to init in the container. Thus if we get SIGINT from + // ^C, the container gracefully exit, and we can clean up. + // + // N.B. There is a still a window before this where a signal may kill + // this process, skipping cleanup. + stopForwarding := ct.ForwardSignals(0 /* pid */, false /* fgProcess */) + defer stopForwarding() + + ws, err := ct.Wait() + if err != nil { + return Errorf("waiting for container: %v", err) } *waitStatus = ws @@ -237,20 +255,27 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) { for _, cmd := range cmds { log.Debugf("Run %q", cmd) args := strings.Split(cmd, " ") - c := exec.Command(args[0], args[1:]...) - if err := c.Run(); err != nil { + cmd := exec.Command(args[0], args[1:]...) + if err := cmd.Run(); err != nil { + c.cleanupNet(cid, dev, "", "", "") return nil, fmt.Errorf("failed to run %q: %v", cmd, err) } } - if err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec); err != nil { + resolvPath, err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec) + if err != nil { + c.cleanupNet(cid, dev, "", "", "") return nil, err } - if err := makeFile("/etc/hostname", cid+"\n", spec); err != nil { + hostnamePath, err := makeFile("/etc/hostname", cid+"\n", spec) + if err != nil { + c.cleanupNet(cid, dev, resolvPath, "", "") return nil, err } hosts := fmt.Sprintf("127.0.0.1\tlocalhost\n%s\t%s\n", c.ip, cid) - if err := makeFile("/etc/hosts", hosts, spec); err != nil { + hostsPath, err := makeFile("/etc/hosts", hosts, spec) + if err != nil { + c.cleanupNet(cid, dev, resolvPath, hostnamePath, "") return nil, err } @@ -263,19 +288,22 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) { } spec.Linux.Namespaces = append(spec.Linux.Namespaces, netns) - return func() { c.cleanNet(cid, dev) }, nil + return func() { c.cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath) }, nil } -func (c *Do) cleanNet(cid, dev string) { - veth, peer := deviceNames(cid) +// cleanupNet tries to cleanup the network setup in setupNet. +// +// It may be called when setupNet is only partially complete, in which case it +// will cleanup as much as possible, logging warnings for the rest. +// +// Unfortunately none of this can be automatically cleaned up on process exit, +// we must do so explicitly. +func (c *Do) cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath string) { + _, peer := deviceNames(cid) cmds := []string{ fmt.Sprintf("ip link delete %s", peer), fmt.Sprintf("ip netns delete %s", cid), - - fmt.Sprintf("iptables -t nat -D POSTROUTING -s %s/24 -o %s -j MASQUERADE", c.ip, dev), - fmt.Sprintf("iptables -D FORWARD -i %s -o %s -j ACCEPT", dev, veth), - fmt.Sprintf("iptables -D FORWARD -o %s -i %s -j ACCEPT", dev, veth), } for _, cmd := range cmds { @@ -286,6 +314,10 @@ func (c *Do) cleanNet(cid, dev string) { log.Warningf("Failed to run %q: %v", cmd, err) } } + + tryRemove(resolvPath) + tryRemove(hostnamePath) + tryRemove(hostsPath) } func deviceNames(cid string) (string, string) { @@ -306,13 +338,16 @@ func defaultDevice() (string, error) { return parts[4], nil } -func makeFile(dest, content string, spec *specs.Spec) error { +func makeFile(dest, content string, spec *specs.Spec) (string, error) { tmpFile, err := ioutil.TempFile("", filepath.Base(dest)) if err != nil { - return err + return "", err } if _, err := tmpFile.WriteString(content); err != nil { - return err + if err := os.Remove(tmpFile.Name()); err != nil { + log.Warningf("Failed to remove %q: %v", tmpFile, err) + } + return "", err } spec.Mounts = append(spec.Mounts, specs.Mount{ Source: tmpFile.Name(), @@ -320,7 +355,17 @@ func makeFile(dest, content string, spec *specs.Spec) error { Type: "bind", Options: []string{"ro"}, }) - return nil + return tmpFile.Name(), nil +} + +func tryRemove(path string) { + if path == "" { + return + } + + if err := os.Remove(path); err != nil { + log.Warningf("Failed to remove %q: %v", path, err) + } } func calculatePeerIP(ip string) (string, error) { diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 6e06f3c0f..10448a759 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -168,7 +168,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Start with root mount, then add any other additional mount as needed. ats := make([]p9.Attacher, 0, len(spec.Mounts)+1) ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{ - ROMount: spec.Root.Readonly, + ROMount: spec.Root.Readonly || conf.Overlay, PanicOnWrite: g.panicOnWrite, }) if err != nil { @@ -181,7 +181,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) for _, m := range spec.Mounts { if specutils.Is9PMount(m) { cfg := fsgofer.Config{ - ROMount: isReadonlyMount(m.Options), + ROMount: isReadonlyMount(m.Options) || conf.Overlay, PanicOnWrite: g.panicOnWrite, HostUDS: conf.FSGoferHostUDS, } @@ -272,9 +272,8 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error { root := spec.Root.Path if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { - // FIXME: runsc can't be re-executed without - // /proc, so we create a tmpfs mount, mount ./proc and ./root - // there, then move this mount to the root and after + // runsc can't be re-executed without /proc, so we create a tmpfs mount, + // mount ./proc and ./root there, then move this mount to the root and after // setCapsAndCallSelf, runsc will chroot into /root. // // We need a directory to construct a new root and we know that @@ -335,7 +334,7 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error { if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { if err := pivotRoot("/proc"); err != nil { - Fatalf("faild to change the root file system: %v", err) + Fatalf("failed to change the root file system: %v", err) } if err := os.Chdir("/"); err != nil { Fatalf("failed to change working directory") diff --git a/runsc/cmd/help.go b/runsc/cmd/help.go index c7d210140..cd85dabbb 100644 --- a/runsc/cmd/help.go +++ b/runsc/cmd/help.go @@ -65,16 +65,10 @@ func (h *Help) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{} switch f.NArg() { case 0: fmt.Fprintf(h.cdr.Output, "Usage: %s <flags> <subcommand> <subcommand args>\n\n", h.cdr.Name()) - fmt.Fprintf(h.cdr.Output, `runsc is a command line client for running applications packaged in the Open -Container Initiative (OCI) format. Applications run by runsc are run in an -isolated gVisor sandbox that emulates a Linux environment. + fmt.Fprintf(h.cdr.Output, `runsc is the gVisor container runtime. -gVisor is a user-space kernel, written in Go, that implements a substantial -portion of the Linux system call interface. It provides an additional layer -of isolation between running applications and the host operating system. - -Functionality is provided by subcommands. For additonal help on individual -subcommands use "%s %s <subcommand>". +Functionality is provided by subcommands. For help with a specific subcommand, +use "%s %s <subcommand>". `, h.cdr.Name(), h.Name()) h.cdr.VisitGroups(func(g *subcommands.CommandGroup) { diff --git a/runsc/cmd/spec.go b/runsc/cmd/spec.go index 8e2b36e85..a2b0a4b14 100644 --- a/runsc/cmd/spec.go +++ b/runsc/cmd/spec.go @@ -16,6 +16,7 @@ package cmd import ( "context" + "fmt" "io/ioutil" "os" "path/filepath" @@ -24,7 +25,8 @@ import ( "gvisor.dev/gvisor/runsc/flag" ) -var specTemplate = []byte(`{ +func genSpec(cwd string) []byte { + var template = fmt.Sprintf(`{ "ociVersion": "1.0.0", "process": { "terminal": true, @@ -39,7 +41,7 @@ var specTemplate = []byte(`{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm" ], - "cwd": "/", + "cwd": "%s", "capabilities": { "bounding": [ "CAP_AUDIT_WRITE", @@ -123,11 +125,15 @@ var specTemplate = []byte(`{ } ] } -}`) +}`, cwd) + + return []byte(template) +} // Spec implements subcommands.Command for the "spec" command. type Spec struct { bundle string + cwd string } // Name implements subcommands.Command.Name. @@ -165,6 +171,8 @@ EXAMPLE: // SetFlags implements subcommands.Command.SetFlags. func (s *Spec) SetFlags(f *flag.FlagSet) { f.StringVar(&s.bundle, "bundle", ".", "path to the root of the OCI bundle") + f.StringVar(&s.cwd, "cwd", "/", "working directory that will be set for the executable, "+ + "this value MUST be an absolute path") } // Execute implements subcommands.Command.Execute. @@ -174,7 +182,9 @@ func (s *Spec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("file %q already exists", confPath) } - if err := ioutil.WriteFile(confPath, specTemplate, 0664); err != nil { + var spec = genSpec(s.cwd) + + if err := ioutil.WriteFile(confPath, spec, 0664); err != nil { Fatalf("writing to %q: %v", confPath, err) } diff --git a/runsc/cmd/statefile.go b/runsc/cmd/statefile.go new file mode 100644 index 000000000..e6f1907da --- /dev/null +++ b/runsc/cmd/statefile.go @@ -0,0 +1,143 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "fmt" + "os" + + "github.com/google/subcommands" + "gvisor.dev/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/state/statefile" + "gvisor.dev/gvisor/runsc/flag" +) + +// Statefile implements subcommands.Command for the "statefile" command. +type Statefile struct { + list bool + get string + key string + output string + html bool +} + +// Name implements subcommands.Command. +func (*Statefile) Name() string { + return "state" +} + +// Synopsis implements subcommands.Command. +func (*Statefile) Synopsis() string { + return "shows information about a statefile" +} + +// Usage implements subcommands.Command. +func (*Statefile) Usage() string { + return `statefile [flags] <statefile>` +} + +// SetFlags implements subcommands.Command. +func (s *Statefile) SetFlags(f *flag.FlagSet) { + f.BoolVar(&s.list, "list", false, "lists the metdata in the statefile.") + f.StringVar(&s.get, "get", "", "extracts the given metadata key.") + f.StringVar(&s.key, "key", "", "the integrity key for the file.") + f.StringVar(&s.output, "output", "", "target to write the result.") + f.BoolVar(&s.html, "html", false, "outputs in HTML format.") +} + +// Execute implements subcommands.Command.Execute. +func (s *Statefile) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + // Check arguments. + if s.list && s.get != "" { + Fatalf("error: can't specify -list and -get simultaneously.") + } + + // Setup output. + var output = os.Stdout // Default. + if s.output != "" { + f, err := os.OpenFile(s.output, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644) + if err != nil { + Fatalf("error opening output: %v", err) + } + defer func() { + if err := f.Close(); err != nil { + Fatalf("error flushing output: %v", err) + } + }() + output = f + } + + // Open the file. + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + input, err := os.Open(f.Arg(0)) + if err != nil { + Fatalf("error opening input: %v\n", err) + } + + if s.html { + fmt.Fprintf(output, "<html><body>\n") + defer fmt.Fprintf(output, "</body></html>\n") + } + + // Dump the full file? + if !s.list && s.get == "" { + var key []byte + if s.key != "" { + key = []byte(s.key) + } + rc, _, err := statefile.NewReader(input, key) + if err != nil { + Fatalf("error parsing statefile: %v", err) + } + if err := state.PrettyPrint(output, rc, s.html); err != nil { + Fatalf("error printing state: %v", err) + } + return subcommands.ExitSuccess + } + + // Load just the metadata. + metadata, err := statefile.MetadataUnsafe(input) + if err != nil { + Fatalf("error reading metadata: %v", err) + } + + // Is it a single key? + if s.get != "" { + val, ok := metadata[s.get] + if !ok { + Fatalf("metadata key %s: not found", s.get) + } + fmt.Fprintf(output, "%s\n", val) + return subcommands.ExitSuccess + } + + // List all keys. + if s.html { + fmt.Fprintf(output, " <ul>\n") + defer fmt.Fprintf(output, " </ul>\n") + } + for key := range metadata { + if s.html { + fmt.Fprintf(output, " <li>%s</li>\n", key) + } else { + fmt.Fprintf(output, "%s\n", key) + } + } + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go index 7072547be..a37d66139 100644 --- a/runsc/cmd/syscalls.go +++ b/runsc/cmd/syscalls.go @@ -32,9 +32,10 @@ import ( // Syscalls implements subcommands.Command for the "syscalls" command. type Syscalls struct { - output string - os string - arch string + format string + os string + arch string + filename string } // CompatibilityInfo is a map of system and architecture to compatibility doc. @@ -95,16 +96,17 @@ func (*Syscalls) Usage() string { // SetFlags implements subcommands.Command.SetFlags. func (s *Syscalls) SetFlags(f *flag.FlagSet) { - f.StringVar(&s.output, "o", "table", "Output format (table, csv, json).") + f.StringVar(&s.format, "format", "table", "Output format (table, csv, json).") f.StringVar(&s.os, "os", osAll, "The OS (e.g. linux)") f.StringVar(&s.arch, "arch", archAll, "The CPU architecture (e.g. amd64).") + f.StringVar(&s.filename, "filename", "", "Output filename (otherwise stdout).") } // Execute implements subcommands.Command.Execute. func (s *Syscalls) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - out, ok := outputMap[s.output] + out, ok := outputMap[s.format] if !ok { - Fatalf("Unsupported output format %q", s.output) + Fatalf("Unsupported output format %q", s.format) } // Build map of all supported architectures. @@ -124,7 +126,14 @@ func (s *Syscalls) Execute(_ context.Context, f *flag.FlagSet, args ...interface Fatalf("%v", err) } - if err := out(os.Stdout, info); err != nil { + w := os.Stdout // Default. + if s.filename != "" { + w, err = os.OpenFile(s.filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) + if err != nil { + Fatalf("Error opening %q: %v", s.filename, err) + } + } + if err := out(w, info); err != nil { Fatalf("Error writing output: %v", err) } diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 0aaeea3a8..49cfb0837 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -15,8 +15,11 @@ go_library( "//test:__subpackages__", ], deps = [ + "//pkg/abi/linux", + "//pkg/cleanup", "//pkg/log", "//pkg/sentry/control", + "//pkg/sentry/sighandling", "//pkg/sync", "//runsc/boot", "//runsc/cgroup", @@ -33,33 +36,36 @@ go_test( size = "large", srcs = [ "console_test.go", + "container_norace_test.go", + "container_race_test.go", "container_test.go", "multi_container_test.go", "shared_volume_test.go", ], data = [ "//runsc", - "//runsc/container/test_app", + "//test/cmd/test_app", ], library = ":container", - shard_count = 5, + shard_count = 10, tags = [ "requires-kvm", ], deps = [ "//pkg/abi/linux", "//pkg/bits", + "//pkg/cleanup", "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sync", + "//pkg/test/testutil", "//pkg/unet", "//pkg/urpc", "//runsc/boot", "//runsc/boot/platforms", "//runsc/specutils", - "//runsc/testutil", "@com_github_cenkalti_backoff//:go_default_library", "@com_github_kr_pty//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index c2518d52b..3813c6b93 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -29,9 +29,9 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/urpc" - "gvisor.dev/gvisor/runsc/testutil" ) // socketPath creates a path inside bundleDir and ensures that the returned @@ -58,25 +58,26 @@ func socketPath(bundleDir string) (string, error) { } // createConsoleSocket creates a socket at the given path that will receive a -// console fd from the sandbox. If no error occurs, it returns the server -// socket and a cleanup function. -func createConsoleSocket(path string) (*unet.ServerSocket, func() error, error) { +// console fd from the sandbox. If an error occurs, t.Fatalf will be called. +// The function returning should be deferred as cleanup. +func createConsoleSocket(t *testing.T, path string) (*unet.ServerSocket, func()) { + t.Helper() srv, err := unet.BindAndListen(path, false) if err != nil { - return nil, nil, fmt.Errorf("error binding and listening to socket %q: %v", path, err) + t.Fatalf("error binding and listening to socket %q: %v", path, err) } - cleanup := func() error { + cleanup := func() { + // Log errors; nothing can be done. if err := srv.Close(); err != nil { - return fmt.Errorf("error closing socket %q: %v", path, err) + t.Logf("error closing socket %q: %v", path, err) } if err := os.Remove(path); err != nil { - return fmt.Errorf("error removing socket %q: %v", path, err) + t.Logf("error removing socket %q: %v", path, err) } - return nil } - return srv, cleanup, nil + return srv, cleanup } // receiveConsolePTY accepts a connection on the server socket and reads fds. @@ -118,63 +119,59 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) { // Test that an pty FD is sent over the console socket if one is provided. func TestConsoleSocket(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - spec := testutil.NewSpecWithArgs("true") - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configsWithVFS2(t, all...) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("true") + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - sock, err := socketPath(bundleDir) - if err != nil { - t.Fatalf("error getting socket path: %v", err) - } - srv, cleanup, err := createConsoleSocket(sock) - if err != nil { - t.Fatalf("error creating socket at %q: %v", sock, err) - } - defer cleanup() - - // Create the container and pass the socket name. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - ConsoleSocket: sock, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() + sock, err := socketPath(bundleDir) + if err != nil { + t.Fatalf("error getting socket path: %v", err) + } + srv, cleanup := createConsoleSocket(t, sock) + defer cleanup() + + // Create the container and pass the socket name. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: sock, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() - // Make sure we get a console PTY. - ptyMaster, err := receiveConsolePTY(srv) - if err != nil { - t.Fatalf("error receiving console FD: %v", err) - } - ptyMaster.Close() + // Make sure we get a console PTY. + ptyMaster, err := receiveConsolePTY(srv) + if err != nil { + t.Fatalf("error receiving console FD: %v", err) + } + ptyMaster.Close() + }) } } // Test that job control signals work on a console created with "exec -ti". func TestJobControlSignalExec(t *testing.T) { spec := testutil.NewSpecWithArgs("/bin/sleep", "10000") - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -286,32 +283,28 @@ func TestJobControlSignalExec(t *testing.T) { // Test that job control signals work on a console created with "run -ti". func TestJobControlSignalRootContainer(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) // Don't let bash execute from profile or rc files, otherwise our PID // counts get messed up. spec := testutil.NewSpecWithArgs("/bin/bash", "--noprofile", "--norc") spec.Process.Terminal = true - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() sock, err := socketPath(bundleDir) if err != nil { t.Fatalf("error getting socket path: %v", err) } - srv, cleanup, err := createConsoleSocket(sock) - if err != nil { - t.Fatalf("error creating socket at %q: %v", sock, err) - } + srv, cleanup := createConsoleSocket(t, sock) defer cleanup() // Create the container and pass the socket name. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, ConsoleSocket: sock, @@ -333,13 +326,13 @@ func TestJobControlSignalRootContainer(t *testing.T) { // file. Writes after a certain point will block unless we drain the // PTY, so we must continually copy from it. // - // We log the output to stdout for debugabilitly, and also to a buffer, + // We log the output to stderr for debugabilitly, and also to a buffer, // since we wait on particular output from bash below. We use a custom // blockingBuffer which is thread-safe and also blocks on Read calls, // which makes this a suitable Reader for WaitUntilRead. ptyBuf := newBlockingBuffer() tee := io.TeeReader(ptyMaster, ptyBuf) - go io.Copy(os.Stdout, tee) + go io.Copy(os.Stderr, tee) // Start the container. if err := c.Start(conf); err != nil { @@ -368,7 +361,7 @@ func TestJobControlSignalRootContainer(t *testing.T) { {PID: 1, Cmd: "bash", Threads: []kernel.ThreadID{1}}, } if err := waitForProcessList(c, expectedPL); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Execute sleep via the terminal. @@ -377,7 +370,7 @@ func TestJobControlSignalRootContainer(t *testing.T) { // Wait for sleep to start. expectedPL = append(expectedPL, &control.Process{PID: 2, PPID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{2}}) if err := waitForProcessList(c, expectedPL); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Reset the pty buffer, so there is less output for us to scan later. diff --git a/runsc/container/container.go b/runsc/container/container.go index 68782c4be..6d297d0df 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -17,11 +17,11 @@ package container import ( "context" + "errors" "fmt" "io/ioutil" "os" "os/exec" - "os/signal" "regexp" "strconv" "strings" @@ -30,8 +30,11 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/sighandling" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/cgroup" "gvisor.dev/gvisor/runsc/sandbox" @@ -273,7 +276,7 @@ func New(conf *boot.Config, args Args) (*Container, error) { } if err := os.MkdirAll(conf.RootDir, 0711); err != nil { - return nil, fmt.Errorf("creating container root directory: %v", err) + return nil, fmt.Errorf("creating container root directory %q: %v", conf.RootDir, err) } c := &Container{ @@ -291,7 +294,7 @@ func New(conf *boot.Config, args Args) (*Container, error) { } // The Cleanup object cleans up partially created containers when an error // occurs. Any errors occurring during cleanup itself are ignored. - cu := specutils.MakeCleanup(func() { _ = c.Destroy() }) + cu := cleanup.Make(func() { _ = c.Destroy() }) defer cu.Clean() // Lock the container metadata file to prevent concurrent creations of @@ -400,7 +403,7 @@ func (c *Container) Start(conf *boot.Config) error { if err := c.Saver.lock(); err != nil { return err } - unlock := specutils.MakeCleanup(func() { c.Saver.unlock() }) + unlock := cleanup.Make(func() { c.Saver.unlock() }) defer unlock.Clean() if err := c.requireStatus("start", Created); err != nil { @@ -420,7 +423,7 @@ func (c *Container) Start(conf *boot.Config) error { return err } } else { - // Join cgroup to strt gofer process to ensure it's part of the cgroup from + // Join cgroup to start gofer process to ensure it's part of the cgroup from // the start (and all their children processes). if err := runInCgroup(c.Sandbox.Cgroup, func() error { // Create the gofer process. @@ -504,7 +507,7 @@ func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) { } // Clean up partially created container if an error occurs. // Any errors returned by Destroy() itself are ignored. - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { c.Destroy() }) defer cu.Clean() @@ -620,21 +623,15 @@ func (c *Container) SignalProcess(sig syscall.Signal, pid int32) error { // forwarding signals. func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() { log.Debugf("Forwarding all signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess) - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh) - go func() { - for s := range sigCh { - log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", s, c.ID, pid, fgProcess) - if err := c.Sandbox.SignalProcess(c.ID, pid, s.(syscall.Signal), fgProcess); err != nil { - log.Warningf("error forwarding signal %d to container %q: %v", s, c.ID, err) - } + stop := sighandling.StartSignalForwarding(func(sig linux.Signal) { + log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", sig, c.ID, pid, fgProcess) + if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.Signal(sig), fgProcess); err != nil { + log.Warningf("error forwarding signal %d to container %q: %v", sig, c.ID, err) } - log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess) - }() - + }) return func() { - signal.Stop(sigCh) - close(sigCh) + log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess) + stop() } } @@ -1066,27 +1063,19 @@ func runInCgroup(cg *cgroup.Cgroup, fn func() error) error { // adjustGoferOOMScoreAdj sets the oom_store_adj for the container's gofer. func (c *Container) adjustGoferOOMScoreAdj() error { - if c.GoferPid != 0 && c.Spec.Process.OOMScoreAdj != nil { - if err := setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj); err != nil { - // Ignore NotExist error because it can be returned when the sandbox - // exited while OOM score was being adjusted. - if !os.IsNotExist(err) { - return fmt.Errorf("setting gofer oom_score_adj for container %q: %v", c.ID, err) - } - log.Warningf("Gofer process (%d) not found setting oom_score_adj", c.GoferPid) - } + if c.GoferPid == 0 || c.Spec.Process.OOMScoreAdj == nil { + return nil } - - return nil + return setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj) } // adjustSandboxOOMScoreAdj sets the oom_score_adj for the sandbox. // oom_score_adj is set to the lowest oom_score_adj among the containers // running in the sandbox. // -// TODO(gvisor.dev/issue/512): This call could race with other containers being +// TODO(gvisor.dev/issue/238): This call could race with other containers being // created at the same time and end up setting the wrong oom_score_adj to the -// sandbox. +// sandbox. Use rpc client to synchronize. func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool) error { containers, err := loadSandbox(rootDir, s.ID) if err != nil { @@ -1154,29 +1143,29 @@ func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool) } // Set the lowest of all containers oom_score_adj to the sandbox. - if err := setOOMScoreAdj(s.Pid, lowScore); err != nil { - // Ignore NotExist error because it can be returned when the sandbox - // exited while OOM score was being adjusted. - if !os.IsNotExist(err) { - return fmt.Errorf("setting oom_score_adj for sandbox %q: %v", s.ID, err) - } - log.Warningf("Sandbox process (%d) not found setting oom_score_adj", s.Pid) - } - - return nil + return setOOMScoreAdj(s.Pid, lowScore) } // setOOMScoreAdj sets oom_score_adj to the given value for the given PID. // /proc must be available and mounted read-write. scoreAdj should be between -// -1000 and 1000. +// -1000 and 1000. It's a noop if the process has already exited. func setOOMScoreAdj(pid int, scoreAdj int) error { f, err := os.OpenFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid), os.O_WRONLY, 0644) if err != nil { + // Ignore NotExist errors because it can race with process exit. + if os.IsNotExist(err) { + log.Warningf("Process (%d) not found setting oom_score_adj", pid) + return nil + } return err } defer f.Close() if _, err := f.WriteString(strconv.Itoa(scoreAdj)); err != nil { - return err + if errors.Is(err, syscall.ESRCH) { + log.Warningf("Process (%d) exited while setting oom_score_adj", pid) + return nil + } + return fmt.Errorf("setting oom_score_adj to %q: %v", scoreAdj, err) } return nil } diff --git a/runsc/boot/loader_amd64.go b/runsc/container/container_norace_test.go index b9669f2ac..838c1e20a 100644 --- a/runsc/boot/loader_amd64.go +++ b/runsc/container/container_norace_test.go @@ -1,4 +1,4 @@ -// Copyright 2019 The gVisor Authors. +// Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,16 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build amd64 +// +build !race -package boot +package container -import ( - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" -) - -func init() { - // Register the global syscall table. - kernel.RegisterSyscallTable(linux.AMD64) -} +// Allow both kvm and ptrace for non-race builds. +var platformOptions = []configOption{ptrace, kvm} diff --git a/runsc/boot/loader_arm64.go b/runsc/container/container_race_test.go index cf64d28c8..9fb4c4fc0 100644 --- a/runsc/boot/loader_arm64.go +++ b/runsc/container/container_race_test.go @@ -1,4 +1,4 @@ -// Copyright 2019 The gVisor Authors. +// Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,16 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build arm64 +// +build race -package boot +package container -import ( - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" -) - -func init() { - // Register the global syscall table. - kernel.RegisterSyscallTable(linux.ARM64) -} +// Only enabled ptrace with race builds. +var platformOptions = []configOption{ptrace} diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 04a7dc237..e7715b6f7 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -39,10 +39,10 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/boot/platforms" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" ) // waitForProcessList waits for the given process list to show up in the container. @@ -71,6 +71,7 @@ func waitForProcessCount(cont *Container, want int) error { return &backoff.PermanentError{Err: err} } if got := len(pss); got != want { + log.Infof("Waiting for process count to reach %d. Current: %d", want, got) return fmt.Errorf("wrong process count, got: %d, want: %d", got, want) } return nil @@ -123,23 +124,6 @@ func procListsEqual(got, want []*control.Process) (bool, error) { return true, nil } -// getAndCheckProcLists is similar to waitForProcessList, but does not wait and retry the -// test for equality. This is because we already confirmed that exec occurred. -func getAndCheckProcLists(cont *Container, want []*control.Process) error { - got, err := cont.Processes() - if err != nil { - return fmt.Errorf("error getting process data from container: %v", err) - } - equal, err := procListsEqual(got, want) - if err != nil { - return err - } - if equal { - return nil - } - return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(want)) -} - func procListToString(pl []*control.Process) string { strs := make([]string, 0, len(pl)) for _, p := range pl { @@ -231,16 +215,15 @@ func readOutputNum(file string, position int) (int, error) { // run starts the sandbox and waits for it to exit, checking that the // application succeeded. func run(spec *specs.Spec, conf *boot.Config) error { - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { return fmt.Errorf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create, start and wait for the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, Attached: true, @@ -259,39 +242,64 @@ type configOption int const ( overlay configOption = iota + ptrace kvm nonExclusiveFS ) -var noOverlay = []configOption{kvm, nonExclusiveFS} -var all = append(noOverlay, overlay) +var ( + noOverlay = append(platformOptions, nonExclusiveFS) + all = append(noOverlay, overlay) +) // configs generates different configurations to run tests. -func configs(opts ...configOption) []*boot.Config { +func configs(t *testing.T, opts ...configOption) map[string]*boot.Config { // Always load the default config. - cs := []*boot.Config{testutil.TestConfig()} - + cs := make(map[string]*boot.Config) for _, o := range opts { - c := testutil.TestConfig() switch o { case overlay: + c := testutil.TestConfig(t) c.Overlay = true + cs["overlay"] = c + case ptrace: + c := testutil.TestConfig(t) + c.Platform = platforms.Ptrace + cs["ptrace"] = c case kvm: - // TODO(b/112165693): KVM tests are flaky. Disable until fixed. - continue - + c := testutil.TestConfig(t) c.Platform = platforms.KVM + cs["kvm"] = c case nonExclusiveFS: + c := testutil.TestConfig(t) c.FileAccess = boot.FileAccessShared + cs["non-exclusive"] = c default: panic(fmt.Sprintf("unknown config option %v", o)) - } - cs = append(cs, c) } return cs } +func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*boot.Config { + vfs1 := configs(t, opts...) + + var optsVFS2 []configOption + for _, opt := range opts { + // TODO(gvisor.dev/issue/1487): Enable overlay tests. + if opt != overlay { + optsVFS2 = append(optsVFS2, opt) + } + } + + for key, value := range configs(t, optsVFS2...) { + value.VFS2 = true + vfs1[key+"VFS2"] = value + } + + return vfs1 +} + // TestLifecycle tests the basic Create/Start/Signal/Destroy container lifecycle. // It verifies after each step that the container can be loaded from disk, and // has the correct status. @@ -301,133 +309,133 @@ func TestLifecycle(t *testing.T) { childReaper.Start() defer childReaper.Stop() - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - // The container will just sleep for a long time. We will kill it before - // it finishes sleeping. - spec := testutil.NewSpecWithArgs("sleep", "100") - - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) - - // expectedPL lists the expected process state of the container. - expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{1}, - }, - } - // Create the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() + for name, conf := range configsWithVFS2(t, all...) { + t.Run(name, func(t *testing.T) { + // The container will just sleep for a long time. We will kill it before + // it finishes sleeping. + spec := testutil.NewSpecWithArgs("sleep", "100") - // Load the container from disk and check the status. - c, err = Load(rootDir, args.ID) - if err != nil { - t.Fatalf("error loading container: %v", err) - } - if got, want := c.Status, Created; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // List should return the container id. - ids, err := List(rootDir) - if err != nil { - t.Fatalf("error listing containers: %v", err) - } - if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) { - t.Errorf("container list got %v, want %v", got, want) - } + // expectedPL lists the expected process state of the container. + expectedPL := []*control.Process{ + { + UID: 0, + PID: 1, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{1}, + }, + } + // Create the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() - // Start the container. - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Load the container from disk and check the status. + c, err = Load(rootDir, args.ID) + if err != nil { + t.Fatalf("error loading container: %v", err) + } + if got, want := c.Status, Created; got != want { + t.Errorf("container status got %v, want %v", got, want) + } - // Load the container from disk and check the status. - c, err = Load(rootDir, args.ID) - if err != nil { - t.Fatalf("error loading container: %v", err) - } - if got, want := c.Status, Running; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + // List should return the container id. + ids, err := List(rootDir) + if err != nil { + t.Fatalf("error listing containers: %v", err) + } + if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) { + t.Errorf("container list got %v, want %v", got, want) + } - // Verify that "sleep 100" is running. - if err := waitForProcessList(c, expectedPL); err != nil { - t.Error(err) - } + // Start the container. + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Wait on the container. - var wg sync.WaitGroup - wg.Add(1) - ch := make(chan struct{}) - go func() { - ch <- struct{}{} - ws, err := c.Wait() + // Load the container from disk and check the status. + c, err = Load(rootDir, args.ID) if err != nil { - t.Fatalf("error waiting on container: %v", err) + t.Fatalf("error loading container: %v", err) } - if got, want := ws.Signal(), syscall.SIGTERM; got != want { - t.Fatalf("got signal %v, want %v", got, want) + if got, want := c.Status, Running; got != want { + t.Errorf("container status got %v, want %v", got, want) } - wg.Done() - }() - // Wait a bit to ensure that we've started waiting on the - // container before we signal. - <-ch - time.Sleep(100 * time.Millisecond) - // Send the container a SIGTERM which will cause it to stop. - if err := c.SignalContainer(syscall.SIGTERM, false); err != nil { - t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err) - } - // Wait for it to die. - wg.Wait() + // Verify that "sleep 100" is running. + if err := waitForProcessList(c, expectedPL); err != nil { + t.Error(err) + } - // Load the container from disk and check the status. - c, err = Load(rootDir, args.ID) - if err != nil { - t.Fatalf("error loading container: %v", err) - } - if got, want := c.Status, Stopped; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + // Wait on the container. + ch := make(chan error) + go func() { + ws, err := c.Wait() + if err != nil { + ch <- err + } + if got, want := ws.Signal(), syscall.SIGTERM; got != want { + ch <- fmt.Errorf("got signal %v, want %v", got, want) + } + ch <- nil + }() - // Destroy the container. - if err := c.Destroy(); err != nil { - t.Fatalf("error destroying container: %v", err) - } + // Wait a bit to ensure that we've started waiting on + // the container before we signal. + time.Sleep(time.Second) - // List should not return the container id. - ids, err = List(rootDir) - if err != nil { - t.Fatalf("error listing containers: %v", err) - } - if len(ids) != 0 { - t.Errorf("expected container list to be empty, but got %v", ids) - } + // Send the container a SIGTERM which will cause it to stop. + if err := c.SignalContainer(syscall.SIGTERM, false); err != nil { + t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err) + } - // Loading the container by id should fail. - if _, err = Load(rootDir, args.ID); err == nil { - t.Errorf("expected loading destroyed container to fail, but it did not") - } + // Wait for it to die. + if err := <-ch; err != nil { + t.Fatalf("error waiting for container: %v", err) + } + + // Load the container from disk and check the status. + c, err = Load(rootDir, args.ID) + if err != nil { + t.Fatalf("error loading container: %v", err) + } + if got, want := c.Status, Stopped; got != want { + t.Errorf("container status got %v, want %v", got, want) + } + + // Destroy the container. + if err := c.Destroy(); err != nil { + t.Fatalf("error destroying container: %v", err) + } + + // List should not return the container id. + ids, err = List(rootDir) + if err != nil { + t.Fatalf("error listing containers: %v", err) + } + if len(ids) != 0 { + t.Errorf("expected container list to be empty, but got %v", ids) + } + + // Loading the container by id should fail. + if _, err = Load(rootDir, args.ID); err == nil { + t.Errorf("expected loading destroyed container to fail, but it did not") + } + }) } } @@ -436,12 +444,14 @@ func TestExePath(t *testing.T) { // Create two directories that will be prepended to PATH. firstPath, err := ioutil.TempDir(testutil.TmpDir(), "first") if err != nil { - t.Fatal(err) + t.Fatalf("error creating temporary directory: %v", err) } + defer os.RemoveAll(firstPath) secondPath, err := ioutil.TempDir(testutil.TmpDir(), "second") if err != nil { - t.Fatal(err) + t.Fatalf("error creating temporary directory: %v", err) } + defer os.RemoveAll(secondPath) // Create two minimal executables in the second path, two of which // will be masked by files in first path. @@ -449,11 +459,11 @@ func TestExePath(t *testing.T) { path := filepath.Join(secondPath, p) f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0777) if err != nil { - t.Fatal(err) + t.Fatalf("error opening path: %v", err) } defer f.Close() if _, err := io.WriteString(f, "#!/bin/true\n"); err != nil { - t.Fatal(err) + t.Fatalf("error writing contents: %v", err) } } @@ -462,7 +472,7 @@ func TestExePath(t *testing.T) { nonExecutable := filepath.Join(firstPath, "masked1") f2, err := os.OpenFile(nonExecutable, os.O_CREATE|os.O_EXCL, 0666) if err != nil { - t.Fatal(err) + t.Fatalf("error opening file: %v", err) } f2.Close() @@ -470,85 +480,95 @@ func TestExePath(t *testing.T) { // executable in the second. nonRegular := filepath.Join(firstPath, "masked2") if err := os.Mkdir(nonRegular, 0777); err != nil { - t.Fatal(err) - } - - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - for _, test := range []struct { - path string - success bool - }{ - {path: "true", success: true}, - {path: "bin/true", success: true}, - {path: "/bin/true", success: true}, - {path: "thisfiledoesntexit", success: false}, - {path: "bin/thisfiledoesntexit", success: false}, - {path: "/bin/thisfiledoesntexit", success: false}, - - {path: "unmasked", success: true}, - {path: filepath.Join(firstPath, "unmasked"), success: false}, - {path: filepath.Join(secondPath, "unmasked"), success: true}, - - {path: "masked1", success: true}, - {path: filepath.Join(firstPath, "masked1"), success: false}, - {path: filepath.Join(secondPath, "masked1"), success: true}, - - {path: "masked2", success: true}, - {path: filepath.Join(firstPath, "masked2"), success: false}, - {path: filepath.Join(secondPath, "masked2"), success: true}, - } { - spec := testutil.NewSpecWithArgs(test.path) - spec.Process.Env = []string{ - fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")), - } - - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("exec: %s, error setting up container: %v", test.path, err) - } - - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - Attached: true, - } - ws, err := Run(conf, args) + t.Fatalf("error making directory: %v", err) + } + + for name, conf := range configsWithVFS2(t, overlay) { + t.Run(name, func(t *testing.T) { + for _, test := range []struct { + path string + success bool + }{ + {path: "true", success: true}, + {path: "bin/true", success: true}, + {path: "/bin/true", success: true}, + {path: "thisfiledoesntexit", success: false}, + {path: "bin/thisfiledoesntexit", success: false}, + {path: "/bin/thisfiledoesntexit", success: false}, + + {path: "unmasked", success: true}, + {path: filepath.Join(firstPath, "unmasked"), success: false}, + {path: filepath.Join(secondPath, "unmasked"), success: true}, + + {path: "masked1", success: true}, + {path: filepath.Join(firstPath, "masked1"), success: false}, + {path: filepath.Join(secondPath, "masked1"), success: true}, + + {path: "masked2", success: true}, + {path: filepath.Join(firstPath, "masked2"), success: false}, + {path: filepath.Join(secondPath, "masked2"), success: true}, + } { + t.Run(fmt.Sprintf("path=%s,success=%t", test.path, test.success), func(t *testing.T) { + spec := testutil.NewSpecWithArgs(test.path) + spec.Process.Env = []string{ + fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")), + } - os.RemoveAll(rootDir) - os.RemoveAll(bundleDir) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("exec: error setting up container: %v", err) + } + defer cleanup() - if test.success { - if err != nil { - t.Errorf("exec: %s, error running container: %v", test.path, err) - } - if ws.ExitStatus() != 0 { - t.Errorf("exec: %s, got exit status %v want %v", test.path, ws.ExitStatus(), 0) - } - } else { - if err == nil { - t.Errorf("exec: %s, got: no error, want: error", test.path) - } + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + Attached: true, + } + ws, err := Run(conf, args) + + if test.success { + if err != nil { + t.Errorf("exec: error running container: %v", err) + } + if ws.ExitStatus() != 0 { + t.Errorf("exec: got exit status %v want %v", ws.ExitStatus(), 0) + } + } else { + if err == nil { + t.Errorf("exec: got: no error, want: error") + } + } + }) } - } + }) } } // Test the we can retrieve the application exit status from the container. func TestAppExitStatus(t *testing.T) { + doAppExitStatus(t, false) +} + +// This is TestAppExitStatus for VFSv2. +func TestAppExitStatusVFS2(t *testing.T) { + doAppExitStatus(t, true) +} + +func doAppExitStatus(t *testing.T, vfs2 bool) { // First container will succeed. succSpec := testutil.NewSpecWithArgs("true") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(succSpec, conf) + conf := testutil.TestConfig(t) + conf.VFS2 = vfs2 + _, bundleDir, cleanup, err := testutil.SetupContainer(succSpec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: succSpec, BundleDir: bundleDir, Attached: true, @@ -565,15 +585,14 @@ func TestAppExitStatus(t *testing.T) { wantStatus := 123 errSpec := testutil.NewSpecWithArgs("bash", "-c", fmt.Sprintf("exit %d", wantStatus)) - rootDir2, bundleDir2, err := testutil.SetupContainer(errSpec, conf) + _, bundleDir2, cleanup2, err := testutil.SetupContainer(errSpec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir2) - defer os.RemoveAll(bundleDir2) + defer cleanup2() args2 := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: errSpec, BundleDir: bundleDir2, Attached: true, @@ -589,166 +608,163 @@ func TestAppExitStatus(t *testing.T) { // TestExec verifies that a container can exec a new program. func TestExec(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) + for name, conf := range configsWithVFS2(t, overlay) { + t.Run(name, func(t *testing.T) { + const uid = 343 + spec := testutil.NewSpecWithArgs("sleep", "100") - const uid = 343 - spec := testutil.NewSpecWithArgs("sleep", "100") + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // expectedPL lists the expected process state of the container. + expectedPL := []*control.Process{ + { + UID: 0, + PID: 1, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{1}, + }, + { + UID: uid, + PID: 2, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{2}, + }, + } - // expectedPL lists the expected process state of the container. - expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{1}, - }, - { - UID: uid, - PID: 2, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{2}, - }, - } + // Verify that "sleep 100" is running. + if err := waitForProcessList(cont, expectedPL[:1]); err != nil { + t.Error(err) + } - // Verify that "sleep 100" is running. - if err := waitForProcessList(cont, expectedPL[:1]); err != nil { - t.Error(err) - } + execArgs := &control.ExecArgs{ + Filename: "/bin/sleep", + Argv: []string{"/bin/sleep", "5"}, + WorkingDirectory: "/", + KUID: uid, + } - execArgs := &control.ExecArgs{ - Filename: "/bin/sleep", - Argv: []string{"/bin/sleep", "5"}, - WorkingDirectory: "/", - KUID: uid, - } + // Verify that "sleep 100" and "sleep 5" are running + // after exec. First, start running exec (whick + // blocks). + ch := make(chan error) + go func() { + exitStatus, err := cont.executeSync(execArgs) + if err != nil { + ch <- err + } else if exitStatus != 0 { + ch <- fmt.Errorf("failed with exit status: %v", exitStatus) + } else { + ch <- nil + } + }() - // Verify that "sleep 100" and "sleep 5" are running after exec. - // First, start running exec (whick blocks). - status := make(chan error, 1) - go func() { - exitStatus, err := cont.executeSync(execArgs) - if err != nil { - log.Debugf("error executing: %v", err) - status <- err - } else if exitStatus != 0 { - log.Debugf("bad status: %d", exitStatus) - status <- fmt.Errorf("failed with exit status: %v", exitStatus) - } else { - status <- nil + if err := waitForProcessList(cont, expectedPL); err != nil { + t.Fatalf("error waiting for processes: %v", err) } - }() - if err := waitForProcessList(cont, expectedPL); err != nil { - t.Fatal(err) - } - - // Ensure that exec finished without error. - select { - case <-time.After(10 * time.Second): - t.Fatalf("container timed out waiting for exec to finish.") - case st := <-status: - if st != nil { - t.Errorf("container failed to exec %v: %v", args, err) + // Ensure that exec finished without error. + select { + case <-time.After(10 * time.Second): + t.Fatalf("container timed out waiting for exec to finish.") + case err := <-ch: + if err != nil { + t.Errorf("container failed to exec %v: %v", args, err) + } } - } + }) } } // TestKillPid verifies that we can signal individual exec'd processes. func TestKillPid(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - - app, err := testutil.FindFile("runsc/container/test_app/test_app") - if err != nil { - t.Fatal("error finding test_app:", err) - } + for name, conf := range configsWithVFS2(t, overlay) { + t.Run(name, func(t *testing.T) { + app, err := testutil.FindFile("test/cmd/test_app/test_app") + if err != nil { + t.Fatal("error finding test_app:", err) + } - const nProcs = 4 - spec := testutil.NewSpecWithArgs(app, "task-tree", "--depth", strconv.Itoa(nProcs-1), "--width=1", "--pause=true") - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + const nProcs = 4 + spec := testutil.NewSpecWithArgs(app, "task-tree", "--depth", strconv.Itoa(nProcs-1), "--width=1", "--pause=true") + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Verify that all processes are running. - if err := waitForProcessCount(cont, nProcs); err != nil { - t.Fatalf("timed out waiting for processes to start: %v", err) - } + // Verify that all processes are running. + if err := waitForProcessCount(cont, nProcs); err != nil { + t.Fatalf("timed out waiting for processes to start: %v", err) + } - // Kill the child process with the largest PID. - procs, err := cont.Processes() - if err != nil { - t.Fatalf("failed to get process list: %v", err) - } - var pid int32 - for _, p := range procs { - if pid < int32(p.PID) { - pid = int32(p.PID) + // Kill the child process with the largest PID. + procs, err := cont.Processes() + if err != nil { + t.Fatalf("failed to get process list: %v", err) + } + var pid int32 + for _, p := range procs { + if pid < int32(p.PID) { + pid = int32(p.PID) + } + } + if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil { + t.Fatalf("failed to signal process %d: %v", pid, err) } - } - if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil { - t.Fatalf("failed to signal process %d: %v", pid, err) - } - // Verify that one process is gone. - if err := waitForProcessCount(cont, nProcs-1); err != nil { - t.Fatal(err) - } + // Verify that one process is gone. + if err := waitForProcessCount(cont, nProcs-1); err != nil { + t.Fatalf("error waiting for processes: %v", err) + } - procs, err = cont.Processes() - if err != nil { - t.Fatalf("failed to get process list: %v", err) - } - for _, p := range procs { - if pid == int32(p.PID) { - t.Fatalf("pid %d is still alive, which should be killed", pid) + procs, err = cont.Processes() + if err != nil { + t.Fatalf("failed to get process list: %v", err) } - } + for _, p := range procs { + if pid == int32(p.PID) { + t.Fatalf("pid %d is still alive, which should be killed", pid) + } + } + }) } } @@ -759,160 +775,160 @@ func TestKillPid(t *testing.T) { // be the next consecutive number after the last number from the checkpointed container. func TestCheckpointRestore(t *testing.T) { // Skip overlay because test requires writing to host file. - for _, conf := range configs(noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - - dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test") - if err != nil { - t.Fatalf("ioutil.TempDir failed: %v", err) - } - if err := os.Chmod(dir, 0777); err != nil { - t.Fatalf("error chmoding file: %q, %v", dir, err) - } + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test") + if err != nil { + t.Fatalf("ioutil.TempDir failed: %v", err) + } + defer os.RemoveAll(dir) + if err := os.Chmod(dir, 0777); err != nil { + t.Fatalf("error chmoding file: %q, %v", dir, err) + } - outputPath := filepath.Join(dir, "output") - outputFile, err := createWriteableOutputFile(outputPath) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile.Close() + outputPath := filepath.Join(dir, "output") + outputFile, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile.Close() - script := fmt.Sprintf("for ((i=0; ;i++)); do echo $i >> %q; sleep 1; done", outputPath) - spec := testutil.NewSpecWithArgs("bash", "-c", script) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + script := fmt.Sprintf("for ((i=0; ;i++)); do echo $i >> %q; sleep 1; done", outputPath) + spec := testutil.NewSpecWithArgs("bash", "-c", script) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Set the image path, which is where the checkpoint image will be saved. - imagePath := filepath.Join(dir, "test-image-file") + // Set the image path, which is where the checkpoint image will be saved. + imagePath := filepath.Join(dir, "test-image-file") - // Create the image file and open for writing. - file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) - if err != nil { - t.Fatalf("error opening new file at imagePath: %v", err) - } - defer file.Close() + // Create the image file and open for writing. + file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) + if err != nil { + t.Fatalf("error opening new file at imagePath: %v", err) + } + defer file.Close() - // Wait until application has ran. - if err := waitForFileNotEmpty(outputFile); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - // Checkpoint running container; save state into new file. - if err := cont.Checkpoint(file); err != nil { - t.Fatalf("error checkpointing container to empty file: %v", err) - } - defer os.RemoveAll(imagePath) + // Checkpoint running container; save state into new file. + if err := cont.Checkpoint(file); err != nil { + t.Fatalf("error checkpointing container to empty file: %v", err) + } + defer os.RemoveAll(imagePath) - lastNum, err := readOutputNum(outputPath, -1) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + lastNum, err := readOutputNum(outputPath, -1) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Delete and recreate file before restoring. - if err := os.Remove(outputPath); err != nil { - t.Fatalf("error removing file") - } - outputFile2, err := createWriteableOutputFile(outputPath) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile2.Close() + // Delete and recreate file before restoring. + if err := os.Remove(outputPath); err != nil { + t.Fatalf("error removing file") + } + outputFile2, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile2.Close() - // Restore into a new container. - args2 := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont2, err := New(conf, args2) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont2.Destroy() + // Restore into a new container. + args2 := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont2, err := New(conf, args2) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont2.Destroy() - if err := cont2.Restore(spec, conf, imagePath); err != nil { - t.Fatalf("error restoring container: %v", err) - } + if err := cont2.Restore(spec, conf, imagePath); err != nil { + t.Fatalf("error restoring container: %v", err) + } - // Wait until application has ran. - if err := waitForFileNotEmpty(outputFile2); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile2); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - firstNum, err := readOutputNum(outputPath, 0) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + firstNum, err := readOutputNum(outputPath, 0) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Check that lastNum is one less than firstNum and that the container picks - // up from where it left off. - if lastNum+1 != firstNum { - t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum) - } - cont2.Destroy() + // Check that lastNum is one less than firstNum and that the container picks + // up from where it left off. + if lastNum+1 != firstNum { + t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum) + } + cont2.Destroy() - // Restore into another container! - // Delete and recreate file before restoring. - if err := os.Remove(outputPath); err != nil { - t.Fatalf("error removing file") - } - outputFile3, err := createWriteableOutputFile(outputPath) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile3.Close() + // Restore into another container! + // Delete and recreate file before restoring. + if err := os.Remove(outputPath); err != nil { + t.Fatalf("error removing file") + } + outputFile3, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile3.Close() - // Restore into a new container. - args3 := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont3, err := New(conf, args3) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont3.Destroy() + // Restore into a new container. + args3 := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont3, err := New(conf, args3) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont3.Destroy() - if err := cont3.Restore(spec, conf, imagePath); err != nil { - t.Fatalf("error restoring container: %v", err) - } + if err := cont3.Restore(spec, conf, imagePath); err != nil { + t.Fatalf("error restoring container: %v", err) + } - // Wait until application has ran. - if err := waitForFileNotEmpty(outputFile3); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile3); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - firstNum2, err := readOutputNum(outputPath, 0) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + firstNum2, err := readOutputNum(outputPath, 0) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Check that lastNum is one less than firstNum and that the container picks - // up from where it left off. - if lastNum+1 != firstNum2 { - t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2) - } - cont3.Destroy() + // Check that lastNum is one less than firstNum and that the container picks + // up from where it left off. + if lastNum+1 != firstNum2 { + t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2) + } + cont3.Destroy() + }) } } @@ -920,135 +936,134 @@ func TestCheckpointRestore(t *testing.T) { // with filesystem Unix Domain Socket use. func TestUnixDomainSockets(t *testing.T) { // Skip overlay because test requires writing to host file. - for _, conf := range configs(noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - - // UDS path is limited to 108 chars for compatibility with older systems. - // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is - // not exceeded. Assumes '/tmp' exists in the system. - dir, err := ioutil.TempDir("/tmp", "uds-test") - if err != nil { - t.Fatalf("ioutil.TempDir failed: %v", err) - } - defer os.RemoveAll(dir) + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + // UDS path is limited to 108 chars for compatibility with older systems. + // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is + // not exceeded. Assumes '/tmp' exists in the system. + dir, err := ioutil.TempDir("/tmp", "uds-test") + if err != nil { + t.Fatalf("ioutil.TempDir failed: %v", err) + } + defer os.RemoveAll(dir) - outputPath := filepath.Join(dir, "uds_output") - outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile.Close() + outputPath := filepath.Join(dir, "uds_output") + outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile.Close() - app, err := testutil.FindFile("runsc/container/test_app/test_app") - if err != nil { - t.Fatal("error finding test_app:", err) - } + app, err := testutil.FindFile("test/cmd/test_app/test_app") + if err != nil { + t.Fatal("error finding test_app:", err) + } - socketPath := filepath.Join(dir, "uds_socket") - defer os.Remove(socketPath) + socketPath := filepath.Join(dir, "uds_socket") + defer os.Remove(socketPath) - spec := testutil.NewSpecWithArgs(app, "uds", "--file", outputPath, "--socket", socketPath) - spec.Process.User = specs.User{ - UID: uint32(os.Getuid()), - GID: uint32(os.Getgid()), - } - spec.Mounts = []specs.Mount{{ - Type: "bind", - Destination: dir, - Source: dir, - }} + spec := testutil.NewSpecWithArgs(app, "uds", "--file", outputPath, "--socket", socketPath) + spec.Process.User = specs.User{ + UID: uint32(os.Getuid()), + GID: uint32(os.Getgid()), + } + spec.Mounts = []specs.Mount{{ + Type: "bind", + Destination: dir, + Source: dir, + }} - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Set the image path, the location where the checkpoint image will be saved. - imagePath := filepath.Join(dir, "test-image-file") + // Set the image path, the location where the checkpoint image will be saved. + imagePath := filepath.Join(dir, "test-image-file") - // Create the image file and open for writing. - file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) - if err != nil { - t.Fatalf("error opening new file at imagePath: %v", err) - } - defer file.Close() - defer os.RemoveAll(imagePath) + // Create the image file and open for writing. + file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) + if err != nil { + t.Fatalf("error opening new file at imagePath: %v", err) + } + defer file.Close() + defer os.RemoveAll(imagePath) - // Wait until application has ran. - if err := waitForFileNotEmpty(outputFile); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - // Checkpoint running container; save state into new file. - if err := cont.Checkpoint(file); err != nil { - t.Fatalf("error checkpointing container to empty file: %v", err) - } + // Checkpoint running container; save state into new file. + if err := cont.Checkpoint(file); err != nil { + t.Fatalf("error checkpointing container to empty file: %v", err) + } - // Read last number outputted before checkpoint. - lastNum, err := readOutputNum(outputPath, -1) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + // Read last number outputted before checkpoint. + lastNum, err := readOutputNum(outputPath, -1) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Delete and recreate file before restoring. - if err := os.Remove(outputPath); err != nil { - t.Fatalf("error removing file") - } - outputFile2, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile2.Close() + // Delete and recreate file before restoring. + if err := os.Remove(outputPath); err != nil { + t.Fatalf("error removing file") + } + outputFile2, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile2.Close() - // Restore into a new container. - argsRestore := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - contRestore, err := New(conf, argsRestore) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer contRestore.Destroy() + // Restore into a new container. + argsRestore := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + contRestore, err := New(conf, argsRestore) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer contRestore.Destroy() - if err := contRestore.Restore(spec, conf, imagePath); err != nil { - t.Fatalf("error restoring container: %v", err) - } + if err := contRestore.Restore(spec, conf, imagePath); err != nil { + t.Fatalf("error restoring container: %v", err) + } - // Wait until application has ran. - if err := waitForFileNotEmpty(outputFile2); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile2); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - // Read first number outputted after restore. - firstNum, err := readOutputNum(outputPath, 0) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + // Read first number outputted after restore. + firstNum, err := readOutputNum(outputPath, 0) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Check that lastNum is one less than firstNum. - if lastNum+1 != firstNum { - t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum) - } - contRestore.Destroy() + // Check that lastNum is one less than firstNum. + if lastNum+1 != firstNum { + t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum) + } + contRestore.Destroy() + }) } } @@ -1058,10 +1073,8 @@ func TestUnixDomainSockets(t *testing.T) { // recreated. Then it resumes the container, verify that the file gets created // again. func TestPauseResume(t *testing.T) { - for _, conf := range configs(noOverlay...) { - t.Run(fmt.Sprintf("conf: %+v", conf), func(t *testing.T) { - t.Logf("Running test with conf: %+v", conf) - + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "lock") if err != nil { t.Fatalf("error creating temp dir: %v", err) @@ -1072,16 +1085,15 @@ func TestPauseResume(t *testing.T) { script := fmt.Sprintf("while [[ true ]]; do touch %q; sleep 0.1; done", running) spec := testutil.NewSpecWithArgs("/bin/bash", "-c", script) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1139,17 +1151,16 @@ func TestPauseResume(t *testing.T) { // occurs given the correct state. func TestPauseResumeStatus(t *testing.T) { spec := testutil.NewSpecWithArgs("sleep", "20") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1205,359 +1216,365 @@ func TestCapabilities(t *testing.T) { uid := auth.KUID(os.Getuid() + 1) gid := auth.KGID(os.Getgid() + 1) - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - spec := testutil.NewSpecWithArgs("sleep", "100") - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configsWithVFS2(t, all...) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("sleep", "100") + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // expectedPL lists the expected process state of the container. - expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{1}, - }, - { - UID: uid, - PID: 2, - PPID: 0, - C: 0, - Cmd: "exe", - Threads: []kernel.ThreadID{2}, - }, - } - if err := waitForProcessList(cont, expectedPL[:1]); err != nil { - t.Fatalf("Failed to wait for sleep to start, err: %v", err) - } + // expectedPL lists the expected process state of the container. + expectedPL := []*control.Process{ + { + UID: 0, + PID: 1, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{1}, + }, + { + UID: uid, + PID: 2, + PPID: 0, + C: 0, + Cmd: "exe", + Threads: []kernel.ThreadID{2}, + }, + } + if err := waitForProcessList(cont, expectedPL[:1]); err != nil { + t.Fatalf("Failed to wait for sleep to start, err: %v", err) + } - // Create an executable that can't be run with the specified UID:GID. - // This shouldn't be callable within the container until we add the - // CAP_DAC_OVERRIDE capability to skip the access check. - exePath := filepath.Join(rootDir, "exe") - if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil { - t.Fatalf("couldn't create executable: %v", err) - } - defer os.Remove(exePath) - - // Need to traverse the intermediate directory. - os.Chmod(rootDir, 0755) - - execArgs := &control.ExecArgs{ - Filename: exePath, - Argv: []string{exePath}, - WorkingDirectory: "/", - KUID: uid, - KGID: gid, - Capabilities: &auth.TaskCapabilities{}, - } + // Create an executable that can't be run with the specified UID:GID. + // This shouldn't be callable within the container until we add the + // CAP_DAC_OVERRIDE capability to skip the access check. + exePath := filepath.Join(rootDir, "exe") + if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil { + t.Fatalf("couldn't create executable: %v", err) + } + defer os.Remove(exePath) + + // Need to traverse the intermediate directory. + os.Chmod(rootDir, 0755) + + execArgs := &control.ExecArgs{ + Filename: exePath, + Argv: []string{exePath}, + WorkingDirectory: "/", + KUID: uid, + KGID: gid, + Capabilities: &auth.TaskCapabilities{}, + } - // "exe" should fail because we don't have the necessary permissions. - if _, err := cont.executeSync(execArgs); err == nil { - t.Fatalf("container executed without error, but an error was expected") - } + // "exe" should fail because we don't have the necessary permissions. + if _, err := cont.executeSync(execArgs); err == nil { + t.Fatalf("container executed without error, but an error was expected") + } - // Now we run with the capability enabled and should succeed. - execArgs.Capabilities = &auth.TaskCapabilities{ - EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE), - } - // "exe" should not fail this time. - if _, err := cont.executeSync(execArgs); err != nil { - t.Fatalf("container failed to exec %v: %v", args, err) - } + // Now we run with the capability enabled and should succeed. + execArgs.Capabilities = &auth.TaskCapabilities{ + EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE), + } + // "exe" should not fail this time. + if _, err := cont.executeSync(execArgs); err != nil { + t.Fatalf("container failed to exec %v: %v", args, err) + } + }) } } // TestRunNonRoot checks that sandbox can be configured when running as // non-privileged user. func TestRunNonRoot(t *testing.T) { - for _, conf := range configs(noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - - spec := testutil.NewSpecWithArgs("/bin/true") - - // Set a random user/group with no access to "blocked" dir. - spec.Process.User.UID = 343 - spec.Process.User.GID = 2401 - spec.Process.Capabilities = nil + for name, conf := range configsWithVFS2(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("/bin/true") + + // Set a random user/group with no access to "blocked" dir. + spec.Process.User.UID = 343 + spec.Process.User.GID = 2401 + spec.Process.Capabilities = nil + + // User running inside container can't list '$TMP/blocked' and would fail to + // mount it. + dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + if err := os.Chmod(dir, 0700); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", dir, err) + } + dir = path.Join(dir, "test") + if err := os.Mkdir(dir, 0755); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", dir, err) + } - // User running inside container can't list '$TMP/blocked' and would fail to - // mount it. - dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } - if err := os.Chmod(dir, 0700); err != nil { - t.Fatalf("os.MkDir(%q) failed: %v", dir, err) - } - dir = path.Join(dir, "test") - if err := os.Mkdir(dir, 0755); err != nil { - t.Fatalf("os.MkDir(%q) failed: %v", dir, err) - } + src, err := ioutil.TempDir(testutil.TmpDir(), "src") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } - src, err := ioutil.TempDir(testutil.TmpDir(), "src") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: dir, + Source: src, + Type: "bind", + }) - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: dir, - Source: src, - Type: "bind", + if err := run(spec, conf); err != nil { + t.Fatalf("error running sandbox: %v", err) + } }) - - if err := run(spec, conf); err != nil { - t.Fatalf("error running sandbox: %v", err) - } } } // TestMountNewDir checks that runsc will create destination directory if it // doesn't exit. func TestMountNewDir(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) + for name, conf := range configsWithVFS2(t, overlay) { + t.Run(name, func(t *testing.T) { + root, err := ioutil.TempDir(testutil.TmpDir(), "root") + if err != nil { + t.Fatal("ioutil.TempDir() failed:", err) + } - root, err := ioutil.TempDir(testutil.TmpDir(), "root") - if err != nil { - t.Fatal("ioutil.TempDir() failed:", err) - } + srcDir := path.Join(root, "src", "dir", "anotherdir") + if err := os.MkdirAll(srcDir, 0755); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", srcDir, err) + } - srcDir := path.Join(root, "src", "dir", "anotherdir") - if err := os.MkdirAll(srcDir, 0755); err != nil { - t.Fatalf("os.MkDir(%q) failed: %v", srcDir, err) - } + mountDir := path.Join(root, "dir", "anotherdir") - mountDir := path.Join(root, "dir", "anotherdir") + spec := testutil.NewSpecWithArgs("/bin/ls", mountDir) + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: mountDir, + Source: srcDir, + Type: "bind", + }) - spec := testutil.NewSpecWithArgs("/bin/ls", mountDir) - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: mountDir, - Source: srcDir, - Type: "bind", + if err := run(spec, conf); err != nil { + t.Fatalf("error running sandbox: %v", err) + } }) - - if err := run(spec, conf); err != nil { - t.Fatalf("error running sandbox: %v", err) - } } } func TestReadonlyRoot(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - - spec := testutil.NewSpecWithArgs("/bin/touch", "/foo") - spec.Root.Readonly = true - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configsWithVFS2(t, overlay) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("/bin/touch", "/foo") + spec.Root.Readonly = true + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create, start and wait for the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create, start and wait for the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { - t.Fatalf("container failed, waitStatus: %v", ws) - } + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { + t.Fatalf("container failed, waitStatus: %v", ws) + } + }) } } func TestUIDMap(t *testing.T) { - for _, conf := range configs(noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(testDir) - testFile := path.Join(testDir, "testfile") - - spec := testutil.NewSpecWithArgs("touch", "/tmp/testfile") - uid := os.Getuid() - gid := os.Getgid() - spec.Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - {Type: specs.UserNamespace}, - {Type: specs.PIDNamespace}, - {Type: specs.MountNamespace}, - }, - UIDMappings: []specs.LinuxIDMapping{ - { - ContainerID: 0, - HostID: uint32(uid), - Size: 1, + for name, conf := range configsWithVFS2(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + defer os.RemoveAll(testDir) + testFile := path.Join(testDir, "testfile") + + spec := testutil.NewSpecWithArgs("touch", "/tmp/testfile") + uid := os.Getuid() + gid := os.Getgid() + spec.Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + {Type: specs.UserNamespace}, + {Type: specs.PIDNamespace}, + {Type: specs.MountNamespace}, }, - }, - GIDMappings: []specs.LinuxIDMapping{ - { - ContainerID: 0, - HostID: uint32(gid), - Size: 1, + UIDMappings: []specs.LinuxIDMapping{ + { + ContainerID: 0, + HostID: uint32(uid), + Size: 1, + }, }, - }, - } + GIDMappings: []specs.LinuxIDMapping{ + { + ContainerID: 0, + HostID: uint32(gid), + Size: 1, + }, + }, + } - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: "/tmp", - Source: testDir, - Type: "bind", - }) + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: "/tmp", + Source: testDir, + Type: "bind", + }) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create, start and wait for the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create, start and wait for the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if !ws.Exited() || ws.ExitStatus() != 0 { - t.Fatalf("container failed, waitStatus: %v", ws) - } - st := syscall.Stat_t{} - if err := syscall.Stat(testFile, &st); err != nil { - t.Fatalf("error stat /testfile: %v", err) - } + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if !ws.Exited() || ws.ExitStatus() != 0 { + t.Fatalf("container failed, waitStatus: %v", ws) + } + st := syscall.Stat_t{} + if err := syscall.Stat(testFile, &st); err != nil { + t.Fatalf("error stat /testfile: %v", err) + } - if st.Uid != uint32(uid) || st.Gid != uint32(gid) { - t.Fatalf("UID: %d (%d) GID: %d (%d)", st.Uid, uid, st.Gid, gid) - } + if st.Uid != uint32(uid) || st.Gid != uint32(gid) { + t.Fatalf("UID: %d (%d) GID: %d (%d)", st.Uid, uid, st.Gid, gid) + } + }) } } func TestReadonlyMount(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - - dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount") - spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: dir, - Source: dir, - Type: "bind", - Options: []string{"ro"}, - }) - spec.Root.Readonly = false - - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configsWithVFS2(t, overlay) { + t.Run(name, func(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount") + spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: dir, + Source: dir, + Type: "bind", + Options: []string{"ro"}, + }) + spec.Root.Readonly = false + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create, start and wait for the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create, start and wait for the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { - t.Fatalf("container failed, waitStatus: %v", ws) - } + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { + t.Fatalf("container failed, waitStatus: %v", ws) + } + }) } } // TestAbbreviatedIDs checks that runsc supports using abbreviated container // IDs in place of full IDs. func TestAbbreviatedIDs(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + doAbbreviatedIDsTest(t, false) +} + +func TestAbbreviatedIDsVFS2(t *testing.T) { + doAbbreviatedIDsTest(t, true) +} + +func doAbbreviatedIDsTest(t *testing.T, vfs2 bool) { + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir + conf.VFS2 = vfs2 cids := []string{ - "foo-" + testutil.UniqueContainerID(), - "bar-" + testutil.UniqueContainerID(), - "baz-" + testutil.UniqueContainerID(), + "foo-" + testutil.RandomContainerID(), + "bar-" + testutil.RandomContainerID(), + "baz-" + testutil.RandomContainerID(), } for _, cid := range cids { spec := testutil.NewSpecWithArgs("sleep", "100") - bundleDir, err := testutil.SetupBundleDir(spec) + bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ @@ -1600,18 +1617,27 @@ func TestAbbreviatedIDs(t *testing.T) { } func TestGoferExits(t *testing.T) { + doGoferExitTest(t, false) +} + +func TestGoferExitsVFS2(t *testing.T) { + doGoferExitTest(t, true) +} + +func doGoferExitTest(t *testing.T, vfs2 bool) { spec := testutil.NewSpecWithArgs("/bin/sleep", "10000") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + conf.VFS2 = vfs2 + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1640,7 +1666,7 @@ func TestGoferExits(t *testing.T) { } func TestRootNotMount(t *testing.T) { - appSym, err := testutil.FindFile("runsc/container/test_app/test_app") + appSym, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -1670,27 +1696,26 @@ func TestRootNotMount(t *testing.T) { spec.Root.Readonly = true spec.Mounts = nil - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) if err := run(spec, conf); err != nil { t.Fatalf("error running sandbox: %v", err) } } func TestUserLog(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } // sched_rr_get_interval = 148 - not implemented in gvisor. spec := testutil.NewSpecWithArgs(app, "syscall", "--syscall=148") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() dir, err := ioutil.TempDir(testutil.TmpDir(), "user_log_test") if err != nil { @@ -1700,7 +1725,7 @@ func TestUserLog(t *testing.T) { // Create, start and wait for the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, UserLog: userLog, @@ -1718,78 +1743,85 @@ func TestUserLog(t *testing.T) { if err != nil { t.Fatalf("error opening user log file %q: %v", userLog, err) } - if want := "Unsupported syscall: sched_rr_get_interval"; !strings.Contains(string(out), want) { + if want := "Unsupported syscall sched_rr_get_interval("; !strings.Contains(string(out), want) { t.Errorf("user log file doesn't contain %q, out: %s", want, string(out)) } } func TestWaitOnExitedSandbox(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - // Run a shell that sleeps for 1 second and then exits with a - // non-zero code. - const wantExit = 17 - cmd := fmt.Sprintf("sleep 1; exit %d", wantExit) - spec := testutil.NewSpecWithArgs("/bin/sh", "-c", cmd) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configsWithVFS2(t, all...) { + t.Run(name, func(t *testing.T) { + // Run a shell that sleeps for 1 second and then exits with a + // non-zero code. + const wantExit = 17 + cmd := fmt.Sprintf("sleep 1; exit %d", wantExit) + spec := testutil.NewSpecWithArgs("/bin/sh", "-c", cmd) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and Start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and Start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Wait on the sandbox. This will make an RPC to the sandbox - // and get the actual exit status of the application. - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if got := ws.ExitStatus(); got != wantExit { - t.Errorf("got exit status %d, want %d", got, wantExit) - } + // Wait on the sandbox. This will make an RPC to the sandbox + // and get the actual exit status of the application. + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if got := ws.ExitStatus(); got != wantExit { + t.Errorf("got exit status %d, want %d", got, wantExit) + } - // Now the sandbox has exited, but the zombie sandbox process - // still exists. Calling Wait() now will return the sandbox - // exit status. - ws, err = c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if got := ws.ExitStatus(); got != wantExit { - t.Errorf("got exit status %d, want %d", got, wantExit) - } + // Now the sandbox has exited, but the zombie sandbox process + // still exists. Calling Wait() now will return the sandbox + // exit status. + ws, err = c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if got := ws.ExitStatus(); got != wantExit { + t.Errorf("got exit status %d, want %d", got, wantExit) + } + }) } } func TestDestroyNotStarted(t *testing.T) { + doDestroyNotStartedTest(t, false) +} + +func TestDestroyNotStartedVFS2(t *testing.T) { + doDestroyNotStartedTest(t, true) +} + +func doDestroyNotStartedTest(t *testing.T, vfs2 bool) { spec := testutil.NewSpecWithArgs("/bin/sleep", "100") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + conf.VFS2 = vfs2 + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create the container and check that it can be destroyed. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1804,19 +1836,27 @@ func TestDestroyNotStarted(t *testing.T) { // TestDestroyStarting attempts to force a race between start and destroy. func TestDestroyStarting(t *testing.T) { + doDestroyNotStartedTest(t, false) +} + +func TestDestroyStartedVFS2(t *testing.T) { + doDestroyNotStartedTest(t, true) +} + +func doDestroyStartingTest(t *testing.T, vfs2 bool) { for i := 0; i < 10; i++ { spec := testutil.NewSpecWithArgs("/bin/sleep", "100") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + conf.VFS2 = vfs2 + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create the container and check that it can be destroyed. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1851,23 +1891,23 @@ func TestDestroyStarting(t *testing.T) { } func TestCreateWorkingDir(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - - tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } - dir := path.Join(tmpDir, "new/working/dir") + for name, conf := range configsWithVFS2(t, overlay) { + t.Run(name, func(t *testing.T) { + tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + dir := path.Join(tmpDir, "new/working/dir") - // touch will fail if the directory doesn't exist. - spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) - spec.Process.Cwd = dir - spec.Root.Readonly = true + // touch will fail if the directory doesn't exist. + spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) + spec.Process.Cwd = dir + spec.Root.Readonly = true - if err := run(spec, conf); err != nil { - t.Fatalf("Error running container: %v", err) - } + if err := run(spec, conf); err != nil { + t.Fatalf("Error running container: %v", err) + } + }) } } @@ -1924,16 +1964,15 @@ func TestMountPropagation(t *testing.T) { }, } - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1975,87 +2014,87 @@ func TestMountPropagation(t *testing.T) { } func TestMountSymlink(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - - dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } + for name, conf := range configsWithVFS2(t, overlay) { + t.Run(name, func(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + defer os.RemoveAll(dir) - source := path.Join(dir, "source") - target := path.Join(dir, "target") - for _, path := range []string{source, target} { - if err := os.MkdirAll(path, 0777); err != nil { - t.Fatalf("os.MkdirAll(): %v", err) + source := path.Join(dir, "source") + target := path.Join(dir, "target") + for _, path := range []string{source, target} { + if err := os.MkdirAll(path, 0777); err != nil { + t.Fatalf("os.MkdirAll(): %v", err) + } } - } - f, err := os.Create(path.Join(source, "file")) - if err != nil { - t.Fatalf("os.Create(): %v", err) - } - f.Close() + f, err := os.Create(path.Join(source, "file")) + if err != nil { + t.Fatalf("os.Create(): %v", err) + } + f.Close() - link := path.Join(dir, "link") - if err := os.Symlink(target, link); err != nil { - t.Fatalf("os.Symlink(%q, %q): %v", target, link, err) - } + link := path.Join(dir, "link") + if err := os.Symlink(target, link); err != nil { + t.Fatalf("os.Symlink(%q, %q): %v", target, link, err) + } - spec := testutil.NewSpecWithArgs("/bin/sleep", "1000") + spec := testutil.NewSpecWithArgs("/bin/sleep", "1000") - // Mount to a symlink to ensure the mount code will follow it and mount - // at the symlink target. - spec.Mounts = append(spec.Mounts, specs.Mount{ - Type: "bind", - Destination: link, - Source: source, - }) + // Mount to a symlink to ensure the mount code will follow it and mount + // at the symlink target. + spec.Mounts = append(spec.Mounts, specs.Mount{ + Type: "bind", + Destination: link, + Source: source, + }) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("creating container: %v", err) - } - defer cont.Destroy() + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("creating container: %v", err) + } + defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("starting container: %v", err) - } + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } - // Check that symlink was resolved and mount was created where the symlink - // is pointing to. - file := path.Join(target, "file") - execArgs := &control.ExecArgs{ - Filename: "/usr/bin/test", - Argv: []string{"test", "-f", file}, - } - if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { - t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err) - } + // Check that symlink was resolved and mount was created where the symlink + // is pointing to. + file := path.Join(target, "file") + execArgs := &control.ExecArgs{ + Filename: "/usr/bin/test", + Argv: []string{"test", "-f", file}, + } + if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { + t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err) + } + }) } } // Check that --net-raw disables the CAP_NET_RAW capability. func TestNetRaw(t *testing.T) { capNetRaw := strconv.FormatUint(bits.MaskOf64(int(linux.CAP_NET_RAW)), 10) - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } for _, enableRaw := range []bool{true, false} { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.EnableRaw = enableRaw test := "--enabled" @@ -2070,49 +2109,12 @@ func TestNetRaw(t *testing.T) { } } -// TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works. -func TestOverlayfsStaleRead(t *testing.T) { - conf := testutil.TestConfig() - conf.OverlayfsStaleRead = true - - in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in") - if err != nil { - t.Fatalf("ioutil.TempFile() failed: %v", err) - } - defer in.Close() - if _, err := in.WriteString("stale data"); err != nil { - t.Fatalf("in.Write() failed: %v", err) - } - - out, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.out") - if err != nil { - t.Fatalf("ioutil.TempFile() failed: %v", err) - } - defer out.Close() - - const want = "foobar" - cmd := fmt.Sprintf("cat %q && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name()) - spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd) - if err := run(spec, conf); err != nil { - t.Fatalf("Error running container: %v", err) - } - - gotBytes, err := ioutil.ReadAll(out) - if err != nil { - t.Fatalf("out.Read() failed: %v", err) - } - got := strings.TrimSpace(string(gotBytes)) - if want != got { - t.Errorf("Wrong content in out file, got: %q. want: %q", got, want) - } -} - // TestTTYField checks TTY field returned by container.Processes(). func TestTTYField(t *testing.T) { stop := testutil.StartReaper() defer stop() - testApp, err := testutil.FindFile("runsc/container/test_app/test_app") + testApp, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -2135,64 +2137,70 @@ func TestTTYField(t *testing.T) { } for _, test := range testCases { - t.Run(test.name, func(t *testing.T) { - conf := testutil.TestConfig() - - // We will run /bin/sleep, possibly with an open TTY. - cmd := []string{"/bin/sleep", "10000"} - if test.useTTY { - // Run inside the "pty-runner". - cmd = append([]string{testApp, "pty-runner"}, cmd...) + for _, vfs2 := range []bool{false, true} { + name := test.name + if vfs2 { + name += "-vfs2" } + t.Run(name, func(t *testing.T) { + conf := testutil.TestConfig(t) + conf.VFS2 = vfs2 + + // We will run /bin/sleep, possibly with an open TTY. + cmd := []string{"/bin/sleep", "10000"} + if test.useTTY { + // Run inside the "pty-runner". + cmd = append([]string{testApp, "pty-runner"}, cmd...) + } - spec := testutil.NewSpecWithArgs(cmd...) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) - - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + spec := testutil.NewSpecWithArgs(cmd...) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Wait for sleep to be running, and check the TTY - // field. - var gotTTYField string - cb := func() error { - ps, err := c.Processes() + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { - err = fmt.Errorf("error getting process data from container: %v", err) - return &backoff.PermanentError{Err: err} + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) } - for _, p := range ps { - if strings.Contains(p.Cmd, "sleep") { - gotTTYField = p.TTY - return nil + + // Wait for sleep to be running, and check the TTY + // field. + var gotTTYField string + cb := func() error { + ps, err := c.Processes() + if err != nil { + err = fmt.Errorf("error getting process data from container: %v", err) + return &backoff.PermanentError{Err: err} + } + for _, p := range ps { + if strings.Contains(p.Cmd, "sleep") { + gotTTYField = p.TTY + return nil + } } + return fmt.Errorf("sleep not running") + } + if err := testutil.Poll(cb, 30*time.Second); err != nil { + t.Fatalf("error waiting for sleep process: %v", err) } - return fmt.Errorf("sleep not running") - } - if err := testutil.Poll(cb, 30*time.Second); err != nil { - t.Fatalf("error waiting for sleep process: %v", err) - } - if gotTTYField != test.wantTTYField { - t.Errorf("tty field got %q, want %q", gotTTYField, test.wantTTYField) - } - }) + if gotTTYField != test.wantTTYField { + t.Errorf("tty field got %q, want %q", gotTTYField, test.wantTTYField) + } + }) + } } } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 2da93ec5b..207206dd2 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -27,18 +27,19 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" ) func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { var specs []*specs.Spec var ids []string - rootID := testutil.UniqueContainerID() + rootID := testutil.RandomContainerID() for i, cmd := range cmds { spec := testutil.NewSpecWithArgs(cmd...) @@ -52,7 +53,7 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { specutils.ContainerdContainerTypeAnnotation: specutils.ContainerdContainerTypeContainer, specutils.ContainerdSandboxIDAnnotation: rootID, } - ids = append(ids, testutil.UniqueContainerID()) + ids = append(ids, testutil.RandomContainerID()) } specs = append(specs, spec) } @@ -64,23 +65,16 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.") } + cu := cleanup.Cleanup{} + defer cu.Clean() + var containers []*Container - var bundles []string - cleanup := func() { - for _, c := range containers { - c.Destroy() - } - for _, b := range bundles { - os.RemoveAll(b) - } - } for i, spec := range specs { - bundleDir, err := testutil.SetupBundleDir(spec) + bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { - cleanup() return nil, nil, fmt.Errorf("error setting up container: %v", err) } - bundles = append(bundles, bundleDir) + cu.Add(cleanup) args := Args{ ID: ids[i], @@ -89,17 +83,17 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C } cont, err := New(conf, args) if err != nil { - cleanup() return nil, nil, fmt.Errorf("error creating container: %v", err) } + cu.Add(func() { cont.Destroy() }) containers = append(containers, cont) if err := cont.Start(conf); err != nil { - cleanup() return nil, nil, fmt.Errorf("error starting container: %v", err) } } - return containers, cleanup, nil + + return containers, cu.Release(), nil } type execDesc struct { @@ -135,161 +129,161 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) { // TestMultiContainerSanity checks that it is possible to run 2 dead-simple // containers in the same sandbox. func TestMultiContainerSanity(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir + for name, conf := range configsWithVFS2(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir - // Setup the containers. - sleep := []string{"sleep", "100"} - specs, ids := createSpecs(sleep, sleep) - containers, cleanup, err := startContainers(conf, specs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + // Setup the containers. + sleep := []string{"sleep", "100"} + specs, ids := createSpecs(sleep, sleep) + containers, cleanup, err := startContainers(conf, specs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that multiple processes are running. - expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } - expectedPL = []*control.Process{ - {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, - } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + expectedPL = []*control.Process{ + {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + }) } } // TestMultiPIDNS checks that it is possible to run 2 dead-simple // containers in the same sandbox with different pidns. func TestMultiPIDNS(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // Setup the containers. - sleep := []string{"sleep", "100"} - testSpecs, ids := createSpecs(sleep, sleep) - testSpecs[1].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + testSpecs, ids := createSpecs(sleep, sleep) + testSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + }, }, - }, - } + } - containers, cleanup, err := startContainers(conf, testSpecs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, testSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that multiple processes are running. - expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } - expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, - } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + expectedPL = []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + }) } } // TestMultiPIDNSPath checks the pidns path. func TestMultiPIDNSPath(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // Setup the containers. - sleep := []string{"sleep", "100"} - testSpecs, ids := createSpecs(sleep, sleep, sleep) - testSpecs[0].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", - Path: "/proc/1/ns/pid", + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + testSpecs, ids := createSpecs(sleep, sleep, sleep) + testSpecs[0].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/1/ns/pid", + }, }, - }, - } - testSpecs[1].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", - Path: "/proc/1/ns/pid", + } + testSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/1/ns/pid", + }, }, - }, - } - testSpecs[2].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", - Path: "/proc/2/ns/pid", + } + testSpecs[2].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/2/ns/pid", + }, }, - }, - } + } - containers, cleanup, err := startContainers(conf, testSpecs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, testSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that multiple processes are running. - expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } - if err := waitForProcessList(containers[2], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + if err := waitForProcessList(containers[2], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - expectedPL = []*control.Process{ - {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, - } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + expectedPL = []*control.Process{ + {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + }) } } func TestMultiContainerWait(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // The first container should run the entire duration of the test. @@ -361,13 +355,13 @@ func TestMultiContainerWait(t *testing.T) { // TestExecWait ensures what we can wait containers and individual processes in the // sandbox that have already exited. func TestExecWait(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // The first container should run the entire duration of the test. @@ -457,13 +451,13 @@ func TestMultiContainerMount(t *testing.T) { }) // Setup the containers. - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir containers, cleanup, err := startContainers(conf, sps, ids) @@ -484,176 +478,176 @@ func TestMultiContainerMount(t *testing.T) { // TestMultiContainerSignal checks that it is possible to signal individual // containers without killing the entire sandbox. func TestMultiContainerSignal(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir - // Setup the containers. - sleep := []string{"sleep", "100"} - specs, ids := createSpecs(sleep, sleep) - containers, cleanup, err := startContainers(conf, specs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + // Setup the containers. + sleep := []string{"sleep", "100"} + specs, ids := createSpecs(sleep, sleep) + containers, cleanup, err := startContainers(conf, specs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that container 1 process is running. - expectedPL := []*control.Process{ - {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, - } + // Check via ps that container 1 process is running. + expectedPL := []*control.Process{ + {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - // Kill process 2. - if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil { - t.Errorf("failed to kill process 2: %v", err) - } + // Kill process 2. + if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil { + t.Errorf("failed to kill process 2: %v", err) + } - // Make sure process 1 is still running. - expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Make sure process 1 is still running. + expectedPL = []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - // goferPid is reset when container is destroyed. - goferPid := containers[1].GoferPid + // goferPid is reset when container is destroyed. + goferPid := containers[1].GoferPid - // Destroy container and ensure container's gofer process has exited. - if err := containers[1].Destroy(); err != nil { - t.Errorf("failed to destroy container: %v", err) - } - _, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) { - cpid, err := syscall.Wait4(goferPid, nil, 0, nil) - return uintptr(cpid), 0, err - }) - if err != syscall.ECHILD { - t.Errorf("error waiting for gofer to exit: %v", err) - } - // Make sure process 1 is still running. - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Destroy container and ensure container's gofer process has exited. + if err := containers[1].Destroy(); err != nil { + t.Errorf("failed to destroy container: %v", err) + } + _, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) { + cpid, err := syscall.Wait4(goferPid, nil, 0, nil) + return uintptr(cpid), 0, err + }) + if err != syscall.ECHILD { + t.Errorf("error waiting for gofer to exit: %v", err) + } + // Make sure process 1 is still running. + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - // Now that process 2 is gone, ensure we get an error trying to - // signal it again. - if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil { - t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID) - } + // Now that process 2 is gone, ensure we get an error trying to + // signal it again. + if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil { + t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID) + } - // Kill process 1. - if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil { - t.Errorf("failed to kill process 1: %v", err) - } + // Kill process 1. + if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil { + t.Errorf("failed to kill process 1: %v", err) + } - // Ensure that container's gofer and sandbox process are no more. - err = blockUntilWaitable(containers[0].GoferPid) - if err != nil && err != syscall.ECHILD { - t.Errorf("error waiting for gofer to exit: %v", err) - } + // Ensure that container's gofer and sandbox process are no more. + err = blockUntilWaitable(containers[0].GoferPid) + if err != nil && err != syscall.ECHILD { + t.Errorf("error waiting for gofer to exit: %v", err) + } - err = blockUntilWaitable(containers[0].Sandbox.Pid) - if err != nil && err != syscall.ECHILD { - t.Errorf("error waiting for sandbox to exit: %v", err) - } + err = blockUntilWaitable(containers[0].Sandbox.Pid) + if err != nil && err != syscall.ECHILD { + t.Errorf("error waiting for sandbox to exit: %v", err) + } - // The sentry should be gone, so signaling should yield an error. - if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil { - t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID) - } + // The sentry should be gone, so signaling should yield an error. + if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil { + t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID) + } - if err := containers[0].Destroy(); err != nil { - t.Errorf("failed to destroy container: %v", err) - } + if err := containers[0].Destroy(); err != nil { + t.Errorf("failed to destroy container: %v", err) + } + }) } } // TestMultiContainerDestroy checks that container are properly cleaned-up when // they are destroyed. func TestMultiContainerDestroy(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // First container will remain intact while the second container is killed. - podSpecs, ids := createSpecs( - []string{"sleep", "100"}, - []string{app, "fork-bomb"}) - - // Run the fork bomb in a PID namespace to prevent processes to be - // re-parented to PID=1 in the root container. - podSpecs[1].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{{Type: "pid"}}, - } - containers, cleanup, err := startContainers(conf, podSpecs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // First container will remain intact while the second container is killed. + podSpecs, ids := createSpecs( + []string{"sleep", "100"}, + []string{app, "fork-bomb"}) + + // Run the fork bomb in a PID namespace to prevent processes to be + // re-parented to PID=1 in the root container. + podSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{{Type: "pid"}}, + } + containers, cleanup, err := startContainers(conf, podSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Exec more processes to ensure signal all works for exec'd processes too. - args := &control.ExecArgs{ - Filename: app, - Argv: []string{app, "fork-bomb"}, - } - if _, err := containers[1].Execute(args); err != nil { - t.Fatalf("error exec'ing: %v", err) - } + // Exec more processes to ensure signal all works for exec'd processes too. + args := &control.ExecArgs{ + Filename: app, + Argv: []string{app, "fork-bomb"}, + } + if _, err := containers[1].Execute(args); err != nil { + t.Fatalf("error exec'ing: %v", err) + } - // Let it brew... - time.Sleep(500 * time.Millisecond) + // Let it brew... + time.Sleep(500 * time.Millisecond) - if err := containers[1].Destroy(); err != nil { - t.Fatalf("error destroying container: %v", err) - } + if err := containers[1].Destroy(); err != nil { + t.Fatalf("error destroying container: %v", err) + } - // Check that destroy killed all processes belonging to the container and - // waited for them to exit before returning. - pss, err := containers[0].Sandbox.Processes("") - if err != nil { - t.Fatalf("error getting process data from sandbox: %v", err) - } - expectedPL := []*control.Process{{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}} - if r, err := procListsEqual(pss, expectedPL); !r { - t.Errorf("container got process list: %s, want: %s: error: %v", - procListToString(pss), procListToString(expectedPL), err) - } + // Check that destroy killed all processes belonging to the container and + // waited for them to exit before returning. + pss, err := containers[0].Sandbox.Processes("") + if err != nil { + t.Fatalf("error getting process data from sandbox: %v", err) + } + expectedPL := []*control.Process{{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}} + if r, err := procListsEqual(pss, expectedPL); !r { + t.Errorf("container got process list: %s, want: %s: error: %v", + procListToString(pss), procListToString(expectedPL), err) + } - // Check that cont.Destroy is safe to call multiple times. - if err := containers[1].Destroy(); err != nil { - t.Errorf("error destroying container: %v", err) - } + // Check that cont.Destroy is safe to call multiple times. + if err := containers[1].Destroy(); err != nil { + t.Errorf("error destroying container: %v", err) + } + }) } } func TestMultiContainerProcesses(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // Note: use curly braces to keep 'sh' process around. Otherwise, shell @@ -706,13 +700,13 @@ func TestMultiContainerProcesses(t *testing.T) { // TestMultiContainerKillAll checks that all process that belong to a container // are killed when SIGKILL is sent to *all* processes in that container. func TestMultiContainerKillAll(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir for _, tc := range []struct { @@ -721,7 +715,7 @@ func TestMultiContainerKillAll(t *testing.T) { {killContainer: true}, {killContainer: false}, } { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -739,11 +733,11 @@ func TestMultiContainerKillAll(t *testing.T) { // Wait until all processes are created. rootProcCount := int(math.Pow(2, 3) - 1) if err := waitForProcessCount(containers[0], rootProcCount); err != nil { - t.Fatal(err) + t.Fatalf("error waitting for processes: %v", err) } procCount := int(math.Pow(2, 5) - 1) if err := waitForProcessCount(containers[1], procCount); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Exec more processes to ensure signal works for exec'd processes too. @@ -757,7 +751,7 @@ func TestMultiContainerKillAll(t *testing.T) { // Wait for these new processes to start. procCount += int(math.Pow(2, 3) - 1) if err := waitForProcessCount(containers[1], procCount); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } if tc.killContainer { @@ -790,11 +784,11 @@ func TestMultiContainerKillAll(t *testing.T) { // Check that all processes are gone. if err := waitForProcessCount(containers[1], 0); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Check that root container was not affected. if err := waitForProcessCount(containers[0], rootProcCount); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } } } @@ -804,18 +798,17 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) { []string{"/bin/sleep", "100"}, []string{"/bin/sleep", "100"}) - conf := testutil.TestConfig() - rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf) + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(specs[0], conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(rootBundleDir) + defer cleanup() rootArgs := Args{ ID: ids[0], Spec: specs[0], - BundleDir: rootBundleDir, + BundleDir: bundleDir, } root, err := New(conf, rootArgs) if err != nil { @@ -827,11 +820,11 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) { } // Create and destroy sub-container. - bundleDir, err := testutil.SetupBundleDir(specs[1]) + bundleDir, cleanupSub, err := testutil.SetupBundleDir(specs[1]) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanupSub() args := Args{ ID: ids[1], @@ -858,18 +851,17 @@ func TestMultiContainerDestroyStarting(t *testing.T) { } specs, ids := createSpecs(cmds...) - conf := testutil.TestConfig() - rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf) + conf := testutil.TestConfig(t) + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(specs[0], conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(rootBundleDir) + defer cleanup() rootArgs := Args{ ID: ids[0], Spec: specs[0], - BundleDir: rootBundleDir, + BundleDir: bundleDir, } root, err := New(conf, rootArgs) if err != nil { @@ -886,16 +878,16 @@ func TestMultiContainerDestroyStarting(t *testing.T) { continue // skip root container } - bundleDir, err := testutil.SetupBundleDir(specs[i]) + bundleDir, cleanup, err := testutil.SetupBundleDir(specs[i]) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanup() rootArgs := Args{ ID: ids[i], Spec: specs[i], - BundleDir: rootBundleDir, + BundleDir: bundleDir, } cont, err := New(conf, rootArgs) if err != nil { @@ -937,13 +929,13 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) { script := fmt.Sprintf("if [ -f %q ]; then exit 1; else touch %q; fi", filename, filename) cmd := []string{"sh", "-c", script} - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // Make sure overlay is enabled, and none of the root filesystems are @@ -977,7 +969,7 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) { // TestMultiContainerContainerDestroyStress tests that IO operations continue // to work after containers have been stopped and gofers killed. func TestMultiContainerContainerDestroyStress(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -1006,13 +998,12 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { childrenSpecs := allSpecs[1:] childrenIDs := allIDs[1:] - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(rootSpec, conf) + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(rootSpec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Start root container. rootArgs := Args{ @@ -1038,11 +1029,11 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { var children []*Container for j, spec := range specs { - bundleDir, err := testutil.SetupBundleDir(spec) + bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanup() args := Args{ ID: ids[j], @@ -1080,308 +1071,308 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { // Test that pod shared mounts are properly mounted in 2 containers and that // changes from one container is reflected in the other. func TestMultiContainerSharedMount(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // Setup the containers. - sleep := []string{"sleep", "100"} - podSpec, ids := createSpecs(sleep, sleep) - mnt0 := specs.Mount{ - Destination: "/mydir/test", - Source: "/some/dir", - Type: "tmpfs", - Options: nil, - } - podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: nil, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) - mnt1 := mnt0 - mnt1.Destination = "/mydir2/test2" - podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) - createSharedMount(mnt0, "test-mount", podSpec...) + createSharedMount(mnt0, "test-mount", podSpec...) - containers, cleanup, err := startContainers(conf, podSpec, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - file0 := path.Join(mnt0.Destination, "abc") - file1 := path.Join(mnt1.Destination, "abc") - execs := []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, - desc: "directory is mounted in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, - desc: "directory is mounted in container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/touch", file0}, - desc: "create file in container0", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-f", file0}, - desc: "file appears in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-f", file1}, - desc: "file appears in container1", - }, - { - c: containers[1], - cmd: []string{"/bin/rm", file1}, - desc: "file removed from container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "!", "-f", file0}, - desc: "file removed from container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "!", "-f", file1}, - desc: "file removed from container1", - }, - { - c: containers[1], - cmd: []string{"/bin/mkdir", file1}, - desc: "create directory in container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-d", file0}, - desc: "dir appears in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-d", file1}, - desc: "dir appears in container1", - }, - { - c: containers[0], - cmd: []string{"/bin/rmdir", file0}, - desc: "create directory in container0", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "!", "-d", file0}, - desc: "dir removed from container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "!", "-d", file1}, - desc: "dir removed from container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, + desc: "directory is mounted in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, + desc: "directory is mounted in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + desc: "create file in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file appears in container1", + }, + { + c: containers[1], + cmd: []string{"/bin/rm", file1}, + desc: "file removed from container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-f", file0}, + desc: "file removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-f", file1}, + desc: "file removed from container1", + }, + { + c: containers[1], + cmd: []string{"/bin/mkdir", file1}, + desc: "create directory in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", file0}, + desc: "dir appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", file1}, + desc: "dir appears in container1", + }, + { + c: containers[0], + cmd: []string{"/bin/rmdir", file0}, + desc: "create directory in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-d", file0}, + desc: "dir removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-d", file1}, + desc: "dir removed from container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + }) } } // Test that pod mounts are mounted as readonly when requested. func TestMultiContainerSharedMountReadonly(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // Setup the containers. - sleep := []string{"sleep", "100"} - podSpec, ids := createSpecs(sleep, sleep) - mnt0 := specs.Mount{ - Destination: "/mydir/test", - Source: "/some/dir", - Type: "tmpfs", - Options: []string{"ro"}, - } - podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: []string{"ro"}, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) - mnt1 := mnt0 - mnt1.Destination = "/mydir2/test2" - podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) - createSharedMount(mnt0, "test-mount", podSpec...) + createSharedMount(mnt0, "test-mount", podSpec...) - containers, cleanup, err := startContainers(conf, podSpec, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - file0 := path.Join(mnt0.Destination, "abc") - file1 := path.Join(mnt1.Destination, "abc") - execs := []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, - desc: "directory is mounted in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, - desc: "directory is mounted in container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/touch", file0}, - want: 1, - desc: "fails to write to container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/touch", file1}, - want: 1, - desc: "fails to write to container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, + desc: "directory is mounted in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, + desc: "directory is mounted in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + want: 1, + desc: "fails to write to container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/touch", file1}, + want: 1, + desc: "fails to write to container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + }) } } // Test that shared pod mounts continue to work after container is restarted. func TestMultiContainerSharedMountRestart(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // Setup the containers. - sleep := []string{"sleep", "100"} - podSpec, ids := createSpecs(sleep, sleep) - mnt0 := specs.Mount{ - Destination: "/mydir/test", - Source: "/some/dir", - Type: "tmpfs", - Options: nil, - } - podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: nil, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) - mnt1 := mnt0 - mnt1.Destination = "/mydir2/test2" - podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) - createSharedMount(mnt0, "test-mount", podSpec...) + createSharedMount(mnt0, "test-mount", podSpec...) - containers, cleanup, err := startContainers(conf, podSpec, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - file0 := path.Join(mnt0.Destination, "abc") - file1 := path.Join(mnt1.Destination, "abc") - execs := []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/touch", file0}, - desc: "create file in container0", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-f", file0}, - desc: "file appears in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-f", file1}, - desc: "file appears in container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + desc: "create file in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file appears in container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } - containers[1].Destroy() + containers[1].Destroy() - bundleDir, err := testutil.SetupBundleDir(podSpec[1]) - if err != nil { - t.Fatalf("error restarting container: %v", err) - } - defer os.RemoveAll(bundleDir) + bundleDir, cleanup, err := testutil.SetupBundleDir(podSpec[1]) + if err != nil { + t.Fatalf("error restarting container: %v", err) + } + defer cleanup() - args := Args{ - ID: ids[1], - Spec: podSpec[1], - BundleDir: bundleDir, - } - containers[1], err = New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - if err := containers[1].Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + args := Args{ + ID: ids[1], + Spec: podSpec[1], + BundleDir: bundleDir, + } + containers[1], err = New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + if err := containers[1].Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - execs = []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-f", file0}, - desc: "file is still in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-f", file1}, - desc: "file is still in container1", - }, - { - c: containers[1], - cmd: []string{"/bin/rm", file1}, - desc: "file removed from container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "!", "-f", file0}, - desc: "file removed from container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "!", "-f", file1}, - desc: "file removed from container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + execs = []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file is still in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file is still in container1", + }, + { + c: containers[1], + cmd: []string{"/bin/rm", file1}, + desc: "file removed from container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-f", file0}, + desc: "file removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-f", file1}, + desc: "file removed from container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + }) } } // Test that unsupported pod mounts options are ignored when matching master and // slave mounts. func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // Setup the containers. @@ -1428,7 +1419,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { // Test that one container can send an FD to another container, even though // they have distinct MountNamespaces. func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -1457,13 +1448,13 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { Type: "tmpfs", } - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // Create the specs. @@ -1494,13 +1485,13 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { // Test that container is destroyed when Gofer is killed. func TestMultiContainerGoferKilled(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir sleep := []string{"sleep", "100"} @@ -1581,13 +1572,13 @@ func TestMultiContainerLoadSandbox(t *testing.T) { sleep := []string{"sleep", "100"} specs, ids := createSpecs(sleep, sleep, sleep) - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // Create containers for the sandbox. @@ -1614,7 +1605,7 @@ func TestMultiContainerLoadSandbox(t *testing.T) { } // Create a valid but empty container directory. - randomCID := testutil.UniqueContainerID() + randomCID := testutil.RandomContainerID() dir = filepath.Join(conf.RootDir, randomCID) if err := os.MkdirAll(dir, 0755); err != nil { t.Fatalf("os.MkdirAll(%q)=%v", dir, err) @@ -1681,13 +1672,13 @@ func TestMultiContainerRunNonRoot(t *testing.T) { Type: "bind", }) - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir pod, cleanup, err := startContainers(conf, podSpecs, ids) diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go index dc4194134..bac177a88 100644 --- a/runsc/container/shared_volume_test.go +++ b/runsc/container/shared_volume_test.go @@ -24,16 +24,15 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" - "gvisor.dev/gvisor/runsc/testutil" ) // TestSharedVolume checks that modifications to a volume mount are propagated // into and out of the sandbox. func TestSharedVolume(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.FileAccess = boot.FileAccessShared - t.Logf("Running test with conf: %+v", conf) // Main process just sleeps. We will use "exec" to probe the state of // the filesystem. @@ -44,16 +43,15 @@ func TestSharedVolume(t *testing.T) { t.Fatalf("TempDir failed: %v", err) } - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -190,9 +188,8 @@ func checkFile(c *Container, filename string, want []byte) error { // TestSharedVolumeFile tests that changes to file content outside the sandbox // is reflected inside. func TestSharedVolumeFile(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.FileAccess = boot.FileAccessShared - t.Logf("Running test with conf: %+v", conf) // Main process just sleeps. We will use "exec" to probe the state of // the filesystem. @@ -203,16 +200,15 @@ func TestSharedVolumeFile(t *testing.T) { t.Fatalf("TempDir failed: %v", err) } - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } diff --git a/runsc/container/test_app/BUILD b/runsc/container/test_app/BUILD deleted file mode 100644 index 0defbd9fc..000000000 --- a/runsc/container/test_app/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools:defs.bzl", "go_binary") - -package(licenses = ["notice"]) - -go_binary( - name = "test_app", - testonly = 1, - srcs = [ - "fds.go", - "test_app.go", - ], - pure = True, - visibility = ["//runsc/container:__pkg__"], - deps = [ - "//pkg/unet", - "//runsc/flag", - "//runsc/testutil", - "@com_github_google_subcommands//:go_default_library", - "@com_github_kr_pty//:go_default_library", - ], -) diff --git a/runsc/container/test_app/fds.go b/runsc/container/test_app/fds.go deleted file mode 100644 index 2a146a2c3..000000000 --- a/runsc/container/test_app/fds.go +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "context" - "io/ioutil" - "log" - "os" - "time" - - "github.com/google/subcommands" - "gvisor.dev/gvisor/pkg/unet" - "gvisor.dev/gvisor/runsc/flag" - "gvisor.dev/gvisor/runsc/testutil" -) - -const fileContents = "foobarbaz" - -// fdSender will open a file and send the FD over a unix domain socket. -type fdSender struct { - socketPath string -} - -// Name implements subcommands.Command.Name. -func (*fdSender) Name() string { - return "fd_sender" -} - -// Synopsis implements subcommands.Command.Synopsys. -func (*fdSender) Synopsis() string { - return "creates a file and sends the FD over the socket" -} - -// Usage implements subcommands.Command.Usage. -func (*fdSender) Usage() string { - return "fd_sender <flags>" -} - -// SetFlags implements subcommands.Command.SetFlags. -func (fds *fdSender) SetFlags(f *flag.FlagSet) { - f.StringVar(&fds.socketPath, "socket", "", "path to socket") -} - -// Execute implements subcommands.Command.Execute. -func (fds *fdSender) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if fds.socketPath == "" { - log.Fatalf("socket flag must be set") - } - - dir, err := ioutil.TempDir("", "") - if err != nil { - log.Fatalf("TempDir failed: %v", err) - } - - fileToSend, err := ioutil.TempFile(dir, "") - if err != nil { - log.Fatalf("TempFile failed: %v", err) - } - defer fileToSend.Close() - - if _, err := fileToSend.WriteString(fileContents); err != nil { - log.Fatalf("Write(%q) failed: %v", fileContents, err) - } - - // Receiver may not be started yet, so try connecting in a poll loop. - var s *unet.Socket - if err := testutil.Poll(func() error { - var err error - s, err = unet.Connect(fds.socketPath, true /* SEQPACKET, so we can send empty message with FD */) - return err - }, 10*time.Second); err != nil { - log.Fatalf("Error connecting to socket %q: %v", fds.socketPath, err) - } - defer s.Close() - - w := s.Writer(true) - w.ControlMessage.PackFDs(int(fileToSend.Fd())) - if _, err := w.WriteVec([][]byte{[]byte{'a'}}); err != nil { - log.Fatalf("Error sending FD %q over socket %q: %v", fileToSend.Fd(), fds.socketPath, err) - } - - log.Print("FD SENDER exiting successfully") - return subcommands.ExitSuccess -} - -// fdReceiver receives an FD from a unix domain socket and does things to it. -type fdReceiver struct { - socketPath string -} - -// Name implements subcommands.Command.Name. -func (*fdReceiver) Name() string { - return "fd_receiver" -} - -// Synopsis implements subcommands.Command.Synopsys. -func (*fdReceiver) Synopsis() string { - return "reads an FD from a unix socket, and then does things to it" -} - -// Usage implements subcommands.Command.Usage. -func (*fdReceiver) Usage() string { - return "fd_receiver <flags>" -} - -// SetFlags implements subcommands.Command.SetFlags. -func (fdr *fdReceiver) SetFlags(f *flag.FlagSet) { - f.StringVar(&fdr.socketPath, "socket", "", "path to socket") -} - -// Execute implements subcommands.Command.Execute. -func (fdr *fdReceiver) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if fdr.socketPath == "" { - log.Fatalf("Flags cannot be empty, given: socket: %q", fdr.socketPath) - } - - ss, err := unet.BindAndListen(fdr.socketPath, true /* packet */) - if err != nil { - log.Fatalf("BindAndListen(%q) failed: %v", fdr.socketPath, err) - } - defer ss.Close() - - var s *unet.Socket - c := make(chan error, 1) - go func() { - var err error - s, err = ss.Accept() - c <- err - }() - - select { - case err := <-c: - if err != nil { - log.Fatalf("Accept() failed: %v", err) - } - case <-time.After(10 * time.Second): - log.Fatalf("Timeout waiting for accept") - } - - r := s.Reader(true) - r.EnableFDs(1) - b := [][]byte{{'a'}} - if n, err := r.ReadVec(b); n != 1 || err != nil { - log.Fatalf("ReadVec got n=%d err %v (wanted 0, nil)", n, err) - } - - fds, err := r.ExtractFDs() - if err != nil { - log.Fatalf("ExtractFD() got err %v", err) - } - if len(fds) != 1 { - log.Fatalf("ExtractFD() got %d FDs, wanted 1", len(fds)) - } - fd := fds[0] - - file := os.NewFile(uintptr(fd), "received file") - defer file.Close() - if _, err := file.Seek(0, os.SEEK_SET); err != nil { - log.Fatalf("Seek(0, 0) failed: %v", err) - } - - got, err := ioutil.ReadAll(file) - if err != nil { - log.Fatalf("ReadAll failed: %v", err) - } - if string(got) != fileContents { - log.Fatalf("ReadAll got %q want %q", string(got), fileContents) - } - - log.Print("FD RECEIVER exiting successfully") - return subcommands.ExitSuccess -} diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go deleted file mode 100644 index 01c47c79f..000000000 --- a/runsc/container/test_app/test_app.go +++ /dev/null @@ -1,394 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Binary test_app is like a swiss knife for tests that need to run anything -// inside the sandbox. New functionality can be added with new commands. -package main - -import ( - "context" - "fmt" - "io" - "io/ioutil" - "log" - "net" - "os" - "os/exec" - "regexp" - "strconv" - sys "syscall" - "time" - - "github.com/google/subcommands" - "github.com/kr/pty" - "gvisor.dev/gvisor/runsc/flag" - "gvisor.dev/gvisor/runsc/testutil" -) - -func main() { - subcommands.Register(subcommands.HelpCommand(), "") - subcommands.Register(subcommands.FlagsCommand(), "") - subcommands.Register(new(capability), "") - subcommands.Register(new(fdReceiver), "") - subcommands.Register(new(fdSender), "") - subcommands.Register(new(forkBomb), "") - subcommands.Register(new(ptyRunner), "") - subcommands.Register(new(reaper), "") - subcommands.Register(new(syscall), "") - subcommands.Register(new(taskTree), "") - subcommands.Register(new(uds), "") - - flag.Parse() - - exitCode := subcommands.Execute(context.Background()) - os.Exit(int(exitCode)) -} - -type uds struct { - fileName string - socketPath string -} - -// Name implements subcommands.Command.Name. -func (*uds) Name() string { - return "uds" -} - -// Synopsis implements subcommands.Command.Synopsys. -func (*uds) Synopsis() string { - return "creates unix domain socket client and server. Client sends a contant flow of sequential numbers. Server prints them to --file" -} - -// Usage implements subcommands.Command.Usage. -func (*uds) Usage() string { - return "uds <flags>" -} - -// SetFlags implements subcommands.Command.SetFlags. -func (c *uds) SetFlags(f *flag.FlagSet) { - f.StringVar(&c.fileName, "file", "", "name of output file") - f.StringVar(&c.socketPath, "socket", "", "path to socket") -} - -// Execute implements subcommands.Command.Execute. -func (c *uds) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if c.fileName == "" || c.socketPath == "" { - log.Fatalf("Flags cannot be empty, given: fileName: %q, socketPath: %q", c.fileName, c.socketPath) - return subcommands.ExitFailure - } - outputFile, err := os.OpenFile(c.fileName, os.O_WRONLY|os.O_CREATE, 0666) - if err != nil { - log.Fatal("error opening output file:", err) - } - - defer os.Remove(c.socketPath) - - listener, err := net.Listen("unix", c.socketPath) - if err != nil { - log.Fatal("error listening on socket %q:", c.socketPath, err) - } - - go server(listener, outputFile) - for i := 0; ; i++ { - conn, err := net.Dial("unix", c.socketPath) - if err != nil { - log.Fatal("error dialing:", err) - } - if _, err := conn.Write([]byte(strconv.Itoa(i))); err != nil { - log.Fatal("error writing:", err) - } - conn.Close() - time.Sleep(100 * time.Millisecond) - } -} - -func server(listener net.Listener, out *os.File) { - buf := make([]byte, 16) - - for { - c, err := listener.Accept() - if err != nil { - log.Fatal("error accepting connection:", err) - } - nr, err := c.Read(buf) - if err != nil { - log.Fatal("error reading from buf:", err) - } - data := buf[0:nr] - fmt.Fprint(out, string(data)+"\n") - } -} - -type taskTree struct { - depth int - width int - pause bool -} - -// Name implements subcommands.Command. -func (*taskTree) Name() string { - return "task-tree" -} - -// Synopsis implements subcommands.Command. -func (*taskTree) Synopsis() string { - return "creates a tree of tasks" -} - -// Usage implements subcommands.Command. -func (*taskTree) Usage() string { - return "task-tree <flags>" -} - -// SetFlags implements subcommands.Command. -func (c *taskTree) SetFlags(f *flag.FlagSet) { - f.IntVar(&c.depth, "depth", 1, "number of levels to create") - f.IntVar(&c.width, "width", 1, "number of tasks at each level") - f.BoolVar(&c.pause, "pause", false, "whether the tasks should pause perpetually") -} - -// Execute implements subcommands.Command. -func (c *taskTree) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - stop := testutil.StartReaper() - defer stop() - - if c.depth == 0 { - log.Printf("Child sleeping, PID: %d\n", os.Getpid()) - select {} - } - log.Printf("Parent %d sleeping, PID: %d\n", c.depth, os.Getpid()) - - var cmds []*exec.Cmd - for i := 0; i < c.width; i++ { - cmd := exec.Command( - "/proc/self/exe", c.Name(), - "--depth", strconv.Itoa(c.depth-1), - "--width", strconv.Itoa(c.width), - "--pause", strconv.FormatBool(c.pause)) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - if err := cmd.Start(); err != nil { - log.Fatal("failed to call self:", err) - } - cmds = append(cmds, cmd) - } - - for _, c := range cmds { - c.Wait() - } - - if c.pause { - select {} - } - - return subcommands.ExitSuccess -} - -type forkBomb struct { - delay time.Duration -} - -// Name implements subcommands.Command. -func (*forkBomb) Name() string { - return "fork-bomb" -} - -// Synopsis implements subcommands.Command. -func (*forkBomb) Synopsis() string { - return "creates child process until the end of times" -} - -// Usage implements subcommands.Command. -func (*forkBomb) Usage() string { - return "fork-bomb <flags>" -} - -// SetFlags implements subcommands.Command. -func (c *forkBomb) SetFlags(f *flag.FlagSet) { - f.DurationVar(&c.delay, "delay", 100*time.Millisecond, "amount of time to delay creation of child") -} - -// Execute implements subcommands.Command. -func (c *forkBomb) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - time.Sleep(c.delay) - - cmd := exec.Command("/proc/self/exe", c.Name()) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - log.Fatal("failed to call self:", err) - } - return subcommands.ExitSuccess -} - -type reaper struct{} - -// Name implements subcommands.Command. -func (*reaper) Name() string { - return "reaper" -} - -// Synopsis implements subcommands.Command. -func (*reaper) Synopsis() string { - return "reaps all children in a loop" -} - -// Usage implements subcommands.Command. -func (*reaper) Usage() string { - return "reaper <flags>" -} - -// SetFlags implements subcommands.Command. -func (*reaper) SetFlags(*flag.FlagSet) {} - -// Execute implements subcommands.Command. -func (c *reaper) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - stop := testutil.StartReaper() - defer stop() - select {} -} - -type syscall struct { - sysno uint64 -} - -// Name implements subcommands.Command. -func (*syscall) Name() string { - return "syscall" -} - -// Synopsis implements subcommands.Command. -func (*syscall) Synopsis() string { - return "syscall makes a syscall" -} - -// Usage implements subcommands.Command. -func (*syscall) Usage() string { - return "syscall <flags>" -} - -// SetFlags implements subcommands.Command. -func (s *syscall) SetFlags(f *flag.FlagSet) { - f.Uint64Var(&s.sysno, "syscall", 0, "syscall to call") -} - -// Execute implements subcommands.Command. -func (s *syscall) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if _, _, errno := sys.Syscall(uintptr(s.sysno), 0, 0, 0); errno != 0 { - fmt.Printf("syscall(%d, 0, 0...) failed: %v\n", s.sysno, errno) - } else { - fmt.Printf("syscall(%d, 0, 0...) success\n", s.sysno) - } - return subcommands.ExitSuccess -} - -type capability struct { - enabled uint64 - disabled uint64 -} - -// Name implements subcommands.Command. -func (*capability) Name() string { - return "capability" -} - -// Synopsis implements subcommands.Command. -func (*capability) Synopsis() string { - return "checks if effective capabilities are set/unset" -} - -// Usage implements subcommands.Command. -func (*capability) Usage() string { - return "capability [--enabled=number] [--disabled=number]" -} - -// SetFlags implements subcommands.Command. -func (c *capability) SetFlags(f *flag.FlagSet) { - f.Uint64Var(&c.enabled, "enabled", 0, "") - f.Uint64Var(&c.disabled, "disabled", 0, "") -} - -// Execute implements subcommands.Command. -func (c *capability) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if c.enabled == 0 && c.disabled == 0 { - fmt.Println("One of the flags must be set") - return subcommands.ExitUsageError - } - - status, err := ioutil.ReadFile("/proc/self/status") - if err != nil { - fmt.Printf("Error reading %q: %v\n", "proc/self/status", err) - return subcommands.ExitFailure - } - re := regexp.MustCompile("CapEff:\t([0-9a-f]+)\n") - matches := re.FindStringSubmatch(string(status)) - if matches == nil || len(matches) != 2 { - fmt.Printf("Effective capabilities not found in\n%s\n", status) - return subcommands.ExitFailure - } - caps, err := strconv.ParseUint(matches[1], 16, 64) - if err != nil { - fmt.Printf("failed to convert capabilities %q: %v\n", matches[1], err) - return subcommands.ExitFailure - } - - if c.enabled != 0 && (caps&c.enabled) != c.enabled { - fmt.Printf("Missing capabilities, want: %#x: got: %#x\n", c.enabled, caps) - return subcommands.ExitFailure - } - if c.disabled != 0 && (caps&c.disabled) != 0 { - fmt.Printf("Extra capabilities found, dont_want: %#x: got: %#x\n", c.disabled, caps) - return subcommands.ExitFailure - } - - return subcommands.ExitSuccess -} - -type ptyRunner struct{} - -// Name implements subcommands.Command. -func (*ptyRunner) Name() string { - return "pty-runner" -} - -// Synopsis implements subcommands.Command. -func (*ptyRunner) Synopsis() string { - return "runs the given command with an open pty terminal" -} - -// Usage implements subcommands.Command. -func (*ptyRunner) Usage() string { - return "pty-runner [command]" -} - -// SetFlags implements subcommands.Command.SetFlags. -func (*ptyRunner) SetFlags(f *flag.FlagSet) {} - -// Execute implements subcommands.Command. -func (*ptyRunner) Execute(_ context.Context, fs *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus { - c := exec.Command(fs.Args()[0], fs.Args()[1:]...) - f, err := pty.Start(c) - if err != nil { - fmt.Printf("pty.Start failed: %v", err) - return subcommands.ExitFailure - } - defer f.Close() - - // Copy stdout from the command to keep this process alive until the - // subprocess exits. - io.Copy(os.Stdout, f) - - return subcommands.ExitSuccess -} diff --git a/runsc/criutil/BUILD b/runsc/criutil/BUILD deleted file mode 100644 index 8a571a000..000000000 --- a/runsc/criutil/BUILD +++ /dev/null @@ -1,11 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "criutil", - testonly = 1, - srcs = ["criutil.go"], - visibility = ["//:sandbox"], - deps = ["//runsc/testutil"], -) diff --git a/runsc/criutil/criutil.go b/runsc/criutil/criutil.go deleted file mode 100644 index 773f5a1c4..000000000 --- a/runsc/criutil/criutil.go +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package criutil contains utility functions for interacting with the -// Container Runtime Interface (CRI), principally via the crictl command line -// tool. This requires critools to be installed on the local system. -package criutil - -import ( - "encoding/json" - "fmt" - "os" - "os/exec" - "strings" - "time" - - "gvisor.dev/gvisor/runsc/testutil" -) - -const endpointPrefix = "unix://" - -// Crictl contains information required to run the crictl utility. -type Crictl struct { - executable string - timeout time.Duration - imageEndpoint string - runtimeEndpoint string -} - -// NewCrictl returns a Crictl configured with a timeout and an endpoint over -// which it will talk to containerd. -func NewCrictl(timeout time.Duration, endpoint string) *Crictl { - // Bazel doesn't pass PATH through, assume the location of crictl - // unless specified by environment variable. - executable := os.Getenv("CRICTL_PATH") - if executable == "" { - executable = "/usr/local/bin/crictl" - } - return &Crictl{ - executable: executable, - timeout: timeout, - imageEndpoint: endpointPrefix + endpoint, - runtimeEndpoint: endpointPrefix + endpoint, - } -} - -// Pull pulls an container image. It corresponds to `crictl pull`. -func (cc *Crictl) Pull(imageName string) error { - _, err := cc.run("pull", imageName) - return err -} - -// RunPod creates a sandbox. It corresponds to `crictl runp`. -func (cc *Crictl) RunPod(sbSpecFile string) (string, error) { - podID, err := cc.run("runp", sbSpecFile) - if err != nil { - return "", fmt.Errorf("runp failed: %v", err) - } - // Strip the trailing newline from crictl output. - return strings.TrimSpace(podID), nil -} - -// Create creates a container within a sandbox. It corresponds to `crictl -// create`. -func (cc *Crictl) Create(podID, contSpecFile, sbSpecFile string) (string, error) { - podID, err := cc.run("create", podID, contSpecFile, sbSpecFile) - if err != nil { - return "", fmt.Errorf("create failed: %v", err) - } - // Strip the trailing newline from crictl output. - return strings.TrimSpace(podID), nil -} - -// Start starts a container. It corresponds to `crictl start`. -func (cc *Crictl) Start(contID string) (string, error) { - output, err := cc.run("start", contID) - if err != nil { - return "", fmt.Errorf("start failed: %v", err) - } - return output, nil -} - -// Stop stops a container. It corresponds to `crictl stop`. -func (cc *Crictl) Stop(contID string) error { - _, err := cc.run("stop", contID) - return err -} - -// Exec execs a program inside a container. It corresponds to `crictl exec`. -func (cc *Crictl) Exec(contID string, args ...string) (string, error) { - a := []string{"exec", contID} - a = append(a, args...) - output, err := cc.run(a...) - if err != nil { - return "", fmt.Errorf("exec failed: %v", err) - } - return output, nil -} - -// Rm removes a container. It corresponds to `crictl rm`. -func (cc *Crictl) Rm(contID string) error { - _, err := cc.run("rm", contID) - return err -} - -// StopPod stops a pod. It corresponds to `crictl stopp`. -func (cc *Crictl) StopPod(podID string) error { - _, err := cc.run("stopp", podID) - return err -} - -// containsConfig is a minimal copy of -// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/apis/cri/runtime/v1alpha2/api.proto -// It only contains fields needed for testing. -type containerConfig struct { - Status containerStatus -} - -type containerStatus struct { - Network containerNetwork -} - -type containerNetwork struct { - IP string -} - -// PodIP returns a pod's IP address. -func (cc *Crictl) PodIP(podID string) (string, error) { - output, err := cc.run("inspectp", podID) - if err != nil { - return "", err - } - conf := &containerConfig{} - if err := json.Unmarshal([]byte(output), conf); err != nil { - return "", fmt.Errorf("failed to unmarshal JSON: %v, %s", err, output) - } - if conf.Status.Network.IP == "" { - return "", fmt.Errorf("no IP found in config: %s", output) - } - return conf.Status.Network.IP, nil -} - -// RmPod removes a container. It corresponds to `crictl rmp`. -func (cc *Crictl) RmPod(podID string) error { - _, err := cc.run("rmp", podID) - return err -} - -// StartContainer pulls the given image ands starts the container in the -// sandbox with the given podID. -func (cc *Crictl) StartContainer(podID, image, sbSpec, contSpec string) (string, error) { - // Write the specs to files that can be read by crictl. - sbSpecFile, err := testutil.WriteTmpFile("sbSpec", sbSpec) - if err != nil { - return "", fmt.Errorf("failed to write sandbox spec: %v", err) - } - contSpecFile, err := testutil.WriteTmpFile("contSpec", contSpec) - if err != nil { - return "", fmt.Errorf("failed to write container spec: %v", err) - } - - return cc.startContainer(podID, image, sbSpecFile, contSpecFile) -} - -func (cc *Crictl) startContainer(podID, image, sbSpecFile, contSpecFile string) (string, error) { - if err := cc.Pull(image); err != nil { - return "", fmt.Errorf("failed to pull %s: %v", image, err) - } - - contID, err := cc.Create(podID, contSpecFile, sbSpecFile) - if err != nil { - return "", fmt.Errorf("failed to create container in pod %q: %v", podID, err) - } - - if _, err := cc.Start(contID); err != nil { - return "", fmt.Errorf("failed to start container %q in pod %q: %v", contID, podID, err) - } - - return contID, nil -} - -// StopContainer stops and deletes the container with the given container ID. -func (cc *Crictl) StopContainer(contID string) error { - if err := cc.Stop(contID); err != nil { - return fmt.Errorf("failed to stop container %q: %v", contID, err) - } - - if err := cc.Rm(contID); err != nil { - return fmt.Errorf("failed to remove container %q: %v", contID, err) - } - - return nil -} - -// StartPodAndContainer pulls an image, then starts a sandbox and container in -// that sandbox. It returns the pod ID and container ID. -func (cc *Crictl) StartPodAndContainer(image, sbSpec, contSpec string) (string, string, error) { - // Write the specs to files that can be read by crictl. - sbSpecFile, err := testutil.WriteTmpFile("sbSpec", sbSpec) - if err != nil { - return "", "", fmt.Errorf("failed to write sandbox spec: %v", err) - } - contSpecFile, err := testutil.WriteTmpFile("contSpec", contSpec) - if err != nil { - return "", "", fmt.Errorf("failed to write container spec: %v", err) - } - - podID, err := cc.RunPod(sbSpecFile) - if err != nil { - return "", "", err - } - - contID, err := cc.startContainer(podID, image, sbSpecFile, contSpecFile) - - return podID, contID, err -} - -// StopPodAndContainer stops a container and pod. -func (cc *Crictl) StopPodAndContainer(podID, contID string) error { - if err := cc.StopContainer(contID); err != nil { - return fmt.Errorf("failed to stop container %q in pod %q: %v", contID, podID, err) - } - - if err := cc.StopPod(podID); err != nil { - return fmt.Errorf("failed to stop pod %q: %v", podID, err) - } - - if err := cc.RmPod(podID); err != nil { - return fmt.Errorf("failed to remove pod %q: %v", podID, err) - } - - return nil -} - -// run runs crictl with the given args and returns an error if it takes longer -// than cc.Timeout to run. -func (cc *Crictl) run(args ...string) (string, error) { - defaultArgs := []string{ - "--image-endpoint", cc.imageEndpoint, - "--runtime-endpoint", cc.runtimeEndpoint, - } - cmd := exec.Command(cc.executable, append(defaultArgs, args...)...) - - // Run the command with a timeout. - done := make(chan string) - errCh := make(chan error) - go func() { - output, err := cmd.CombinedOutput() - if err != nil { - errCh <- fmt.Errorf("error: \"%v\", output: %s", err, string(output)) - return - } - done <- string(output) - }() - select { - case output := <-done: - return output, nil - case err := <-errCh: - return "", err - case <-time.After(cc.timeout): - if err := testutil.KillCommand(cmd); err != nil { - return "", fmt.Errorf("timed out, then couldn't kill process %+v: %v", cmd, err) - } - return "", fmt.Errorf("timed out: %+v", cmd) - } -} diff --git a/runsc/dockerutil/BUILD b/runsc/dockerutil/BUILD deleted file mode 100644 index 8621af901..000000000 --- a/runsc/dockerutil/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "dockerutil", - testonly = 1, - srcs = ["dockerutil.go"], - visibility = ["//:sandbox"], - deps = [ - "//runsc/testutil", - "@com_github_kr_pty//:go_default_library", - ], -) diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go deleted file mode 100644 index 1ff5e8cc3..000000000 --- a/runsc/dockerutil/dockerutil.go +++ /dev/null @@ -1,476 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package dockerutil is a collection of utility functions, primarily for -// testing. -package dockerutil - -import ( - "encoding/json" - "flag" - "fmt" - "io/ioutil" - "log" - "os" - "os/exec" - "path" - "regexp" - "strconv" - "strings" - "syscall" - "time" - - "github.com/kr/pty" - "gvisor.dev/gvisor/runsc/testutil" -) - -var ( - runtime = flag.String("runtime", "runsc", "specify which runtime to use") - config = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths") -) - -// EnsureSupportedDockerVersion checks if correct docker is installed. -func EnsureSupportedDockerVersion() { - cmd := exec.Command("docker", "version") - out, err := cmd.CombinedOutput() - if err != nil { - log.Fatalf("Error running %q: %v", "docker version", err) - } - re := regexp.MustCompile(`Version:\s+(\d+)\.(\d+)\.\d.*`) - matches := re.FindStringSubmatch(string(out)) - if len(matches) != 3 { - log.Fatalf("Invalid docker output: %s", out) - } - major, _ := strconv.Atoi(matches[1]) - minor, _ := strconv.Atoi(matches[2]) - if major < 17 || (major == 17 && minor < 9) { - log.Fatalf("Docker version 17.09.0 or greater is required, found: %02d.%02d", major, minor) - } -} - -// RuntimePath returns the binary path for the current runtime. -func RuntimePath() (string, error) { - // Read the configuration data; the file must exist. - configBytes, err := ioutil.ReadFile(*config) - if err != nil { - return "", err - } - - // Unmarshal the configuration. - c := make(map[string]interface{}) - if err := json.Unmarshal(configBytes, &c); err != nil { - return "", err - } - - // Decode the expected configuration. - r, ok := c["runtimes"] - if !ok { - return "", fmt.Errorf("no runtimes declared: %v", c) - } - rs, ok := r.(map[string]interface{}) - if !ok { - // The runtimes are not a map. - return "", fmt.Errorf("unexpected format: %v", c) - } - r, ok = rs[*runtime] - if !ok { - // The expected runtime is not declared. - return "", fmt.Errorf("runtime %q not found: %v", *runtime, c) - } - rs, ok = r.(map[string]interface{}) - if !ok { - // The runtime is not a map. - return "", fmt.Errorf("unexpected format: %v", c) - } - p, ok := rs["path"].(string) - if !ok { - // The runtime does not declare a path. - return "", fmt.Errorf("unexpected format: %v", c) - } - return p, nil -} - -// MountMode describes if the mount should be ro or rw. -type MountMode int - -const ( - // ReadOnly is what the name says. - ReadOnly MountMode = iota - // ReadWrite is what the name says. - ReadWrite -) - -// String returns the mount mode argument for this MountMode. -func (m MountMode) String() string { - switch m { - case ReadOnly: - return "ro" - case ReadWrite: - return "rw" - } - panic(fmt.Sprintf("invalid mode: %d", m)) -} - -// MountArg formats the volume argument to mount in the container. -func MountArg(source, target string, mode MountMode) string { - return fmt.Sprintf("-v=%s:%s:%v", source, target, mode) -} - -// LinkArg formats the link argument. -func LinkArg(source *Docker, target string) string { - return fmt.Sprintf("--link=%s:%s", source.Name, target) -} - -// PrepareFiles creates temp directory to copy files there. The sandbox doesn't -// have access to files in the test dir. -func PrepareFiles(names ...string) (string, error) { - dir, err := ioutil.TempDir("", "image-test") - if err != nil { - return "", fmt.Errorf("ioutil.TempDir failed: %v", err) - } - if err := os.Chmod(dir, 0777); err != nil { - return "", fmt.Errorf("os.Chmod(%q, 0777) failed: %v", dir, err) - } - for _, name := range names { - src, err := testutil.FindFile(name) - if err != nil { - return "", fmt.Errorf("testutil.Preparefiles(%q) failed: %v", name, err) - } - dst := path.Join(dir, path.Base(name)) - if err := testutil.Copy(src, dst); err != nil { - return "", fmt.Errorf("testutil.Copy(%q, %q) failed: %v", src, dst, err) - } - } - return dir, nil -} - -// do executes docker command. -func do(args ...string) (string, error) { - log.Printf("Running: docker %s\n", args) - cmd := exec.Command("docker", args...) - out, err := cmd.CombinedOutput() - if err != nil { - return "", fmt.Errorf("error executing docker %s: %v\nout: %s", args, err, out) - } - return string(out), nil -} - -// doWithPty executes docker command with stdio attached to a pty. -func doWithPty(args ...string) (*exec.Cmd, *os.File, error) { - log.Printf("Running with pty: docker %s\n", args) - cmd := exec.Command("docker", args...) - ptmx, err := pty.Start(cmd) - if err != nil { - return nil, nil, fmt.Errorf("error executing docker %s with a pty: %v", args, err) - } - return cmd, ptmx, nil -} - -// Pull pulls a docker image. This is used in tests to isolate the -// time to pull the image off the network from the time to actually -// start the container, to avoid timeouts over slow networks. -func Pull(image string) error { - _, err := do("pull", image) - return err -} - -// Docker contains the name and the runtime of a docker container. -type Docker struct { - Runtime string - Name string -} - -// MakeDocker sets up the struct for a Docker container. -// Names of containers will be unique. -func MakeDocker(namePrefix string) Docker { - return Docker{ - Name: testutil.RandomName(namePrefix), - Runtime: *runtime, - } -} - -// logDockerID logs a container id, which is needed to find container runsc logs. -func (d *Docker) logDockerID() { - id, err := d.ID() - if err != nil { - log.Printf("%v\n", err) - } - log.Printf("Name: %s ID: %v\n", d.Name, id) -} - -// Create calls 'docker create' with the arguments provided. -func (d *Docker) Create(args ...string) error { - a := []string{"create", "--runtime", d.Runtime, "--name", d.Name} - a = append(a, args...) - _, err := do(a...) - if err == nil { - d.logDockerID() - } - return err -} - -// Start calls 'docker start'. -func (d *Docker) Start() error { - if _, err := do("start", d.Name); err != nil { - return fmt.Errorf("error starting container %q: %v", d.Name, err) - } - return nil -} - -// Stop calls 'docker stop'. -func (d *Docker) Stop() error { - if _, err := do("stop", d.Name); err != nil { - return fmt.Errorf("error stopping container %q: %v", d.Name, err) - } - return nil -} - -// Run calls 'docker run' with the arguments provided. The container starts -// running in the background and the call returns immediately. -func (d *Docker) Run(args ...string) error { - a := d.runArgs("-d") - a = append(a, args...) - _, err := do(a...) - if err == nil { - d.logDockerID() - } - return err -} - -// RunWithPty is like Run but with an attached pty. -func (d *Docker) RunWithPty(args ...string) (*exec.Cmd, *os.File, error) { - a := d.runArgs("-it") - a = append(a, args...) - return doWithPty(a...) -} - -// RunFg calls 'docker run' with the arguments provided in the foreground. It -// blocks until the container exits and returns the output. -func (d *Docker) RunFg(args ...string) (string, error) { - a := d.runArgs(args...) - out, err := do(a...) - if err == nil { - d.logDockerID() - } - return string(out), err -} - -func (d *Docker) runArgs(args ...string) []string { - // Environment variable RUNSC_TEST_NAME is picked up by the runtime and added - // to the log name, so one can easily identify the corresponding logs for - // this test. - rv := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-e", "RUNSC_TEST_NAME=" + d.Name} - return append(rv, args...) -} - -// Logs calls 'docker logs'. -func (d *Docker) Logs() (string, error) { - return do("logs", d.Name) -} - -// Exec calls 'docker exec' with the arguments provided. -func (d *Docker) Exec(args ...string) (string, error) { - return d.ExecWithFlags(nil, args...) -} - -// ExecWithFlags calls 'docker exec <flags> name <args>'. -func (d *Docker) ExecWithFlags(flags []string, args ...string) (string, error) { - a := []string{"exec"} - a = append(a, flags...) - a = append(a, d.Name) - a = append(a, args...) - return do(a...) -} - -// ExecAsUser calls 'docker exec' as the given user with the arguments -// provided. -func (d *Docker) ExecAsUser(user string, args ...string) (string, error) { - a := []string{"exec", "--user", user, d.Name} - a = append(a, args...) - return do(a...) -} - -// ExecWithTerminal calls 'docker exec -it' with the arguments provided and -// attaches a pty to stdio. -func (d *Docker) ExecWithTerminal(args ...string) (*exec.Cmd, *os.File, error) { - a := []string{"exec", "-it", d.Name} - a = append(a, args...) - return doWithPty(a...) -} - -// Pause calls 'docker pause'. -func (d *Docker) Pause() error { - if _, err := do("pause", d.Name); err != nil { - return fmt.Errorf("error pausing container %q: %v", d.Name, err) - } - return nil -} - -// Unpause calls 'docker pause'. -func (d *Docker) Unpause() error { - if _, err := do("unpause", d.Name); err != nil { - return fmt.Errorf("error unpausing container %q: %v", d.Name, err) - } - return nil -} - -// Checkpoint calls 'docker checkpoint'. -func (d *Docker) Checkpoint(name string) error { - if _, err := do("checkpoint", "create", d.Name, name); err != nil { - return fmt.Errorf("error pausing container %q: %v", d.Name, err) - } - return nil -} - -// Restore calls 'docker start --checkname [name]'. -func (d *Docker) Restore(name string) error { - if _, err := do("start", "--checkpoint", name, d.Name); err != nil { - return fmt.Errorf("error starting container %q: %v", d.Name, err) - } - return nil -} - -// Remove calls 'docker rm'. -func (d *Docker) Remove() error { - if _, err := do("rm", d.Name); err != nil { - return fmt.Errorf("error deleting container %q: %v", d.Name, err) - } - return nil -} - -// CleanUp kills and deletes the container (best effort). -func (d *Docker) CleanUp() { - d.logDockerID() - if _, err := do("kill", d.Name); err != nil { - if strings.Contains(err.Error(), "is not running") { - // Nothing to kill. Don't log the error in this case. - } else { - log.Printf("error killing container %q: %v", d.Name, err) - } - } - if err := d.Remove(); err != nil { - log.Print(err) - } -} - -// FindPort returns the host port that is mapped to 'sandboxPort'. This calls -// docker to allocate a free port in the host and prevent conflicts. -func (d *Docker) FindPort(sandboxPort int) (int, error) { - format := fmt.Sprintf(`{{ (index (index .NetworkSettings.Ports "%d/tcp") 0).HostPort }}`, sandboxPort) - out, err := do("inspect", "-f", format, d.Name) - if err != nil { - return -1, fmt.Errorf("error retrieving port: %v", err) - } - port, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - return -1, fmt.Errorf("error parsing port %q: %v", out, err) - } - return port, nil -} - -// FindIP returns the IP address of the container as a string. -func (d *Docker) FindIP() (string, error) { - const format = `{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}` - out, err := do("inspect", "-f", format, d.Name) - if err != nil { - return "", fmt.Errorf("error retrieving IP: %v", err) - } - return strings.TrimSpace(out), nil -} - -// SandboxPid returns the PID to the sandbox process. -func (d *Docker) SandboxPid() (int, error) { - out, err := do("inspect", "-f={{.State.Pid}}", d.Name) - if err != nil { - return -1, fmt.Errorf("error retrieving pid: %v", err) - } - pid, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - return -1, fmt.Errorf("error parsing pid %q: %v", out, err) - } - return pid, nil -} - -// ID returns the container ID. -func (d *Docker) ID() (string, error) { - out, err := do("inspect", "-f={{.Id}}", d.Name) - if err != nil { - return "", fmt.Errorf("error retrieving ID: %v", err) - } - return strings.TrimSpace(string(out)), nil -} - -// Wait waits for container to exit, up to the given timeout. Returns error if -// wait fails or timeout is hit. Returns the application return code otherwise. -// Note that the application may have failed even if err == nil, always check -// the exit code. -func (d *Docker) Wait(timeout time.Duration) (syscall.WaitStatus, error) { - timeoutChan := time.After(timeout) - waitChan := make(chan (syscall.WaitStatus)) - errChan := make(chan (error)) - - go func() { - out, err := do("wait", d.Name) - if err != nil { - errChan <- fmt.Errorf("error waiting for container %q: %v", d.Name, err) - } - exit, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - errChan <- fmt.Errorf("error parsing exit code %q: %v", out, err) - } - waitChan <- syscall.WaitStatus(uint32(exit)) - }() - - select { - case ws := <-waitChan: - return ws, nil - case err := <-errChan: - return syscall.WaitStatus(1), err - case <-timeoutChan: - return syscall.WaitStatus(1), fmt.Errorf("timeout waiting for container %q", d.Name) - } -} - -// WaitForOutput calls 'docker logs' to retrieve containers output and searches -// for the given pattern. -func (d *Docker) WaitForOutput(pattern string, timeout time.Duration) (string, error) { - matches, err := d.WaitForOutputSubmatch(pattern, timeout) - if err != nil { - return "", err - } - if len(matches) == 0 { - return "", nil - } - return matches[0], nil -} - -// WaitForOutputSubmatch calls 'docker logs' to retrieve containers output and -// searches for the given pattern. It returns any regexp submatches as well. -func (d *Docker) WaitForOutputSubmatch(pattern string, timeout time.Duration) ([]string, error) { - re := regexp.MustCompile(pattern) - var out string - for exp := time.Now().Add(timeout); time.Now().Before(exp); { - var err error - out, err = d.Logs() - if err != nil { - return nil, err - } - if matches := re.FindStringSubmatch(out); matches != nil { - // Success! - return matches, nil - } - time.Sleep(100 * time.Millisecond) - } - return nil, fmt.Errorf("timeout waiting for output %q: %s", re.String(), out) -} diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index 64a406ae2..1036b0630 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -13,12 +13,12 @@ go_library( visibility = ["//runsc:__subpackages__"], deps = [ "//pkg/abi/linux", + "//pkg/cleanup", "//pkg/fd", "//pkg/log", "//pkg/p9", "//pkg/sync", "//pkg/syserr", - "//runsc/specutils", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index cadd83273..edc239013 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -33,11 +33,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -439,7 +439,7 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid if err != nil { return nil, nil, p9.QID{}, 0, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { child.Close() // Best effort attempt to remove the file in case of failure. if err := syscall.Unlinkat(l.file.FD(), name); err != nil { @@ -480,7 +480,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID) if err := syscall.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil { return p9.QID{}, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { // Best effort attempt to remove the dir in case of failure. if err := unix.Unlinkat(l.file.FD(), name, unix.AT_REMOVEDIR); err != nil { log.Warningf("error unlinking dir %q after failure: %v", path.Join(l.hostPath, name), err) @@ -767,22 +767,18 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { return err } -// TODO(b/127675828): support getxattr. func (*localFile) GetXattr(string, uint64) (string, error) { return "", syscall.EOPNOTSUPP } -// TODO(b/127675828): support setxattr. func (*localFile) SetXattr(string, string, uint32) error { return syscall.EOPNOTSUPP } -// TODO(b/148303075): support listxattr. func (*localFile) ListXattr(uint64) (map[string]struct{}, error) { return nil, syscall.EOPNOTSUPP } -// TODO(b/148303075): support removexattr. func (*localFile) RemoveXattr(string) error { return syscall.EOPNOTSUPP } @@ -868,7 +864,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9. if err := unix.Symlinkat(target, l.file.FD(), newName); err != nil { return p9.QID{}, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { // Best effort attempt to remove the symlink in case of failure. if err := syscall.Unlinkat(l.file.FD(), newName); err != nil { log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, newName), err) diff --git a/runsc/main.go b/runsc/main.go index 762b0f801..920ed84a5 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -54,9 +54,11 @@ var ( // Debugging flags. debugLog = flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.") + panicLog = flag.String("panic-log", "", "file path were panic reports and other Go's runtime messages are written.") logPackets = flag.Bool("log-packets", false, "enable network packet logging.") logFD = flag.Int("log-fd", -1, "file descriptor to log to. If set, the 'log' flag is ignored.") debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored.") + panicLogFD = flag.Int("panic-log-fd", -1, "file descriptor to write Go's runtime messages.") debugLogFormat = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s.") alsoLogToStderr = flag.Bool("alsologtostderr", false, "send log messages to stderr.") @@ -70,10 +72,11 @@ var ( network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.") hardwareGSO = flag.Bool("gso", true, "enable hardware segmentation offload if it is supported by a network device.") softwareGSO = flag.Bool("software-gso", true, "enable software segmentation offload when hardware ofload can't be enabled.") + qDisc = flag.String("qdisc", "fifo", "specifies which queueing discipline to apply by default to the non loopback nics used by the sandbox.") fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") fsGoferHostUDS = flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") - overlayfsStaleRead = flag.Bool("overlayfs-stale-read", false, "reopen cached FDs after a file is opened for write to workaround overlayfs limitation on kernels before 4.19.") + overlayfsStaleRead = flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem") watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.") panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.") profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).") @@ -82,6 +85,7 @@ var ( rootless = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.") referenceLeakMode = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), log-names, log-traces.") cpuNumFromQuota = flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)") + vfs2Enabled = flag.Bool("vfs2", false, "TEST ONLY; use while VFSv2 is landing. This uses the new experimental VFS layer.") // Test flags, not to be used outside tests, ever. testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.") @@ -116,8 +120,8 @@ func main() { subcommands.Register(new(cmd.Resume), "") subcommands.Register(new(cmd.Run), "") subcommands.Register(new(cmd.Spec), "") - subcommands.Register(new(cmd.Start), "") subcommands.Register(new(cmd.State), "") + subcommands.Register(new(cmd.Start), "") subcommands.Register(new(cmd.Wait), "") // Register internal commands with the internal group name. This causes @@ -127,6 +131,7 @@ func main() { subcommands.Register(new(cmd.Boot), internalGroup) subcommands.Register(new(cmd.Debug), internalGroup) subcommands.Register(new(cmd.Gofer), internalGroup) + subcommands.Register(new(cmd.Statefile), internalGroup) // All subcommands must be registered before flag parsing. flag.Parse() @@ -194,6 +199,11 @@ func main() { cmd.Fatalf("%v", err) } + queueingDiscipline, err := boot.MakeQueueingDiscipline(*qDisc) + if err != nil { + cmd.Fatalf("%s", err) + } + // Sets the reference leak check mode. Also set it in config below to // propagate it to child processes. refs.SetLeakMode(refsLeakMode) @@ -205,6 +215,7 @@ func main() { LogFilename: *logFilename, LogFormat: *logFormat, DebugLog: *debugLog, + PanicLog: *panicLog, DebugLogFormat: *debugLogFormat, FileAccess: fsAccess, FSGoferHostUDS: *fsGoferHostUDS, @@ -226,7 +237,8 @@ func main() { ReferenceLeakMode: refsLeakMode, OverlayfsStaleRead: *overlayfsStaleRead, CPUNumFromQuota: *cpuNumFromQuota, - + VFS2: *vfs2Enabled, + QDisc: queueingDiscipline, TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot, TestOnlyTestNameEnv: *testOnlyTestNameEnv, } @@ -257,20 +269,6 @@ func main() { if *debugLogFD > -1 { f := os.NewFile(uintptr(*debugLogFD), "debug log file") - // Quick sanity check to make sure no other commands get passed - // a log fd (they should use log dir instead). - if subcommand != "boot" && subcommand != "gofer" { - cmd.Fatalf("flag --debug-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand) - } - - // If we are the boot process, then we own our stdio FDs and can do what we - // want with them. Since Docker and Containerd both eat boot's stderr, we - // dup our stderr to the provided log FD so that panics will appear in the - // logs, rather than just disappear. - if err := syscall.Dup3(int(f.Fd()), int(os.Stderr.Fd()), 0); err != nil { - cmd.Fatalf("error dup'ing fd %d to stderr: %v", f.Fd(), err) - } - e = newEmitter(*debugLogFormat, f) } else if *debugLog != "" { @@ -286,7 +284,25 @@ func main() { e = newEmitter("text", ioutil.Discard) } - if *alsoLogToStderr { + if *panicLogFD > -1 || *debugLogFD > -1 { + fd := *panicLogFD + if fd < 0 { + fd = *debugLogFD + } + // Quick sanity check to make sure no other commands get passed + // a log fd (they should use log dir instead). + if subcommand != "boot" && subcommand != "gofer" { + cmd.Fatalf("flags --debug-log-fd and --panic-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand) + } + + // If we are the boot process, then we own our stdio FDs and can do what we + // want with them. Since Docker and Containerd both eat boot's stderr, we + // dup our stderr to the provided log FD so that panics will appear in the + // logs, rather than just disappear. + if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil { + cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err) + } + } else if *alsoLogToStderr { e = &log.MultiEmitter{e, newEmitter(*debugLogFormat, os.Stderr)} } @@ -303,6 +319,7 @@ func main() { log.Infof("\t\tFileAccess: %v, overlay: %t", conf.FileAccess, conf.Overlay) log.Infof("\t\tNetwork: %v, logging: %t", conf.Network, conf.LogPackets) log.Infof("\t\tStrace: %t, max size: %d, syscalls: %s", conf.Strace, conf.StraceLogSize, conf.StraceSyscalls) + log.Infof("\t\tVFS2 enabled: %v", conf.VFS2) log.Infof("***************************") if *testOnlyAllowRunAsCurrentUserWithoutChroot { @@ -319,7 +336,7 @@ func main() { log.Infof("Exiting with status: %v", ws) if ws.Signaled() { // No good way to return it, emulate what the shell does. Maybe raise - // signall to self? + // signal to self? os.Exit(128 + int(ws.Signal())) } os.Exit(ws.ExitStatus()) @@ -332,11 +349,11 @@ func main() { func newEmitter(format string, logFile io.Writer) log.Emitter { switch format { case "text": - return &log.GoogleEmitter{log.Writer{Next: logFile}} + return log.GoogleEmitter{&log.Writer{Next: logFile}} case "json": - return &log.JSONEmitter{log.Writer{Next: logFile}} + return log.JSONEmitter{&log.Writer{Next: logFile}} case "json-k8s": - return &log.K8sJSONEmitter{log.Writer{Next: logFile}} + return log.K8sJSONEmitter{&log.Writer{Next: logFile}} } cmd.Fatalf("invalid log format %q, must be 'text', 'json', or 'json-k8s'", format) panic("unreachable") diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index c95d50294..035dcd3e3 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -13,6 +13,7 @@ go_library( "//runsc:__subpackages__", ], deps = [ + "//pkg/cleanup", "//pkg/control/client", "//pkg/control/server", "//pkg/log", diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index 99e143696..209bfdb20 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -21,7 +21,6 @@ import ( "path/filepath" "runtime" "strconv" - "strings" "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -63,7 +62,7 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi // Build the path to the net namespace of the sandbox process. // This is what we will copy. nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net") - if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.HardwareGSO, conf.SoftwareGSO, conf.NumNetworkChannels); err != nil { + if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.HardwareGSO, conf.SoftwareGSO, conf.NumNetworkChannels, conf.QDisc); err != nil { return fmt.Errorf("creating interfaces from net namespace %q: %v", nsPath, err) } case boot.NetworkHost: @@ -75,30 +74,8 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi } func createDefaultLoopbackInterface(conn *urpc.Client) error { - link := boot.LoopbackLink{ - Name: "lo", - Addresses: []net.IP{ - net.IP("\x7f\x00\x00\x01"), - net.IPv6loopback, - }, - Routes: []boot.Route{ - { - Destination: net.IPNet{ - - IP: net.IPv4(0x7f, 0, 0, 0), - Mask: net.IPv4Mask(0xff, 0, 0, 0), - }, - }, - { - Destination: net.IPNet{ - IP: net.IPv6loopback, - Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)), - }, - }, - }, - } if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &boot.CreateLinksAndRoutesArgs{ - LoopbackLinks: []boot.LoopbackLink{link}, + LoopbackLinks: []boot.LoopbackLink{boot.DefaultLoopbackLink}, }, nil); err != nil { return fmt.Errorf("creating loopback link and routes: %v", err) } @@ -138,7 +115,7 @@ func isRootNS() (bool, error) { // createInterfacesAndRoutesFromNS scrapes the interface and routes from the // net namespace with the given path, creates them in the sandbox, and removes // them from the host. -func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareGSO bool, softwareGSO bool, numNetworkChannels int) error { +func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareGSO bool, softwareGSO bool, numNetworkChannels int, qDisc boot.QueueingDiscipline) error { // Join the network namespace that we will be copying. restore, err := joinNetNS(nsPath) if err != nil { @@ -224,6 +201,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG MTU: iface.MTU, Routes: routes, NumChannels: numNetworkChannels, + QDisc: qDisc, } // Get the link for the interface. diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index ec72bdbfd..6e1a2af25 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -18,16 +18,19 @@ package sandbox import ( "context" "fmt" + "io" "math" "os" "os/exec" "strconv" + "strings" "syscall" "time" "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/control/client" "gvisor.dev/gvisor/pkg/control/server" "gvisor.dev/gvisor/pkg/log" @@ -117,7 +120,7 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) { s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup} // The Cleanup object cleans up partially created sandboxes when an error // occurs. Any errors occurring during cleanup itself are ignored. - c := specutils.MakeCleanup(func() { + c := cleanup.Make(func() { err := s.destroy() log.Warningf("error destroying sandbox: %v", err) }) @@ -142,7 +145,19 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) { // Wait until the sandbox has booted. b := make([]byte, 1) if l, err := clientSyncFile.Read(b); err != nil || l != 1 { - return nil, fmt.Errorf("waiting for sandbox to start: %v", err) + err := fmt.Errorf("waiting for sandbox to start: %v", err) + // If the sandbox failed to start, it may be because the binary + // permissions were incorrect. Check the bits and return a more helpful + // error message. + // + // NOTE: The error message is checked because error types are lost over + // rpc calls. + if strings.Contains(err.Error(), io.EOF.Error()) { + if permsErr := checkBinaryPermissions(conf); permsErr != nil { + return nil, fmt.Errorf("%v: %v", err, permsErr) + } + } + return nil, err } c.Release() @@ -369,8 +384,24 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF cmd.Args = append(cmd.Args, "--debug-log-fd="+strconv.Itoa(nextFD)) nextFD++ } + if conf.PanicLog != "" { + test := "" + if len(conf.TestOnlyTestNameEnv) != 0 { + // Fetch test name if one is provided and the test only flag was set. + if t, ok := specutils.EnvVar(args.Spec.Process.Env, conf.TestOnlyTestNameEnv); ok { + test = t + } + } - cmd.Args = append(cmd.Args, "--panic-signal="+strconv.Itoa(int(syscall.SIGTERM))) + panicLogFile, err := specutils.DebugLogFile(conf.PanicLog, "panic", test) + if err != nil { + return fmt.Errorf("opening debug log file in %q: %v", conf.PanicLog, err) + } + defer panicLogFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, panicLogFile) + cmd.Args = append(cmd.Args, "--panic-log-fd="+strconv.Itoa(nextFD)) + nextFD++ + } // Add the "boot" command to the args. // @@ -416,9 +447,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } - // If the platform needs a device FD we must pass it in. - if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil { + gPlatform, err := platform.Lookup(conf.Platform) + if err != nil { return err + } + + if deviceFile, err := gPlatform.OpenDevice(); err != nil { + return fmt.Errorf("opening device file for platform %q: %v", gPlatform, err) } else if deviceFile != nil { defer deviceFile.Close() cmd.ExtraFiles = append(cmd.ExtraFiles, deviceFile) @@ -426,6 +461,12 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } + // TODO(b/151157106): syscall tests fail by timeout if asyncpreemptoff + // isn't set. + if conf.Platform == "kvm" { + cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1") + } + // The current process' stdio must be passed to the application via the // --stdio-fds flag. The stdio of the sandbox process itself must not // be connected to the same FDs, otherwise we risk leaking sandbox @@ -503,7 +544,7 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF {Type: specs.UTSNamespace}, } - if conf.Platform == platforms.Ptrace { + if gPlatform.Requirements().RequiresCurrentPIDNS { // TODO(b/75837838): Also set a new PID namespace so that we limit // access to other host processes. log.Infof("Sandbox will be started in the current PID namespace") @@ -564,45 +605,32 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace}) cmd.Args = append(cmd.Args, "--setup-root") + const nobody = 65534 if conf.Rootless { - log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid()) + log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid()) cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ { - ContainerID: 0, + ContainerID: nobody, HostID: os.Getuid(), Size: 1, }, } cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ { - ContainerID: 0, + ContainerID: nobody, HostID: os.Getgid(), Size: 1, }, } - cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0} } else { // Map nobody in the new namespace to nobody in the parent namespace. // // A sandbox process will construct an empty - // root for itself, so it has to have the CAP_SYS_ADMIN - // capability. - // - // FIXME(b/122554829): The current implementations of - // os/exec doesn't allow to set ambient capabilities if - // a process is started in a new user namespace. As a - // workaround, we start the sandbox process with the 0 - // UID and then it constructs a chroot and sets UID to - // nobody. https://github.com/golang/go/issues/2315 - const nobody = 65534 + // root for itself, so it has to have + // CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities. cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ { - ContainerID: 0, - HostID: nobody - 1, - Size: 1, - }, - { ContainerID: nobody, HostID: nobody, Size: 1, @@ -615,11 +643,11 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF Size: 1, }, } - - // Set credentials to run as user and group nobody. - cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody} } + // Set credentials to run as user and group nobody. + cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody} + cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT)) } else { return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID") } @@ -677,6 +705,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } + if args.Attached { + // Kill sandbox if parent process exits in attached mode. + cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + // Tells boot that any process it creates must have pdeathsig set. + cmd.Args = append(cmd.Args, "--attached") + } + // Add container as the last argument. cmd.Args = append(cmd.Args, s.ID) @@ -685,15 +720,22 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF log.Debugf("Donating FD %d: %q", i+3, f.Name()) } - if args.Attached { - // Kill sandbox if parent process exits in attached mode. - cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL - } - log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args) log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr) if err := specutils.StartInNS(cmd, nss); err != nil { - return fmt.Errorf("Sandbox: %v", err) + err := fmt.Errorf("starting sandbox: %v", err) + // If the sandbox failed to start, it may be because the binary + // permissions were incorrect. Check the bits and return a more helpful + // error message. + // + // NOTE: The error message is checked because error types are lost over + // rpc calls. + if strings.Contains(err.Error(), syscall.EACCES.Error()) { + if permsErr := checkBinaryPermissions(conf); permsErr != nil { + return fmt.Errorf("%v: %v", err, permsErr) + } + } + return err } s.child = true s.Pid = cmd.Process.Pid @@ -972,6 +1014,66 @@ func (s *Sandbox) StopCPUProfile() error { return nil } +// GoroutineProfile writes a goroutine profile to the given file. +func (s *Sandbox) GoroutineProfile(f *os.File) error { + log.Debugf("Goroutine profile %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + opts := control.ProfileOpts{ + FilePayload: urpc.FilePayload{ + Files: []*os.File{f}, + }, + } + if err := conn.Call(boot.GoroutineProfile, &opts, nil); err != nil { + return fmt.Errorf("getting sandbox %q goroutine profile: %v", s.ID, err) + } + return nil +} + +// BlockProfile writes a block profile to the given file. +func (s *Sandbox) BlockProfile(f *os.File) error { + log.Debugf("Block profile %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + opts := control.ProfileOpts{ + FilePayload: urpc.FilePayload{ + Files: []*os.File{f}, + }, + } + if err := conn.Call(boot.BlockProfile, &opts, nil); err != nil { + return fmt.Errorf("getting sandbox %q block profile: %v", s.ID, err) + } + return nil +} + +// MutexProfile writes a mutex profile to the given file. +func (s *Sandbox) MutexProfile(f *os.File) error { + log.Debugf("Mutex profile %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + opts := control.ProfileOpts{ + FilePayload: urpc.FilePayload{ + Files: []*os.File{f}, + }, + } + if err := conn.Call(boot.MutexProfile, &opts, nil); err != nil { + return fmt.Errorf("getting sandbox %q mutex profile: %v", s.ID, err) + } + return nil +} + // StartTrace start trace writing to the given file. func (s *Sandbox) StartTrace(f *os.File) error { log.Debugf("Trace start %q", s.ID) @@ -1096,3 +1198,31 @@ func deviceFileForPlatform(name string) (*os.File, error) { } return f, nil } + +// checkBinaryPermissions verifies that the required binary bits are set on +// the runsc executable. +func checkBinaryPermissions(conf *boot.Config) error { + // All platforms need the other exe bit + neededBits := os.FileMode(0001) + if conf.Platform == platforms.Ptrace { + // Ptrace needs the other read bit + neededBits |= os.FileMode(0004) + } + + exePath, err := os.Executable() + if err != nil { + return fmt.Errorf("getting exe path: %v", err) + } + + // Check the permissions of the runsc binary and print an error if it + // doesn't match expectations. + info, err := os.Stat(exePath) + if err != nil { + return fmt.Errorf("stat file: %v", err) + } + + if info.Mode().Perm()&neededBits != neededBits { + return fmt.Errorf(specutils.FaqErrorMsg("runsc-perms", fmt.Sprintf("%s does not have the correct permissions", exePath))) + } + return nil +} diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index c7dd3051c..23001d67c 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -18,6 +18,7 @@ import ( "fmt" "os" "os/exec" + "os/signal" "path/filepath" "runtime" "syscall" @@ -252,13 +253,27 @@ func MaybeRunAsRoot() error { }, Credential: &syscall.Credential{Uid: 0, Gid: 0}, GidMappingsEnableSetgroups: false, + + // Make sure child is killed when the parent terminates. + Pdeathsig: syscall.SIGKILL, } cmd.Env = os.Environ() cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { + if err := cmd.Start(); err != nil { + return fmt.Errorf("re-executing self: %w", err) + } + ch := make(chan os.Signal, 1) + signal.Notify(ch) + go func() { + for { + // Forward all signals to child process. + cmd.Process.Signal(<-ch) + } + }() + if err := cmd.Wait(); err != nil { if exit, ok := err.(*exec.ExitError); ok { if ws, ok := exit.Sys().(syscall.WaitStatus); ok { os.Exit(ws.ExitStatus()) @@ -266,7 +281,7 @@ func MaybeRunAsRoot() error { log.Warningf("No wait status provided, exiting with -1: %v", err) os.Exit(-1) } - return fmt.Errorf("re-executing self: %v", err) + return err } // Child completed with success. os.Exit(0) diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index d3c2e4e78..f1fa573c5 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -92,6 +92,12 @@ func ValidateSpec(spec *specs.Spec) error { log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile) } + // PR_SET_NO_NEW_PRIVS is assumed to always be set. + // See kernel.Task.updateCredsForExecLocked. + if !spec.Process.NoNewPrivileges { + log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.") + } + // TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox. if spec.Linux != nil && spec.Linux.Seccomp != nil { log.Warningf("Seccomp spec is being ignored") @@ -438,36 +444,6 @@ func ContainsStr(strs []string, str string) bool { return false } -// Cleanup allows defers to be aborted when cleanup needs to happen -// conditionally. Usage: -// c := MakeCleanup(func() { f.Close() }) -// defer c.Clean() // any failure before release is called will close the file. -// ... -// c.Release() // on success, aborts closing the file and return it. -// return f -type Cleanup struct { - clean func() -} - -// MakeCleanup creates a new Cleanup object. -func MakeCleanup(f func()) Cleanup { - return Cleanup{clean: f} -} - -// Clean calls the cleanup function. -func (c *Cleanup) Clean() { - if c.clean != nil { - c.clean() - c.clean = nil - } -} - -// Release releases the cleanup from its duties, i.e. cleanup function is not -// called after this point. -func (c *Cleanup) Release() { - c.clean = nil -} - // RetryEintr retries the function until an error different than EINTR is // returned. func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) { @@ -528,3 +504,8 @@ func EnvVar(env []string, name string) (string, bool) { } return "", false } + +// FaqErrorMsg returns an error message pointing to the FAQ. +func FaqErrorMsg(anchor, msg string) string { + return fmt.Sprintf("%s; see https://gvisor.dev/faq#%s for more details", msg, anchor) +} diff --git a/runsc/testutil/BUILD b/runsc/testutil/BUILD deleted file mode 100644 index 945405303..000000000 --- a/runsc/testutil/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "testutil", - testonly = 1, - srcs = [ - "testutil.go", - "testutil_runfiles.go", - ], - visibility = ["//:sandbox"], - deps = [ - "//pkg/log", - "//pkg/sync", - "//runsc/boot", - "//runsc/specutils", - "@com_github_cenkalti_backoff//:go_default_library", - "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", - ], -) diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go deleted file mode 100644 index 92d677e71..000000000 --- a/runsc/testutil/testutil.go +++ /dev/null @@ -1,431 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package testutil contains utility functions for runsc tests. -package testutil - -import ( - "bufio" - "context" - "debug/elf" - "encoding/base32" - "encoding/json" - "flag" - "fmt" - "io" - "io/ioutil" - "math" - "math/rand" - "net/http" - "os" - "os/exec" - "os/signal" - "path/filepath" - "strconv" - "strings" - "sync/atomic" - "syscall" - "time" - - "github.com/cenkalti/backoff" - specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/runsc/boot" - "gvisor.dev/gvisor/runsc/specutils" -) - -var ( - checkpoint = flag.Bool("checkpoint", true, "control checkpoint/restore support") -) - -func init() { - rand.Seed(time.Now().UnixNano()) -} - -// IsCheckpointSupported returns the relevant command line flag. -func IsCheckpointSupported() bool { - return *checkpoint -} - -// TmpDir returns the absolute path to a writable directory that can be used as -// scratch by the test. -func TmpDir() string { - dir := os.Getenv("TEST_TMPDIR") - if dir == "" { - dir = "/tmp" - } - return dir -} - -// ConfigureExePath configures the executable for runsc in the test environment. -func ConfigureExePath() error { - path, err := FindFile("runsc/runsc") - if err != nil { - return err - } - specutils.ExePath = path - return nil -} - -// TestConfig returns the default configuration to use in tests. Note that -// 'RootDir' must be set by caller if required. -func TestConfig() *boot.Config { - logDir := "" - if dir, ok := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR"); ok { - logDir = dir + "/" - } - return &boot.Config{ - Debug: true, - DebugLog: logDir, - LogFormat: "text", - DebugLogFormat: "text", - AlsoLogToStderr: true, - LogPackets: true, - Network: boot.NetworkNone, - Strace: true, - Platform: "ptrace", - FileAccess: boot.FileAccessExclusive, - TestOnlyAllowRunAsCurrentUserWithoutChroot: true, - NumNetworkChannels: 1, - } -} - -// NewSpecWithArgs creates a simple spec with the given args suitable for use -// in tests. -func NewSpecWithArgs(args ...string) *specs.Spec { - return &specs.Spec{ - // The host filesystem root is the container root. - Root: &specs.Root{ - Path: "/", - Readonly: true, - }, - Process: &specs.Process{ - Args: args, - Env: []string{ - "PATH=" + os.Getenv("PATH"), - }, - Capabilities: specutils.AllCapabilities(), - }, - Mounts: []specs.Mount{ - // Hide the host /etc to avoid any side-effects. - // For example, bash reads /etc/passwd and if it is - // very big, tests can fail by timeout. - { - Type: "tmpfs", - Destination: "/etc", - }, - // Root is readonly, but many tests want to write to tmpdir. - // This creates a writable mount inside the root. Also, when tmpdir points - // to "/tmp", it makes the the actual /tmp to be mounted and not a tmpfs - // inside the sentry. - { - Type: "bind", - Destination: TmpDir(), - Source: TmpDir(), - }, - }, - Hostname: "runsc-test-hostname", - } -} - -// SetupRootDir creates a root directory for containers. -func SetupRootDir() (string, error) { - rootDir, err := ioutil.TempDir(TmpDir(), "containers") - if err != nil { - return "", fmt.Errorf("error creating root dir: %v", err) - } - return rootDir, nil -} - -// SetupContainer creates a bundle and root dir for the container, generates a -// test config, and writes the spec to config.json in the bundle dir. -func SetupContainer(spec *specs.Spec, conf *boot.Config) (rootDir, bundleDir string, err error) { - rootDir, err = SetupRootDir() - if err != nil { - return "", "", err - } - conf.RootDir = rootDir - bundleDir, err = SetupBundleDir(spec) - return rootDir, bundleDir, err -} - -// SetupBundleDir creates a bundle dir and writes the spec to config.json. -func SetupBundleDir(spec *specs.Spec) (bundleDir string, err error) { - bundleDir, err = ioutil.TempDir(TmpDir(), "bundle") - if err != nil { - return "", fmt.Errorf("error creating bundle dir: %v", err) - } - - if err = writeSpec(bundleDir, spec); err != nil { - return "", fmt.Errorf("error writing spec: %v", err) - } - return bundleDir, nil -} - -// writeSpec writes the spec to disk in the given directory. -func writeSpec(dir string, spec *specs.Spec) error { - b, err := json.Marshal(spec) - if err != nil { - return err - } - return ioutil.WriteFile(filepath.Join(dir, "config.json"), b, 0755) -} - -// UniqueContainerID generates a unique container id for each test. -// -// The container id is used to create an abstract unix domain socket, which must -// be unique. While the container forbids creating two containers with the same -// name, sometimes between test runs the socket does not get cleaned up quickly -// enough, causing container creation to fail. -func UniqueContainerID() string { - // Read 20 random bytes. - b := make([]byte, 20) - // "[Read] always returns len(p) and a nil error." --godoc - if _, err := rand.Read(b); err != nil { - panic("rand.Read failed: " + err.Error()) - } - // base32 encode the random bytes, so that the name is a valid - // container id and can be used as a socket name in the filesystem. - return fmt.Sprintf("test-container-%s", base32.StdEncoding.EncodeToString(b)) -} - -// Copy copies file from src to dst. -func Copy(src, dst string) error { - in, err := os.Open(src) - if err != nil { - return err - } - defer in.Close() - - out, err := os.Create(dst) - if err != nil { - return err - } - defer out.Close() - - _, err = io.Copy(out, in) - return err -} - -// Poll is a shorthand function to poll for something with given timeout. -func Poll(cb func() error, timeout time.Duration) error { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) - return backoff.Retry(cb, b) -} - -// WaitForHTTP tries GET requests on a port until the call succeeds or timeout. -func WaitForHTTP(port int, timeout time.Duration) error { - cb := func() error { - c := &http.Client{ - // Calculate timeout to be able to do minimum 5 attempts. - Timeout: timeout / 5, - } - url := fmt.Sprintf("http://localhost:%d/", port) - resp, err := c.Get(url) - if err != nil { - log.Infof("Waiting %s: %v", url, err) - return err - } - resp.Body.Close() - return nil - } - return Poll(cb, timeout) -} - -// Reaper reaps child processes. -type Reaper struct { - // mu protects ch, which will be nil if the reaper is not running. - mu sync.Mutex - ch chan os.Signal -} - -// Start starts reaping child processes. -func (r *Reaper) Start() { - r.mu.Lock() - defer r.mu.Unlock() - - if r.ch != nil { - panic("reaper.Start called on a running reaper") - } - - r.ch = make(chan os.Signal, 1) - signal.Notify(r.ch, syscall.SIGCHLD) - - go func() { - for { - r.mu.Lock() - ch := r.ch - r.mu.Unlock() - if ch == nil { - return - } - - _, ok := <-ch - if !ok { - // Channel closed. - return - } - for { - cpid, _ := syscall.Wait4(-1, nil, syscall.WNOHANG, nil) - if cpid < 1 { - break - } - } - } - }() -} - -// Stop stops reaping child processes. -func (r *Reaper) Stop() { - r.mu.Lock() - defer r.mu.Unlock() - - if r.ch == nil { - panic("reaper.Stop called on a stopped reaper") - } - - signal.Stop(r.ch) - close(r.ch) - r.ch = nil -} - -// StartReaper is a helper that starts a new Reaper and returns a function to -// stop it. -func StartReaper() func() { - r := &Reaper{} - r.Start() - return r.Stop -} - -// WaitUntilRead reads from the given reader until the wanted string is found -// or until timeout. -func WaitUntilRead(r io.Reader, want string, split bufio.SplitFunc, timeout time.Duration) error { - sc := bufio.NewScanner(r) - if split != nil { - sc.Split(split) - } - // done must be accessed atomically. A value greater than 0 indicates - // that the read loop can exit. - var done uint32 - doneCh := make(chan struct{}) - go func() { - for sc.Scan() { - t := sc.Text() - if strings.Contains(t, want) { - atomic.StoreUint32(&done, 1) - close(doneCh) - break - } - if atomic.LoadUint32(&done) > 0 { - break - } - } - }() - select { - case <-time.After(timeout): - atomic.StoreUint32(&done, 1) - return fmt.Errorf("timeout waiting to read %q", want) - case <-doneCh: - return nil - } -} - -// KillCommand kills the process running cmd unless it hasn't been started. It -// returns an error if it cannot kill the process unless the reason is that the -// process has already exited. -func KillCommand(cmd *exec.Cmd) error { - if cmd.Process == nil { - return nil - } - if err := cmd.Process.Kill(); err != nil { - if !strings.Contains(err.Error(), "process already finished") { - return fmt.Errorf("failed to kill process %v: %v", cmd, err) - } - } - return nil -} - -// WriteTmpFile writes text to a temporary file, closes the file, and returns -// the name of the file. -func WriteTmpFile(pattern, text string) (string, error) { - file, err := ioutil.TempFile(TmpDir(), pattern) - if err != nil { - return "", err - } - defer file.Close() - if _, err := file.Write([]byte(text)); err != nil { - return "", err - } - return file.Name(), nil -} - -// RandomName create a name with a 6 digit random number appended to it. -func RandomName(prefix string) string { - return fmt.Sprintf("%s-%06d", prefix, rand.Int31n(1000000)) -} - -// IsStatic returns true iff the given file is a static binary. -func IsStatic(filename string) (bool, error) { - f, err := elf.Open(filename) - if err != nil { - return false, err - } - for _, prog := range f.Progs { - if prog.Type == elf.PT_INTERP { - return false, nil // Has interpreter. - } - } - return true, nil -} - -// TestIndicesForShard returns indices for this test shard based on the -// TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. -// -// If either of the env vars are not present, then the function will return all -// tests. If there are more shards than there are tests, then the returned list -// may be empty. -func TestIndicesForShard(numTests int) ([]int, error) { - var ( - shardIndex = 0 - shardTotal = 1 - ) - - indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS") - if indexStr != "" && totalStr != "" { - // Parse index and total to ints. - var err error - shardIndex, err = strconv.Atoi(indexStr) - if err != nil { - return nil, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err) - } - shardTotal, err = strconv.Atoi(totalStr) - if err != nil { - return nil, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err) - } - } - - // Calculate! - var indices []int - numBlocks := int(math.Ceil(float64(numTests) / float64(shardTotal))) - for i := 0; i < numBlocks; i++ { - pick := i*shardTotal + shardIndex - if pick < numTests { - indices = append(indices, pick) - } - } - return indices, nil -} diff --git a/runsc/testutil/testutil_runfiles.go b/runsc/testutil/testutil_runfiles.go deleted file mode 100644 index ece9ea9a1..000000000 --- a/runsc/testutil/testutil_runfiles.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testutil - -import ( - "fmt" - "os" - "path/filepath" -) - -// FindFile searchs for a file inside the test run environment. It returns the -// full path to the file. It fails if none or more than one file is found. -func FindFile(path string) (string, error) { - wd, err := os.Getwd() - if err != nil { - return "", err - } - - // The test root is demarcated by a path element called "__main__". Search for - // it backwards from the working directory. - root := wd - for { - dir, name := filepath.Split(root) - if name == "__main__" { - break - } - if len(dir) == 0 { - return "", fmt.Errorf("directory __main__ not found in %q", wd) - } - // Remove ending slash to loop around. - root = dir[:len(dir)-1] - } - - // Annoyingly, bazel adds the build type to the directory path for go - // binaries, but not for c++ binaries. We use two different patterns to - // to find our file. - patterns := []string{ - // Try the obvious path first. - filepath.Join(root, path), - // If it was a go binary, use a wildcard to match the build - // type. The pattern is: /test-path/__main__/directories/*/file. - filepath.Join(root, filepath.Dir(path), "*", filepath.Base(path)), - } - - for _, p := range patterns { - matches, err := filepath.Glob(p) - if err != nil { - // "The only possible returned error is ErrBadPattern, - // when pattern is malformed." -godoc - return "", fmt.Errorf("error globbing %q: %v", p, err) - } - switch len(matches) { - case 0: - // Try the next pattern. - case 1: - // We found it. - return matches[0], nil - default: - return "", fmt.Errorf("more than one match found for %q: %s", path, matches) - } - } - return "", fmt.Errorf("file %q not found", path) -} |