summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/BUILD52
-rw-r--r--runsc/boot/BUILD50
-rw-r--r--runsc/boot/compat.go73
-rw-r--r--runsc/boot/compat_amd64.go93
-rw-r--r--runsc/boot/compat_arm64.go95
-rw-r--r--runsc/boot/compat_test.go45
-rw-r--r--runsc/boot/config.go47
-rw-r--r--runsc/boot/controller.go67
-rw-r--r--runsc/boot/fds.go81
-rw-r--r--runsc/boot/filter/BUILD6
-rw-r--r--runsc/boot/filter/config.go122
-rw-r--r--runsc/boot/filter/config_amd64.go31
-rw-r--r--runsc/boot/filter/config_arm64.go21
-rw-r--r--runsc/boot/filter/config_profile.go34
-rw-r--r--runsc/boot/filter/extra_filters_msan.go2
-rw-r--r--runsc/boot/fs.go172
-rw-r--r--runsc/boot/fs_test.go131
-rw-r--r--runsc/boot/limits.go2
-rw-r--r--runsc/boot/loader.go694
-rw-r--r--runsc/boot/loader_test.go186
-rw-r--r--runsc/boot/network.go130
-rw-r--r--runsc/boot/platforms/BUILD3
-rw-r--r--runsc/boot/pprof/BUILD11
-rw-r--r--runsc/boot/pprof/pprof.go (renamed from runsc/boot/pprof.go)6
-rw-r--r--runsc/boot/user.go170
-rw-r--r--runsc/boot/user_test.go254
-rw-r--r--runsc/boot/vfs.go519
-rw-r--r--runsc/cgroup/BUILD13
-rw-r--r--runsc/cgroup/cgroup.go153
-rw-r--r--runsc/cgroup/cgroup_test.go582
-rw-r--r--runsc/cmd/BUILD22
-rw-r--r--runsc/cmd/boot.go94
-rw-r--r--runsc/cmd/capability_test.go11
-rw-r--r--runsc/cmd/checkpoint.go2
-rw-r--r--runsc/cmd/chroot.go2
-rw-r--r--runsc/cmd/create.go3
-rw-r--r--runsc/cmd/debug.go54
-rw-r--r--runsc/cmd/delete.go2
-rw-r--r--runsc/cmd/do.go85
-rw-r--r--runsc/cmd/events.go2
-rw-r--r--runsc/cmd/exec.go2
-rw-r--r--runsc/cmd/gofer.go26
-rw-r--r--runsc/cmd/help.go16
-rw-r--r--runsc/cmd/install.go2
-rw-r--r--runsc/cmd/kill.go2
-rw-r--r--runsc/cmd/list.go2
-rw-r--r--runsc/cmd/pause.go2
-rw-r--r--runsc/cmd/ps.go2
-rw-r--r--runsc/cmd/restore.go2
-rw-r--r--runsc/cmd/resume.go2
-rw-r--r--runsc/cmd/run.go2
-rw-r--r--runsc/cmd/spec.go224
-rw-r--r--runsc/cmd/start.go3
-rw-r--r--runsc/cmd/state.go2
-rw-r--r--runsc/cmd/statefile.go149
-rw-r--r--runsc/cmd/syscalls.go25
-rw-r--r--runsc/cmd/wait.go2
-rw-r--r--runsc/console/BUILD3
-rw-r--r--runsc/container/BUILD26
-rw-r--r--runsc/container/console_test.go142
-rw-r--r--runsc/container/container.go405
-rw-r--r--runsc/container/container_norace_test.go20
-rw-r--r--runsc/container/container_race_test.go20
-rw-r--r--runsc/container/container_test.go2578
-rw-r--r--runsc/container/multi_container_test.go1391
-rw-r--r--runsc/container/shared_volume_test.go22
-rw-r--r--runsc/container/state_file.go185
-rw-r--r--runsc/container/test_app/BUILD19
-rw-r--r--runsc/container/test_app/fds.go185
-rw-r--r--runsc/container/test_app/test_app.go354
-rw-r--r--runsc/criutil/BUILD12
-rw-r--r--runsc/criutil/criutil.go277
-rw-r--r--runsc/debian/description6
-rwxr-xr-xrunsc/debian/postinst.sh9
-rw-r--r--runsc/dockerutil/BUILD15
-rw-r--r--runsc/dockerutil/dockerutil.go467
-rw-r--r--runsc/flag/BUILD9
-rw-r--r--runsc/flag/flag.go33
-rw-r--r--runsc/fsgofer/BUILD15
-rw-r--r--runsc/fsgofer/filter/BUILD5
-rw-r--r--runsc/fsgofer/filter/config.go20
-rw-r--r--runsc/fsgofer/filter/config_amd64.go33
-rw-r--r--runsc/fsgofer/filter/config_arm64.go27
-rw-r--r--runsc/fsgofer/fsgofer.go343
-rw-r--r--runsc/fsgofer/fsgofer_amd64_unsafe.go49
-rw-r--r--runsc/fsgofer/fsgofer_arm64_unsafe.go49
-rw-r--r--runsc/fsgofer/fsgofer_test.go169
-rw-r--r--runsc/fsgofer/fsgofer_unsafe.go25
-rw-r--r--runsc/main.go97
-rw-r--r--runsc/sandbox/BUILD8
-rw-r--r--runsc/sandbox/network.go187
-rw-r--r--runsc/sandbox/sandbox.go221
-rw-r--r--runsc/specutils/BUILD10
-rw-r--r--runsc/specutils/namespace.go19
-rw-r--r--runsc/specutils/specutils.go81
-rw-r--r--runsc/testutil/BUILD18
-rw-r--r--runsc/testutil/testutil.go476
-rwxr-xr-xrunsc/version_test.sh2
98 files changed, 6781 insertions, 5838 deletions
diff --git a/runsc/BUILD b/runsc/BUILD
index e4e8e64a3..96f697a5f 100644
--- a/runsc/BUILD
+++ b/runsc/BUILD
@@ -1,7 +1,6 @@
-package(licenses = ["notice"]) # Apache 2.0
+load("//tools:defs.bzl", "go_binary", "pkg_deb", "pkg_tar")
-load("@io_bazel_rules_go//go:def.bzl", "go_binary")
-load("@rules_pkg//:pkg.bzl", "pkg_deb", "pkg_tar")
+package(licenses = ["notice"])
go_binary(
name = "runsc",
@@ -9,7 +8,7 @@ go_binary(
"main.go",
"version.go",
],
- pure = "on",
+ pure = True,
visibility = [
"//visibility:public",
],
@@ -20,16 +19,19 @@ go_binary(
"//pkg/sentry/platform",
"//runsc/boot",
"//runsc/cmd",
+ "//runsc/flag",
"//runsc/specutils",
"@com_github_google_subcommands//:go_default_library",
],
)
# The runsc-race target is a race-compatible BUILD target. This must be built
-# via "bazel build --features=race //runsc:runsc-race", since the race feature
-# must apply to all dependencies due a bug in gazelle file selection. The pure
-# attribute must be off because the race detector requires linking with non-Go
-# components, although we still require a static binary.
+# via: bazel build --features=race :runsc-race
+#
+# This is neccessary because the race feature must apply to all dependencies
+# due a bug in gazelle file selection. The pure attribute must be off because
+# the race detector requires linking with non-Go components, although we still
+# require a static binary.
#
# Note that in the future this might be convertible to a compatible target by
# using the pure and static attributes within a select function, but select is
@@ -42,7 +44,7 @@ go_binary(
"main.go",
"version.go",
],
- static = "on",
+ static = True,
visibility = [
"//visibility:public",
],
@@ -53,39 +55,57 @@ go_binary(
"//pkg/sentry/platform",
"//runsc/boot",
"//runsc/cmd",
+ "//runsc/flag",
"//runsc/specutils",
"@com_github_google_subcommands//:go_default_library",
],
)
pkg_tar(
- name = "runsc-bin",
- srcs = [":runsc"],
+ name = "debian-bin",
+ srcs = [
+ ":runsc",
+ "//shim/v1:gvisor-containerd-shim",
+ "//shim/v2:containerd-shim-runsc-v1",
+ ],
mode = "0755",
package_dir = "/usr/bin",
- strip_prefix = "/runsc/linux_amd64_pure_stripped",
)
pkg_tar(
name = "debian-data",
extension = "tar.gz",
deps = [
- ":runsc-bin",
+ ":debian-bin",
+ "//shim:config",
],
)
genrule(
name = "deb-version",
+ # Note that runsc must appear in the srcs parameter and not the tools
+ # parameter, otherwise it will not be stamped. This is reasonable, as tools
+ # may be encoded differently in the build graph (cached more aggressively
+ # because they are assumes to be hermetic).
+ srcs = [":runsc"],
outs = ["version.txt"],
- cmd = "$(location :runsc) -version | grep 'runsc version' | sed 's/^[^0-9]*//' > $@",
+ # Note that the little dance here is necessary because files in the $(SRCS)
+ # attribute are not executable by default, and we can't touch in place.
+ cmd = "cp $(location :runsc) $(@D)/runsc && \
+ chmod a+x $(@D)/runsc && \
+ $(@D)/runsc -version | grep version | sed 's/^[^0-9]*//' > $@ && \
+ rm -f $(@D)/runsc",
stamp = 1,
- tools = [":runsc"],
)
pkg_deb(
name = "runsc-debian",
architecture = "amd64",
data = ":debian-data",
+ # Note that the description_file will be flatten (all newlines removed),
+ # and therefore it is kept to a simple one-line description. The expected
+ # format for debian packages is "short summary\nLonger explanation of
+ # tool." and this is impossible with the flattening.
description_file = "debian/description",
homepage = "https://gvisor.dev/",
maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>",
@@ -101,5 +121,7 @@ sh_test(
name = "version_test",
size = "small",
srcs = ["version_test.sh"],
+ args = ["$(location :runsc)"],
data = [":runsc"],
+ tags = ["noguitar"],
)
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 6fe2b57de..9f52438c2 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -7,38 +7,42 @@ go_library(
srcs = [
"compat.go",
"compat_amd64.go",
+ "compat_arm64.go",
"config.go",
"controller.go",
"debug.go",
"events.go",
- "fds.go",
"fs.go",
"limits.go",
"loader.go",
"network.go",
- "pprof.go",
"strace.go",
- "user.go",
+ "vfs.go",
],
- importpath = "gvisor.dev/gvisor/runsc/boot",
visibility = [
+ "//pkg/test:__subpackages__",
"//runsc:__subpackages__",
"//test:__subpackages__",
],
deps = [
"//pkg/abi",
"//pkg/abi/linux",
+ "//pkg/context",
"//pkg/control/server",
"//pkg/cpuid",
"//pkg/eventchannel",
+ "//pkg/fspath",
"//pkg/log",
"//pkg/memutil",
"//pkg/rand",
"//pkg/refs",
"//pkg/sentry/arch",
"//pkg/sentry/arch:registers_go_proto",
- "//pkg/sentry/context",
"//pkg/sentry/control",
+ "//pkg/sentry/devices/memdev",
+ "//pkg/sentry/devices/ttydev",
+ "//pkg/sentry/devices/tundev",
+ "//pkg/sentry/fdimport",
"//pkg/sentry/fs",
"//pkg/sentry/fs/dev",
"//pkg/sentry/fs/gofer",
@@ -48,6 +52,16 @@ go_library(
"//pkg/sentry/fs/sys",
"//pkg/sentry/fs/tmpfs",
"//pkg/sentry/fs/tty",
+ "//pkg/sentry/fs/user",
+ "//pkg/sentry/fsimpl/devpts",
+ "//pkg/sentry/fsimpl/devtmpfs",
+ "//pkg/sentry/fsimpl/fuse",
+ "//pkg/sentry/fsimpl/gofer",
+ "//pkg/sentry/fsimpl/host",
+ "//pkg/sentry/fsimpl/overlay",
+ "//pkg/sentry/fsimpl/proc",
+ "//pkg/sentry/fsimpl/sys",
+ "//pkg/sentry/fsimpl/tmpfs",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel:uncaught_signal_go_proto",
@@ -60,20 +74,24 @@ go_library(
"//pkg/sentry/socket/hostinet",
"//pkg/sentry/socket/netlink",
"//pkg/sentry/socket/netlink/route",
+ "//pkg/sentry/socket/netlink/uevent",
"//pkg/sentry/socket/netstack",
"//pkg/sentry/socket/unix",
"//pkg/sentry/state",
"//pkg/sentry/strace",
- "//pkg/sentry/syscalls/linux",
+ "//pkg/sentry/syscalls/linux/vfs2",
"//pkg/sentry/time",
"//pkg/sentry/unimpl:unimplemented_syscall_go_proto",
"//pkg/sentry/usage",
- "//pkg/sentry/usermem",
+ "//pkg/sentry/vfs",
"//pkg/sentry/watchdog",
+ "//pkg/sync",
"//pkg/syserror",
"//pkg/tcpip",
"//pkg/tcpip/link/fdbased",
"//pkg/tcpip/link/loopback",
+ "//pkg/tcpip/link/packetsocket",
+ "//pkg/tcpip/link/qdisc/fifo",
"//pkg/tcpip/link/sniffer",
"//pkg/tcpip/network/arp",
"//pkg/tcpip/network/ipv4",
@@ -86,9 +104,10 @@ go_library(
"//pkg/urpc",
"//runsc/boot/filter",
"//runsc/boot/platforms",
+ "//runsc/boot/pprof",
"//runsc/specutils",
"@com_github_golang_protobuf//proto:go_default_library",
- "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
],
)
@@ -100,19 +119,20 @@ go_test(
"compat_test.go",
"fs_test.go",
"loader_test.go",
- "user_test.go",
],
- embed = [":boot"],
+ library = ":boot",
deps = [
"//pkg/control/server",
+ "//pkg/fspath",
"//pkg/log",
"//pkg/p9",
- "//pkg/sentry/arch:registers_go_proto",
- "//pkg/sentry/context/contexttest",
+ "//pkg/sentry/contexttest",
"//pkg/sentry/fs",
- "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/vfs",
+ "//pkg/sync",
"//pkg/unet",
"//runsc/fsgofer",
- "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index 07e35ab10..84c67cbc2 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -17,18 +17,16 @@ package boot
import (
"fmt"
"os"
- "sync"
"syscall"
"github.com/golang/protobuf/proto"
- "gvisor.dev/gvisor/pkg/abi"
"gvisor.dev/gvisor/pkg/eventchannel"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/arch"
rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
"gvisor.dev/gvisor/pkg/sentry/strace"
spb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
+ "gvisor.dev/gvisor/pkg/sync"
)
func initCompatLogs(fd int) error {
@@ -53,9 +51,9 @@ type compatEmitter struct {
}
func newCompatEmitter(logFD int) (*compatEmitter, error) {
- nameMap, ok := strace.Lookup(abi.Linux, arch.AMD64)
+ nameMap, ok := getSyscallNameMap()
if !ok {
- return nil, fmt.Errorf("amd64 Linux syscall table not found")
+ return nil, fmt.Errorf("Linux syscall table not found")
}
c := &compatEmitter{
@@ -67,7 +65,7 @@ func newCompatEmitter(logFD int) (*compatEmitter, error) {
if logFD > 0 {
f := os.NewFile(uintptr(logFD), "user log file")
- target := log.MultiEmitter{c.sink, log.K8sJSONEmitter{log.Writer{Next: f}}}
+ target := &log.MultiEmitter{c.sink, log.K8sJSONEmitter{&log.Writer{Next: f}}}
c.sink = &log.BasicLogger{Level: log.Info, Emitter: target}
}
return c, nil
@@ -86,16 +84,16 @@ func (c *compatEmitter) Emit(msg proto.Message) (bool, error) {
}
func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) {
- regs := us.Registers.GetArch().(*rpb.Registers_Amd64).Amd64
+ regs := us.Registers
c.mu.Lock()
defer c.mu.Unlock()
- sysnr := regs.OrigRax
+ sysnr := syscallNum(regs)
tr := c.trackers[sysnr]
if tr == nil {
switch sysnr {
- case syscall.SYS_PRCTL, syscall.SYS_ARCH_PRCTL:
+ case syscall.SYS_PRCTL:
// args: cmd, ...
tr = newArgsTracker(0)
@@ -112,12 +110,22 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) {
tr = newArgsTracker(2)
default:
- tr = &onceTracker{}
+ tr = newArchArgsTracker(sysnr)
+ if tr == nil {
+ tr = &onceTracker{}
+ }
}
c.trackers[sysnr] = tr
}
+
if tr.shouldReport(regs) {
- c.sink.Infof("Unsupported syscall: %s, regs: %+v", c.nameMap.Name(uintptr(sysnr)), regs)
+ name := c.nameMap.Name(uintptr(sysnr))
+ c.sink.Infof("Unsupported syscall %s(%#x,%#x,%#x,%#x,%#x,%#x). It is "+
+ "likely that you can safely ignore this message and that this is not "+
+ "the cause of any error. Please, refer to %s/%s for more information.",
+ name, argVal(0, regs), argVal(1, regs), argVal(2, regs), argVal(3, regs),
+ argVal(4, regs), argVal(5, regs), syscallLink, name)
+
tr.onReported(regs)
}
}
@@ -139,10 +147,10 @@ func (c *compatEmitter) Close() error {
// the syscall and arguments.
type syscallTracker interface {
// shouldReport returns true is the syscall should be reported.
- shouldReport(regs *rpb.AMD64Registers) bool
+ shouldReport(regs *rpb.Registers) bool
// onReported marks the syscall as reported.
- onReported(regs *rpb.AMD64Registers)
+ onReported(regs *rpb.Registers)
}
// onceTracker reports only a single time, used for most syscalls.
@@ -150,10 +158,45 @@ type onceTracker struct {
reported bool
}
-func (o *onceTracker) shouldReport(_ *rpb.AMD64Registers) bool {
+func (o *onceTracker) shouldReport(_ *rpb.Registers) bool {
return !o.reported
}
-func (o *onceTracker) onReported(_ *rpb.AMD64Registers) {
+func (o *onceTracker) onReported(_ *rpb.Registers) {
o.reported = true
}
+
+// argsTracker reports only once for each different combination of arguments.
+// It's used for generic syscalls like ioctl to report once per 'cmd'.
+type argsTracker struct {
+ // argsIdx is the syscall arguments to use as unique ID.
+ argsIdx []int
+ reported map[string]struct{}
+ count int
+}
+
+func newArgsTracker(argIdx ...int) *argsTracker {
+ return &argsTracker{argsIdx: argIdx, reported: make(map[string]struct{})}
+}
+
+// key returns the command based on the syscall argument index.
+func (a *argsTracker) key(regs *rpb.Registers) string {
+ var rv string
+ for _, idx := range a.argsIdx {
+ rv += fmt.Sprintf("%d|", argVal(idx, regs))
+ }
+ return rv
+}
+
+func (a *argsTracker) shouldReport(regs *rpb.Registers) bool {
+ if a.count >= reportLimit {
+ return false
+ }
+ _, ok := a.reported[a.key(regs)]
+ return !ok
+}
+
+func (a *argsTracker) onReported(regs *rpb.Registers) {
+ a.count++
+ a.reported[a.key(regs)] = struct{}{}
+}
diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go
index 43cd0db94..8eb76b2ba 100644
--- a/runsc/boot/compat_amd64.go
+++ b/runsc/boot/compat_amd64.go
@@ -16,62 +16,85 @@ package boot
import (
"fmt"
+ "syscall"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
)
-// reportLimit is the max number of events that should be reported per tracker.
-const reportLimit = 100
+const (
+ // reportLimit is the max number of events that should be reported per
+ // tracker.
+ reportLimit = 100
+ syscallLink = "https://gvisor.dev/c/linux/amd64"
+)
-// argsTracker reports only once for each different combination of arguments.
-// It's used for generic syscalls like ioctl to report once per 'cmd'.
-type argsTracker struct {
- // argsIdx is the syscall arguments to use as unique ID.
- argsIdx []int
- reported map[string]struct{}
- count int
+// newRegs create a empty Registers instance.
+func newRegs() *rpb.Registers {
+ return &rpb.Registers{
+ Arch: &rpb.Registers_Amd64{
+ Amd64: &rpb.AMD64Registers{},
+ },
+ }
}
-func newArgsTracker(argIdx ...int) *argsTracker {
- return &argsTracker{argsIdx: argIdx, reported: make(map[string]struct{})}
-}
+func argVal(argIdx int, regs *rpb.Registers) uint64 {
+ amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64
-// cmd returns the command based on the syscall argument index.
-func (a *argsTracker) key(regs *rpb.AMD64Registers) string {
- var rv string
- for _, idx := range a.argsIdx {
- rv += fmt.Sprintf("%d|", argVal(idx, regs))
+ switch argIdx {
+ case 0:
+ return amd64Regs.Rdi
+ case 1:
+ return amd64Regs.Rsi
+ case 2:
+ return amd64Regs.Rdx
+ case 3:
+ return amd64Regs.R10
+ case 4:
+ return amd64Regs.R8
+ case 5:
+ return amd64Regs.R9
}
- return rv
+ panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
}
-func argVal(argIdx int, regs *rpb.AMD64Registers) uint32 {
+func setArgVal(argIdx int, argVal uint64, regs *rpb.Registers) {
+ amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64
+
switch argIdx {
case 0:
- return uint32(regs.Rdi)
+ amd64Regs.Rdi = argVal
case 1:
- return uint32(regs.Rsi)
+ amd64Regs.Rsi = argVal
case 2:
- return uint32(regs.Rdx)
+ amd64Regs.Rdx = argVal
case 3:
- return uint32(regs.R10)
+ amd64Regs.R10 = argVal
case 4:
- return uint32(regs.R8)
+ amd64Regs.R8 = argVal
case 5:
- return uint32(regs.R9)
+ amd64Regs.R9 = argVal
+ default:
+ panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
}
- panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
}
-func (a *argsTracker) shouldReport(regs *rpb.AMD64Registers) bool {
- if a.count >= reportLimit {
- return false
- }
- _, ok := a.reported[a.key(regs)]
- return !ok
+func getSyscallNameMap() (strace.SyscallMap, bool) {
+ return strace.Lookup(abi.Linux, arch.AMD64)
+}
+
+func syscallNum(regs *rpb.Registers) uint64 {
+ amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64
+ return amd64Regs.OrigRax
}
-func (a *argsTracker) onReported(regs *rpb.AMD64Registers) {
- a.count++
- a.reported[a.key(regs)] = struct{}{}
+func newArchArgsTracker(sysnr uint64) syscallTracker {
+ switch sysnr {
+ case syscall.SYS_ARCH_PRCTL:
+ // args: cmd, ...
+ return newArgsTracker(0)
+ }
+ return nil
}
diff --git a/runsc/boot/compat_arm64.go b/runsc/boot/compat_arm64.go
new file mode 100644
index 000000000..bce9d95b3
--- /dev/null
+++ b/runsc/boot/compat_arm64.go
@@ -0,0 +1,95 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
+)
+
+const (
+ // reportLimit is the max number of events that should be reported per
+ // tracker.
+ reportLimit = 100
+ syscallLink = "https://gvisor.dev/c/linux/arm64"
+)
+
+// newRegs create a empty Registers instance.
+func newRegs() *rpb.Registers {
+ return &rpb.Registers{
+ Arch: &rpb.Registers_Arm64{
+ Arm64: &rpb.ARM64Registers{},
+ },
+ }
+}
+
+func argVal(argIdx int, regs *rpb.Registers) uint64 {
+ arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64
+
+ switch argIdx {
+ case 0:
+ return arm64Regs.R0
+ case 1:
+ return arm64Regs.R1
+ case 2:
+ return arm64Regs.R2
+ case 3:
+ return arm64Regs.R3
+ case 4:
+ return arm64Regs.R4
+ case 5:
+ return arm64Regs.R5
+ }
+ panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
+}
+
+func setArgVal(argIdx int, argVal uint64, regs *rpb.Registers) {
+ arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64
+
+ switch argIdx {
+ case 0:
+ arm64Regs.R0 = argVal
+ case 1:
+ arm64Regs.R1 = argVal
+ case 2:
+ arm64Regs.R2 = argVal
+ case 3:
+ arm64Regs.R3 = argVal
+ case 4:
+ arm64Regs.R4 = argVal
+ case 5:
+ arm64Regs.R5 = argVal
+ default:
+ panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
+ }
+}
+
+func getSyscallNameMap() (strace.SyscallMap, bool) {
+ return strace.Lookup(abi.Linux, arch.ARM64)
+}
+
+func syscallNum(regs *rpb.Registers) uint64 {
+ arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64
+ return arm64Regs.R8
+}
+
+func newArchArgsTracker(sysnr uint64) syscallTracker {
+ // currently, no arch specific syscalls need to be handled here.
+ return nil
+}
diff --git a/runsc/boot/compat_test.go b/runsc/boot/compat_test.go
index 388298d8d..839c5303b 100644
--- a/runsc/boot/compat_test.go
+++ b/runsc/boot/compat_test.go
@@ -16,8 +16,6 @@ package boot
import (
"testing"
-
- rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
)
func TestOnceTracker(t *testing.T) {
@@ -35,31 +33,34 @@ func TestOnceTracker(t *testing.T) {
func TestArgsTracker(t *testing.T) {
for _, tc := range []struct {
- name string
- idx []int
- rdi1 uint64
- rdi2 uint64
- rsi1 uint64
- rsi2 uint64
- want bool
+ name string
+ idx []int
+ arg1_1 uint64
+ arg1_2 uint64
+ arg2_1 uint64
+ arg2_2 uint64
+ want bool
}{
- {name: "same rdi", idx: []int{0}, rdi1: 123, rdi2: 123, want: false},
- {name: "same rsi", idx: []int{1}, rsi1: 123, rsi2: 123, want: false},
- {name: "diff rdi", idx: []int{0}, rdi1: 123, rdi2: 321, want: true},
- {name: "diff rsi", idx: []int{1}, rsi1: 123, rsi2: 321, want: true},
- {name: "cmd is uint32", idx: []int{0}, rsi1: 0xdead00000123, rsi2: 0xbeef00000123, want: false},
- {name: "same 2 args", idx: []int{0, 1}, rsi1: 123, rdi1: 321, rsi2: 123, rdi2: 321, want: false},
- {name: "diff 2 args", idx: []int{0, 1}, rsi1: 123, rdi1: 321, rsi2: 789, rdi2: 987, want: true},
+ {name: "same arg1", idx: []int{0}, arg1_1: 123, arg1_2: 123, want: false},
+ {name: "same arg2", idx: []int{1}, arg2_1: 123, arg2_2: 123, want: false},
+ {name: "diff arg1", idx: []int{0}, arg1_1: 123, arg1_2: 321, want: true},
+ {name: "diff arg2", idx: []int{1}, arg2_1: 123, arg2_2: 321, want: true},
+ {name: "cmd is uint32", idx: []int{0}, arg2_1: 0xdead00000123, arg2_2: 0xbeef00000123, want: false},
+ {name: "same 2 args", idx: []int{0, 1}, arg2_1: 123, arg1_1: 321, arg2_2: 123, arg1_2: 321, want: false},
+ {name: "diff 2 args", idx: []int{0, 1}, arg2_1: 123, arg1_1: 321, arg2_2: 789, arg1_2: 987, want: true},
} {
t.Run(tc.name, func(t *testing.T) {
c := newArgsTracker(tc.idx...)
- regs := &rpb.AMD64Registers{Rdi: tc.rdi1, Rsi: tc.rsi1}
+ regs := newRegs()
+ setArgVal(0, tc.arg1_1, regs)
+ setArgVal(1, tc.arg2_1, regs)
if !c.shouldReport(regs) {
t.Error("first call to shouldReport, got: false, want: true")
}
c.onReported(regs)
- regs.Rdi, regs.Rsi = tc.rdi2, tc.rsi2
+ setArgVal(0, tc.arg1_2, regs)
+ setArgVal(1, tc.arg2_2, regs)
if got := c.shouldReport(regs); tc.want != got {
t.Errorf("second call to shouldReport, got: %t, want: %t", got, tc.want)
}
@@ -70,7 +71,9 @@ func TestArgsTracker(t *testing.T) {
func TestArgsTrackerLimit(t *testing.T) {
c := newArgsTracker(0, 1)
for i := 0; i < reportLimit; i++ {
- regs := &rpb.AMD64Registers{Rdi: 123, Rsi: uint64(i)}
+ regs := newRegs()
+ setArgVal(0, 123, regs)
+ setArgVal(1, uint64(i), regs)
if !c.shouldReport(regs) {
t.Error("shouldReport before limit was reached, got: false, want: true")
}
@@ -78,7 +81,9 @@ func TestArgsTrackerLimit(t *testing.T) {
}
// Should hit the count limit now.
- regs := &rpb.AMD64Registers{Rdi: 123, Rsi: 123456}
+ regs := newRegs()
+ setArgVal(0, 123, regs)
+ setArgVal(1, 123456, regs)
if c.shouldReport(regs) {
t.Error("shouldReport after limit was reached, got: true, want: false")
}
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 72a33534f..80da8b3e6 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -158,6 +158,9 @@ type Config struct {
// DebugLog is the path to log debug information to, if not empty.
DebugLog string
+ // PanicLog is the path to log GO's runtime messages, if not empty.
+ PanicLog string
+
// DebugLogFormat is the log format for debug.
DebugLogFormat string
@@ -184,6 +187,16 @@ type Config struct {
// SoftwareGSO indicates that software segmentation offload is enabled.
SoftwareGSO bool
+ // TXChecksumOffload indicates that TX Checksum Offload is enabled.
+ TXChecksumOffload bool
+
+ // RXChecksumOffload indicates that RX Checksum Offload is enabled.
+ RXChecksumOffload bool
+
+ // QDisc indicates the type of queuening discipline to use by default
+ // for non-loopback interfaces.
+ QDisc QueueingDiscipline
+
// LogPackets indicates that all network packets should be logged.
LogPackets bool
@@ -234,8 +247,10 @@ type Config struct {
// ReferenceLeakMode sets reference leak check mode
ReferenceLeakMode refs.LeakMode
- // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for
- // write to workaround overlayfs limitation on kernels before 4.19.
+ // OverlayfsStaleRead instructs the sandbox to assume that the root mount
+ // is on a Linux overlayfs mount, which does not necessarily preserve
+ // coherence between read-only and subsequent writable file descriptors
+ // representing the "same" file.
OverlayfsStaleRead bool
// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
@@ -250,6 +265,18 @@ type Config struct {
// multiple tests are run in parallel, since there is no way to pass
// parameters to the runtime from docker.
TestOnlyTestNameEnv string
+
+ // CPUNumFromQuota sets CPU number count to available CPU quota, using
+ // least integer value greater than or equal to quota.
+ //
+ // E.g. 0.2 CPU quota will result in 1, and 1.9 in 2.
+ CPUNumFromQuota bool
+
+ // Enables VFS2 (not plumbled through yet).
+ VFS2 bool
+
+ // Enables FUSE usage (not plumbled through yet).
+ FUSE bool
}
// ToFlags returns a slice of flags that correspond to the given Config.
@@ -260,6 +287,7 @@ func (c *Config) ToFlags() []string {
"--log=" + c.LogFilename,
"--log-format=" + c.LogFormat,
"--debug-log=" + c.DebugLog,
+ "--panic-log=" + c.PanicLog,
"--debug-log-format=" + c.DebugLogFormat,
"--file-access=" + c.FileAccess.String(),
"--overlay=" + strconv.FormatBool(c.Overlay),
@@ -280,7 +308,13 @@ func (c *Config) ToFlags() []string {
"--ref-leak-mode=" + refsLeakModeToString(c.ReferenceLeakMode),
"--gso=" + strconv.FormatBool(c.HardwareGSO),
"--software-gso=" + strconv.FormatBool(c.SoftwareGSO),
+ "--rx-checksum-offload=" + strconv.FormatBool(c.RXChecksumOffload),
+ "--tx-checksum-offload=" + strconv.FormatBool(c.TXChecksumOffload),
"--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead),
+ "--qdisc=" + c.QDisc.String(),
+ }
+ if c.CPUNumFromQuota {
+ f = append(f, "--cpu-num-from-quota")
}
// Only include these if set since it is never to be used by users.
if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
@@ -289,5 +323,14 @@ func (c *Config) ToFlags() []string {
if len(c.TestOnlyTestNameEnv) != 0 {
f = append(f, "--TESTONLY-test-name-env="+c.TestOnlyTestNameEnv)
}
+
+ if c.VFS2 {
+ f = append(f, "--vfs2=true")
+ }
+
+ if c.FUSE {
+ f = append(f, "--fuse=true")
+ }
+
return f
}
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 5f644b57e..626a3816e 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -32,6 +32,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/watchdog"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot/pprof"
"gvisor.dev/gvisor/runsc/specutils"
)
@@ -51,7 +52,7 @@ const (
ContainerEvent = "containerManager.Event"
// ContainerExecuteAsync is the URPC endpoint for executing a command in a
- // container..
+ // container.
ContainerExecuteAsync = "containerManager.ExecuteAsync"
// ContainerPause pauses the container.
@@ -103,6 +104,8 @@ const (
StartCPUProfile = "Profile.StartCPUProfile"
StopCPUProfile = "Profile.StopCPUProfile"
HeapProfile = "Profile.HeapProfile"
+ BlockProfile = "Profile.BlockProfile"
+ MutexProfile = "Profile.MutexProfile"
StartTrace = "Profile.StartTrace"
StopTrace = "Profile.StopTrace"
)
@@ -125,43 +128,55 @@ type controller struct {
// manager holds the containerManager methods.
manager *containerManager
+
+ // pprop holds the profile instance if enabled. It may be nil.
+ pprof *control.Profile
}
// newController creates a new controller. The caller must call
// controller.srv.StartServing() to start the controller.
func newController(fd int, l *Loader) (*controller, error) {
- srv, err := server.CreateFromFD(fd)
+ ctrl := &controller{}
+ var err error
+ ctrl.srv, err = server.CreateFromFD(fd)
if err != nil {
return nil, err
}
- manager := &containerManager{
+ ctrl.manager = &containerManager{
startChan: make(chan struct{}),
startResultChan: make(chan error),
l: l,
}
- srv.Register(manager)
+ ctrl.srv.Register(ctrl.manager)
- if eps, ok := l.k.NetworkStack().(*netstack.Stack); ok {
+ if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok {
net := &Network{
Stack: eps.Stack,
}
- srv.Register(net)
+ ctrl.srv.Register(net)
}
- srv.Register(&debug{})
- srv.Register(&control.Logging{})
- if l.conf.ProfileEnable {
- srv.Register(&control.Profile{})
+ ctrl.srv.Register(&debug{})
+ ctrl.srv.Register(&control.Logging{})
+
+ if l.root.conf.ProfileEnable {
+ ctrl.pprof = &control.Profile{Kernel: l.k}
+ ctrl.srv.Register(ctrl.pprof)
}
- return &controller{
- srv: srv,
- manager: manager,
- }, nil
+ return ctrl, nil
+}
+
+func (c *controller) stop() {
+ if c.pprof != nil {
+ // These are noop if there is nothing being profiled.
+ _ = c.pprof.StopCPUProfile(nil, nil)
+ _ = c.pprof.StopTrace(nil, nil)
+ }
}
-// containerManager manages sandboes containers.
+// containerManager manages sandbox containers.
type containerManager struct {
// startChan is used to signal when the root container process should
// be started.
@@ -327,7 +342,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
// Pause the kernel while we build a new one.
cm.l.k.Pause()
- p, err := createPlatform(cm.l.conf, deviceFile)
+ p, err := createPlatform(cm.l.root.conf, deviceFile)
if err != nil {
return fmt.Errorf("creating platform: %v", err)
}
@@ -339,12 +354,12 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
return fmt.Errorf("creating memory file: %v", err)
}
k.SetMemoryFile(mf)
- networkStack := cm.l.k.NetworkStack()
+ networkStack := cm.l.k.RootNetworkNamespace().Stack()
cm.l.k = k
// Set up the restore environment.
- mntr := newContainerMounter(cm.l.spec, cm.l.goferFDs, cm.l.k, cm.l.mountHints)
- renv, err := mntr.createRestoreEnvironment(cm.l.conf)
+ mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints)
+ renv, err := mntr.createRestoreEnvironment(cm.l.root.conf)
if err != nil {
return fmt.Errorf("creating RestoreEnvironment: %v", err)
}
@@ -362,10 +377,10 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
return fmt.Errorf("file cannot be empty")
}
- if cm.l.conf.ProfileEnable {
- // initializePProf opens /proc/self/maps, so has to be
- // called before installing seccomp filters.
- initializePProf()
+ if cm.l.root.conf.ProfileEnable {
+ // pprof.Initialize opens /proc/self/maps, so has to be called before
+ // installing seccomp filters.
+ pprof.Initialize()
}
// Seccomp filters have to be applied before parsing the state file.
@@ -380,12 +395,14 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
}
// Since we have a new kernel we also must make a new watchdog.
- dog := watchdog.New(k, watchdog.DefaultTimeout, cm.l.conf.WatchdogAction)
+ dogOpts := watchdog.DefaultOpts
+ dogOpts.TaskTimeoutAction = cm.l.root.conf.WatchdogAction
+ dog := watchdog.New(k, dogOpts)
// Change the loader fields to reflect the changes made when restoring.
cm.l.k = k
cm.l.watchdog = dog
- cm.l.rootProcArgs = kernel.CreateProcessArgs{}
+ cm.l.root.procArgs = kernel.CreateProcessArgs{}
cm.l.restore = true
// Reinitialize the sandbox ID and processes map. Note that it doesn't
diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go
deleted file mode 100644
index e5de1f3d7..000000000
--- a/runsc/boot/fds.go
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package boot
-
-import (
- "fmt"
-
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/fs/host"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
-)
-
-// createFDTable creates an FD table that contains stdin, stdout, and stderr.
-// If console is true, then ioctl calls will be passed through to the host FD.
-// Upon success, createFDMap dups then closes stdioFDs.
-func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) {
- if len(stdioFDs) != 3 {
- return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
- }
-
- k := kernel.KernelFromContext(ctx)
- fdTable := k.NewFDTable()
- defer fdTable.DecRef()
- mounter := fs.FileOwnerFromContext(ctx)
-
- var ttyFile *fs.File
- for appFD, hostFD := range stdioFDs {
- var appFile *fs.File
-
- if console && appFD < 3 {
- // Import the file as a host TTY file.
- if ttyFile == nil {
- var err error
- appFile, err = host.ImportFile(ctx, hostFD, mounter, true /* isTTY */)
- if err != nil {
- return nil, err
- }
- defer appFile.DecRef()
-
- // Remember this in the TTY file, as we will
- // use it for the other stdio FDs.
- ttyFile = appFile
- } else {
- // Re-use the existing TTY file, as all three
- // stdio FDs must point to the same fs.File in
- // order to share TTY state, specifically the
- // foreground process group id.
- appFile = ttyFile
- }
- } else {
- // Import the file as a regular host file.
- var err error
- appFile, err = host.ImportFile(ctx, hostFD, mounter, false /* isTTY */)
- if err != nil {
- return nil, err
- }
- defer appFile.DecRef()
- }
-
- // Add the file to the FD map.
- if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil {
- return nil, err
- }
- }
-
- fdTable.IncRef()
- return fdTable, nil
-}
diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD
index f5509b6b7..ed18f0047 100644
--- a/runsc/boot/filter/BUILD
+++ b/runsc/boot/filter/BUILD
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
@@ -6,12 +6,14 @@ go_library(
name = "filter",
srcs = [
"config.go",
+ "config_amd64.go",
+ "config_arm64.go",
+ "config_profile.go",
"extra_filters.go",
"extra_filters_msan.go",
"extra_filters_race.go",
"filter.go",
],
- importpath = "gvisor.dev/gvisor/runsc/boot/filter",
visibility = [
"//runsc/boot:__subpackages__",
],
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 5ad108261..149eb0b1b 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -26,10 +26,6 @@ import (
// allowedSyscalls is the set of syscalls executed by the Sentry to the host OS.
var allowedSyscalls = seccomp.SyscallRules{
- syscall.SYS_ARCH_PRCTL: []seccomp.Rule{
- {seccomp.AllowValue(linux.ARCH_GET_FS)},
- {seccomp.AllowValue(linux.ARCH_SET_FS)},
- },
syscall.SYS_CLOCK_GETTIME: {},
syscall.SYS_CLONE: []seccomp.Rule{
{
@@ -42,9 +38,15 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.CLONE_THREAD),
},
},
- syscall.SYS_CLOSE: {},
- syscall.SYS_DUP: {},
- syscall.SYS_DUP2: {},
+ syscall.SYS_CLOSE: {},
+ syscall.SYS_DUP: {},
+ syscall.SYS_DUP3: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.O_CLOEXEC),
+ },
+ },
syscall.SYS_EPOLL_CREATE1: {},
syscall.SYS_EPOLL_CTL: {},
syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{
@@ -132,11 +134,6 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_SNDBUF),
},
- {
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_REUSEADDR),
- },
},
syscall.SYS_GETTID: {},
syscall.SYS_GETTIMEOFDAY: {},
@@ -177,6 +174,18 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_LSEEK: {},
syscall.SYS_MADVISE: {},
syscall.SYS_MINCORE: {},
+ // Used by the Go runtime as a temporarily workaround for a Linux
+ // 5.2-5.4 bug.
+ //
+ // See src/runtime/os_linux_x86.go.
+ //
+ // TODO(b/148688965): Remove once this is gone from Go.
+ syscall.SYS_MLOCK: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4096),
+ },
+ },
syscall.SYS_MMAP: []seccomp.Rule{
{
seccomp.AllowAny{},
@@ -220,7 +229,11 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_NANOSLEEP: {},
syscall.SYS_PPOLL: {},
syscall.SYS_PREAD64: {},
+ syscall.SYS_PREADV: {},
+ unix.SYS_PREADV2: {},
syscall.SYS_PWRITE64: {},
+ syscall.SYS_PWRITEV: {},
+ unix.SYS_PWRITEV2: {},
syscall.SYS_READ: {},
syscall.SYS_RECVMSG: []seccomp.Rule{
{
@@ -273,26 +286,36 @@ var allowedSyscalls = seccomp.SyscallRules{
{seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)},
},
syscall.SYS_SIGALTSTACK: {},
+ unix.SYS_STATX: {},
syscall.SYS_SYNC_FILE_RANGE: {},
+ syscall.SYS_TEE: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(1), /* len */
+ seccomp.AllowValue(unix.SPLICE_F_NONBLOCK), /* flags */
+ },
+ },
syscall.SYS_TGKILL: []seccomp.Rule{
{
seccomp.AllowValue(uint64(os.Getpid())),
},
},
- syscall.SYS_WRITE: {},
- // The only user in rawfile.NonBlockingWrite3 always passes iovcnt with
- // values 2 or 3. Three iovec-s are passed, when the PACKET_VNET_HDR
- // option is enabled for a packet socket.
- syscall.SYS_WRITEV: []seccomp.Rule{
+ syscall.SYS_UTIMENSAT: []seccomp.Rule{
{
seccomp.AllowAny{},
+ seccomp.AllowValue(0), /* null pathname */
seccomp.AllowAny{},
- seccomp.AllowValue(2),
+ seccomp.AllowValue(0), /* flags */
},
+ },
+ syscall.SYS_WRITE: {},
+ // For rawfile.NonBlockingWriteIovec.
+ syscall.SYS_WRITEV: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowAny{},
- seccomp.AllowValue(3),
+ seccomp.GreaterThan(0),
},
},
}
@@ -315,6 +338,26 @@ func hostInetFilters() seccomp.SyscallRules {
syscall.SYS_GETSOCKOPT: []seccomp.Rule{
{
seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IP),
+ seccomp.AllowValue(syscall.IP_TOS),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IP),
+ seccomp.AllowValue(syscall.IP_RECVTOS),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IPV6),
+ seccomp.AllowValue(syscall.IPV6_TCLASS),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IPV6),
+ seccomp.AllowValue(syscall.IPV6_RECVTCLASS),
+ },
+ {
+ seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_IPV6),
seccomp.AllowValue(syscall.IPV6_V6ONLY),
},
@@ -416,6 +459,34 @@ func hostInetFilters() seccomp.SyscallRules {
seccomp.AllowAny{},
seccomp.AllowValue(4),
},
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IP),
+ seccomp.AllowValue(syscall.IP_TOS),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IP),
+ seccomp.AllowValue(syscall.IP_RECVTOS),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IPV6),
+ seccomp.AllowValue(syscall.IPV6_TCLASS),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IPV6),
+ seccomp.AllowValue(syscall.IPV6_RECVTCLASS),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
},
syscall.SYS_SHUTDOWN: []seccomp.Rule{
{
@@ -479,16 +550,3 @@ func controlServerFilters(fd int) seccomp.SyscallRules {
},
}
}
-
-// profileFilters returns extra syscalls made by runtime/pprof package.
-func profileFilters() seccomp.SyscallRules {
- return seccomp.SyscallRules{
- syscall.SYS_OPENAT: []seccomp.Rule{
- {
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC),
- },
- },
- }
-}
diff --git a/runsc/boot/filter/config_amd64.go b/runsc/boot/filter/config_amd64.go
new file mode 100644
index 000000000..5335ff82c
--- /dev/null
+++ b/runsc/boot/filter/config_amd64.go
@@ -0,0 +1,31 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package filter
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
+)
+
+func init() {
+ allowedSyscalls[syscall.SYS_ARCH_PRCTL] = append(allowedSyscalls[syscall.SYS_ARCH_PRCTL],
+ seccomp.Rule{seccomp.AllowValue(linux.ARCH_GET_FS)},
+ seccomp.Rule{seccomp.AllowValue(linux.ARCH_SET_FS)},
+ )
+}
diff --git a/runsc/boot/filter/config_arm64.go b/runsc/boot/filter/config_arm64.go
new file mode 100644
index 000000000..7fa9bbda3
--- /dev/null
+++ b/runsc/boot/filter/config_arm64.go
@@ -0,0 +1,21 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package filter
+
+// Reserve for future customization.
+func init() {
+}
diff --git a/runsc/boot/filter/config_profile.go b/runsc/boot/filter/config_profile.go
new file mode 100644
index 000000000..194952a7b
--- /dev/null
+++ b/runsc/boot/filter/config_profile.go
@@ -0,0 +1,34 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package filter
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/seccomp"
+)
+
+// profileFilters returns extra syscalls made by runtime/pprof package.
+func profileFilters() seccomp.SyscallRules {
+ return seccomp.SyscallRules{
+ syscall.SYS_OPENAT: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC),
+ },
+ },
+ }
+}
diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go
index 5e5a3c998..209e646a7 100644
--- a/runsc/boot/filter/extra_filters_msan.go
+++ b/runsc/boot/filter/extra_filters_msan.go
@@ -26,6 +26,8 @@ import (
func instrumentationFilters() seccomp.SyscallRules {
Report("MSAN is enabled: syscall filters less restrictive!")
return seccomp.SyscallRules{
+ syscall.SYS_CLONE: {},
+ syscall.SYS_MMAP: {},
syscall.SYS_SCHED_GETAFFINITY: {},
syscall.SYS_SET_ROBUST_LIST: {},
}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 76036c147..9dd5b0184 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -16,7 +16,6 @@ package boot
import (
"fmt"
- "path"
"path/filepath"
"sort"
"strconv"
@@ -30,14 +29,22 @@ import (
_ "gvisor.dev/gvisor/pkg/sentry/fs/sys"
_ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
_ "gvisor.dev/gvisor/pkg/sentry/fs/tty"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/gofer"
"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+ gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
+ procvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
+ sysvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
+ tmpfsvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/syserror"
@@ -45,27 +52,19 @@ import (
)
const (
- // Filesystem name for 9p gofer mounts.
- rootFsName = "9p"
-
// Device name for root mount.
rootDevice = "9pfs-/"
// MountPrefix is the annotation prefix for mount hints.
- MountPrefix = "gvisor.dev/spec/mount"
-
- // Filesystems that runsc supports.
- bind = "bind"
- devpts = "devpts"
- devtmpfs = "devtmpfs"
- proc = "proc"
- sysfs = "sysfs"
- tmpfs = "tmpfs"
- nonefs = "none"
+ MountPrefix = "dev.gvisor.spec.mount."
+
+ // Supported filesystems that map to different internal filesystem.
+ bind = "bind"
+ nonefs = "none"
)
// tmpfs has some extra supported options that we must pass through.
-var tmpfsAllowedOptions = []string{"mode", "uid", "gid"}
+var tmpfsAllowedData = []string{"mode", "uid", "gid"}
func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) {
// Upper layer uses the same flags as lower, but it must be read-write.
@@ -109,12 +108,12 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
// Always mount /dev.
mounts = append(mounts, specs.Mount{
- Type: devtmpfs,
+ Type: devtmpfs.Name,
Destination: "/dev",
})
mounts = append(mounts, specs.Mount{
- Type: devpts,
+ Type: devpts.Name,
Destination: "/dev/pts",
})
@@ -138,13 +137,13 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
var mandatoryMounts []specs.Mount
if !procMounted {
mandatoryMounts = append(mandatoryMounts, specs.Mount{
- Type: proc,
+ Type: procvfs2.Name,
Destination: "/proc",
})
}
if !sysMounted {
mandatoryMounts = append(mandatoryMounts, specs.Mount{
- Type: sysfs,
+ Type: sysvfs2.Name,
Destination: "/sys",
})
}
@@ -156,13 +155,17 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
return mounts
}
-// p9MountOptions creates a slice of options for a p9 mount.
-func p9MountOptions(fd int, fa FileAccessType) []string {
+// p9MountData creates a slice of p9 mount data.
+func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string {
opts := []string{
"trans=fd",
"rfdno=" + strconv.Itoa(fd),
"wfdno=" + strconv.Itoa(fd),
- "privateunixsocket=true",
+ }
+ if !vfs2 {
+ // privateunixsocket is always enabled in VFS2. VFS1 requires explicit
+ // enablement.
+ opts = append(opts, "privateunixsocket=true")
}
if fa == FileAccessShared {
opts = append(opts, "cache=remote_revalidating")
@@ -232,8 +235,8 @@ func isSupportedMountFlag(fstype, opt string) bool {
case "rw", "ro", "noatime", "noexec":
return true
}
- if fstype == tmpfs {
- ok, err := parseMountOption(opt, tmpfsAllowedOptions...)
+ if fstype == tmpfsvfs2.Name {
+ ok, err := parseMountOption(opt, tmpfsAllowedData...)
return ok && err == nil
}
return false
@@ -279,6 +282,9 @@ func subtargets(root string, mnts []specs.Mount) []string {
}
func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
+ if conf.VFS2 {
+ return setupContainerVFS2(ctx, conf, mntr, procArgs)
+ }
mns, err := mntr.setupFS(conf, procArgs)
if err != nil {
return err
@@ -287,19 +293,12 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter,
// Set namespace here so that it can be found in ctx.
procArgs.MountNamespace = mns
- return setExecutablePath(ctx, procArgs)
-}
-
-// setExecutablePath sets the procArgs.Filename by searching the PATH for an
-// executable matching the procArgs.Argv[0].
-func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
- paths := fs.GetPath(procArgs.Envv)
- exe := procArgs.Argv[0]
- f, err := procArgs.MountNamespace.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths)
+ // Resolve the executable path from working dir and environment.
+ resolved, err := user.ResolveExecutablePath(ctx, procArgs)
if err != nil {
- return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err)
+ return err
}
- procArgs.Filename = f
+ procArgs.Filename = resolved
return nil
}
@@ -392,6 +391,10 @@ type mountHint struct {
// root is the inode where the volume is mounted. For mounts with 'pod' share
// the volume is mounted once and then bind mounted inside the containers.
root *fs.Inode
+
+ // vfsMount is the master mount for the volume. For mounts with 'pod' share
+ // the master volume is bind mounted inside the containers.
+ vfsMount *vfs.Mount
}
func (m *mountHint) setField(key, val string) error {
@@ -439,7 +442,7 @@ func (m *mountHint) setOptions(val string) error {
}
func (m *mountHint) isSupported() bool {
- return m.mount.Type == tmpfs && m.share == pod
+ return m.mount.Type == tmpfsvfs2.Name && m.share == pod
}
// checkCompatible verifies that shared mount is compatible with master.
@@ -465,6 +468,13 @@ func (m *mountHint) checkCompatible(mount specs.Mount) error {
return nil
}
+func (m *mountHint) fileAccessType() FileAccessType {
+ if m.share == container {
+ return FileAccessExclusive
+ }
+ return FileAccessShared
+}
+
func filterUnsupportedOptions(mount specs.Mount) []string {
rv := make([]string, 0, len(mount.Options))
for _, o := range mount.Options {
@@ -483,14 +493,15 @@ type podMountHints struct {
func newPodMountHints(spec *specs.Spec) (*podMountHints, error) {
mnts := make(map[string]*mountHint)
for k, v := range spec.Annotations {
- // Look for 'gvisor.dev/spec/mount' annotations and parse them.
+ // Look for 'dev.gvisor.spec.mount' annotations and parse them.
if strings.HasPrefix(k, MountPrefix) {
- parts := strings.Split(k, "/")
- if len(parts) != 5 {
+ // Remove the prefix and split the rest.
+ parts := strings.Split(k[len(MountPrefix):], ".")
+ if len(parts) != 2 {
return nil, fmt.Errorf("invalid mount annotation: %s=%s", k, v)
}
- name := parts[3]
- if len(name) == 0 || path.Clean(name) != name {
+ name := parts[0]
+ if len(name) == 0 {
return nil, fmt.Errorf("invalid mount name: %s", name)
}
mnt := mnts[name]
@@ -498,7 +509,7 @@ func newPodMountHints(spec *specs.Spec) (*podMountHints, error) {
mnt = &mountHint{name: name}
mnts[name] = mnt
}
- if err := mnt.setField(parts[4], v); err != nil {
+ if err := mnt.setField(parts[1], v); err != nil {
return nil, err
}
}
@@ -565,9 +576,17 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin
// processHints processes annotations that container hints about how volumes
// should be mounted (e.g. a volume shared between containers). It must be
// called for the root container only.
-func (c *containerMounter) processHints(conf *Config) error {
+func (c *containerMounter) processHints(conf *Config, creds *auth.Credentials) error {
+ if conf.VFS2 {
+ return c.processHintsVFS2(conf, creds)
+ }
ctx := c.k.SupervisorContext()
for _, hint := range c.hints.mounts {
+ // TODO(b/142076984): Only support tmpfs for now. Bind mounts require a
+ // common gofer to mount all shared volumes.
+ if hint.mount.Type != tmpfsvfs2.Name {
+ continue
+ }
log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
inode, err := c.mountSharedMaster(ctx, conf, hint)
if err != nil {
@@ -621,7 +640,7 @@ func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Confi
func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace) error {
root := mns.Root()
- defer root.DecRef()
+ defer root.DecRef(ctx)
for _, m := range c.mounts {
log.Debugf("Mounting %q to %q, type: %s, options: %s", m.Source, m.Destination, m.Type, m.Options)
@@ -702,7 +721,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*
fd := c.fds.remove()
log.Infof("Mounting root over 9P, ioFD: %d", fd)
p9FS := mustFindFilesystem("9p")
- opts := p9MountOptions(fd, conf.FileAccess)
+ opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */)
if conf.OverlayfsStaleRead {
// We can't check for overlayfs here because sandbox is chroot'ed and gofer
@@ -748,36 +767,40 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
)
switch m.Type {
- case devpts, devtmpfs, proc, sysfs:
+ case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name:
fsName = m.Type
case nonefs:
- fsName = sysfs
- case tmpfs:
+ fsName = sysvfs2.Name
+ case tmpfsvfs2.Name:
fsName = m.Type
var err error
- opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...)
+ opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...)
if err != nil {
return "", nil, false, err
}
case bind:
fd := c.fds.remove()
- fsName = "9p"
- // Non-root bind mounts are always shared.
- opts = p9MountOptions(fd, FileAccessShared)
+ fsName = gofervfs2.Name
+ opts = p9MountData(fd, c.getMountAccessType(m), conf.VFS2)
// If configured, add overlay to all writable mounts.
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
default:
- // TODO(nlacasse): Support all the mount types and make this a fatal error.
- // Most applications will "just work" without them, so this is a warning
- // for now.
log.Warningf("ignoring unknown filesystem type %q", m.Type)
}
return fsName, opts, useOverlay, nil
}
+func (c *containerMounter) getMountAccessType(mount specs.Mount) FileAccessType {
+ if hint := c.hints.findMount(mount); hint != nil {
+ return hint.fileAccessType()
+ }
+ // Non-root bind mounts are always shared if no hints were provided.
+ return FileAccessShared
+}
+
// mountSubmount mounts volumes inside the container's root. Because mounts may
// be readonly, a lower ramfs overlay is added to create the mount point dir.
// Another overlay is added with tmpfs on top if Config.Overlay is true.
@@ -805,7 +828,20 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns
inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(opts, ","), nil)
if err != nil {
- return fmt.Errorf("creating mount with source %q: %v", m.Source, err)
+ err := fmt.Errorf("creating mount with source %q: %v", m.Source, err)
+ // Check to see if this is a common error due to a Linux bug.
+ // This error is generated here in order to cause it to be
+ // printed to the user using Docker via 'runsc create' etc. rather
+ // than simply printed to the logs for the 'runsc boot' command.
+ //
+ // We check the error message string rather than type because the
+ // actual error types (syscall.EIO, syscall.EPIPE) are lost by file system
+ // implementation (e.g. p9).
+ // TODO(gvisor.dev/issue/1765): Remove message when bug is resolved.
+ if strings.Contains(err.Error(), syscall.EIO.Error()) || strings.Contains(err.Error(), syscall.EPIPE.Error()) {
+ return fmt.Errorf("%v: %s", err, specutils.FaqErrorMsg("memlock", "you may be encountering a Linux kernel bug"))
+ }
+ return err
}
// If there are submounts, we need to overlay the mount on top of a ramfs
@@ -832,12 +868,12 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns
if err != nil {
return fmt.Errorf("can't find mount destination %q: %v", m.Destination, err)
}
- defer dirent.DecRef()
+ defer dirent.DecRef(ctx)
if err := mns.Mount(ctx, dirent, inode); err != nil {
return fmt.Errorf("mount %q error: %v", m.Destination, err)
}
- log.Infof("Mounted %q to %q type %s", m.Source, m.Destination, m.Type)
+ log.Infof("Mounted %q to %q type: %s, internal-options: %q", m.Source, m.Destination, m.Type, opts)
return nil
}
@@ -853,12 +889,12 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun
if err != nil {
return fmt.Errorf("can't find mount destination %q: %v", mount.Destination, err)
}
- defer target.DecRef()
+ defer target.DecRef(ctx)
// Take a ref on the inode that is about to be (re)-mounted.
source.root.IncRef()
if err := mns.Mount(ctx, target, source.root); err != nil {
- source.root.DecRef()
+ source.root.DecRef(ctx)
return fmt.Errorf("bind mount %q error: %v", mount.Destination, err)
}
@@ -900,7 +936,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
// Add root mount.
fd := c.fds.remove()
- opts := p9MountOptions(fd, conf.FileAccess)
+ opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */)
mf := fs.MountSourceFlags{}
if c.root.Readonly || conf.Overlay {
@@ -912,7 +948,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
Flags: mf,
DataString: strings.Join(opts, ","),
}
- renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount)
+ renv.MountSources[gofervfs2.Name] = append(renv.MountSources[gofervfs2.Name], rootMount)
// Add submounts.
var tmpMounted bool
@@ -928,7 +964,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
// TODO(b/67958150): handle '/tmp' properly (see mountTmp()).
if !tmpMounted {
tmpMount := specs.Mount{
- Type: tmpfs,
+ Type: tmpfsvfs2.Name,
Destination: "/tmp",
}
if err := c.addRestoreMount(conf, renv, tmpMount); err != nil {
@@ -961,12 +997,12 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M
switch err {
case nil:
// Found '/tmp' in filesystem, check if it's empty.
- defer tmp.DecRef()
+ defer tmp.DecRef(ctx)
f, err := tmp.Inode.GetFile(ctx, tmp, fs.FileFlags{Read: true, Directory: true})
if err != nil {
return err
}
- defer f.DecRef()
+ defer f.DecRef(ctx)
serializer := &fs.CollectEntriesSerializer{}
if err := f.Readdir(ctx, serializer); err != nil {
return err
@@ -984,11 +1020,11 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M
// No '/tmp' found (or fallthrough from above). Safe to mount internal
// tmpfs.
tmpMount := specs.Mount{
- Type: tmpfs,
+ Type: tmpfsvfs2.Name,
Destination: "/tmp",
// Sticky bit is added to prevent accidental deletion of files from
// another user. This is normally done for /tmp.
- Options: []string{"mode=1777"},
+ Options: []string{"mode=01777"},
}
return c.mountSubmount(ctx, conf, mns, root, tmpMount)
diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go
index 49ab34b33..912037075 100644
--- a/runsc/boot/fs_test.go
+++ b/runsc/boot/fs_test.go
@@ -15,7 +15,6 @@
package boot
import (
- "path"
"reflect"
"strings"
"testing"
@@ -26,19 +25,19 @@ import (
func TestPodMountHintsHappy(t *testing.T) {
spec := &specs.Spec{
Annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "pod",
- path.Join(MountPrefix, "mount2", "source"): "bar",
- path.Join(MountPrefix, "mount2", "type"): "bind",
- path.Join(MountPrefix, "mount2", "share"): "container",
- path.Join(MountPrefix, "mount2", "options"): "rw,private",
+ MountPrefix + "mount2.source": "bar",
+ MountPrefix + "mount2.type": "bind",
+ MountPrefix + "mount2.share": "container",
+ MountPrefix + "mount2.options": "rw,private",
},
}
podHints, err := newPodMountHints(spec)
if err != nil {
- t.Errorf("newPodMountHints failed: %v", err)
+ t.Fatalf("newPodMountHints failed: %v", err)
}
// Check that fields were set correctly.
@@ -86,95 +85,95 @@ func TestPodMountHintsErrors(t *testing.T) {
{
name: "too short",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1"): "foo",
+ MountPrefix + "mount1": "foo",
},
error: "invalid mount annotation",
},
{
name: "no name",
annotations: map[string]string{
- MountPrefix + "//source": "foo",
+ MountPrefix + ".source": "foo",
},
error: "invalid mount name",
},
{
name: "missing source",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "pod",
},
error: "source field",
},
{
name: "missing type",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.share": "pod",
},
error: "type field",
},
{
name: "missing share",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "tmpfs",
},
error: "share field",
},
{
name: "invalid field name",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "invalid"): "foo",
+ MountPrefix + "mount1.invalid": "foo",
},
error: "invalid mount annotation",
},
{
name: "invalid source",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.source": "",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "pod",
},
error: "source cannot be empty",
},
{
name: "invalid type",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "invalid-type",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "invalid-type",
+ MountPrefix + "mount1.share": "pod",
},
error: "invalid type",
},
{
name: "invalid share",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "invalid-share",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "invalid-share",
},
error: "invalid share",
},
{
name: "invalid options",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "pod",
- path.Join(MountPrefix, "mount1", "options"): "invalid-option",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "pod",
+ MountPrefix + "mount1.options": "invalid-option",
},
error: "unknown mount option",
},
{
name: "duplicate source",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "pod",
- path.Join(MountPrefix, "mount2", "source"): "foo",
- path.Join(MountPrefix, "mount2", "type"): "bind",
- path.Join(MountPrefix, "mount2", "share"): "container",
+ MountPrefix + "mount2.source": "foo",
+ MountPrefix + "mount2.type": "bind",
+ MountPrefix + "mount2.share": "container",
},
error: "have the same mount source",
},
@@ -191,3 +190,61 @@ func TestPodMountHintsErrors(t *testing.T) {
})
}
}
+
+func TestGetMountAccessType(t *testing.T) {
+ const source = "foo"
+ for _, tst := range []struct {
+ name string
+ annotations map[string]string
+ want FileAccessType
+ }{
+ {
+ name: "container=exclusive",
+ annotations: map[string]string{
+ MountPrefix + "mount1.source": source,
+ MountPrefix + "mount1.type": "bind",
+ MountPrefix + "mount1.share": "container",
+ },
+ want: FileAccessExclusive,
+ },
+ {
+ name: "pod=shared",
+ annotations: map[string]string{
+ MountPrefix + "mount1.source": source,
+ MountPrefix + "mount1.type": "bind",
+ MountPrefix + "mount1.share": "pod",
+ },
+ want: FileAccessShared,
+ },
+ {
+ name: "shared=shared",
+ annotations: map[string]string{
+ MountPrefix + "mount1.source": source,
+ MountPrefix + "mount1.type": "bind",
+ MountPrefix + "mount1.share": "shared",
+ },
+ want: FileAccessShared,
+ },
+ {
+ name: "default=shared",
+ annotations: map[string]string{
+ MountPrefix + "mount1.source": source + "mismatch",
+ MountPrefix + "mount1.type": "bind",
+ MountPrefix + "mount1.share": "container",
+ },
+ want: FileAccessShared,
+ },
+ } {
+ t.Run(tst.name, func(t *testing.T) {
+ spec := &specs.Spec{Annotations: tst.annotations}
+ podHints, err := newPodMountHints(spec)
+ if err != nil {
+ t.Fatalf("newPodMountHints failed: %v", err)
+ }
+ mounter := containerMounter{hints: podHints}
+ if got := mounter.getMountAccessType(specs.Mount{Source: source}); got != tst.want {
+ t.Errorf("getMountAccessType(), want: %v, got: %v", tst.want, got)
+ }
+ })
+ }
+}
diff --git a/runsc/boot/limits.go b/runsc/boot/limits.go
index d1c0bb9b5..ce62236e5 100644
--- a/runsc/boot/limits.go
+++ b/runsc/boot/limits.go
@@ -16,12 +16,12 @@ package boot
import (
"fmt"
- "sync"
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sync"
)
// Mapping from linux resource names to limits.LimitType.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 0c0eba99e..40c6f99fd 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -16,26 +16,30 @@
package boot
import (
+ "errors"
"fmt"
mrand "math/rand"
"os"
"runtime"
- "sync"
"sync/atomic"
- "syscall"
gtime "time"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/memutil"
"gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/fdimport"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
+ hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -43,11 +47,14 @@ import (
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/sighandling"
- slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+ "gvisor.dev/gvisor/pkg/sentry/syscalls/linux/vfs2"
"gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/loopback"
"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
"gvisor.dev/gvisor/pkg/tcpip/network/arp"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
@@ -59,43 +66,46 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
"gvisor.dev/gvisor/runsc/boot/filter"
_ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms.
+ "gvisor.dev/gvisor/runsc/boot/pprof"
"gvisor.dev/gvisor/runsc/specutils"
// Include supported socket providers.
"gvisor.dev/gvisor/pkg/sentry/socket/hostinet"
_ "gvisor.dev/gvisor/pkg/sentry/socket/netlink"
_ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/uevent"
"gvisor.dev/gvisor/pkg/sentry/socket/netstack"
_ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
)
-// Loader keeps state needed to start the kernel and run the container..
-type Loader struct {
- // k is the kernel.
- k *kernel.Kernel
-
- // ctrl is the control server.
- ctrl *controller
-
+type containerInfo struct {
conf *Config
- // console is set to true if terminal is enabled.
- console bool
+ // spec is the base configuration for the root container.
+ spec *specs.Spec
- watchdog *watchdog.Watchdog
+ // procArgs refers to the container's init task.
+ procArgs kernel.CreateProcessArgs
// stdioFDs contains stdin, stdout, and stderr.
stdioFDs []int
// goferFDs are the FDs that attach the sandbox to the gofers.
goferFDs []int
+}
- // spec is the base configuration for the root container.
- spec *specs.Spec
+// Loader keeps state needed to start the kernel and run the container..
+type Loader struct {
+ // k is the kernel.
+ k *kernel.Kernel
+
+ // ctrl is the control server.
+ ctrl *controller
- // startSignalForwarding enables forwarding of signals to the sandboxed
- // container. It should be called after the init process is loaded.
- startSignalForwarding func() func()
+ // root contains information about the root container in the sandbox.
+ root containerInfo
+
+ watchdog *watchdog.Watchdog
// stopSignalForwarding disables forwarding of signals to the sandboxed
// container. It should be called when a sandbox is destroyed.
@@ -104,9 +114,6 @@ type Loader struct {
// restore is set to true if we are restoring a container.
restore bool
- // rootProcArgs refers to the root sandbox init task.
- rootProcArgs kernel.CreateProcessArgs
-
// sandboxID is the ID for the whole sandbox.
sandboxID string
@@ -139,6 +146,9 @@ type execProcess struct {
// tty will be nil if the process is not attached to a terminal.
tty *host.TTYFileOperations
+ // tty will be nil if the process is not attached to a terminal.
+ ttyVFS2 *hostvfs2.TTYFileDescription
+
// pidnsPath is the pid namespace path in spec
pidnsPath string
}
@@ -146,9 +156,6 @@ type execProcess struct {
func init() {
// Initialize the random number generator.
mrand.Seed(gtime.Now().UnixNano())
-
- // Register the global syscall table.
- kernel.RegisterSyscallTable(slinux.AMD64)
}
// Args are the arguments for New().
@@ -159,16 +166,18 @@ type Args struct {
Spec *specs.Spec
// Conf is the system configuration.
Conf *Config
- // ControllerFD is the FD to the URPC controller.
+ // ControllerFD is the FD to the URPC controller. The Loader takes ownership
+ // of this FD and may close it at any time.
ControllerFD int
- // Device is an optional argument that is passed to the platform.
+ // Device is an optional argument that is passed to the platform. The Loader
+ // takes ownership of this file and may close it at any time.
Device *os.File
- // GoferFDs is an array of FDs used to connect with the Gofer.
+ // GoferFDs is an array of FDs used to connect with the Gofer. The Loader
+ // takes ownership of these FDs and may close them at any time.
GoferFDs []int
- // StdioFDs is the stdio for the application.
+ // StdioFDs is the stdio for the application. The Loader takes ownership of
+ // these FDs and may close them at any time.
StdioFDs []int
- // Console is set to true if using TTY.
- Console bool
// NumCPU is the number of CPUs to create inside the sandbox.
NumCPU int
// TotalMem is the initial amount of total memory to report back to the
@@ -178,6 +187,9 @@ type Args struct {
UserLogFD int
}
+// make sure stdioFDs are always the same on initial start and on restore
+const startingStdioFD = 256
+
// New initializes a new kernel loader configured by spec.
// New also handles setting up a kernel for restoring a container.
func New(args Args) (*Loader, error) {
@@ -191,6 +203,16 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("setting up memory usage: %v", err)
}
+ // Is this a VFSv2 kernel?
+ if args.Conf.VFS2 {
+ kernel.VFS2Enabled = true
+ if args.Conf.FUSE {
+ kernel.FUSEEnabled = true
+ }
+
+ vfs2.Override()
+ }
+
// Create kernel and platform.
p, err := createPlatform(args.Conf, args.Device)
if err != nil {
@@ -210,9 +232,7 @@ func New(args Args) (*Loader, error) {
// Create VDSO.
//
// Pass k as the platform since it is savable, unlike the actual platform.
- //
- // FIXME(b/109889800): Use non-nil context.
- vdso, err := loader.PrepareVDSO(nil, k)
+ vdso, err := loader.PrepareVDSO(k)
if err != nil {
return nil, fmt.Errorf("creating vdso: %v", err)
}
@@ -228,11 +248,8 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("enabling strace: %v", err)
}
- // Create an empty network stack because the network namespace may be empty at
- // this point. Netns is configured before Run() is called. Netstack is
- // configured using a control uRPC message. Host network is configured inside
- // Run().
- networkStack, err := newEmptyNetworkStack(args.Conf, k)
+ // Create root network namespace/stack.
+ netns, err := newRootNetworkNamespace(args.Conf, k, k)
if err != nil {
return nil, fmt.Errorf("creating network: %v", err)
}
@@ -275,7 +292,7 @@ func New(args Args) (*Loader, error) {
FeatureSet: cpuid.HostFeatureSet(),
Timekeeper: tk,
RootUserNamespace: creds.UserNamespace,
- NetworkStack: networkStack,
+ RootNetworkNamespace: netns,
ApplicationCores: uint(args.NumCPU),
Vdso: vdso,
RootUTSNamespace: kernel.NewUTSNamespace(args.Spec.Hostname, args.Spec.Hostname, creds.UserNamespace),
@@ -286,6 +303,12 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("initializing kernel: %v", err)
}
+ if kernel.VFS2Enabled {
+ if err := registerFilesystems(k); err != nil {
+ return nil, fmt.Errorf("registering filesystems: %w", err)
+ }
+ }
+
if err := adjustDirentCache(k); err != nil {
return nil, err
}
@@ -300,9 +323,11 @@ func New(args Args) (*Loader, error) {
}
// Create a watchdog.
- dog := watchdog.New(k, watchdog.DefaultTimeout, args.Conf.WatchdogAction)
+ dogOpts := watchdog.DefaultOpts
+ dogOpts.TaskTimeoutAction = args.Conf.WatchdogAction
+ dog := watchdog.New(k, dogOpts)
- procArgs, err := newProcess(args.ID, args.Spec, creds, k, k.RootPIDNamespace())
+ procArgs, err := createProcessArgs(args.ID, args.Spec, creds, k, k.RootPIDNamespace())
if err != nil {
return nil, fmt.Errorf("creating init process for root container: %v", err)
}
@@ -316,19 +341,57 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("creating pod mount hints: %v", err)
}
+ if kernel.VFS2Enabled {
+ // Set up host mount that will be used for imported fds.
+ hostFilesystem, err := hostvfs2.NewFilesystem(k.VFS())
+ if err != nil {
+ return nil, fmt.Errorf("failed to create hostfs filesystem: %v", err)
+ }
+ defer hostFilesystem.DecRef(k.SupervisorContext())
+ hostMount, err := k.VFS().NewDisconnectedMount(hostFilesystem, nil, &vfs.MountOptions{})
+ if err != nil {
+ return nil, fmt.Errorf("failed to create hostfs mount: %v", err)
+ }
+ k.SetHostMount(hostMount)
+ }
+
+ // Make host FDs stable between invocations. Host FDs must map to the exact
+ // same number when the sandbox is restored. Otherwise the wrong FD will be
+ // used.
+ var stdioFDs []int
+ newfd := startingStdioFD
+ for _, fd := range args.StdioFDs {
+ // Check that newfd is unused to avoid clobbering over it.
+ if _, err := unix.FcntlInt(uintptr(newfd), unix.F_GETFD, 0); !errors.Is(err, unix.EBADF) {
+ if err != nil {
+ return nil, fmt.Errorf("error checking for FD (%d) conflict: %w", newfd, err)
+ }
+ return nil, fmt.Errorf("unable to remap stdios, FD %d is already in use", newfd)
+ }
+
+ err := unix.Dup3(fd, newfd, unix.O_CLOEXEC)
+ if err != nil {
+ return nil, fmt.Errorf("dup3 of stdioFDs failed: %v", err)
+ }
+ stdioFDs = append(stdioFDs, newfd)
+ _ = unix.Close(fd)
+ newfd++
+ }
+
eid := execID{cid: args.ID}
l := &Loader{
- k: k,
- conf: args.Conf,
- console: args.Console,
- watchdog: dog,
- spec: args.Spec,
- goferFDs: args.GoferFDs,
- stdioFDs: args.StdioFDs,
- rootProcArgs: procArgs,
- sandboxID: args.ID,
- processes: map[execID]*execProcess{eid: {}},
- mountHints: mountHints,
+ k: k,
+ watchdog: dog,
+ sandboxID: args.ID,
+ processes: map[execID]*execProcess{eid: {}},
+ mountHints: mountHints,
+ root: containerInfo{
+ conf: args.Conf,
+ stdioFDs: stdioFDs,
+ goferFDs: args.GoferFDs,
+ spec: args.Spec,
+ procArgs: procArgs,
+ },
}
// We don't care about child signals; some platforms can generate a
@@ -337,29 +400,6 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("ignore child stop signals failed: %v", err)
}
- // Handle signals by forwarding them to the root container process
- // (except for panic signal, which should cause a panic).
- l.startSignalForwarding = sighandling.PrepareHandler(func(sig linux.Signal) {
- // Panic signal should cause a panic.
- if args.Conf.PanicSignal != -1 && sig == linux.Signal(args.Conf.PanicSignal) {
- panic("Signal-induced panic")
- }
-
- // Otherwise forward to root container.
- deliveryMode := DeliverToProcess
- if args.Console {
- // Since we are running with a console, we should
- // forward the signal to the foreground process group
- // so that job control signals like ^C can be handled
- // properly.
- deliveryMode = DeliverToForegroundProcessGroup
- }
- log.Infof("Received external signal %d, mode: %v", sig, deliveryMode)
- if err := l.signal(args.ID, 0, int32(sig), deliveryMode); err != nil {
- log.Warningf("error sending signal %v to container %q: %v", sig, args.ID, err)
- }
- })
-
// Create the control server using the provided FD.
//
// This must be done *after* we have initialized the kernel since the
@@ -379,19 +419,24 @@ func New(args Args) (*Loader, error) {
return l, nil
}
-// newProcess creates a process that can be run with kernel.CreateProcess.
-func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.Kernel, pidns *kernel.PIDNamespace) (kernel.CreateProcessArgs, error) {
+// createProcessArgs creates args that can be used with kernel.CreateProcess.
+func createProcessArgs(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.Kernel, pidns *kernel.PIDNamespace) (kernel.CreateProcessArgs, error) {
// Create initial limits.
ls, err := createLimitSet(spec)
if err != nil {
return kernel.CreateProcessArgs{}, fmt.Errorf("creating limits: %v", err)
}
+ wd := spec.Process.Cwd
+ if wd == "" {
+ wd = "/"
+ }
+
// Create the process arguments.
procArgs := kernel.CreateProcessArgs{
Argv: spec.Process.Args,
Envv: spec.Process.Env,
- WorkingDirectory: spec.Process.Cwd, // Defaults to '/' if empty.
+ WorkingDirectory: wd,
Credentials: creds,
Umask: 0022,
Limits: ls,
@@ -419,6 +464,11 @@ func (l *Loader) Destroy() {
l.stopSignalForwarding()
}
l.watchdog.Stop()
+
+ for i, fd := range l.root.stdioFDs {
+ _ = unix.Close(fd)
+ l.root.stdioFDs[i] = -1
+ }
}
func createPlatform(conf *Config, deviceFile *os.File) (platform.Platform, error) {
@@ -449,13 +499,13 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
}
func (l *Loader) installSeccompFilters() error {
- if l.conf.DisableSeccomp {
+ if l.root.conf.DisableSeccomp {
filter.Report("syscall filter is DISABLED. Running in less secure mode.")
} else {
opts := filter.Options{
Platform: l.k.Platform,
- HostNetwork: l.conf.Network == NetworkHost,
- ProfileEnable: l.conf.ProfileEnable,
+ HostNetwork: l.root.conf.Network == NetworkHost,
+ ProfileEnable: l.root.conf.ProfileEnable,
ControllerFD: l.ctrl.srv.FD(),
}
if err := filter.Install(opts); err != nil {
@@ -481,11 +531,11 @@ func (l *Loader) Run() error {
}
func (l *Loader) run() error {
- if l.conf.Network == NetworkHost {
+ if l.root.conf.Network == NetworkHost {
// Delay host network configuration to this point because network namespace
// is configured after the loader is created and before Run() is called.
log.Debugf("Configuring host network")
- stack := l.k.NetworkStack().(*hostinet.Stack)
+ stack := l.k.RootNetworkNamespace().Stack().(*hostinet.Stack)
if err := stack.Configure(); err != nil {
return err
}
@@ -503,8 +553,8 @@ func (l *Loader) run() error {
// If we are restoring, we do not want to create a process.
// l.restore is set by the container manager when a restore call is made.
if !l.restore {
- if l.conf.ProfileEnable {
- initializePProf()
+ if l.root.conf.ProfileEnable {
+ pprof.Initialize()
}
// Finally done with all configuration. Setup filters before user code
@@ -513,62 +563,50 @@ func (l *Loader) run() error {
return err
}
- // Create the FD map, which will set stdin, stdout, and stderr. If console
- // is true, then ioctl calls will be passed through to the host fd.
- ctx := l.rootProcArgs.NewContext(l.k)
- fdTable, err := createFDTable(ctx, l.console, l.stdioFDs)
- if err != nil {
- return fmt.Errorf("importing fds: %v", err)
- }
- // CreateProcess takes a reference on FDMap if successful. We won't need
- // ours either way.
- l.rootProcArgs.FDTable = fdTable
-
- // Setup the root container file system.
- l.startGoferMonitor(l.sandboxID, l.goferFDs)
-
- mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints)
- if err := mntr.processHints(l.conf); err != nil {
- return err
- }
- if err := setupContainerFS(ctx, l.conf, mntr, &l.rootProcArgs); err != nil {
- return err
- }
-
- // Add the HOME enviroment variable if it is not already set.
- envv, err := maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
- if err != nil {
- return err
- }
- l.rootProcArgs.Envv = envv
-
// Create the root container init task. It will begin running
// when the kernel is started.
- if _, _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
- return fmt.Errorf("creating init process: %v", err)
+ if _, err := l.createContainerProcess(true, l.sandboxID, &l.root, ep); err != nil {
+ return err
}
-
- // CreateProcess takes a reference on FDTable if successful.
- l.rootProcArgs.FDTable.DecRef()
}
ep.tg = l.k.GlobalInit()
- if ns, ok := specutils.GetNS(specs.PIDNamespace, l.spec); ok {
+ if ns, ok := specutils.GetNS(specs.PIDNamespace, l.root.spec); ok {
ep.pidnsPath = ns.Path
}
- if l.console {
- ttyFile, _ := l.rootProcArgs.FDTable.Get(0)
- defer ttyFile.DecRef()
- ep.tty = ttyFile.FileOperations.(*host.TTYFileOperations)
- // Set the foreground process group on the TTY to the global
- // init process group, since that is what we are about to
- // start running.
- ep.tty.InitForegroundProcessGroup(ep.tg.ProcessGroup())
- }
+ // Handle signals by forwarding them to the root container process
+ // (except for panic signal, which should cause a panic).
+ l.stopSignalForwarding = sighandling.StartSignalForwarding(func(sig linux.Signal) {
+ // Panic signal should cause a panic.
+ if l.root.conf.PanicSignal != -1 && sig == linux.Signal(l.root.conf.PanicSignal) {
+ panic("Signal-induced panic")
+ }
- // Start signal forwarding only after an init process is created.
- l.stopSignalForwarding = l.startSignalForwarding()
+ // Otherwise forward to root container.
+ deliveryMode := DeliverToProcess
+ if l.root.spec.Process.Terminal {
+ // Since we are running with a console, we should forward the signal to
+ // the foreground process group so that job control signals like ^C can
+ // be handled properly.
+ deliveryMode = DeliverToForegroundProcessGroup
+ }
+ log.Infof("Received external signal %d, mode: %v", sig, deliveryMode)
+ if err := l.signal(l.sandboxID, 0, int32(sig), deliveryMode); err != nil {
+ log.Warningf("error sending signal %v to container %q: %v", sig, l.sandboxID, err)
+ }
+ })
+
+ // l.stdioFDs are derived from dup() in boot.New() and they are now dup()ed again
+ // either in createFDTable() during initial start or in descriptor.initAfterLoad()
+ // during restore, we can release l.stdioFDs now. VFS2 takes ownership of the
+ // passed FDs, so only close for VFS1.
+ if !kernel.VFS2Enabled {
+ for i, fd := range l.root.stdioFDs {
+ _ = unix.Close(fd)
+ l.root.stdioFDs[i] = -1
+ }
+ }
log.Infof("Process should have started...")
l.watchdog.Start()
@@ -601,8 +639,8 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
l.mu.Lock()
defer l.mu.Unlock()
- eid := execID{cid: cid}
- if _, ok := l.processes[eid]; !ok {
+ ep := l.processes[execID{cid: cid}]
+ if ep == nil {
return fmt.Errorf("trying to start a deleted container %q", cid)
}
@@ -636,61 +674,112 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
if pidns == nil {
pidns = l.k.RootPIDNamespace().NewChild(l.k.RootUserNamespace())
}
- l.processes[eid].pidnsPath = ns.Path
+ ep.pidnsPath = ns.Path
} else {
pidns = l.k.RootPIDNamespace()
}
- procArgs, err := newProcess(cid, spec, creds, l.k, pidns)
+
+ info := &containerInfo{
+ conf: conf,
+ spec: spec,
+ }
+ info.procArgs, err = createProcessArgs(cid, spec, creds, l.k, pidns)
if err != nil {
return fmt.Errorf("creating new process: %v", err)
}
// setupContainerFS() dups stdioFDs, so we don't need to dup them here.
- var stdioFDs []int
for _, f := range files[:3] {
- stdioFDs = append(stdioFDs, int(f.Fd()))
- }
-
- // Create the FD map, which will set stdin, stdout, and stderr.
- ctx := procArgs.NewContext(l.k)
- fdTable, err := createFDTable(ctx, false, stdioFDs)
- if err != nil {
- return fmt.Errorf("importing fds: %v", err)
+ info.stdioFDs = append(info.stdioFDs, int(f.Fd()))
}
- // CreateProcess takes a reference on fdTable if successful. We won't
- // need ours either way.
- procArgs.FDTable = fdTable
// Can't take ownership away from os.File. dup them to get a new FDs.
- var goferFDs []int
for _, f := range files[3:] {
- fd, err := syscall.Dup(int(f.Fd()))
+ fd, err := unix.Dup(int(f.Fd()))
if err != nil {
return fmt.Errorf("failed to dup file: %v", err)
}
- goferFDs = append(goferFDs, fd)
+ info.goferFDs = append(info.goferFDs, fd)
}
+ tg, err := l.createContainerProcess(false, cid, info, ep)
+ if err != nil {
+ return err
+ }
+
+ // Success!
+ l.k.StartProcess(tg)
+ ep.tg = tg
+ return nil
+}
+
+func (l *Loader) createContainerProcess(root bool, cid string, info *containerInfo, ep *execProcess) (*kernel.ThreadGroup, error) {
+ console := false
+ if root {
+ // Only root container supports terminal for now.
+ console = info.spec.Process.Terminal
+ }
+
+ // Create the FD map, which will set stdin, stdout, and stderr.
+ ctx := info.procArgs.NewContext(l.k)
+ fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, console, info.stdioFDs)
+ if err != nil {
+ return nil, fmt.Errorf("importing fds: %v", err)
+ }
+ // CreateProcess takes a reference on fdTable if successful. We won't need
+ // ours either way.
+ info.procArgs.FDTable = fdTable
+
// Setup the child container file system.
- l.startGoferMonitor(cid, goferFDs)
+ l.startGoferMonitor(cid, info.goferFDs)
- mntr := newContainerMounter(spec, goferFDs, l.k, l.mountHints)
- if err := setupContainerFS(ctx, conf, mntr, &procArgs); err != nil {
- return err
+ mntr := newContainerMounter(info.spec, info.goferFDs, l.k, l.mountHints)
+ if root {
+ if err := mntr.processHints(info.conf, info.procArgs.Credentials); err != nil {
+ return nil, err
+ }
+ }
+ if err := setupContainerFS(ctx, info.conf, mntr, &info.procArgs); err != nil {
+ return nil, err
}
- // Create and start the new process.
- tg, _, err := l.k.CreateProcess(procArgs)
+ // Add the HOME enviroment variable if it is not already set.
+ var envv []string
+ if kernel.VFS2Enabled {
+ envv, err = user.MaybeAddExecUserHomeVFS2(ctx, info.procArgs.MountNamespaceVFS2,
+ info.procArgs.Credentials.RealKUID, info.procArgs.Envv)
+
+ } else {
+ envv, err = user.MaybeAddExecUserHome(ctx, info.procArgs.MountNamespace,
+ info.procArgs.Credentials.RealKUID, info.procArgs.Envv)
+ }
if err != nil {
- return fmt.Errorf("creating process: %v", err)
+ return nil, err
}
- l.k.StartProcess(tg)
+ info.procArgs.Envv = envv
+ // Create and start the new process.
+ tg, _, err := l.k.CreateProcess(info.procArgs)
+ if err != nil {
+ return nil, fmt.Errorf("creating process: %v", err)
+ }
// CreateProcess takes a reference on FDTable if successful.
- procArgs.FDTable.DecRef()
+ info.procArgs.FDTable.DecRef(ctx)
+
+ // Set the foreground process group on the TTY to the global init process
+ // group, since that is what we are about to start running.
+ if root {
+ switch {
+ case ttyFileVFS2 != nil:
+ ep.ttyVFS2 = ttyFileVFS2
+ ttyFileVFS2.InitForegroundProcessGroup(tg.ProcessGroup())
+ case ttyFile != nil:
+ ep.tty = ttyFile
+ ttyFile.InitForegroundProcessGroup(tg.ProcessGroup())
+ }
+ }
- l.processes[eid].tg = tg
- return nil
+ return tg, nil
}
// startGoferMonitor runs a goroutine to monitor gofer's health. It polls on
@@ -738,14 +827,14 @@ func (l *Loader) destroyContainer(cid string) error {
l.mu.Lock()
defer l.mu.Unlock()
- _, _, started, err := l.threadGroupFromIDLocked(execID{cid: cid})
+ tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid})
if err != nil {
// Container doesn't exist.
return err
}
- // The container exists, has it been started?
- if started {
+ // The container exists, but has it been started?
+ if tg != nil {
if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
return fmt.Errorf("sending SIGKILL to all container processes: %v", err)
}
@@ -787,45 +876,63 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
l.mu.Lock()
defer l.mu.Unlock()
- tg, _, started, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID})
+ tg, err := l.tryThreadGroupFromIDLocked(execID{cid: args.ContainerID})
if err != nil {
return 0, err
}
- if !started {
+ if tg == nil {
return 0, fmt.Errorf("container %q not started", args.ContainerID)
}
// Get the container MountNamespace from the Task.
- tg.Leader().WithMuLocked(func(t *kernel.Task) {
- // task.MountNamespace() does not take a ref, so we must do so
- // ourselves.
- args.MountNamespace = t.MountNamespace()
- args.MountNamespace.IncRef()
- })
- defer args.MountNamespace.DecRef()
+ if kernel.VFS2Enabled {
+ // task.MountNamespace() does not take a ref, so we must do so ourselves.
+ args.MountNamespaceVFS2 = tg.Leader().MountNamespaceVFS2()
+ args.MountNamespaceVFS2.IncRef()
+ } else {
+ tg.Leader().WithMuLocked(func(t *kernel.Task) {
+ // task.MountNamespace() does not take a ref, so we must do so ourselves.
+ args.MountNamespace = t.MountNamespace()
+ args.MountNamespace.IncRef()
+ })
+ }
- // Add the HOME enviroment varible if it is not already set.
- root := args.MountNamespace.Root()
- defer root.DecRef()
- ctx := fs.WithRoot(l.k.SupervisorContext(), root)
- envv, err := maybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv)
- if err != nil {
- return 0, err
+ // Add the HOME environment variable if it is not already set.
+ if kernel.VFS2Enabled {
+ root := args.MountNamespaceVFS2.Root()
+ ctx := vfs.WithRoot(l.k.SupervisorContext(), root)
+ defer args.MountNamespaceVFS2.DecRef(ctx)
+ defer root.DecRef(ctx)
+ envv, err := user.MaybeAddExecUserHomeVFS2(ctx, args.MountNamespaceVFS2, args.KUID, args.Envv)
+ if err != nil {
+ return 0, err
+ }
+ args.Envv = envv
+ } else {
+ root := args.MountNamespace.Root()
+ ctx := fs.WithRoot(l.k.SupervisorContext(), root)
+ defer args.MountNamespace.DecRef(ctx)
+ defer root.DecRef(ctx)
+ envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv)
+ if err != nil {
+ return 0, err
+ }
+ args.Envv = envv
}
- args.Envv = envv
// Start the process.
proc := control.Proc{Kernel: l.k}
args.PIDNamespace = tg.PIDNamespace()
- newTG, tgid, ttyFile, err := control.ExecAsync(&proc, args)
+ newTG, tgid, ttyFile, ttyFileVFS2, err := control.ExecAsync(&proc, args)
if err != nil {
return 0, err
}
eid := execID{cid: args.ContainerID, pid: tgid}
l.processes[eid] = &execProcess{
- tg: newTG,
- tty: ttyFile,
+ tg: newTG,
+ tty: ttyFile,
+ ttyVFS2: ttyFileVFS2,
}
log.Debugf("updated processes: %v", l.processes)
@@ -836,7 +943,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
// Don't defer unlock, as doing so would make it impossible for
// multiple clients to wait on the same container.
- tg, _, err := l.threadGroupFromID(execID{cid: cid})
+ tg, err := l.threadGroupFromID(execID{cid: cid})
if err != nil {
return fmt.Errorf("can't wait for container %q: %v", cid, err)
}
@@ -855,7 +962,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e
// Try to find a process that was exec'd
eid := execID{cid: cid, pid: tgid}
- execTG, _, err := l.threadGroupFromID(eid)
+ execTG, err := l.threadGroupFromID(eid)
if err == nil {
ws := l.wait(execTG)
*waitStatus = ws
@@ -869,7 +976,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e
// The caller may be waiting on a process not started directly via exec.
// In this case, find the process in the container's PID namespace.
- initTG, _, err := l.threadGroupFromID(execID{cid: cid})
+ initTG, err := l.threadGroupFromID(execID{cid: cid})
if err != nil {
return fmt.Errorf("waiting for PID %d: %v", tgid, err)
}
@@ -902,50 +1009,98 @@ func (l *Loader) WaitExit() kernel.ExitStatus {
// Wait for container.
l.k.WaitExited()
+ // Cleanup
+ l.ctrl.stop()
+
+ refs.OnExit()
+
return l.k.GlobalInit().ExitStatus()
}
-func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
+func newRootNetworkNamespace(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (*inet.Namespace, error) {
+ // Create an empty network stack because the network namespace may be empty at
+ // this point. Netns is configured before Run() is called. Netstack is
+ // configured using a control uRPC message. Host network is configured inside
+ // Run().
switch conf.Network {
case NetworkHost:
- return hostinet.NewStack(), nil
+ // No network namespacing support for hostinet yet, hence creator is nil.
+ return inet.NewRootNamespace(hostinet.NewStack(), nil), nil
case NetworkNone, NetworkSandbox:
- // NetworkNone sets up loopback using netstack.
- netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()}
- transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()}
- s := netstack.Stack{stack.New(stack.Options{
- NetworkProtocols: netProtos,
- TransportProtocols: transProtos,
- Clock: clock,
- Stats: netstack.Metrics,
- HandleLocal: true,
- // Enable raw sockets for users with sufficient
- // privileges.
- RawFactory: raw.EndpointFactory{},
- })}
-
- // Enable SACK Recovery.
- if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil {
- return nil, fmt.Errorf("failed to enable SACK: %v", err)
+ s, err := newEmptySandboxNetworkStack(clock, uniqueID)
+ if err != nil {
+ return nil, err
}
+ creator := &sandboxNetstackCreator{
+ clock: clock,
+ uniqueID: uniqueID,
+ }
+ return inet.NewRootNamespace(s, creator), nil
- // Set default TTLs as required by socket/netstack.
- s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
- s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
+ default:
+ panic(fmt.Sprintf("invalid network configuration: %v", conf.Network))
+ }
- // Enable Receive Buffer Auto-Tuning.
- if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
- return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
- }
+}
- s.FillDefaultIPTables()
+func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) {
+ netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()}
+ transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()}
+ s := netstack.Stack{stack.New(stack.Options{
+ NetworkProtocols: netProtos,
+ TransportProtocols: transProtos,
+ Clock: clock,
+ Stats: netstack.Metrics,
+ HandleLocal: true,
+ // Enable raw sockets for users with sufficient
+ // privileges.
+ RawFactory: raw.EndpointFactory{},
+ UniqueID: uniqueID,
+ })}
- return &s, nil
+ // Enable SACK Recovery.
+ if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil {
+ return nil, fmt.Errorf("failed to enable SACK: %s", err)
+ }
- default:
- panic(fmt.Sprintf("invalid network configuration: %v", conf.Network))
+ // Set default TTLs as required by socket/netstack.
+ s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
+ s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
+
+ // Enable Receive Buffer Auto-Tuning.
+ if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
+ return nil, fmt.Errorf("SetTransportProtocolOption failed: %s", err)
+ }
+
+ return &s, nil
+}
+
+// sandboxNetstackCreator implements kernel.NetworkStackCreator.
+//
+// +stateify savable
+type sandboxNetstackCreator struct {
+ clock tcpip.Clock
+ uniqueID stack.UniqueID
+}
+
+// CreateStack implements kernel.NetworkStackCreator.CreateStack.
+func (f *sandboxNetstackCreator) CreateStack() (inet.Stack, error) {
+ s, err := newEmptySandboxNetworkStack(f.clock, f.uniqueID)
+ if err != nil {
+ return nil, err
+ }
+
+ // Setup loopback.
+ n := &Network{Stack: s.(*netstack.Stack).Stack}
+ nicID := tcpip.NICID(f.uniqueID.UniqueID())
+ link := DefaultLoopbackLink
+ linkEP := loopback.New()
+ if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
+ return nil, err
}
+
+ return s, nil
}
// signal sends a signal to one or more processes in a container. If PID is 0,
@@ -975,8 +1130,7 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e
return fmt.Errorf("PID (%d) cannot be set when signaling all processes", pid)
}
// Check that the container has actually started before signaling it.
- _, _, err := l.threadGroupFromID(execID{cid: cid})
- if err != nil {
+ if _, err := l.threadGroupFromID(execID{cid: cid}); err != nil {
return err
}
if err := l.signalAllProcesses(cid, signo); err != nil {
@@ -990,16 +1144,16 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e
}
func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) error {
- execTG, _, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
+ execTG, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
if err == nil {
// Send signal directly to the identified process.
- return execTG.SendSignal(&arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo})
}
// The caller may be signaling a process not started directly via exec.
// In this case, find the process in the container's PID namespace and
// signal it.
- initTG, _, err := l.threadGroupFromID(execID{cid: cid})
+ initTG, err := l.threadGroupFromID(execID{cid: cid})
if err != nil {
return fmt.Errorf("no thread group found: %v", err)
}
@@ -1010,25 +1164,43 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er
if tg.Leader().ContainerID() != cid {
return fmt.Errorf("process %d is part of a different container: %q", tgid, tg.Leader().ContainerID())
}
- return tg.SendSignal(&arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo})
}
+// signalForegrondProcessGroup looks up foreground process group from the TTY
+// for the given "tgid" inside container "cid", and send the signal to it.
func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, signo int32) error {
- // Lookup foreground process group from the TTY for the given process,
- // and send the signal to it.
- tg, tty, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
+ l.mu.Lock()
+ tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid, pid: tgid})
if err != nil {
+ l.mu.Unlock()
return fmt.Errorf("no thread group found: %v", err)
}
- if tty == nil {
+ if tg == nil {
+ l.mu.Unlock()
+ return fmt.Errorf("container %q not started", cid)
+ }
+
+ tty, ttyVFS2, err := l.ttyFromIDLocked(execID{cid: cid, pid: tgid})
+ l.mu.Unlock()
+ if err != nil {
+ return fmt.Errorf("no thread group found: %v", err)
+ }
+
+ var pg *kernel.ProcessGroup
+ switch {
+ case ttyVFS2 != nil:
+ pg = ttyVFS2.ForegroundProcessGroup()
+ case tty != nil:
+ pg = tty.ForegroundProcessGroup()
+ default:
return fmt.Errorf("no TTY attached")
}
- pg := tty.ForegroundProcessGroup()
if pg == nil {
// No foreground process group has been set. Signal the
// original thread group.
log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, tgid, tgid)
- return tg.SendSignal(&arch.SignalInfo{Signo: signo})
+ return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo})
}
// Send the signal to all processes in the process group.
var lastErr error
@@ -1036,7 +1208,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s
if tg.ProcessGroup() != pg {
continue
}
- if err := tg.SendSignal(&arch.SignalInfo{Signo: signo}); err != nil {
+ if err := l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}); err != nil {
lastErr = err
}
}
@@ -1054,33 +1226,57 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo})
}
-// threadGroupFromID same as threadGroupFromIDLocked except that it acquires
-// mutex before calling it.
-func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) {
+// threadGroupFromID is similar to tryThreadGroupFromIDLocked except that it
+// acquires mutex before calling it and fails in case container hasn't started
+// yet.
+func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, error) {
l.mu.Lock()
defer l.mu.Unlock()
- tg, tty, ok, err := l.threadGroupFromIDLocked(key)
+ tg, err := l.tryThreadGroupFromIDLocked(key)
if err != nil {
- return nil, nil, err
+ return nil, err
}
- if !ok {
- return nil, nil, fmt.Errorf("container %q not started", key.cid)
+ if tg == nil {
+ return nil, fmt.Errorf("container %q not started", key.cid)
}
- return tg, tty, nil
+ return tg, nil
}
-// threadGroupFromIDLocked returns the thread group and TTY for the given
-// execution ID. TTY may be nil if the process is not attached to a terminal.
-// Also returns a boolean indicating whether the container has already started.
-// Returns error if execution ID is invalid or if the container cannot be
-// found (maybe it has been deleted). Caller must hold 'mu'.
-func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, bool, error) {
+// tryThreadGroupFromIDLocked returns the thread group for the given execution
+// ID. It may return nil in case the container has not started yet. Returns
+// error if execution ID is invalid or if the container cannot be found (maybe
+// it has been deleted). Caller must hold 'mu'.
+func (l *Loader) tryThreadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, error) {
ep := l.processes[key]
if ep == nil {
- return nil, nil, false, fmt.Errorf("container %q not found", key.cid)
+ return nil, fmt.Errorf("container %q not found", key.cid)
}
- if ep.tg == nil {
- return nil, nil, false, nil
+ return ep.tg, nil
+}
+
+// ttyFromIDLocked returns the TTY files for the given execution ID. It may
+// return nil in case the container has not started yet. Returns error if
+// execution ID is invalid or if the container cannot be found (maybe it has
+// been deleted). Caller must hold 'mu'.
+func (l *Loader) ttyFromIDLocked(key execID) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+ ep := l.processes[key]
+ if ep == nil {
+ return nil, nil, fmt.Errorf("container %q not found", key.cid)
+ }
+ return ep.tty, ep.ttyVFS2, nil
+}
+
+func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+ if len(stdioFDs) != 3 {
+ return nil, nil, nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
+ }
+
+ k := kernel.KernelFromContext(ctx)
+ fdTable := k.NewFDTable()
+ ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, stdioFDs)
+ if err != nil {
+ fdTable.DecRef(ctx)
+ return nil, nil, nil, err
}
- return ep.tg, ep.tty, true, nil
+ return fdTable, ttyFile, ttyFileVFS2, nil
}
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 147ff7703..aa3fdf96c 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -19,17 +19,20 @@ import (
"math/rand"
"os"
"reflect"
- "sync"
"syscall"
"testing"
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/p9"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/unet"
"gvisor.dev/gvisor/runsc/fsgofer"
)
@@ -100,20 +103,29 @@ func startGofer(root string) (int, func(), error) {
return sandboxEnd, cleanup, nil
}
-func createLoader() (*Loader, func(), error) {
+func createLoader(vfsEnabled bool, spec *specs.Spec) (*Loader, func(), error) {
fd, err := server.CreateSocket(ControlSocketAddr(fmt.Sprintf("%010d", rand.Int())[:10]))
if err != nil {
return nil, nil, err
}
conf := testConfig()
- spec := testSpec()
+ conf.VFS2 = vfsEnabled
sandEnd, cleanup, err := startGofer(spec.Root.Path)
if err != nil {
return nil, nil, err
}
- stdio := []int{int(os.Stdin.Fd()), int(os.Stdout.Fd()), int(os.Stderr.Fd())}
+ // Loader takes ownership of stdio.
+ var stdio []int
+ for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} {
+ newFd, err := unix.Dup(int(f.Fd()))
+ if err != nil {
+ return nil, nil, err
+ }
+ stdio = append(stdio, newFd)
+ }
+
args := Args{
ID: "foo",
Spec: spec,
@@ -132,10 +144,20 @@ func createLoader() (*Loader, func(), error) {
// TestRun runs a simple application in a sandbox and checks that it succeeds.
func TestRun(t *testing.T) {
- l, cleanup, err := createLoader()
+ doRun(t, false)
+}
+
+// TestRunVFS2 runs TestRun in VFSv2.
+func TestRunVFS2(t *testing.T) {
+ doRun(t, true)
+}
+
+func doRun(t *testing.T, vfsEnabled bool) {
+ l, cleanup, err := createLoader(vfsEnabled, testSpec())
if err != nil {
t.Fatalf("error creating loader: %v", err)
}
+
defer l.Destroy()
defer cleanup()
@@ -169,7 +191,16 @@ func TestRun(t *testing.T) {
// TestStartSignal tests that the controller Start message will cause
// WaitForStartSignal to return.
func TestStartSignal(t *testing.T) {
- l, cleanup, err := createLoader()
+ doStartSignal(t, false)
+}
+
+// TestStartSignalVFS2 does TestStartSignal with VFS2.
+func TestStartSignalVFS2(t *testing.T) {
+ doStartSignal(t, true)
+}
+
+func doStartSignal(t *testing.T, vfsEnabled bool) {
+ l, cleanup, err := createLoader(vfsEnabled, testSpec())
if err != nil {
t.Fatalf("error creating loader: %v", err)
}
@@ -217,18 +248,19 @@ func TestStartSignal(t *testing.T) {
}
-// Test that MountNamespace can be created with various specs.
-func TestCreateMountNamespace(t *testing.T) {
- testCases := []struct {
- name string
- // Spec that will be used to create the mount manager. Note
- // that we can't mount procfs without a kernel, so each spec
- // MUST contain something other than procfs mounted at /proc.
- spec specs.Spec
- // Paths that are expected to exist in the resulting fs.
- expectedPaths []string
- }{
- {
+type CreateMountTestcase struct {
+ name string
+ // Spec that will be used to create the mount manager. Note
+ // that we can't mount procfs without a kernel, so each spec
+ // MUST contain something other than procfs mounted at /proc.
+ spec specs.Spec
+ // Paths that are expected to exist in the resulting fs.
+ expectedPaths []string
+}
+
+func createMountTestcases(vfs2 bool) []*CreateMountTestcase {
+ testCases := []*CreateMountTestcase{
+ &CreateMountTestcase{
// Only proc.
name: "only proc mount",
spec: specs.Spec{
@@ -270,7 +302,7 @@ func TestCreateMountNamespace(t *testing.T) {
// /dev, and /sys.
expectedPaths: []string{"/some/very/very/deep/path", "/proc", "/dev", "/sys"},
},
- {
+ &CreateMountTestcase{
// Mounts are nested inside each other.
name: "nested mounts",
spec: specs.Spec{
@@ -314,7 +346,7 @@ func TestCreateMountNamespace(t *testing.T) {
expectedPaths: []string{"/foo", "/foo/bar", "/foo/bar/baz", "/foo/qux",
"/foo/qux-quz", "/foo/some/very/very/deep/path", "/proc", "/dev", "/sys"},
},
- {
+ &CreateMountTestcase{
name: "mount inside /dev",
spec: specs.Spec{
Root: &specs.Root{
@@ -357,40 +389,46 @@ func TestCreateMountNamespace(t *testing.T) {
},
expectedPaths: []string{"/proc", "/dev", "/dev/fd-foo", "/dev/foo", "/dev/bar", "/sys"},
},
- {
- name: "mounts inside mandatory mounts",
- spec: specs.Spec{
- Root: &specs.Root{
- Path: os.TempDir(),
- Readonly: true,
+ }
+
+ vfsCase := &CreateMountTestcase{
+ name: "mounts inside mandatory mounts",
+ spec: specs.Spec{
+ Root: &specs.Root{
+ Path: os.TempDir(),
+ Readonly: true,
+ },
+ Mounts: []specs.Mount{
+ {
+ Destination: "/proc",
+ Type: "tmpfs",
},
- Mounts: []specs.Mount{
- {
- Destination: "/proc",
- Type: "tmpfs",
- },
- // We don't include /sys, and /tmp in
- // the spec, since they will be added
- // automatically.
- //
- // Instead, add submounts inside these
- // directories and make sure they are
- // visible under the mandatory mounts.
- {
- Destination: "/sys/bar",
- Type: "tmpfs",
- },
- {
- Destination: "/tmp/baz",
- Type: "tmpfs",
- },
+ // TODO (gvisor.dev/issue/1487): Re-add this case when sysfs supports
+ // MkDirAt in VFS2 (and remove the reduntant append).
+ // {
+ // Destination: "/sys/bar",
+ // Type: "tmpfs",
+ // },
+ //
+ {
+ Destination: "/tmp/baz",
+ Type: "tmpfs",
},
},
- expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz"},
},
+ expectedPaths: []string{"/proc", "/sys" /* "/sys/bar" ,*/, "/tmp", "/tmp/baz"},
}
- for _, tc := range testCases {
+ if !vfs2 {
+ vfsCase.spec.Mounts = append(vfsCase.spec.Mounts, specs.Mount{Destination: "/sys/bar", Type: "tmpfs"})
+ vfsCase.expectedPaths = append(vfsCase.expectedPaths, "/sys/bar")
+ }
+ return append(testCases, vfsCase)
+}
+
+// Test that MountNamespace can be created with various specs.
+func TestCreateMountNamespace(t *testing.T) {
+ for _, tc := range createMountTestcases(false /* vfs2 */) {
t.Run(tc.name, func(t *testing.T) {
conf := testConfig()
ctx := contexttest.Context(t)
@@ -412,13 +450,59 @@ func TestCreateMountNamespace(t *testing.T) {
}
root := mns.Root()
- defer root.DecRef()
+ defer root.DecRef(ctx)
for _, p := range tc.expectedPaths {
maxTraversals := uint(0)
if d, err := mns.FindInode(ctx, root, root, p, &maxTraversals); err != nil {
t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err)
} else {
- d.DecRef()
+ d.DecRef(ctx)
+ }
+ }
+ })
+ }
+}
+
+// Test that MountNamespace can be created with various specs.
+func TestCreateMountNamespaceVFS2(t *testing.T) {
+ for _, tc := range createMountTestcases(true /* vfs2 */) {
+ t.Run(tc.name, func(t *testing.T) {
+ spec := testSpec()
+ spec.Mounts = tc.spec.Mounts
+ spec.Root = tc.spec.Root
+
+ t.Logf("Using root: %q", spec.Root.Path)
+ l, loaderCleanup, err := createLoader(true /* VFS2 Enabled */, spec)
+ if err != nil {
+ t.Fatalf("failed to create loader: %v", err)
+ }
+ defer l.Destroy()
+ defer loaderCleanup()
+
+ mntr := newContainerMounter(l.root.spec, l.root.goferFDs, l.k, l.mountHints)
+ if err := mntr.processHints(l.root.conf, l.root.procArgs.Credentials); err != nil {
+ t.Fatalf("failed process hints: %v", err)
+ }
+
+ ctx := l.k.SupervisorContext()
+ mns, err := mntr.setupVFS2(ctx, l.root.conf, &l.root.procArgs)
+ if err != nil {
+ t.Fatalf("failed to setupVFS2: %v", err)
+ }
+
+ root := mns.Root()
+ defer root.DecRef(ctx)
+ for _, p := range tc.expectedPaths {
+ target := &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(p),
+ }
+
+ if d, err := l.k.VFS().GetDentryAt(ctx, l.root.procArgs.Credentials, target, &vfs.GetDentryOptions{}); err != nil {
+ t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err)
+ } else {
+ d.DecRef(ctx)
}
}
})
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index f98c5fd36..4e1fa7665 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -17,12 +17,16 @@ package boot
import (
"fmt"
"net"
+ "runtime"
+ "strings"
"syscall"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+ "gvisor.dev/gvisor/pkg/tcpip/link/packetsocket"
+ "gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo"
"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
"gvisor.dev/gvisor/pkg/tcpip/network/arp"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
@@ -31,6 +35,32 @@ import (
"gvisor.dev/gvisor/pkg/urpc"
)
+var (
+ // DefaultLoopbackLink contains IP addresses and routes of "127.0.0.1/8" and
+ // "::1/8" on "lo" interface.
+ DefaultLoopbackLink = LoopbackLink{
+ Name: "lo",
+ Addresses: []net.IP{
+ net.IP("\x7f\x00\x00\x01"),
+ net.IPv6loopback,
+ },
+ Routes: []Route{
+ {
+ Destination: net.IPNet{
+ IP: net.IPv4(0x7f, 0, 0, 0),
+ Mask: net.IPv4Mask(0xff, 0, 0, 0),
+ },
+ },
+ {
+ Destination: net.IPNet{
+ IP: net.IPv6loopback,
+ Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)),
+ },
+ },
+ },
+ }
+)
+
// Network exposes methods that can be used to configure a network stack.
type Network struct {
Stack *stack.Stack
@@ -48,6 +78,44 @@ type DefaultRoute struct {
Name string
}
+// QueueingDiscipline is used to specify the kind of Queueing Discipline to
+// apply for a give FDBasedLink.
+type QueueingDiscipline int
+
+const (
+ // QDiscNone disables any queueing for the underlying FD.
+ QDiscNone QueueingDiscipline = iota
+
+ // QDiscFIFO applies a simple fifo based queue to the underlying
+ // FD.
+ QDiscFIFO
+)
+
+// MakeQueueingDiscipline if possible the equivalent QueuingDiscipline for s
+// else returns an error.
+func MakeQueueingDiscipline(s string) (QueueingDiscipline, error) {
+ switch s {
+ case "none":
+ return QDiscNone, nil
+ case "fifo":
+ return QDiscFIFO, nil
+ default:
+ return 0, fmt.Errorf("unsupported qdisc specified: %q", s)
+ }
+}
+
+// String implements fmt.Stringer.
+func (q QueueingDiscipline) String() string {
+ switch q {
+ case QDiscNone:
+ return "none"
+ case QDiscFIFO:
+ return "fifo"
+ default:
+ panic(fmt.Sprintf("Invalid queueing discipline: %d", q))
+ }
+}
+
// FDBasedLink configures an fd-based link.
type FDBasedLink struct {
Name string
@@ -56,7 +124,10 @@ type FDBasedLink struct {
Routes []Route
GSOMaxSize uint32
SoftwareGSOEnabled bool
+ TXChecksumOffload bool
+ RXChecksumOffload bool
LinkAddress net.HardwareAddr
+ QDisc QueueingDiscipline
// NumChannels controls how many underlying FD's are to be used to
// create this endpoint.
@@ -80,7 +151,8 @@ type CreateLinksAndRoutesArgs struct {
LoopbackLinks []LoopbackLink
FDBasedLinks []FDBasedLink
- DefaultGateway DefaultRoute
+ Defaultv4Gateway DefaultRoute
+ Defaultv6Gateway DefaultRoute
}
// Empty returns true if route hasn't been set.
@@ -122,10 +194,10 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
nicID++
nicids[link.Name] = nicID
- ep := loopback.New()
+ linkEP := loopback.New()
log.Infof("Enabling loopback interface %q with id %d on addresses %+v", link.Name, nicID, link.Addresses)
- if err := n.createNICWithAddrs(nicID, link.Name, ep, link.Addresses, true /* loopback */); err != nil {
+ if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
return err
}
@@ -157,7 +229,9 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
}
mac := tcpip.LinkAddress(link.LinkAddress)
- ep, err := fdbased.New(&fdbased.Options{
+ log.Infof("gso max size is: %d", link.GSOMaxSize)
+
+ linkEP, err := fdbased.New(&fdbased.Options{
FDs: FDs,
MTU: uint32(link.MTU),
EthernetHeader: true,
@@ -165,14 +239,25 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
PacketDispatchMode: fdbased.RecvMMsg,
GSOMaxSize: link.GSOMaxSize,
SoftwareGSOEnabled: link.SoftwareGSOEnabled,
- RXChecksumOffload: true,
+ TXChecksumOffload: link.TXChecksumOffload,
+ RXChecksumOffload: link.RXChecksumOffload,
})
if err != nil {
return err
}
+ switch link.QDisc {
+ case QDiscNone:
+ case QDiscFIFO:
+ log.Infof("Enabling FIFO QDisc on %q", link.Name)
+ linkEP = fifo.New(linkEP, runtime.GOMAXPROCS(0), 1000)
+ }
+
+ // Enable support for AF_PACKET sockets to receive outgoing packets.
+ linkEP = packetsocket.New(linkEP)
+
log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels)
- if err := n.createNICWithAddrs(nicID, link.Name, ep, link.Addresses, false /* loopback */); err != nil {
+ if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
return err
}
@@ -186,12 +271,24 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
}
}
- if !args.DefaultGateway.Route.Empty() {
- nicID, ok := nicids[args.DefaultGateway.Name]
+ if !args.Defaultv4Gateway.Route.Empty() {
+ nicID, ok := nicids[args.Defaultv4Gateway.Name]
if !ok {
- return fmt.Errorf("invalid interface name %q for default route", args.DefaultGateway.Name)
+ return fmt.Errorf("invalid interface name %q for default route", args.Defaultv4Gateway.Name)
}
- route, err := args.DefaultGateway.Route.toTcpipRoute(nicID)
+ route, err := args.Defaultv4Gateway.Route.toTcpipRoute(nicID)
+ if err != nil {
+ return err
+ }
+ routes = append(routes, route)
+ }
+
+ if !args.Defaultv6Gateway.Route.Empty() {
+ nicID, ok := nicids[args.Defaultv6Gateway.Name]
+ if !ok {
+ return fmt.Errorf("invalid interface name %q for default route", args.Defaultv6Gateway.Name)
+ }
+ route, err := args.Defaultv6Gateway.Route.toTcpipRoute(nicID)
if err != nil {
return err
}
@@ -205,15 +302,10 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
// createNICWithAddrs creates a NIC in the network stack and adds the given
// addresses.
-func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, ep stack.LinkEndpoint, addrs []net.IP, loopback bool) error {
- if loopback {
- if err := n.Stack.CreateNamedLoopbackNIC(id, name, sniffer.New(ep)); err != nil {
- return fmt.Errorf("CreateNamedLoopbackNIC(%v, %v) failed: %v", id, name, err)
- }
- } else {
- if err := n.Stack.CreateNamedNIC(id, name, sniffer.New(ep)); err != nil {
- return fmt.Errorf("CreateNamedNIC(%v, %v) failed: %v", id, name, err)
- }
+func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, ep stack.LinkEndpoint, addrs []net.IP) error {
+ opts := stack.NICOptions{Name: name}
+ if err := n.Stack.CreateNICWithOptions(id, sniffer.New(ep), opts); err != nil {
+ return fmt.Errorf("CreateNICWithOptions(%d, _, %+v) failed: %v", id, opts, err)
}
// Always start with an arp address for the NIC.
diff --git a/runsc/boot/platforms/BUILD b/runsc/boot/platforms/BUILD
index 03391cdca..77774f43c 100644
--- a/runsc/boot/platforms/BUILD
+++ b/runsc/boot/platforms/BUILD
@@ -1,11 +1,10 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
go_library(
name = "platforms",
srcs = ["platforms.go"],
- importpath = "gvisor.dev/gvisor/runsc/boot/platforms",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/boot/pprof/BUILD b/runsc/boot/pprof/BUILD
new file mode 100644
index 000000000..29cb42b2f
--- /dev/null
+++ b/runsc/boot/pprof/BUILD
@@ -0,0 +1,11 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "pprof",
+ srcs = ["pprof.go"],
+ visibility = [
+ "//runsc:__subpackages__",
+ ],
+)
diff --git a/runsc/boot/pprof.go b/runsc/boot/pprof/pprof.go
index 463362f02..1ded20dee 100644
--- a/runsc/boot/pprof.go
+++ b/runsc/boot/pprof/pprof.go
@@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package boot
+// Package pprof provides a stub to initialize custom profilers.
+package pprof
-func initializePProf() {
+// Initialize will be called at boot for initializing custom profilers.
+func Initialize() {
}
diff --git a/runsc/boot/user.go b/runsc/boot/user.go
deleted file mode 100644
index 56cc12ee0..000000000
--- a/runsc/boot/user.go
+++ /dev/null
@@ -1,170 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package boot
-
-import (
- "bufio"
- "fmt"
- "io"
- "strconv"
- "strings"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
-)
-
-type fileReader struct {
- // Ctx is the context for the file reader.
- Ctx context.Context
-
- // File is the file to read from.
- File *fs.File
-}
-
-// Read implements io.Reader.Read.
-func (r *fileReader) Read(buf []byte) (int, error) {
- n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf))
- return int(n), err
-}
-
-// getExecUserHome returns the home directory of the executing user read from
-// /etc/passwd as read from the container filesystem.
-func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.KUID) (string, error) {
- // The default user home directory to return if no user matching the user
- // if found in the /etc/passwd found in the image.
- const defaultHome = "/"
-
- // Open the /etc/passwd file from the dirent via the root mount namespace.
- mnsRoot := rootMns.Root()
- maxTraversals := uint(linux.MaxSymlinkTraversals)
- dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals)
- if err != nil {
- // NOTE: Ignore errors opening the passwd file. If the passwd file
- // doesn't exist we will return the default home directory.
- return defaultHome, nil
- }
- defer dirent.DecRef()
-
- // Check read permissions on the file.
- if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil {
- // NOTE: Ignore permissions errors here and return default root dir.
- return defaultHome, nil
- }
-
- // Only open regular files. We don't open other files like named pipes as
- // they may block and might present some attack surface to the container.
- // Note that runc does not seem to do this kind of checking.
- if !fs.IsRegular(dirent.Inode.StableAttr) {
- return defaultHome, nil
- }
-
- f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false})
- if err != nil {
- return "", err
- }
- defer f.DecRef()
-
- r := &fileReader{
- Ctx: ctx,
- File: f,
- }
-
- homeDir, err := findHomeInPasswd(uint32(uid), r, defaultHome)
- if err != nil {
- return "", err
- }
-
- return homeDir, nil
-}
-
-// maybeAddExecUserHome returns a new slice with the HOME enviroment variable
-// set if the slice does not already contain it, otherwise it returns the
-// original slice unmodified.
-func maybeAddExecUserHome(ctx context.Context, mns *fs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) {
- // Check if the envv already contains HOME.
- for _, env := range envv {
- if strings.HasPrefix(env, "HOME=") {
- // We have it. Return the original slice unmodified.
- return envv, nil
- }
- }
-
- // Read /etc/passwd for the user's HOME directory and set the HOME
- // environment variable as required by POSIX if it is not overridden by
- // the user.
- homeDir, err := getExecUserHome(ctx, mns, uid)
- if err != nil {
- return nil, fmt.Errorf("error reading exec user: %v", err)
- }
- return append(envv, "HOME="+homeDir), nil
-}
-
-// findHomeInPasswd parses a passwd file and returns the given user's home
-// directory. This function does it's best to replicate the runc's behavior.
-func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) {
- s := bufio.NewScanner(passwd)
-
- for s.Scan() {
- if err := s.Err(); err != nil {
- return "", err
- }
-
- line := strings.TrimSpace(s.Text())
- if line == "" {
- continue
- }
-
- // Pull out part of passwd entry. Loosely parse the passwd entry as some
- // passwd files could be poorly written and for compatibility with runc.
- //
- // Per 'man 5 passwd'
- // /etc/passwd contains one line for each user account, with seven
- // fields delimited by colons (“:”). These fields are:
- //
- // - login name
- // - optional encrypted password
- // - numerical user ID
- // - numerical group ID
- // - user name or comment field
- // - user home directory
- // - optional user command interpreter
- parts := strings.Split(line, ":")
-
- found := false
- homeDir := ""
- for i, p := range parts {
- switch i {
- case 2:
- parsedUID, err := strconv.ParseUint(p, 10, 32)
- if err == nil && parsedUID == uint64(uid) {
- found = true
- }
- case 5:
- homeDir = p
- }
- }
- if found {
- // NOTE: If the uid is present but the home directory is not
- // present in the /etc/passwd entry we return an empty string. This
- // is, for better or worse, what runc does.
- return homeDir, nil
- }
- }
-
- return defaultHome, nil
-}
diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go
deleted file mode 100644
index 9aee2ad07..000000000
--- a/runsc/boot/user_test.go
+++ /dev/null
@@ -1,254 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package boot
-
-import (
- "io/ioutil"
- "os"
- "path/filepath"
- "strings"
- "syscall"
- "testing"
-
- specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-)
-
-func setupTempDir() (string, error) {
- tmpDir, err := ioutil.TempDir(os.TempDir(), "exec-user-test")
- if err != nil {
- return "", err
- }
- return tmpDir, nil
-}
-
-func setupPasswd(contents string, perms os.FileMode) func() (string, error) {
- return func() (string, error) {
- tmpDir, err := setupTempDir()
- if err != nil {
- return "", err
- }
-
- if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
- return "", err
- }
-
- f, err := os.Create(filepath.Join(tmpDir, "etc", "passwd"))
- if err != nil {
- return "", err
- }
- defer f.Close()
-
- _, err = f.WriteString(contents)
- if err != nil {
- return "", err
- }
-
- err = f.Chmod(perms)
- if err != nil {
- return "", err
- }
- return tmpDir, nil
- }
-}
-
-// TestGetExecUserHome tests the getExecUserHome function.
-func TestGetExecUserHome(t *testing.T) {
- tests := map[string]struct {
- uid auth.KUID
- createRoot func() (string, error)
- expected string
- }{
- "success": {
- uid: 1000,
- createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0666),
- expected: "/home/adin",
- },
- "no_passwd": {
- uid: 1000,
- createRoot: setupTempDir,
- expected: "/",
- },
- "no_perms": {
- uid: 1000,
- createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0000),
- expected: "/",
- },
- "directory": {
- uid: 1000,
- createRoot: func() (string, error) {
- tmpDir, err := setupTempDir()
- if err != nil {
- return "", err
- }
-
- if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
- return "", err
- }
-
- if err := syscall.Mkdir(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil {
- return "", err
- }
-
- return tmpDir, nil
- },
- expected: "/",
- },
- // Currently we don't allow named pipes.
- "named_pipe": {
- uid: 1000,
- createRoot: func() (string, error) {
- tmpDir, err := setupTempDir()
- if err != nil {
- return "", err
- }
-
- if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
- return "", err
- }
-
- if err := syscall.Mkfifo(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil {
- return "", err
- }
-
- return tmpDir, nil
- },
- expected: "/",
- },
- }
-
- for name, tc := range tests {
- t.Run(name, func(t *testing.T) {
- tmpDir, err := tc.createRoot()
- if err != nil {
- t.Fatalf("failed to create root dir: %v", err)
- }
-
- sandEnd, cleanup, err := startGofer(tmpDir)
- if err != nil {
- t.Fatalf("failed to create gofer: %v", err)
- }
- defer cleanup()
-
- ctx := contexttest.Context(t)
- conf := &Config{
- RootDir: "unused_root_dir",
- Network: NetworkNone,
- DisableSeccomp: true,
- }
-
- spec := &specs.Spec{
- Root: &specs.Root{
- Path: tmpDir,
- Readonly: true,
- },
- // Add /proc mount as tmpfs to avoid needing a kernel.
- Mounts: []specs.Mount{
- {
- Destination: "/proc",
- Type: "tmpfs",
- },
- },
- }
-
- mntr := newContainerMounter(spec, []int{sandEnd}, nil, &podMountHints{})
- mns, err := mntr.createMountNamespace(ctx, conf)
- if err != nil {
- t.Fatalf("failed to create mount namespace: %v", err)
- }
- ctx = fs.WithRoot(ctx, mns.Root())
- if err := mntr.mountSubmounts(ctx, conf, mns); err != nil {
- t.Fatalf("failed to create mount namespace: %v", err)
- }
-
- got, err := getExecUserHome(ctx, mns, tc.uid)
- if err != nil {
- t.Fatalf("failed to get user home: %v", err)
- }
-
- if got != tc.expected {
- t.Fatalf("expected %v, got: %v", tc.expected, got)
- }
- })
- }
-}
-
-// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing.
-func TestFindHomeInPasswd(t *testing.T) {
- tests := map[string]struct {
- uid uint32
- passwd string
- expected string
- def string
- }{
- "empty": {
- uid: 1000,
- passwd: "",
- expected: "/",
- def: "/",
- },
- "whitespace": {
- uid: 1000,
- passwd: " ",
- expected: "/",
- def: "/",
- },
- "full": {
- uid: 1000,
- passwd: "adin::1000:1111::/home/adin:/bin/sh",
- expected: "/home/adin",
- def: "/",
- },
- // For better or worse, this is how runc works.
- "partial": {
- uid: 1000,
- passwd: "adin::1000:1111:",
- expected: "",
- def: "/",
- },
- "multiple": {
- uid: 1001,
- passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh",
- expected: "/home/ian",
- def: "/",
- },
- "duplicate": {
- uid: 1000,
- passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh",
- expected: "/home/adin",
- def: "/",
- },
- "empty_lines": {
- uid: 1001,
- passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh",
- expected: "/home/ian",
- def: "/",
- },
- }
-
- for name, tc := range tests {
- t.Run(name, func(t *testing.T) {
- got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def)
- if err != nil {
- t.Fatalf("error parsing passwd: %v", err)
- }
- if tc.expected != got {
- t.Fatalf("expected %v, got: %v", tc.expected, got)
- }
- })
- }
-}
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
new file mode 100644
index 000000000..08dce8b6c
--- /dev/null
+++ b/runsc/boot/vfs.go
@@ -0,0 +1,519 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "fmt"
+ "path"
+ "sort"
+ "strings"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/devices/memdev"
+ "gvisor.dev/gvisor/pkg/sentry/devices/ttydev"
+ "gvisor.dev/gvisor/pkg/sentry/devices/tundev"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/fuse"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/overlay"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+func registerFilesystems(k *kernel.Kernel) error {
+ ctx := k.SupervisorContext()
+ creds := auth.NewRootCredentials(k.RootUserNamespace())
+ vfsObj := k.VFS()
+
+ vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserList: true,
+ // TODO(b/29356795): Users may mount this once the terminals are in a
+ // usable state.
+ AllowUserMount: false,
+ })
+ vfsObj.MustRegisterFilesystemType(devtmpfs.Name, &devtmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+ vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserList: true,
+ })
+ vfsObj.MustRegisterFilesystemType(overlay.Name, &overlay.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+ vfsObj.MustRegisterFilesystemType(proc.Name, &proc.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+ vfsObj.MustRegisterFilesystemType(sys.Name, &sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+ vfsObj.MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+ vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
+
+ // Setup files in devtmpfs.
+ if err := memdev.Register(vfsObj); err != nil {
+ return fmt.Errorf("registering memdev: %w", err)
+ }
+ if err := ttydev.Register(vfsObj); err != nil {
+ return fmt.Errorf("registering ttydev: %w", err)
+ }
+ tunSupported := tundev.IsNetTunSupported(inet.StackFromContext(ctx))
+ if tunSupported {
+ if err := tundev.Register(vfsObj); err != nil {
+ return fmt.Errorf("registering tundev: %v", err)
+ }
+ }
+
+ if kernel.FUSEEnabled {
+ if err := fuse.Register(vfsObj); err != nil {
+ return fmt.Errorf("registering fusedev: %w", err)
+ }
+ }
+
+ a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name)
+ if err != nil {
+ return fmt.Errorf("creating devtmpfs accessor: %w", err)
+ }
+ defer a.Release(ctx)
+
+ if err := a.UserspaceInit(ctx); err != nil {
+ return fmt.Errorf("initializing userspace: %w", err)
+ }
+ if err := memdev.CreateDevtmpfsFiles(ctx, a); err != nil {
+ return fmt.Errorf("creating memdev devtmpfs files: %w", err)
+ }
+ if err := ttydev.CreateDevtmpfsFiles(ctx, a); err != nil {
+ return fmt.Errorf("creating ttydev devtmpfs files: %w", err)
+ }
+ if tunSupported {
+ if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil {
+ return fmt.Errorf("creating tundev devtmpfs files: %v", err)
+ }
+ }
+
+ if kernel.FUSEEnabled {
+ if err := fuse.CreateDevtmpfsFile(ctx, a); err != nil {
+ return fmt.Errorf("creating fusedev devtmpfs files: %w", err)
+ }
+ }
+
+ return nil
+}
+
+func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
+ mns, err := mntr.setupVFS2(ctx, conf, procArgs)
+ if err != nil {
+ return fmt.Errorf("failed to setupFS: %w", err)
+ }
+ procArgs.MountNamespaceVFS2 = mns
+
+ // Resolve the executable path from working dir and environment.
+ resolved, err := user.ResolveExecutablePath(ctx, procArgs)
+ if err != nil {
+ return err
+ }
+ procArgs.Filename = resolved
+ return nil
+}
+
+func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) {
+ log.Infof("Configuring container's file system with VFS2")
+
+ // Create context with root credentials to mount the filesystem (the current
+ // user may not be privileged enough).
+ rootCreds := auth.NewRootCredentials(procArgs.Credentials.UserNamespace)
+ rootProcArgs := *procArgs
+ rootProcArgs.WorkingDirectory = "/"
+ rootProcArgs.Credentials = rootCreds
+ rootProcArgs.Umask = 0022
+ rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals
+ rootCtx := procArgs.NewContext(c.k)
+
+ mns, err := c.createMountNamespaceVFS2(rootCtx, conf, rootCreds)
+ if err != nil {
+ return nil, fmt.Errorf("creating mount namespace: %w", err)
+ }
+ rootProcArgs.MountNamespaceVFS2 = mns
+
+ // Mount submounts.
+ if err := c.mountSubmountsVFS2(rootCtx, conf, mns, rootCreds); err != nil {
+ return nil, fmt.Errorf("mounting submounts vfs2: %w", err)
+ }
+ return mns, nil
+}
+
+func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) {
+ fd := c.fds.remove()
+ opts := p9MountData(fd, conf.FileAccess, true /* vfs2 */)
+
+ if conf.OverlayfsStaleRead {
+ // We can't check for overlayfs here because sandbox is chroot'ed and gofer
+ // can only send mount options for specs.Mounts (specs.Root is missing
+ // Options field). So assume root is always on top of overlayfs.
+ opts = append(opts, "overlayfs_stale_read")
+ }
+
+ log.Infof("Mounting root over 9P, ioFD: %d", fd)
+ mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{
+ Data: strings.Join(opts, ","),
+ })
+ if err != nil {
+ return nil, fmt.Errorf("setting up mount namespace: %w", err)
+ }
+ return mns, nil
+}
+
+func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error {
+ mounts, err := c.prepareMountsVFS2()
+ if err != nil {
+ return err
+ }
+
+ for i := range mounts {
+ submount := &mounts[i]
+ log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options)
+ if hint := c.hints.findMount(submount.Mount); hint != nil && hint.isSupported() {
+ if err := c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.Mount, hint); err != nil {
+ return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, submount.Destination, err)
+ }
+ } else {
+ if err := c.mountSubmountVFS2(ctx, conf, mns, creds, submount); err != nil {
+ return fmt.Errorf("mount submount %q: %w", submount.Destination, err)
+ }
+ }
+ }
+
+ if err := c.mountTmpVFS2(ctx, conf, creds, mns); err != nil {
+ return fmt.Errorf(`mount submount "\tmp": %w`, err)
+ }
+ return nil
+}
+
+type mountAndFD struct {
+ specs.Mount
+ fd int
+}
+
+func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
+ // Associate bind mounts with their FDs before sorting since there is an
+ // undocumented assumption that FDs are dispensed in the order in which
+ // they are required by mounts.
+ var mounts []mountAndFD
+ for _, m := range c.mounts {
+ fd := -1
+ // Only bind mounts use host FDs; see
+ // containerMounter.getMountNameAndOptionsVFS2.
+ if m.Type == bind {
+ fd = c.fds.remove()
+ }
+ mounts = append(mounts, mountAndFD{
+ Mount: m,
+ fd: fd,
+ })
+ }
+ if err := c.checkDispenser(); err != nil {
+ return nil, err
+ }
+
+ // Sort the mounts so that we don't place children before parents.
+ sort.Slice(mounts, func(i, j int) bool {
+ return len(mounts[i].Destination) < len(mounts[j].Destination)
+ })
+
+ return mounts, nil
+}
+
+func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error {
+ root := mns.Root()
+ defer root.DecRef(ctx)
+ target := &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(submount.Destination),
+ }
+ fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, submount)
+ if err != nil {
+ return fmt.Errorf("mountOptions failed: %w", err)
+ }
+ if len(fsName) == 0 {
+ // Filesystem is not supported (e.g. cgroup), just skip it.
+ return nil
+ }
+
+ if err := c.makeSyntheticMount(ctx, submount.Destination, root, creds); err != nil {
+ return err
+ }
+ if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil {
+ return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts)
+ }
+ log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts.GetFilesystemOptions.Data)
+ return nil
+}
+
+// getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values
+// used for mounts.
+func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, *vfs.MountOptions, error) {
+ fsName := m.Type
+ var data []string
+
+ // Find filesystem name and FS specific data field.
+ switch m.Type {
+ case devpts.Name, devtmpfs.Name, proc.Name, sys.Name:
+ // Nothing to do.
+
+ case nonefs:
+ fsName = sys.Name
+
+ case tmpfs.Name:
+ var err error
+ data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...)
+ if err != nil {
+ return "", nil, err
+ }
+
+ case bind:
+ fsName = gofer.Name
+ if m.fd == 0 {
+ // Check that an FD was provided to fails fast. Technically FD=0 is valid,
+ // but unlikely to be correct in this context.
+ return "", nil, fmt.Errorf("9P mount requires a connection FD")
+ }
+ data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */)
+
+ default:
+ log.Warningf("ignoring unknown filesystem type %q", m.Type)
+ return "", nil, nil
+ }
+
+ opts := &vfs.MountOptions{
+ GetFilesystemOptions: vfs.GetFilesystemOptions{
+ Data: strings.Join(data, ","),
+ },
+ InternalMount: true,
+ }
+
+ for _, o := range m.Options {
+ switch o {
+ case "rw":
+ opts.ReadOnly = false
+ case "ro":
+ opts.ReadOnly = true
+ case "noatime":
+ opts.Flags.NoATime = true
+ case "noexec":
+ opts.Flags.NoExec = true
+ default:
+ log.Warningf("ignoring unknown mount option %q", o)
+ }
+ }
+
+ if conf.Overlay {
+ // All writes go to upper, be paranoid and make lower readonly.
+ opts.ReadOnly = true
+ }
+ return fsName, opts, nil
+}
+
+func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error {
+ target := &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(currentPath),
+ }
+ _, err := c.k.VFS().StatAt(ctx, creds, target, &vfs.StatOptions{})
+ if err == nil {
+ log.Debugf("Mount point %q already exists", currentPath)
+ return nil
+ }
+ if err != syserror.ENOENT {
+ return fmt.Errorf("stat failed for %q during mount point creation: %w", currentPath, err)
+ }
+
+ // Recurse to ensure parent is created and then create the mount point.
+ if err := c.makeSyntheticMount(ctx, path.Dir(currentPath), root, creds); err != nil {
+ return err
+ }
+ log.Debugf("Creating dir %q for mount point", currentPath)
+ mkdirOpts := &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}
+ if err := c.k.VFS().MkdirAt(ctx, creds, target, mkdirOpts); err != nil {
+ return fmt.Errorf("failed to create directory %q for mount: %w", currentPath, err)
+ }
+ return nil
+}
+
+// mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so.
+// Technically we don't have to mount tmpfs at /tmp, as we could just rely on
+// the host /tmp, but this is a nice optimization, and fixes some apps that call
+// mknod in /tmp. It's unsafe to mount tmpfs if:
+// 1. /tmp is mounted explicitly: we should not override user's wish
+// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp
+//
+// Note that when there are submounts inside of '/tmp', directories for the
+// mount points must be present, making '/tmp' not empty anymore.
+func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *Config, creds *auth.Credentials, mns *vfs.MountNamespace) error {
+ for _, m := range c.mounts {
+ // m.Destination has been cleaned, so it's to use equality here.
+ if m.Destination == "/tmp" {
+ log.Debugf(`Explict "/tmp" mount found, skipping internal tmpfs, mount: %+v`, m)
+ return nil
+ }
+ }
+
+ root := mns.Root()
+ defer root.DecRef(ctx)
+ pop := vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse("/tmp"),
+ }
+ // TODO(gvisor.dev/issue/2782): Use O_PATH when available.
+ fd, err := c.k.VFS().OpenAt(ctx, creds, &pop, &vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_DIRECTORY})
+ switch err {
+ case nil:
+ defer fd.DecRef(ctx)
+
+ err := fd.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error {
+ if dirent.Name != "." && dirent.Name != ".." {
+ return syserror.ENOTEMPTY
+ }
+ return nil
+ }))
+ switch err {
+ case nil:
+ log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`)
+ case syserror.ENOTEMPTY:
+ // If more than "." and ".." is found, skip internal tmpfs to prevent
+ // hiding existing files.
+ log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`)
+ return nil
+ default:
+ return err
+ }
+ fallthrough
+
+ case syserror.ENOENT:
+ // No '/tmp' found (or fallthrough from above). It's safe to mount internal
+ // tmpfs.
+ tmpMount := specs.Mount{
+ Type: tmpfs.Name,
+ Destination: "/tmp",
+ // Sticky bit is added to prevent accidental deletion of files from
+ // another user. This is normally done for /tmp.
+ Options: []string{"mode=01777"},
+ }
+ return c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount})
+
+ case syserror.ENOTDIR:
+ // Not a dir?! Let it be.
+ return nil
+
+ default:
+ return fmt.Errorf(`opening "/tmp" inside container: %w`, err)
+ }
+}
+
+// processHintsVFS2 processes annotations that container hints about how volumes
+// should be mounted (e.g. a volume shared between containers). It must be
+// called for the root container only.
+func (c *containerMounter) processHintsVFS2(conf *Config, creds *auth.Credentials) error {
+ ctx := c.k.SupervisorContext()
+ for _, hint := range c.hints.mounts {
+ // TODO(b/142076984): Only support tmpfs for now. Bind mounts require a
+ // common gofer to mount all shared volumes.
+ if hint.mount.Type != tmpfs.Name {
+ continue
+ }
+
+ log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
+ mnt, err := c.mountSharedMasterVFS2(ctx, conf, hint, creds)
+ if err != nil {
+ return fmt.Errorf("mounting shared master %q: %v", hint.name, err)
+ }
+ hint.vfsMount = mnt
+ }
+ return nil
+}
+
+// mountSharedMasterVFS2 mounts the master of a volume that is shared among
+// containers in a pod.
+func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *Config, hint *mountHint, creds *auth.Credentials) (*vfs.Mount, error) {
+ // Map mount type to filesystem name, and parse out the options that we are
+ // capable of dealing with.
+ mntFD := &mountAndFD{Mount: hint.mount}
+ fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, mntFD)
+ if err != nil {
+ return nil, err
+ }
+ if len(fsName) == 0 {
+ return nil, fmt.Errorf("mount type not supported %q", hint.mount.Type)
+ }
+ return c.k.VFS().MountDisconnected(ctx, creds, "", fsName, opts)
+}
+
+// mountSharedSubmount binds mount to a previously mounted volume that is shared
+// among containers in the same pod.
+func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount specs.Mount, source *mountHint) error {
+ if err := source.checkCompatible(mount); err != nil {
+ return err
+ }
+
+ _, opts, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{Mount: mount})
+ if err != nil {
+ return err
+ }
+ newMnt, err := c.k.VFS().NewDisconnectedMount(source.vfsMount.Filesystem(), source.vfsMount.Root(), opts)
+ if err != nil {
+ return err
+ }
+ defer newMnt.DecRef(ctx)
+
+ root := mns.Root()
+ defer root.DecRef(ctx)
+ if err := c.makeSyntheticMount(ctx, mount.Destination, root, creds); err != nil {
+ return err
+ }
+
+ target := &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(mount.Destination),
+ }
+ if err := c.k.VFS().ConnectMountAt(ctx, creds, newMnt, target); err != nil {
+ return err
+ }
+ log.Infof("Mounted %q type shared bind to %q", mount.Destination, source.name)
+ return nil
+}
diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD
index d6165f9e5..37f4253ba 100644
--- a/runsc/cgroup/BUILD
+++ b/runsc/cgroup/BUILD
@@ -1,17 +1,16 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
go_library(
name = "cgroup",
srcs = ["cgroup.go"],
- importpath = "gvisor.dev/gvisor/runsc/cgroup",
visibility = ["//:sandbox"],
deps = [
+ "//pkg/cleanup",
"//pkg/log",
- "//runsc/specutils",
"@com_github_cenkalti_backoff//:go_default_library",
- "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
],
)
@@ -19,6 +18,10 @@ go_test(
name = "cgroup_test",
size = "small",
srcs = ["cgroup_test.go"],
- embed = [":cgroup"],
+ library = ":cgroup",
tags = ["local"],
+ deps = [
+ "//pkg/test/testutil",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
+ ],
)
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index ab3a25b9b..8fbc3887a 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -19,6 +19,7 @@ package cgroup
import (
"bufio"
"context"
+ "errors"
"fmt"
"io/ioutil"
"os"
@@ -30,29 +31,31 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/runsc/specutils"
)
const (
cgroupRoot = "/sys/fs/cgroup"
)
-var controllers = map[string]controller{
- "blkio": &blockIO{},
- "cpu": &cpu{},
- "cpuset": &cpuSet{},
- "memory": &memory{},
- "net_cls": &networkClass{},
- "net_prio": &networkPrio{},
+var controllers = map[string]config{
+ "blkio": config{ctrlr: &blockIO{}},
+ "cpu": config{ctrlr: &cpu{}},
+ "cpuset": config{ctrlr: &cpuSet{}},
+ "hugetlb": config{ctrlr: &hugeTLB{}, optional: true},
+ "memory": config{ctrlr: &memory{}},
+ "net_cls": config{ctrlr: &networkClass{}},
+ "net_prio": config{ctrlr: &networkPrio{}},
+ "pids": config{ctrlr: &pids{}},
// These controllers either don't have anything in the OCI spec or is
- // irrevalant for a sandbox, e.g. pids.
- "devices": &noop{},
- "freezer": &noop{},
- "perf_event": &noop{},
- "pids": &noop{},
- "systemd": &noop{},
+ // irrelevant for a sandbox.
+ "devices": config{ctrlr: &noop{}},
+ "freezer": config{ctrlr: &noop{}},
+ "perf_event": config{ctrlr: &noop{}},
+ "rdma": config{ctrlr: &noop{}, optional: true},
+ "systemd": config{ctrlr: &noop{}},
}
func setOptionalValueInt(path, name string, val *int64) error {
@@ -89,7 +92,17 @@ func setOptionalValueUint16(path, name string, val *uint16) error {
func setValue(path, name, data string) error {
fullpath := filepath.Join(path, name)
- return ioutil.WriteFile(fullpath, []byte(data), 0700)
+
+ // Retry writes on EINTR; see:
+ // https://github.com/golang/go/issues/38033
+ for {
+ err := ioutil.WriteFile(fullpath, []byte(data), 0700)
+ if err == nil {
+ return nil
+ } else if !errors.Is(err, syscall.EINTR) {
+ return err
+ }
+ }
}
func getValue(path, name string) (string, error) {
@@ -101,6 +114,14 @@ func getValue(path, name string) (string, error) {
return string(out), nil
}
+func getInt(path, name string) (int, error) {
+ s, err := getValue(path, name)
+ if err != nil {
+ return 0, err
+ }
+ return strconv.Atoi(strings.TrimSpace(s))
+}
+
// fillFromAncestor sets the value of a cgroup file from the first ancestor
// that has content. It does nothing if the file in 'path' has already been set.
func fillFromAncestor(path string) (string, error) {
@@ -114,15 +135,23 @@ func fillFromAncestor(path string) (string, error) {
return val, nil
}
- // File is not set, recurse to parent and then set here.
+ // File is not set, recurse to parent and then set here.
name := filepath.Base(path)
parent := filepath.Dir(filepath.Dir(path))
val, err = fillFromAncestor(filepath.Join(parent, name))
if err != nil {
return "", err
}
- if err := ioutil.WriteFile(path, []byte(val), 0700); err != nil {
- return "", err
+
+ // Retry writes on EINTR; see:
+ // https://github.com/golang/go/issues/38033
+ for {
+ err := ioutil.WriteFile(path, []byte(val), 0700)
+ if err == nil {
+ break
+ } else if !errors.Is(err, syscall.EINTR) {
+ return "", err
+ }
}
return val, nil
}
@@ -188,8 +217,9 @@ func LoadPaths(pid string) (map[string]string, error) {
return paths, nil
}
-// Cgroup represents a group inside all controllers. For example: Name='/foo/bar'
-// maps to /sys/fs/cgroup/<controller>/foo/bar on all controllers.
+// Cgroup represents a group inside all controllers. For example:
+// Name='/foo/bar' maps to /sys/fs/cgroup/<controller>/foo/bar on
+// all controllers.
type Cgroup struct {
Name string `json:"name"`
Parents map[string]string `json:"parents"`
@@ -234,16 +264,20 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error {
// The Cleanup object cleans up partially created cgroups when an error occurs.
// Errors occuring during cleanup itself are ignored.
- clean := specutils.MakeCleanup(func() { _ = c.Uninstall() })
+ clean := cleanup.Make(func() { _ = c.Uninstall() })
defer clean.Clean()
- for key, ctrl := range controllers {
+ for key, cfg := range controllers {
path := c.makePath(key)
if err := os.MkdirAll(path, 0755); err != nil {
+ if cfg.optional && errors.Is(err, syscall.EROFS) {
+ log.Infof("Skipping cgroup %q", key)
+ continue
+ }
return err
}
if res != nil {
- if err := ctrl.set(res, path); err != nil {
+ if err := cfg.ctrlr.set(res, path); err != nil {
return err
}
}
@@ -313,16 +347,35 @@ func (c *Cgroup) Join() (func(), error) {
}
// Now join the cgroups.
- for key := range controllers {
+ for key, cfg := range controllers {
path := c.makePath(key)
log.Debugf("Joining cgroup %q", path)
if err := setValue(path, "cgroup.procs", "0"); err != nil {
+ if cfg.optional && os.IsNotExist(err) {
+ continue
+ }
return undo, err
}
}
return undo, nil
}
+func (c *Cgroup) CPUQuota() (float64, error) {
+ path := c.makePath("cpu")
+ quota, err := getInt(path, "cpu.cfs_quota_us")
+ if err != nil {
+ return -1, err
+ }
+ period, err := getInt(path, "cpu.cfs_period_us")
+ if err != nil {
+ return -1, err
+ }
+ if quota <= 0 || period <= 0 {
+ return -1, err
+ }
+ return float64(quota) / float64(period), nil
+}
+
// NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'.
func (c *Cgroup) NumCPU() (int, error) {
path := c.makePath("cpuset")
@@ -351,6 +404,11 @@ func (c *Cgroup) makePath(controllerName string) string {
return filepath.Join(cgroupRoot, controllerName, path)
}
+type config struct {
+ ctrlr controller
+ optional bool
+}
+
type controller interface {
set(*specs.LinuxResources, string) error
}
@@ -406,7 +464,13 @@ func (*cpu) set(spec *specs.LinuxResources, path string) error {
if err := setOptionalValueInt(path, "cpu.cfs_quota_us", spec.CPU.Quota); err != nil {
return err
}
- return setOptionalValueUint(path, "cpu.cfs_period_us", spec.CPU.Period)
+ if err := setOptionalValueUint(path, "cpu.cfs_period_us", spec.CPU.Period); err != nil {
+ return err
+ }
+ if err := setOptionalValueUint(path, "cpu.rt_period_us", spec.CPU.RealtimePeriod); err != nil {
+ return err
+ }
+ return setOptionalValueInt(path, "cpu.rt_runtime_us", spec.CPU.RealtimeRuntime)
}
type cpuSet struct{}
@@ -447,13 +511,17 @@ func (*blockIO) set(spec *specs.LinuxResources, path string) error {
}
for _, dev := range spec.BlockIO.WeightDevice {
- val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, dev.Weight)
- if err := setValue(path, "blkio.weight_device", val); err != nil {
- return err
+ if dev.Weight != nil {
+ val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, *dev.Weight)
+ if err := setValue(path, "blkio.weight_device", val); err != nil {
+ return err
+ }
}
- val = fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, dev.LeafWeight)
- if err := setValue(path, "blkio.leaf_weight_device", val); err != nil {
- return err
+ if dev.LeafWeight != nil {
+ val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, *dev.LeafWeight)
+ if err := setValue(path, "blkio.leaf_weight_device", val); err != nil {
+ return err
+ }
}
}
if err := setThrottle(path, "blkio.throttle.read_bps_device", spec.BlockIO.ThrottleReadBpsDevice); err != nil {
@@ -501,3 +569,26 @@ func (*networkPrio) set(spec *specs.LinuxResources, path string) error {
}
return nil
}
+
+type pids struct{}
+
+func (*pids) set(spec *specs.LinuxResources, path string) error {
+ if spec.Pids == nil || spec.Pids.Limit <= 0 {
+ return nil
+ }
+ val := strconv.FormatInt(spec.Pids.Limit, 10)
+ return setValue(path, "pids.max", val)
+}
+
+type hugeTLB struct{}
+
+func (*hugeTLB) set(spec *specs.LinuxResources, path string) error {
+ for _, limit := range spec.HugepageLimits {
+ name := fmt.Sprintf("hugetlb.%s.limit_in_bytes", limit.Pagesize)
+ val := strconv.FormatUint(limit.Limit, 10)
+ if err := setValue(path, name, val); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go
index 548c80e9a..4db5ee5c3 100644
--- a/runsc/cgroup/cgroup_test.go
+++ b/runsc/cgroup/cgroup_test.go
@@ -15,7 +15,14 @@
package cgroup
import (
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strings"
"testing"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/test/testutil"
)
func TestUninstallEnoent(t *testing.T) {
@@ -65,3 +72,578 @@ func TestCountCpuset(t *testing.T) {
})
}
}
+
+func uint16Ptr(v uint16) *uint16 {
+ return &v
+}
+
+func uint32Ptr(v uint32) *uint32 {
+ return &v
+}
+
+func int64Ptr(v int64) *int64 {
+ return &v
+}
+
+func uint64Ptr(v uint64) *uint64 {
+ return &v
+}
+
+func boolPtr(v bool) *bool {
+ return &v
+}
+
+func checkDir(t *testing.T, dir string, contents map[string]string) {
+ all, err := ioutil.ReadDir(dir)
+ if err != nil {
+ t.Fatalf("ReadDir(%q): %v", dir, err)
+ }
+ fileCount := 0
+ for _, file := range all {
+ if file.IsDir() {
+ // Only want to compare files.
+ continue
+ }
+ fileCount++
+
+ want, ok := contents[file.Name()]
+ if !ok {
+ t.Errorf("file not expected: %q", file.Name())
+ continue
+ }
+ gotBytes, err := ioutil.ReadFile(filepath.Join(dir, file.Name()))
+ if err != nil {
+ t.Fatal(err.Error())
+ }
+ got := strings.TrimSuffix(string(gotBytes), "\n")
+ if got != want {
+ t.Errorf("wrong file content, file: %q, want: %q, got: %q", file.Name(), want, got)
+ }
+ }
+ if fileCount != len(contents) {
+ t.Errorf("file is missing, want: %v, got: %v", contents, all)
+ }
+}
+
+func makeLinuxWeightDevice(major, minor int64, weight, leafWeight *uint16) specs.LinuxWeightDevice {
+ rv := specs.LinuxWeightDevice{
+ Weight: weight,
+ LeafWeight: leafWeight,
+ }
+ rv.Major = major
+ rv.Minor = minor
+ return rv
+}
+
+func makeLinuxThrottleDevice(major, minor int64, rate uint64) specs.LinuxThrottleDevice {
+ rv := specs.LinuxThrottleDevice{
+ Rate: rate,
+ }
+ rv.Major = major
+ rv.Minor = minor
+ return rv
+}
+
+func TestBlockIO(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ spec *specs.LinuxBlockIO
+ wants map[string]string
+ }{
+ {
+ name: "simple",
+ spec: &specs.LinuxBlockIO{
+ Weight: uint16Ptr(1),
+ LeafWeight: uint16Ptr(2),
+ },
+ wants: map[string]string{
+ "blkio.weight": "1",
+ "blkio.leaf_weight": "2",
+ },
+ },
+ {
+ name: "weight_device",
+ spec: &specs.LinuxBlockIO{
+ WeightDevice: []specs.LinuxWeightDevice{
+ makeLinuxWeightDevice(1, 2, uint16Ptr(3), uint16Ptr(4)),
+ },
+ },
+ wants: map[string]string{
+ "blkio.weight_device": "1:2 3",
+ "blkio.leaf_weight_device": "1:2 4",
+ },
+ },
+ {
+ name: "weight_device_nil_values",
+ spec: &specs.LinuxBlockIO{
+ WeightDevice: []specs.LinuxWeightDevice{
+ makeLinuxWeightDevice(1, 2, nil, nil),
+ },
+ },
+ },
+ {
+ name: "throttle",
+ spec: &specs.LinuxBlockIO{
+ ThrottleReadBpsDevice: []specs.LinuxThrottleDevice{
+ makeLinuxThrottleDevice(1, 2, 3),
+ },
+ ThrottleReadIOPSDevice: []specs.LinuxThrottleDevice{
+ makeLinuxThrottleDevice(4, 5, 6),
+ },
+ ThrottleWriteBpsDevice: []specs.LinuxThrottleDevice{
+ makeLinuxThrottleDevice(7, 8, 9),
+ },
+ ThrottleWriteIOPSDevice: []specs.LinuxThrottleDevice{
+ makeLinuxThrottleDevice(10, 11, 12),
+ },
+ },
+ wants: map[string]string{
+ "blkio.throttle.read_bps_device": "1:2 3",
+ "blkio.throttle.read_iops_device": "4:5 6",
+ "blkio.throttle.write_bps_device": "7:8 9",
+ "blkio.throttle.write_iops_device": "10:11 12",
+ },
+ },
+ {
+ name: "nil_values",
+ spec: &specs.LinuxBlockIO{},
+ },
+ {
+ name: "nil",
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "cgroup")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ spec := &specs.LinuxResources{
+ BlockIO: tc.spec,
+ }
+ ctrlr := blockIO{}
+ if err := ctrlr.set(spec, dir); err != nil {
+ t.Fatalf("ctrlr.set(): %v", err)
+ }
+ checkDir(t, dir, tc.wants)
+ })
+ }
+}
+
+func TestCPU(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ spec *specs.LinuxCPU
+ wants map[string]string
+ }{
+ {
+ name: "all",
+ spec: &specs.LinuxCPU{
+ Shares: uint64Ptr(1),
+ Quota: int64Ptr(2),
+ Period: uint64Ptr(3),
+ RealtimeRuntime: int64Ptr(4),
+ RealtimePeriod: uint64Ptr(5),
+ },
+ wants: map[string]string{
+ "cpu.shares": "1",
+ "cpu.cfs_quota_us": "2",
+ "cpu.cfs_period_us": "3",
+ "cpu.rt_runtime_us": "4",
+ "cpu.rt_period_us": "5",
+ },
+ },
+ {
+ name: "nil_values",
+ spec: &specs.LinuxCPU{},
+ },
+ {
+ name: "nil",
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "cgroup")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ spec := &specs.LinuxResources{
+ CPU: tc.spec,
+ }
+ ctrlr := cpu{}
+ if err := ctrlr.set(spec, dir); err != nil {
+ t.Fatalf("ctrlr.set(): %v", err)
+ }
+ checkDir(t, dir, tc.wants)
+ })
+ }
+}
+
+func TestCPUSet(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ spec *specs.LinuxCPU
+ wants map[string]string
+ }{
+ {
+ name: "all",
+ spec: &specs.LinuxCPU{
+ Cpus: "foo",
+ Mems: "bar",
+ },
+ wants: map[string]string{
+ "cpuset.cpus": "foo",
+ "cpuset.mems": "bar",
+ },
+ },
+ // Don't test nil values because they are copied from the parent.
+ // See TestCPUSetAncestor().
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "cgroup")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ spec := &specs.LinuxResources{
+ CPU: tc.spec,
+ }
+ ctrlr := cpuSet{}
+ if err := ctrlr.set(spec, dir); err != nil {
+ t.Fatalf("ctrlr.set(): %v", err)
+ }
+ checkDir(t, dir, tc.wants)
+ })
+ }
+}
+
+// TestCPUSetAncestor checks that, when not available, value is read from
+// parent directory.
+func TestCPUSetAncestor(t *testing.T) {
+ // Prepare master directory with cgroup files that will be propagated to
+ // children.
+ grandpa, err := ioutil.TempDir(testutil.TmpDir(), "cgroup")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ defer os.RemoveAll(grandpa)
+
+ if err := ioutil.WriteFile(filepath.Join(grandpa, "cpuset.cpus"), []byte("parent-cpus"), 0666); err != nil {
+ t.Fatalf("ioutil.WriteFile(): %v", err)
+ }
+ if err := ioutil.WriteFile(filepath.Join(grandpa, "cpuset.mems"), []byte("parent-mems"), 0666); err != nil {
+ t.Fatalf("ioutil.WriteFile(): %v", err)
+ }
+
+ for _, tc := range []struct {
+ name string
+ spec *specs.LinuxCPU
+ }{
+ {
+ name: "nil_values",
+ spec: &specs.LinuxCPU{},
+ },
+ {
+ name: "nil",
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ // Create empty files in intermediate directory. They should be ignored
+ // when reading, and then populated from parent.
+ parent, err := ioutil.TempDir(grandpa, "parent")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ defer os.RemoveAll(parent)
+ if _, err := os.Create(filepath.Join(parent, "cpuset.cpus")); err != nil {
+ t.Fatalf("os.Create(): %v", err)
+ }
+ if _, err := os.Create(filepath.Join(parent, "cpuset.mems")); err != nil {
+ t.Fatalf("os.Create(): %v", err)
+ }
+
+ // cgroup files mmust exist.
+ dir, err := ioutil.TempDir(parent, "child")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ if _, err := os.Create(filepath.Join(dir, "cpuset.cpus")); err != nil {
+ t.Fatalf("os.Create(): %v", err)
+ }
+ if _, err := os.Create(filepath.Join(dir, "cpuset.mems")); err != nil {
+ t.Fatalf("os.Create(): %v", err)
+ }
+
+ spec := &specs.LinuxResources{
+ CPU: tc.spec,
+ }
+ ctrlr := cpuSet{}
+ if err := ctrlr.set(spec, dir); err != nil {
+ t.Fatalf("ctrlr.set(): %v", err)
+ }
+ want := map[string]string{
+ "cpuset.cpus": "parent-cpus",
+ "cpuset.mems": "parent-mems",
+ }
+ // Both path and dir must have been populated from grandpa.
+ checkDir(t, parent, want)
+ checkDir(t, dir, want)
+ })
+ }
+}
+
+func TestHugeTlb(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ spec []specs.LinuxHugepageLimit
+ wants map[string]string
+ }{
+ {
+ name: "single",
+ spec: []specs.LinuxHugepageLimit{
+ {
+ Pagesize: "1G",
+ Limit: 123,
+ },
+ },
+ wants: map[string]string{
+ "hugetlb.1G.limit_in_bytes": "123",
+ },
+ },
+ {
+ name: "multiple",
+ spec: []specs.LinuxHugepageLimit{
+ {
+ Pagesize: "1G",
+ Limit: 123,
+ },
+ {
+ Pagesize: "2G",
+ Limit: 456,
+ },
+ {
+ Pagesize: "1P",
+ Limit: 789,
+ },
+ },
+ wants: map[string]string{
+ "hugetlb.1G.limit_in_bytes": "123",
+ "hugetlb.2G.limit_in_bytes": "456",
+ "hugetlb.1P.limit_in_bytes": "789",
+ },
+ },
+ {
+ name: "nil",
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "cgroup")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ spec := &specs.LinuxResources{
+ HugepageLimits: tc.spec,
+ }
+ ctrlr := hugeTLB{}
+ if err := ctrlr.set(spec, dir); err != nil {
+ t.Fatalf("ctrlr.set(): %v", err)
+ }
+ checkDir(t, dir, tc.wants)
+ })
+ }
+}
+
+func TestMemory(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ spec *specs.LinuxMemory
+ wants map[string]string
+ }{
+ {
+ name: "all",
+ spec: &specs.LinuxMemory{
+ Limit: int64Ptr(1),
+ Reservation: int64Ptr(2),
+ Swap: int64Ptr(3),
+ Kernel: int64Ptr(4),
+ KernelTCP: int64Ptr(5),
+ Swappiness: uint64Ptr(6),
+ DisableOOMKiller: boolPtr(true),
+ },
+ wants: map[string]string{
+ "memory.limit_in_bytes": "1",
+ "memory.soft_limit_in_bytes": "2",
+ "memory.memsw.limit_in_bytes": "3",
+ "memory.kmem.limit_in_bytes": "4",
+ "memory.kmem.tcp.limit_in_bytes": "5",
+ "memory.swappiness": "6",
+ "memory.oom_control": "1",
+ },
+ },
+ {
+ // Disable OOM killer should only write when set to true.
+ name: "oomkiller",
+ spec: &specs.LinuxMemory{
+ DisableOOMKiller: boolPtr(false),
+ },
+ },
+ {
+ name: "nil_values",
+ spec: &specs.LinuxMemory{},
+ },
+ {
+ name: "nil",
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "cgroup")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ spec := &specs.LinuxResources{
+ Memory: tc.spec,
+ }
+ ctrlr := memory{}
+ if err := ctrlr.set(spec, dir); err != nil {
+ t.Fatalf("ctrlr.set(): %v", err)
+ }
+ checkDir(t, dir, tc.wants)
+ })
+ }
+}
+
+func TestNetworkClass(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ spec *specs.LinuxNetwork
+ wants map[string]string
+ }{
+ {
+ name: "all",
+ spec: &specs.LinuxNetwork{
+ ClassID: uint32Ptr(1),
+ },
+ wants: map[string]string{
+ "net_cls.classid": "1",
+ },
+ },
+ {
+ name: "nil_values",
+ spec: &specs.LinuxNetwork{},
+ },
+ {
+ name: "nil",
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "cgroup")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ spec := &specs.LinuxResources{
+ Network: tc.spec,
+ }
+ ctrlr := networkClass{}
+ if err := ctrlr.set(spec, dir); err != nil {
+ t.Fatalf("ctrlr.set(): %v", err)
+ }
+ checkDir(t, dir, tc.wants)
+ })
+ }
+}
+
+func TestNetworkPriority(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ spec *specs.LinuxNetwork
+ wants map[string]string
+ }{
+ {
+ name: "all",
+ spec: &specs.LinuxNetwork{
+ Priorities: []specs.LinuxInterfacePriority{
+ {
+ Name: "foo",
+ Priority: 1,
+ },
+ },
+ },
+ wants: map[string]string{
+ "net_prio.ifpriomap": "foo 1",
+ },
+ },
+ {
+ name: "nil_values",
+ spec: &specs.LinuxNetwork{},
+ },
+ {
+ name: "nil",
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "cgroup")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ spec := &specs.LinuxResources{
+ Network: tc.spec,
+ }
+ ctrlr := networkPrio{}
+ if err := ctrlr.set(spec, dir); err != nil {
+ t.Fatalf("ctrlr.set(): %v", err)
+ }
+ checkDir(t, dir, tc.wants)
+ })
+ }
+}
+
+func TestPids(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ spec *specs.LinuxPids
+ wants map[string]string
+ }{
+ {
+ name: "all",
+ spec: &specs.LinuxPids{Limit: 1},
+ wants: map[string]string{
+ "pids.max": "1",
+ },
+ },
+ {
+ name: "nil_values",
+ spec: &specs.LinuxPids{},
+ },
+ {
+ name: "nil",
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "cgroup")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ spec := &specs.LinuxResources{
+ Pids: tc.spec,
+ }
+ ctrlr := pids{}
+ if err := ctrlr.set(spec, dir); err != nil {
+ t.Fatalf("ctrlr.set(): %v", err)
+ }
+ checkDir(t, dir, tc.wants)
+ })
+ }
+}
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index 250845ad7..1b5178dd5 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -31,10 +31,10 @@ go_library(
"spec.go",
"start.go",
"state.go",
+ "statefile.go",
"syscalls.go",
"wait.go",
],
- importpath = "gvisor.dev/gvisor/runsc/cmd",
visibility = [
"//runsc:__subpackages__",
],
@@ -44,17 +44,21 @@ go_library(
"//pkg/sentry/control",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
+ "//pkg/sentry/platform",
+ "//pkg/state/pretty",
+ "//pkg/state/statefile",
+ "//pkg/sync",
"//pkg/unet",
"//pkg/urpc",
"//runsc/boot",
- "//runsc/boot/platforms",
"//runsc/console",
"//runsc/container",
+ "//runsc/flag",
"//runsc/fsgofer",
"//runsc/fsgofer/filter",
"//runsc/specutils",
"@com_github_google_subcommands//:go_default_library",
- "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
"@com_github_syndtr_gocapability//capability:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
],
@@ -72,20 +76,20 @@ go_test(
data = [
"//runsc",
],
- embed = [":cmd"],
+ library = ":cmd",
deps = [
"//pkg/abi/linux",
"//pkg/log",
"//pkg/sentry/control",
"//pkg/sentry/kernel/auth",
+ "//pkg/test/testutil",
"//pkg/urpc",
"//runsc/boot",
"//runsc/container",
"//runsc/specutils",
- "//runsc/testutil",
- "@com_github_google_go-cmp//cmp:go_default_library",
- "@com_github_google_go-cmp//cmp/cmpopts:go_default_library",
- "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@com_github_google_go_cmp//cmp:go_default_library",
+ "@com_github_google_go_cmp//cmp/cmpopts:go_default_library",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
"@com_github_syndtr_gocapability//capability:go_default_library",
],
)
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index b40fded5b..f4f247721 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -21,12 +21,13 @@ import (
"strings"
"syscall"
- "flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/runsc/boot"
- "gvisor.dev/gvisor/runsc/boot/platforms"
+ "gvisor.dev/gvisor/runsc/flag"
"gvisor.dev/gvisor/runsc/specutils"
)
@@ -53,10 +54,6 @@ type Boot struct {
// provided in that order.
stdioFDs intFlags
- // console is set to true if the sandbox should allow terminal ioctl(2)
- // syscalls.
- console bool
-
// applyCaps determines if capabilities defined in the spec should be applied
// to the process.
applyCaps bool
@@ -82,8 +79,13 @@ type Boot struct {
// sandbox (e.g. gofer) and sent through this FD.
mountsFD int
- // pidns is set if the sanadbox is in its own pid namespace.
+ // pidns is set if the sandbox is in its own pid namespace.
pidns bool
+
+ // attached is set to true to kill the sandbox process when the parent process
+ // terminates. This flag is set when the command execve's itself because
+ // parent death signal doesn't propagate through execve when uid/gid changes.
+ attached bool
}
// Name implements subcommands.Command.Name.
@@ -109,7 +111,6 @@ func (b *Boot) SetFlags(f *flag.FlagSet) {
f.IntVar(&b.deviceFD, "device-fd", -1, "FD for the platform device file")
f.Var(&b.ioFDs, "io-fds", "list of FDs to connect 9P clients. They must follow this order: root first, then mounts as defined in the spec")
f.Var(&b.stdioFDs, "stdio-fds", "list of FDs containing sandbox stdin, stdout, and stderr in that order")
- f.BoolVar(&b.console, "console", false, "set to true if the sandbox should allow terminal ioctl(2) syscalls")
f.BoolVar(&b.applyCaps, "apply-caps", false, "if true, apply capabilities defined in the spec to the process")
f.BoolVar(&b.setUpRoot, "setup-root", false, "if true, set up an empty root for the process")
f.BoolVar(&b.pidns, "pidns", false, "if true, the sandbox is in its own PID namespace")
@@ -118,6 +119,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) {
f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.")
f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup")
f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).")
+ f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates")
}
// Execute implements subcommands.Command.Execute. It starts a sandbox in a
@@ -129,33 +131,36 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
// Ensure that if there is a panic, all goroutine stacks are printed.
- debug.SetTraceback("all")
+ debug.SetTraceback("system")
conf := args[0].(*boot.Config)
+ if b.attached {
+ // Ensure this process is killed after parent process terminates when
+ // attached mode is enabled. In the unfortunate event that the parent
+ // terminates before this point, this process leaks.
+ if err := unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0); err != nil {
+ Fatalf("error setting parent death signal: %v", err)
+ }
+ }
+
if b.setUpRoot {
if err := setUpChroot(b.pidns); err != nil {
Fatalf("error setting up chroot: %v", err)
}
- if !b.applyCaps {
- // Remove --setup-root arg to call myself.
- var args []string
- for _, arg := range os.Args {
- if !strings.Contains(arg, "setup-root") {
- args = append(args, arg)
- }
- }
- if !conf.Rootless {
- // Note that we've already read the spec from the spec FD, and
- // we will read it again after the exec call. This works
- // because the ReadSpecFromFile function seeks to the beginning
- // of the file before reading.
- if err := callSelfAsNobody(args); err != nil {
- Fatalf("%v", err)
- }
- panic("callSelfAsNobody must never return success")
+ if !b.applyCaps && !conf.Rootless {
+ // Remove --apply-caps arg to call myself. It has already been done.
+ args := prepareArgs(b.attached, "setup-root")
+
+ // Note that we've already read the spec from the spec FD, and
+ // we will read it again after the exec call. This works
+ // because the ReadSpecFromFile function seeks to the beginning
+ // of the file before reading.
+ if err := callSelfAsNobody(args); err != nil {
+ Fatalf("%v", err)
}
+ panic("callSelfAsNobody must never return success")
}
}
@@ -173,7 +178,12 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
if caps == nil {
caps = &specs.LinuxCapabilities{}
}
- if conf.Platform == platforms.Ptrace {
+
+ gPlatform, err := platform.Lookup(conf.Platform)
+ if err != nil {
+ Fatalf("loading platform: %v", err)
+ }
+ if gPlatform.Requirements().RequiresCapSysPtrace {
// Ptrace platform requires extra capabilities.
const c = "CAP_SYS_PTRACE"
caps.Bounding = append(caps.Bounding, c)
@@ -181,13 +191,9 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
caps.Permitted = append(caps.Permitted, c)
}
- // Remove --apply-caps arg to call myself.
- var args []string
- for _, arg := range os.Args {
- if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") {
- args = append(args, arg)
- }
- }
+ // Remove --apply-caps and --setup-root arg to call myself. Both have
+ // already been done.
+ args := prepareArgs(b.attached, "setup-root", "apply-caps")
// Note that we've already read the spec from the spec FD, and
// we will read it again after the exec call. This works
@@ -218,7 +224,6 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
Device: os.NewFile(uintptr(b.deviceFD), "platform device"),
GoferFDs: b.ioFDs.GetArray(),
StdioFDs: b.stdioFDs.GetArray(),
- Console: b.console,
NumCPU: b.cpuNum,
TotalMem: b.totalMem,
UserLogFD: b.userLogFD,
@@ -258,3 +263,22 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
l.Destroy()
return subcommands.ExitSuccess
}
+
+func prepareArgs(attached bool, exclude ...string) []string {
+ var args []string
+ for _, arg := range os.Args {
+ for _, excl := range exclude {
+ if strings.Contains(arg, excl) {
+ goto skip
+ }
+ }
+ args = append(args, arg)
+ if attached && arg == "boot" {
+ // Strategicaly place "--attached" after the command. This is needed
+ // to ensure the new process is killed when the parent process terminates.
+ args = append(args, "--attached")
+ }
+ skip:
+ }
+ return args
+}
diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go
index 0c27f7313..a84067112 100644
--- a/runsc/cmd/capability_test.go
+++ b/runsc/cmd/capability_test.go
@@ -23,10 +23,10 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/test/testutil"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
"gvisor.dev/gvisor/runsc/specutils"
- "gvisor.dev/gvisor/runsc/testutil"
)
func init() {
@@ -85,21 +85,20 @@ func TestCapabilities(t *testing.T) {
Inheritable: caps,
}
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
// Use --network=host to make sandbox use spec's capabilities.
conf.Network = boot.NetworkHost
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
// Create and start the container.
args := container.Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
}
diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go
index d8b3a8573..8a29e521e 100644
--- a/runsc/cmd/checkpoint.go
+++ b/runsc/cmd/checkpoint.go
@@ -20,11 +20,11 @@ import (
"path/filepath"
"syscall"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
"gvisor.dev/gvisor/runsc/specutils"
)
diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go
index b5a0ce17d..189244765 100644
--- a/runsc/cmd/chroot.go
+++ b/runsc/cmd/chroot.go
@@ -50,7 +50,7 @@ func pivotRoot(root string) error {
// new_root, so after umounting the old_root, we will see only
// the new_root in "/".
if err := syscall.PivotRoot(".", "."); err != nil {
- return fmt.Errorf("error changing root filesystem: %v", err)
+ return fmt.Errorf("pivot_root failed, make sure that the root mount has a parent: %v", err)
}
if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil {
diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go
index a4e3071b3..910e97577 100644
--- a/runsc/cmd/create.go
+++ b/runsc/cmd/create.go
@@ -16,10 +16,11 @@ package cmd
import (
"context"
- "flag"
+
"github.com/google/subcommands"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
"gvisor.dev/gvisor/runsc/specutils"
)
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 7313e473f..742f8c344 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -22,12 +22,12 @@ import (
"syscall"
"time"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
)
// Debug implements subcommands.Command for the "debug" command.
@@ -37,11 +37,14 @@ type Debug struct {
signal int
profileHeap string
profileCPU string
- profileDelay int
+ profileBlock string
+ profileMutex string
trace string
strace string
logLevel string
logPackets string
+ duration time.Duration
+ ps bool
}
// Name implements subcommands.Command.
@@ -65,12 +68,15 @@ func (d *Debug) SetFlags(f *flag.FlagSet) {
f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log")
f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.")
f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.")
- f.IntVar(&d.profileDelay, "profile-delay", 5, "amount of time to wait before stoping CPU profile")
+ f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.")
+ f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.")
+ f.DurationVar(&d.duration, "duration", time.Second, "amount of time to wait for CPU and trace profiles")
f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all`)
f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).")
f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.")
+ f.BoolVar(&d.ps, "ps", false, "lists processes")
}
// Execute implements subcommands.Command.Execute.
@@ -145,6 +151,30 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
log.Infof("Heap profile written to %q", d.profileHeap)
}
+ if d.profileBlock != "" {
+ f, err := os.Create(d.profileBlock)
+ if err != nil {
+ return Errorf(err.Error())
+ }
+ defer f.Close()
+
+ if err := c.Sandbox.BlockProfile(f); err != nil {
+ return Errorf(err.Error())
+ }
+ log.Infof("Block profile written to %q", d.profileBlock)
+ }
+ if d.profileMutex != "" {
+ f, err := os.Create(d.profileMutex)
+ if err != nil {
+ return Errorf(err.Error())
+ }
+ defer f.Close()
+
+ if err := c.Sandbox.MutexProfile(f); err != nil {
+ return Errorf(err.Error())
+ }
+ log.Infof("Mutex profile written to %q", d.profileMutex)
+ }
delay := false
if d.profileCPU != "" {
@@ -163,7 +193,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
if err := c.Sandbox.StartCPUProfile(f); err != nil {
return Errorf(err.Error())
}
- log.Infof("CPU profile started for %d sec, writing to %q", d.profileDelay, d.profileCPU)
+ log.Infof("CPU profile started for %v, writing to %q", d.duration, d.profileCPU)
}
if d.trace != "" {
delay = true
@@ -181,8 +211,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
if err := c.Sandbox.StartTrace(f); err != nil {
return Errorf(err.Error())
}
- log.Infof("Tracing started for %d sec, writing to %q", d.profileDelay, d.trace)
-
+ log.Infof("Tracing started for %v, writing to %q", d.duration, d.trace)
}
if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 {
@@ -241,9 +270,20 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
log.Infof("Logging options changed")
}
+ if d.ps {
+ pList, err := c.Processes()
+ if err != nil {
+ Fatalf("getting processes for container: %v", err)
+ }
+ o, err := control.ProcessListToJSON(pList)
+ if err != nil {
+ Fatalf("generating JSON: %v", err)
+ }
+ log.Infof(o)
+ }
if delay {
- time.Sleep(time.Duration(d.profileDelay) * time.Second)
+ time.Sleep(d.duration)
}
return subcommands.ExitSuccess
diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go
index 30d8164b1..0e4863f50 100644
--- a/runsc/cmd/delete.go
+++ b/runsc/cmd/delete.go
@@ -19,11 +19,11 @@ import (
"fmt"
"os"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
)
// Delete implements subcommands.Command for the "delete" command.
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index 9a8a49054..7d1310c96 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -27,12 +27,12 @@ import (
"strings"
"syscall"
- "flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
"gvisor.dev/gvisor/runsc/specutils"
)
@@ -166,15 +166,33 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
return Errorf("Error write spec: %v", err)
}
- runArgs := container.Args{
+ containerArgs := container.Args{
ID: cid,
Spec: spec,
BundleDir: tmpDir,
Attached: true,
}
- ws, err := container.Run(conf, runArgs)
+ ct, err := container.New(conf, containerArgs)
if err != nil {
- return Errorf("running container: %v", err)
+ return Errorf("creating container: %v", err)
+ }
+ defer ct.Destroy()
+
+ if err := ct.Start(conf); err != nil {
+ return Errorf("starting container: %v", err)
+ }
+
+ // Forward signals to init in the container. Thus if we get SIGINT from
+ // ^C, the container gracefully exit, and we can clean up.
+ //
+ // N.B. There is a still a window before this where a signal may kill
+ // this process, skipping cleanup.
+ stopForwarding := ct.ForwardSignals(0 /* pid */, false /* fgProcess */)
+ defer stopForwarding()
+
+ ws, err := ct.Wait()
+ if err != nil {
+ return Errorf("waiting for container: %v", err)
}
*waitStatus = ws
@@ -237,20 +255,27 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) {
for _, cmd := range cmds {
log.Debugf("Run %q", cmd)
args := strings.Split(cmd, " ")
- c := exec.Command(args[0], args[1:]...)
- if err := c.Run(); err != nil {
+ cmd := exec.Command(args[0], args[1:]...)
+ if err := cmd.Run(); err != nil {
+ c.cleanupNet(cid, dev, "", "", "")
return nil, fmt.Errorf("failed to run %q: %v", cmd, err)
}
}
- if err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec); err != nil {
+ resolvPath, err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec)
+ if err != nil {
+ c.cleanupNet(cid, dev, "", "", "")
return nil, err
}
- if err := makeFile("/etc/hostname", cid+"\n", spec); err != nil {
+ hostnamePath, err := makeFile("/etc/hostname", cid+"\n", spec)
+ if err != nil {
+ c.cleanupNet(cid, dev, resolvPath, "", "")
return nil, err
}
hosts := fmt.Sprintf("127.0.0.1\tlocalhost\n%s\t%s\n", c.ip, cid)
- if err := makeFile("/etc/hosts", hosts, spec); err != nil {
+ hostsPath, err := makeFile("/etc/hosts", hosts, spec)
+ if err != nil {
+ c.cleanupNet(cid, dev, resolvPath, hostnamePath, "")
return nil, err
}
@@ -263,19 +288,22 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) {
}
spec.Linux.Namespaces = append(spec.Linux.Namespaces, netns)
- return func() { c.cleanNet(cid, dev) }, nil
+ return func() { c.cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath) }, nil
}
-func (c *Do) cleanNet(cid, dev string) {
- veth, peer := deviceNames(cid)
+// cleanupNet tries to cleanup the network setup in setupNet.
+//
+// It may be called when setupNet is only partially complete, in which case it
+// will cleanup as much as possible, logging warnings for the rest.
+//
+// Unfortunately none of this can be automatically cleaned up on process exit,
+// we must do so explicitly.
+func (c *Do) cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath string) {
+ _, peer := deviceNames(cid)
cmds := []string{
fmt.Sprintf("ip link delete %s", peer),
fmt.Sprintf("ip netns delete %s", cid),
-
- fmt.Sprintf("iptables -t nat -D POSTROUTING -s %s/24 -o %s -j MASQUERADE", c.ip, dev),
- fmt.Sprintf("iptables -D FORWARD -i %s -o %s -j ACCEPT", dev, veth),
- fmt.Sprintf("iptables -D FORWARD -o %s -i %s -j ACCEPT", dev, veth),
}
for _, cmd := range cmds {
@@ -286,6 +314,10 @@ func (c *Do) cleanNet(cid, dev string) {
log.Warningf("Failed to run %q: %v", cmd, err)
}
}
+
+ tryRemove(resolvPath)
+ tryRemove(hostnamePath)
+ tryRemove(hostsPath)
}
func deviceNames(cid string) (string, string) {
@@ -306,13 +338,16 @@ func defaultDevice() (string, error) {
return parts[4], nil
}
-func makeFile(dest, content string, spec *specs.Spec) error {
+func makeFile(dest, content string, spec *specs.Spec) (string, error) {
tmpFile, err := ioutil.TempFile("", filepath.Base(dest))
if err != nil {
- return err
+ return "", err
}
if _, err := tmpFile.WriteString(content); err != nil {
- return err
+ if err := os.Remove(tmpFile.Name()); err != nil {
+ log.Warningf("Failed to remove %q: %v", tmpFile, err)
+ }
+ return "", err
}
spec.Mounts = append(spec.Mounts, specs.Mount{
Source: tmpFile.Name(),
@@ -320,7 +355,17 @@ func makeFile(dest, content string, spec *specs.Spec) error {
Type: "bind",
Options: []string{"ro"},
})
- return nil
+ return tmpFile.Name(), nil
+}
+
+func tryRemove(path string) {
+ if path == "" {
+ return
+ }
+
+ if err := os.Remove(path); err != nil {
+ log.Warningf("Failed to remove %q: %v", path, err)
+ }
}
func calculatePeerIP(ip string) (string, error) {
diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go
index 3972e9224..51f6a98ed 100644
--- a/runsc/cmd/events.go
+++ b/runsc/cmd/events.go
@@ -20,11 +20,11 @@ import (
"os"
"time"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
)
// Events implements subcommands.Command for the "events" command.
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index d1e99243b..d9a94903e 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -27,7 +27,6 @@ import (
"syscall"
"time"
- "flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/log"
@@ -37,6 +36,7 @@ import (
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/console"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
"gvisor.dev/gvisor/runsc/specutils"
)
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 4831210c0..3966e2d21 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -21,17 +21,17 @@ import (
"os"
"path/filepath"
"strings"
- "sync"
"syscall"
- "flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/unet"
"gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/flag"
"gvisor.dev/gvisor/runsc/fsgofer"
"gvisor.dev/gvisor/runsc/fsgofer/filter"
"gvisor.dev/gvisor/runsc/specutils"
@@ -168,7 +168,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Start with root mount, then add any other additional mount as needed.
ats := make([]p9.Attacher, 0, len(spec.Mounts)+1)
ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{
- ROMount: spec.Root.Readonly,
+ ROMount: spec.Root.Readonly || conf.Overlay,
PanicOnWrite: g.panicOnWrite,
})
if err != nil {
@@ -181,7 +181,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
for _, m := range spec.Mounts {
if specutils.Is9PMount(m) {
cfg := fsgofer.Config{
- ROMount: isReadonlyMount(m.Options),
+ ROMount: isReadonlyMount(m.Options) || conf.Overlay,
PanicOnWrite: g.panicOnWrite,
HostUDS: conf.FSGoferHostUDS,
}
@@ -272,9 +272,8 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
root := spec.Root.Path
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
- // FIXME: runsc can't be re-executed without
- // /proc, so we create a tmpfs mount, mount ./proc and ./root
- // there, then move this mount to the root and after
+ // runsc can't be re-executed without /proc, so we create a tmpfs mount,
+ // mount ./proc and ./root there, then move this mount to the root and after
// setCapsAndCallSelf, runsc will chroot into /root.
//
// We need a directory to construct a new root and we know that
@@ -307,7 +306,7 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
}
// Replace the current spec, with the clean spec with symlinks resolved.
- if err := setupMounts(spec.Mounts, root); err != nil {
+ if err := setupMounts(conf, spec.Mounts, root); err != nil {
Fatalf("error setting up FS: %v", err)
}
@@ -323,7 +322,7 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
}
// Check if root needs to be remounted as readonly.
- if spec.Root.Readonly {
+ if spec.Root.Readonly || conf.Overlay {
// If root is a mount point but not read-only, we can change mount options
// to make it read-only for extra safety.
log.Infof("Remounting root as readonly: %q", root)
@@ -335,7 +334,7 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
if err := pivotRoot("/proc"); err != nil {
- Fatalf("faild to change the root file system: %v", err)
+ Fatalf("failed to change the root file system: %v", err)
}
if err := os.Chdir("/"); err != nil {
Fatalf("failed to change working directory")
@@ -347,7 +346,7 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
// setupMounts binds mount all mounts specified in the spec in their correct
// location inside root. It will resolve relative paths and symlinks. It also
// creates directories as needed.
-func setupMounts(mounts []specs.Mount, root string) error {
+func setupMounts(conf *boot.Config, mounts []specs.Mount, root string) error {
for _, m := range mounts {
if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
continue
@@ -359,6 +358,11 @@ func setupMounts(mounts []specs.Mount, root string) error {
}
flags := specutils.OptionsToFlags(m.Options) | syscall.MS_BIND
+ if conf.Overlay {
+ // Force mount read-only if writes are not going to be sent to it.
+ flags |= syscall.MS_RDONLY
+ }
+
log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags)
if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil {
return fmt.Errorf("mounting %v: %v", m, err)
diff --git a/runsc/cmd/help.go b/runsc/cmd/help.go
index ff4f901cb..cd85dabbb 100644
--- a/runsc/cmd/help.go
+++ b/runsc/cmd/help.go
@@ -1,4 +1,4 @@
-// Copyright 2018 Google LLC
+// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -18,8 +18,8 @@ import (
"context"
"fmt"
- "flag"
"github.com/google/subcommands"
+ "gvisor.dev/gvisor/runsc/flag"
)
// NewHelp returns a help command for the given commander.
@@ -65,16 +65,10 @@ func (h *Help) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}
switch f.NArg() {
case 0:
fmt.Fprintf(h.cdr.Output, "Usage: %s <flags> <subcommand> <subcommand args>\n\n", h.cdr.Name())
- fmt.Fprintf(h.cdr.Output, `runsc is a command line client for running applications packaged in the Open
-Container Initiative (OCI) format. Applications run by runsc are run in an
-isolated gVisor sandbox that emulates a Linux environment.
+ fmt.Fprintf(h.cdr.Output, `runsc is the gVisor container runtime.
-gVisor is a user-space kernel, written in Go, that implements a substantial
-portion of the Linux system call interface. It provides an additional layer
-of isolation between running applications and the host operating system.
-
-Functionality is provided by subcommands. For additonal help on individual
-subcommands use "%s %s <subcommand>".
+Functionality is provided by subcommands. For help with a specific subcommand,
+use "%s %s <subcommand>".
`, h.cdr.Name(), h.Name())
h.cdr.VisitGroups(func(g *subcommands.CommandGroup) {
diff --git a/runsc/cmd/install.go b/runsc/cmd/install.go
index 441c1db0d..2e223e3be 100644
--- a/runsc/cmd/install.go
+++ b/runsc/cmd/install.go
@@ -23,8 +23,8 @@ import (
"os"
"path"
- "flag"
"github.com/google/subcommands"
+ "gvisor.dev/gvisor/runsc/flag"
)
// Install implements subcommands.Command.
diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go
index 6c1f197a6..8282ea0e0 100644
--- a/runsc/cmd/kill.go
+++ b/runsc/cmd/kill.go
@@ -21,11 +21,11 @@ import (
"strings"
"syscall"
- "flag"
"github.com/google/subcommands"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
)
// Kill implements subcommands.Command for the "kill" command.
diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go
index dd2d99a6b..d8d906fe3 100644
--- a/runsc/cmd/list.go
+++ b/runsc/cmd/list.go
@@ -22,11 +22,11 @@ import (
"text/tabwriter"
"time"
- "flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
)
// List implements subcommands.Command for the "list" command for the "list" command.
diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go
index 9c0e92001..6f95a9837 100644
--- a/runsc/cmd/pause.go
+++ b/runsc/cmd/pause.go
@@ -17,10 +17,10 @@ package cmd
import (
"context"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
)
// Pause implements subcommands.Command for the "pause" command.
diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go
index 45c644f3f..7fb8041af 100644
--- a/runsc/cmd/ps.go
+++ b/runsc/cmd/ps.go
@@ -18,11 +18,11 @@ import (
"context"
"fmt"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
)
// PS implements subcommands.Command for the "ps" command.
diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go
index 7be60cd7d..72584b326 100644
--- a/runsc/cmd/restore.go
+++ b/runsc/cmd/restore.go
@@ -19,10 +19,10 @@ import (
"path/filepath"
"syscall"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
"gvisor.dev/gvisor/runsc/specutils"
)
diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go
index b2df5c640..61a55a554 100644
--- a/runsc/cmd/resume.go
+++ b/runsc/cmd/resume.go
@@ -17,10 +17,10 @@ package cmd
import (
"context"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
)
// Resume implements subcommands.Command for the "resume" command.
diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go
index 33f4bc12b..cf41581ad 100644
--- a/runsc/cmd/run.go
+++ b/runsc/cmd/run.go
@@ -18,10 +18,10 @@ import (
"context"
"syscall"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
"gvisor.dev/gvisor/runsc/specutils"
)
diff --git a/runsc/cmd/spec.go b/runsc/cmd/spec.go
index 344da13ba..55194e641 100644
--- a/runsc/cmd/spec.go
+++ b/runsc/cmd/spec.go
@@ -16,118 +16,122 @@ package cmd
import (
"context"
- "io/ioutil"
+ "encoding/json"
+ "io"
"os"
"path/filepath"
- "flag"
"github.com/google/subcommands"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/runsc/flag"
)
-var specTemplate = []byte(`{
- "ociVersion": "1.0.0",
- "process": {
- "terminal": true,
- "user": {
- "uid": 0,
- "gid": 0
- },
- "args": [
- "sh"
- ],
- "env": [
- "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
- "TERM=xterm"
- ],
- "cwd": "/",
- "capabilities": {
- "bounding": [
- "CAP_AUDIT_WRITE",
- "CAP_KILL",
- "CAP_NET_BIND_SERVICE"
- ],
- "effective": [
- "CAP_AUDIT_WRITE",
- "CAP_KILL",
- "CAP_NET_BIND_SERVICE"
- ],
- "inheritable": [
- "CAP_AUDIT_WRITE",
- "CAP_KILL",
- "CAP_NET_BIND_SERVICE"
- ],
- "permitted": [
- "CAP_AUDIT_WRITE",
- "CAP_KILL",
- "CAP_NET_BIND_SERVICE"
- ],
- "ambient": [
- "CAP_AUDIT_WRITE",
- "CAP_KILL",
- "CAP_NET_BIND_SERVICE"
- ]
- },
- "rlimits": [
- {
- "type": "RLIMIT_NOFILE",
- "hard": 1024,
- "soft": 1024
- }
- ]
- },
- "root": {
- "path": "rootfs",
- "readonly": true
- },
- "hostname": "runsc",
- "mounts": [
- {
- "destination": "/proc",
- "type": "proc",
- "source": "proc"
+func writeSpec(w io.Writer, cwd string, netns string, args []string) error {
+ spec := &specs.Spec{
+ Version: "1.0.0",
+ Process: &specs.Process{
+ Terminal: true,
+ User: specs.User{
+ UID: 0,
+ GID: 0,
+ },
+ Args: args,
+ Env: []string{
+ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+ "TERM=xterm",
+ },
+ Cwd: cwd,
+ Capabilities: &specs.LinuxCapabilities{
+ Bounding: []string{
+ "CAP_AUDIT_WRITE",
+ "CAP_KILL",
+ "CAP_NET_BIND_SERVICE",
+ },
+ Effective: []string{
+ "CAP_AUDIT_WRITE",
+ "CAP_KILL",
+ "CAP_NET_BIND_SERVICE",
+ },
+ Inheritable: []string{
+ "CAP_AUDIT_WRITE",
+ "CAP_KILL",
+ "CAP_NET_BIND_SERVICE",
+ },
+ Permitted: []string{
+ "CAP_AUDIT_WRITE",
+ "CAP_KILL",
+ "CAP_NET_BIND_SERVICE",
+ },
+ // TODO(gvisor.dev/issue/3166): support ambient capabilities
+ },
+ Rlimits: []specs.POSIXRlimit{
+ {
+ Type: "RLIMIT_NOFILE",
+ Hard: 1024,
+ Soft: 1024,
+ },
+ },
},
- {
- "destination": "/dev",
- "type": "tmpfs",
- "source": "tmpfs",
- "options": []
+ Root: &specs.Root{
+ Path: "rootfs",
+ Readonly: true,
},
- {
- "destination": "/sys",
- "type": "sysfs",
- "source": "sysfs",
- "options": [
- "nosuid",
- "noexec",
- "nodev",
- "ro"
- ]
- }
- ],
- "linux": {
- "namespaces": [
+ Hostname: "runsc",
+ Mounts: []specs.Mount{
{
- "type": "pid"
+ Destination: "/proc",
+ Type: "proc",
+ Source: "proc",
},
{
- "type": "network"
+ Destination: "/dev",
+ Type: "tmpfs",
+ Source: "tmpfs",
},
{
- "type": "ipc"
+ Destination: "/sys",
+ Type: "sysfs",
+ Source: "sysfs",
+ Options: []string{
+ "nosuid",
+ "noexec",
+ "nodev",
+ "ro",
+ },
},
- {
- "type": "uts"
+ },
+ Linux: &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{
+ {
+ Type: "pid",
+ },
+ {
+ Type: "network",
+ Path: netns,
+ },
+ {
+ Type: "ipc",
+ },
+ {
+ Type: "uts",
+ },
+ {
+ Type: "mount",
+ },
},
- {
- "type": "mount"
- }
- ]
+ },
}
-}`)
+
+ e := json.NewEncoder(w)
+ e.SetIndent("", " ")
+ return e.Encode(spec)
+}
// Spec implements subcommands.Command for the "spec" command.
type Spec struct {
bundle string
+ cwd string
+ netns string
}
// Name implements subcommands.Command.Name.
@@ -142,21 +146,26 @@ func (*Spec) Synopsis() string {
// Usage implements subcommands.Command.Usage.
func (*Spec) Usage() string {
- return `spec [options] - create a new OCI bundle specification file.
+ return `spec [options] [-- args...] - create a new OCI bundle specification file.
+
+The spec command creates a new specification file (config.json) for a new OCI
+bundle.
-The spec command creates a new specification file (config.json) for a new OCI bundle.
+The specification file is a starter file that runs the command specified by
+'args' in the container. If 'args' is not specified the default is to run the
+'sh' program.
-The specification file is a starter file that runs the "sh" command in the container. You
-should edit the file to suit your needs. You can find out more about the format of the
-specification file by visiting the OCI runtime spec repository:
+While a number of flags are provided to change values in the specification, you
+can examine the file and edit it to suit your needs after this command runs.
+You can find out more about the format of the specification file by visiting
+the OCI runtime spec repository:
https://github.com/opencontainers/runtime-spec/
EXAMPLE:
$ mkdir -p bundle/rootfs
$ cd bundle
- $ runsc spec
+ $ runsc spec -- /hello
$ docker export $(docker create hello-world) | tar -xf - -C rootfs
- $ sed -i 's;"sh";"/hello";' config.json
$ sudo runsc run hello
`
@@ -165,16 +174,31 @@ EXAMPLE:
// SetFlags implements subcommands.Command.SetFlags.
func (s *Spec) SetFlags(f *flag.FlagSet) {
f.StringVar(&s.bundle, "bundle", ".", "path to the root of the OCI bundle")
+ f.StringVar(&s.cwd, "cwd", "/", "working directory that will be set for the executable, "+
+ "this value MUST be an absolute path")
+ f.StringVar(&s.netns, "netns", "", "network namespace path")
}
// Execute implements subcommands.Command.Execute.
func (s *Spec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ // Grab the arguments.
+ containerArgs := f.Args()
+ if len(containerArgs) == 0 {
+ containerArgs = []string{"sh"}
+ }
+
confPath := filepath.Join(s.bundle, "config.json")
if _, err := os.Stat(confPath); !os.IsNotExist(err) {
Fatalf("file %q already exists", confPath)
}
- if err := ioutil.WriteFile(confPath, specTemplate, 0664); err != nil {
+ configFile, err := os.OpenFile(confPath, os.O_WRONLY|os.O_CREATE, 0664)
+ if err != nil {
+ Fatalf("opening file %q: %v", confPath, err)
+ }
+
+ err = writeSpec(configFile, s.cwd, s.netns, containerArgs)
+ if err != nil {
Fatalf("writing to %q: %v", confPath, err)
}
diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go
index de2115dff..0205fd9f7 100644
--- a/runsc/cmd/start.go
+++ b/runsc/cmd/start.go
@@ -16,10 +16,11 @@ package cmd
import (
"context"
- "flag"
+
"github.com/google/subcommands"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
)
// Start implements subcommands.Command for the "start" command.
diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go
index e9f41cbd8..cf2413deb 100644
--- a/runsc/cmd/state.go
+++ b/runsc/cmd/state.go
@@ -19,11 +19,11 @@ import (
"encoding/json"
"os"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
)
// State implements subcommands.Command for the "state" command.
diff --git a/runsc/cmd/statefile.go b/runsc/cmd/statefile.go
new file mode 100644
index 000000000..daed9e728
--- /dev/null
+++ b/runsc/cmd/statefile.go
@@ -0,0 +1,149 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "fmt"
+ "os"
+
+ "github.com/google/subcommands"
+ "gvisor.dev/gvisor/pkg/state/pretty"
+ "gvisor.dev/gvisor/pkg/state/statefile"
+ "gvisor.dev/gvisor/runsc/flag"
+)
+
+// Statefile implements subcommands.Command for the "statefile" command.
+type Statefile struct {
+ list bool
+ get string
+ key string
+ output string
+ html bool
+}
+
+// Name implements subcommands.Command.
+func (*Statefile) Name() string {
+ return "state"
+}
+
+// Synopsis implements subcommands.Command.
+func (*Statefile) Synopsis() string {
+ return "shows information about a statefile"
+}
+
+// Usage implements subcommands.Command.
+func (*Statefile) Usage() string {
+ return `statefile [flags] <statefile>`
+}
+
+// SetFlags implements subcommands.Command.
+func (s *Statefile) SetFlags(f *flag.FlagSet) {
+ f.BoolVar(&s.list, "list", false, "lists the metdata in the statefile.")
+ f.StringVar(&s.get, "get", "", "extracts the given metadata key.")
+ f.StringVar(&s.key, "key", "", "the integrity key for the file.")
+ f.StringVar(&s.output, "output", "", "target to write the result.")
+ f.BoolVar(&s.html, "html", false, "outputs in HTML format.")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (s *Statefile) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ // Check arguments.
+ if s.list && s.get != "" {
+ Fatalf("error: can't specify -list and -get simultaneously.")
+ }
+
+ // Setup output.
+ var output = os.Stdout // Default.
+ if s.output != "" {
+ f, err := os.OpenFile(s.output, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
+ if err != nil {
+ Fatalf("error opening output: %v", err)
+ }
+ defer func() {
+ if err := f.Close(); err != nil {
+ Fatalf("error flushing output: %v", err)
+ }
+ }()
+ output = f
+ }
+
+ // Open the file.
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+ input, err := os.Open(f.Arg(0))
+ if err != nil {
+ Fatalf("error opening input: %v\n", err)
+ }
+
+ if s.html {
+ fmt.Fprintf(output, "<html><body>\n")
+ defer fmt.Fprintf(output, "</body></html>\n")
+ }
+
+ // Dump the full file?
+ if !s.list && s.get == "" {
+ var key []byte
+ if s.key != "" {
+ key = []byte(s.key)
+ }
+ rc, _, err := statefile.NewReader(input, key)
+ if err != nil {
+ Fatalf("error parsing statefile: %v", err)
+ }
+ if s.html {
+ if err := pretty.PrintHTML(output, rc); err != nil {
+ Fatalf("error printing state: %v", err)
+ }
+ } else {
+ if err := pretty.PrintText(output, rc); err != nil {
+ Fatalf("error printing state: %v", err)
+ }
+ }
+ return subcommands.ExitSuccess
+ }
+
+ // Load just the metadata.
+ metadata, err := statefile.MetadataUnsafe(input)
+ if err != nil {
+ Fatalf("error reading metadata: %v", err)
+ }
+
+ // Is it a single key?
+ if s.get != "" {
+ val, ok := metadata[s.get]
+ if !ok {
+ Fatalf("metadata key %s: not found", s.get)
+ }
+ fmt.Fprintf(output, "%s\n", val)
+ return subcommands.ExitSuccess
+ }
+
+ // List all keys.
+ if s.html {
+ fmt.Fprintf(output, " <ul>\n")
+ defer fmt.Fprintf(output, " </ul>\n")
+ }
+ for key := range metadata {
+ if s.html {
+ fmt.Fprintf(output, " <li>%s</li>\n", key)
+ } else {
+ fmt.Fprintf(output, "%s\n", key)
+ }
+ }
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go
index fb6c1ab29..a37d66139 100644
--- a/runsc/cmd/syscalls.go
+++ b/runsc/cmd/syscalls.go
@@ -25,16 +25,17 @@ import (
"strconv"
"text/tabwriter"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/runsc/flag"
)
// Syscalls implements subcommands.Command for the "syscalls" command.
type Syscalls struct {
- output string
- os string
- arch string
+ format string
+ os string
+ arch string
+ filename string
}
// CompatibilityInfo is a map of system and architecture to compatibility doc.
@@ -95,16 +96,17 @@ func (*Syscalls) Usage() string {
// SetFlags implements subcommands.Command.SetFlags.
func (s *Syscalls) SetFlags(f *flag.FlagSet) {
- f.StringVar(&s.output, "o", "table", "Output format (table, csv, json).")
+ f.StringVar(&s.format, "format", "table", "Output format (table, csv, json).")
f.StringVar(&s.os, "os", osAll, "The OS (e.g. linux)")
f.StringVar(&s.arch, "arch", archAll, "The CPU architecture (e.g. amd64).")
+ f.StringVar(&s.filename, "filename", "", "Output filename (otherwise stdout).")
}
// Execute implements subcommands.Command.Execute.
func (s *Syscalls) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
- out, ok := outputMap[s.output]
+ out, ok := outputMap[s.format]
if !ok {
- Fatalf("Unsupported output format %q", s.output)
+ Fatalf("Unsupported output format %q", s.format)
}
// Build map of all supported architectures.
@@ -124,7 +126,14 @@ func (s *Syscalls) Execute(_ context.Context, f *flag.FlagSet, args ...interface
Fatalf("%v", err)
}
- if err := out(os.Stdout, info); err != nil {
+ w := os.Stdout // Default.
+ if s.filename != "" {
+ w, err = os.OpenFile(s.filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
+ if err != nil {
+ Fatalf("Error opening %q: %v", s.filename, err)
+ }
+ }
+ if err := out(w, info); err != nil {
Fatalf("Error writing output: %v", err)
}
diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go
index 046489687..29c0a15f0 100644
--- a/runsc/cmd/wait.go
+++ b/runsc/cmd/wait.go
@@ -20,10 +20,10 @@ import (
"os"
"syscall"
- "flag"
"github.com/google/subcommands"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/flag"
)
const (
diff --git a/runsc/console/BUILD b/runsc/console/BUILD
index e623c1a0f..06924bccd 100644
--- a/runsc/console/BUILD
+++ b/runsc/console/BUILD
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
@@ -7,7 +7,6 @@ go_library(
srcs = [
"console.go",
],
- importpath = "gvisor.dev/gvisor/runsc/console",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 26d1cd5ab..9a9ee7e2a 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -7,60 +7,68 @@ go_library(
srcs = [
"container.go",
"hook.go",
+ "state_file.go",
"status.go",
],
- importpath = "gvisor.dev/gvisor/runsc/container",
visibility = [
"//runsc:__subpackages__",
"//test:__subpackages__",
],
deps = [
+ "//pkg/abi/linux",
+ "//pkg/cleanup",
"//pkg/log",
"//pkg/sentry/control",
+ "//pkg/sentry/sighandling",
+ "//pkg/sync",
"//runsc/boot",
"//runsc/cgroup",
"//runsc/sandbox",
"//runsc/specutils",
"@com_github_cenkalti_backoff//:go_default_library",
"@com_github_gofrs_flock//:go_default_library",
- "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
],
)
go_test(
name = "container_test",
- size = "medium",
+ size = "large",
srcs = [
"console_test.go",
+ "container_norace_test.go",
+ "container_race_test.go",
"container_test.go",
"multi_container_test.go",
"shared_volume_test.go",
],
data = [
"//runsc",
- "//runsc/container/test_app",
+ "//test/cmd/test_app",
],
- embed = [":container"],
- shard_count = 5,
+ library = ":container",
+ shard_count = 10,
tags = [
"requires-kvm",
],
deps = [
"//pkg/abi/linux",
"//pkg/bits",
+ "//pkg/cleanup",
"//pkg/log",
"//pkg/sentry/control",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
+ "//pkg/sync",
+ "//pkg/test/testutil",
"//pkg/unet",
"//pkg/urpc",
"//runsc/boot",
"//runsc/boot/platforms",
"//runsc/specutils",
- "//runsc/testutil",
"@com_github_cenkalti_backoff//:go_default_library",
"@com_github_kr_pty//:go_default_library",
- "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index 7d67c3a75..995d4e267 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -20,7 +20,6 @@ import (
"io"
"os"
"path/filepath"
- "sync"
"syscall"
"testing"
"time"
@@ -28,9 +27,11 @@ import (
"github.com/kr/pty"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/test/testutil"
"gvisor.dev/gvisor/pkg/unet"
"gvisor.dev/gvisor/pkg/urpc"
- "gvisor.dev/gvisor/runsc/testutil"
)
// socketPath creates a path inside bundleDir and ensures that the returned
@@ -57,25 +58,26 @@ func socketPath(bundleDir string) (string, error) {
}
// createConsoleSocket creates a socket at the given path that will receive a
-// console fd from the sandbox. If no error occurs, it returns the server
-// socket and a cleanup function.
-func createConsoleSocket(path string) (*unet.ServerSocket, func() error, error) {
+// console fd from the sandbox. If an error occurs, t.Fatalf will be called.
+// The function returning should be deferred as cleanup.
+func createConsoleSocket(t *testing.T, path string) (*unet.ServerSocket, func()) {
+ t.Helper()
srv, err := unet.BindAndListen(path, false)
if err != nil {
- return nil, nil, fmt.Errorf("error binding and listening to socket %q: %v", path, err)
+ t.Fatalf("error binding and listening to socket %q: %v", path, err)
}
- cleanup := func() error {
+ cleanup := func() {
+ // Log errors; nothing can be done.
if err := srv.Close(); err != nil {
- return fmt.Errorf("error closing socket %q: %v", path, err)
+ t.Logf("error closing socket %q: %v", path, err)
}
if err := os.Remove(path); err != nil {
- return fmt.Errorf("error removing socket %q: %v", path, err)
+ t.Logf("error removing socket %q: %v", path, err)
}
- return nil
}
- return srv, cleanup, nil
+ return srv, cleanup
}
// receiveConsolePTY accepts a connection on the server socket and reads fds.
@@ -117,63 +119,60 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) {
// Test that an pty FD is sent over the console socket if one is provided.
func TestConsoleSocket(t *testing.T) {
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
- spec := testutil.NewSpecWithArgs("true")
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ for name, conf := range configsWithVFS2(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ spec := testutil.NewSpecWithArgs("true")
+ spec.Process.Terminal = true
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- sock, err := socketPath(bundleDir)
- if err != nil {
- t.Fatalf("error getting socket path: %v", err)
- }
- srv, cleanup, err := createConsoleSocket(sock)
- if err != nil {
- t.Fatalf("error creating socket at %q: %v", sock, err)
- }
- defer cleanup()
-
- // Create the container and pass the socket name.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- ConsoleSocket: sock,
- }
- c, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer c.Destroy()
+ sock, err := socketPath(bundleDir)
+ if err != nil {
+ t.Fatalf("error getting socket path: %v", err)
+ }
+ srv, cleanup := createConsoleSocket(t, sock)
+ defer cleanup()
+
+ // Create the container and pass the socket name.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ ConsoleSocket: sock,
+ }
+ c, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer c.Destroy()
- // Make sure we get a console PTY.
- ptyMaster, err := receiveConsolePTY(srv)
- if err != nil {
- t.Fatalf("error receiving console FD: %v", err)
- }
- ptyMaster.Close()
+ // Make sure we get a console PTY.
+ ptyMaster, err := receiveConsolePTY(srv)
+ if err != nil {
+ t.Fatalf("error receiving console FD: %v", err)
+ }
+ ptyMaster.Close()
+ })
}
}
// Test that job control signals work on a console created with "exec -ti".
func TestJobControlSignalExec(t *testing.T) {
spec := testutil.NewSpecWithArgs("/bin/sleep", "10000")
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
// Create and start the container.
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
}
@@ -195,7 +194,10 @@ func TestJobControlSignalExec(t *testing.T) {
defer ptyMaster.Close()
defer ptySlave.Close()
- // Exec bash and attach a terminal.
+ // Exec bash and attach a terminal. Note that occasionally /bin/sh
+ // may be a different shell or have a different configuration (such
+ // as disabling interactive mode and job control). Since we want to
+ // explicitly test interactive mode, use /bin/bash. See b/116981926.
execArgs := &control.ExecArgs{
Filename: "/bin/bash",
// Don't let bash execute from profile or rc files, otherwise
@@ -219,9 +221,9 @@ func TestJobControlSignalExec(t *testing.T) {
// Make sure all the processes are running.
expectedPL := []*control.Process{
// Root container process.
- {PID: 1, Cmd: "sleep"},
+ {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
// Bash from exec process.
- {PID: 2, Cmd: "bash"},
+ {PID: 2, Cmd: "bash", Threads: []kernel.ThreadID{2}},
}
if err := waitForProcessList(c, expectedPL); err != nil {
t.Error(err)
@@ -231,7 +233,7 @@ func TestJobControlSignalExec(t *testing.T) {
ptyMaster.Write([]byte("sleep 100\n"))
// Wait for it to start. Sleep's PPID is bash's PID.
- expectedPL = append(expectedPL, &control.Process{PID: 3, PPID: 2, Cmd: "sleep"})
+ expectedPL = append(expectedPL, &control.Process{PID: 3, PPID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{3}})
if err := waitForProcessList(c, expectedPL); err != nil {
t.Error(err)
}
@@ -282,32 +284,28 @@ func TestJobControlSignalExec(t *testing.T) {
// Test that job control signals work on a console created with "run -ti".
func TestJobControlSignalRootContainer(t *testing.T) {
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
// Don't let bash execute from profile or rc files, otherwise our PID
// counts get messed up.
spec := testutil.NewSpecWithArgs("/bin/bash", "--noprofile", "--norc")
spec.Process.Terminal = true
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
sock, err := socketPath(bundleDir)
if err != nil {
t.Fatalf("error getting socket path: %v", err)
}
- srv, cleanup, err := createConsoleSocket(sock)
- if err != nil {
- t.Fatalf("error creating socket at %q: %v", sock, err)
- }
+ srv, cleanup := createConsoleSocket(t, sock)
defer cleanup()
// Create the container and pass the socket name.
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
ConsoleSocket: sock,
@@ -329,13 +327,13 @@ func TestJobControlSignalRootContainer(t *testing.T) {
// file. Writes after a certain point will block unless we drain the
// PTY, so we must continually copy from it.
//
- // We log the output to stdout for debugabilitly, and also to a buffer,
+ // We log the output to stderr for debugabilitly, and also to a buffer,
// since we wait on particular output from bash below. We use a custom
// blockingBuffer which is thread-safe and also blocks on Read calls,
// which makes this a suitable Reader for WaitUntilRead.
ptyBuf := newBlockingBuffer()
tee := io.TeeReader(ptyMaster, ptyBuf)
- go io.Copy(os.Stdout, tee)
+ go io.Copy(os.Stderr, tee)
// Start the container.
if err := c.Start(conf); err != nil {
@@ -361,19 +359,19 @@ func TestJobControlSignalRootContainer(t *testing.T) {
// Wait for bash to start.
expectedPL := []*control.Process{
- {PID: 1, Cmd: "bash"},
+ {PID: 1, Cmd: "bash", Threads: []kernel.ThreadID{1}},
}
if err := waitForProcessList(c, expectedPL); err != nil {
- t.Fatal(err)
+ t.Fatalf("error waiting for processes: %v", err)
}
// Execute sleep via the terminal.
ptyMaster.Write([]byte("sleep 100\n"))
// Wait for sleep to start.
- expectedPL = append(expectedPL, &control.Process{PID: 2, PPID: 1, Cmd: "sleep"})
+ expectedPL = append(expectedPL, &control.Process{PID: 2, PPID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{2}})
if err := waitForProcessList(c, expectedPL); err != nil {
- t.Fatal(err)
+ t.Fatalf("error waiting for processes: %v", err)
}
// Reset the pty buffer, so there is less output for us to scan later.
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 32510d427..7ad09bf23 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -17,13 +17,11 @@ package container
import (
"context"
- "encoding/json"
+ "errors"
"fmt"
"io/ioutil"
"os"
"os/exec"
- "os/signal"
- "path/filepath"
"regexp"
"strconv"
"strings"
@@ -31,27 +29,18 @@ import (
"time"
"github.com/cenkalti/backoff"
- "github.com/gofrs/flock"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/sighandling"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/cgroup"
"gvisor.dev/gvisor/runsc/sandbox"
"gvisor.dev/gvisor/runsc/specutils"
)
-const (
- // metadataFilename is the name of the metadata file relative to the
- // container root directory that holds sandbox metadata.
- metadataFilename = "meta.json"
-
- // metadataLockFilename is the name of a lock file in the container
- // root directory that is used to prevent concurrent modifications to
- // the container state and metadata.
- metadataLockFilename = "meta.lock"
-)
-
// validateID validates the container id.
func validateID(id string) error {
// See libcontainer/factory_linux.go.
@@ -99,11 +88,6 @@ type Container struct {
// BundleDir is the directory containing the container bundle.
BundleDir string `json:"bundleDir"`
- // Root is the directory containing the container metadata file. If this
- // container is the root container, Root and RootContainerDir will be the
- // same.
- Root string `json:"root"`
-
// CreatedAt is the time the container was created.
CreatedAt time.Time `json:"createdAt"`
@@ -121,21 +105,24 @@ type Container struct {
// be 0 if the gofer has been killed.
GoferPid int `json:"goferPid"`
+ // Sandbox is the sandbox this container is running in. It's set when the
+ // container is created and reset when the sandbox is destroyed.
+ Sandbox *sandbox.Sandbox `json:"sandbox"`
+
+ // Saver handles load from/save to the state file safely from multiple
+ // processes.
+ Saver StateFile `json:"saver"`
+
+ //
+ // Fields below this line are not saved in the state file and will not
+ // be preserved across commands.
+ //
+
// goferIsChild is set if a gofer process is a child of the current process.
//
// This field isn't saved to json, because only a creator of a gofer
// process will have it as a child process.
goferIsChild bool
-
- // Sandbox is the sandbox this container is running in. It's set when the
- // container is created and reset when the sandbox is destroyed.
- Sandbox *sandbox.Sandbox `json:"sandbox"`
-
- // RootContainerDir is the root directory containing the metadata file of the
- // sandbox root container. It's used to lock in order to serialize creating
- // and deleting this Container's metadata directory. If this container is the
- // root container, this is the same as Root.
- RootContainerDir string
}
// loadSandbox loads all containers that belong to the sandbox with the given
@@ -166,43 +153,35 @@ func loadSandbox(rootDir, id string) ([]*Container, error) {
return containers, nil
}
-// Load loads a container with the given id from a metadata file. id may be an
-// abbreviation of the full container id, in which case Load loads the
-// container to which id unambiguously refers to.
-// Returns ErrNotExist if container doesn't exist.
-func Load(rootDir, id string) (*Container, error) {
- log.Debugf("Load container %q %q", rootDir, id)
- if err := validateID(id); err != nil {
+// Load loads a container with the given id from a metadata file. partialID may
+// be an abbreviation of the full container id, in which case Load loads the
+// container to which id unambiguously refers to. Returns ErrNotExist if
+// container doesn't exist.
+func Load(rootDir, partialID string) (*Container, error) {
+ log.Debugf("Load container %q %q", rootDir, partialID)
+ if err := validateID(partialID); err != nil {
return nil, fmt.Errorf("validating id: %v", err)
}
- cRoot, err := findContainerRoot(rootDir, id)
+ id, err := findContainerID(rootDir, partialID)
if err != nil {
// Preserve error so that callers can distinguish 'not found' errors.
return nil, err
}
- // Lock the container metadata to prevent other runsc instances from
- // writing to it while we are reading it.
- unlock, err := lockContainerMetadata(cRoot)
- if err != nil {
- return nil, err
+ state := StateFile{
+ RootDir: rootDir,
+ ID: id,
}
- defer unlock()
+ defer state.close()
- // Read the container metadata file and create a new Container from it.
- metaFile := filepath.Join(cRoot, metadataFilename)
- metaBytes, err := ioutil.ReadFile(metaFile)
- if err != nil {
+ c := &Container{}
+ if err := state.load(c); err != nil {
if os.IsNotExist(err) {
// Preserve error so that callers can distinguish 'not found' errors.
return nil, err
}
- return nil, fmt.Errorf("reading container metadata file %q: %v", metaFile, err)
- }
- var c Container
- if err := json.Unmarshal(metaBytes, &c); err != nil {
- return nil, fmt.Errorf("unmarshaling container metadata from %q: %v", metaFile, err)
+ return nil, fmt.Errorf("reading container metadata file %q: %v", state.statePath(), err)
}
// If the status is "Running" or "Created", check that the sandbox
@@ -223,57 +202,37 @@ func Load(rootDir, id string) (*Container, error) {
}
}
- return &c, nil
+ return c, nil
}
-func findContainerRoot(rootDir, partialID string) (string, error) {
+func findContainerID(rootDir, partialID string) (string, error) {
// Check whether the id fully specifies an existing container.
- cRoot := filepath.Join(rootDir, partialID)
- if _, err := os.Stat(cRoot); err == nil {
- return cRoot, nil
+ stateFile := buildStatePath(rootDir, partialID)
+ if _, err := os.Stat(stateFile); err == nil {
+ return partialID, nil
}
// Now see whether id could be an abbreviation of exactly 1 of the
// container ids. If id is ambiguous (it could match more than 1
// container), it is an error.
- cRoot = ""
ids, err := List(rootDir)
if err != nil {
return "", err
}
+ rv := ""
for _, id := range ids {
if strings.HasPrefix(id, partialID) {
- if cRoot != "" {
- return "", fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, cRoot, id)
+ if rv != "" {
+ return "", fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, rv, id)
}
- cRoot = id
+ rv = id
}
}
- if cRoot == "" {
+ if rv == "" {
return "", os.ErrNotExist
}
- log.Debugf("abbreviated id %q resolves to full id %q", partialID, cRoot)
- return filepath.Join(rootDir, cRoot), nil
-}
-
-// List returns all container ids in the given root directory.
-func List(rootDir string) ([]string, error) {
- log.Debugf("List containers %q", rootDir)
- fs, err := ioutil.ReadDir(rootDir)
- if err != nil {
- return nil, fmt.Errorf("reading dir %q: %v", rootDir, err)
- }
- var out []string
- for _, f := range fs {
- // Filter out directories that do no belong to a container.
- cid := f.Name()
- if validateID(cid) == nil {
- if _, err := os.Stat(filepath.Join(rootDir, cid, metadataFilename)); err == nil {
- out = append(out, f.Name())
- }
- }
- }
- return out, nil
+ log.Debugf("abbreviated id %q resolves to full id %q", partialID, rv)
+ return rv, nil
}
// Args is used to configure a new container.
@@ -316,44 +275,34 @@ func New(conf *boot.Config, args Args) (*Container, error) {
return nil, err
}
- unlockRoot, err := maybeLockRootContainer(args.Spec, conf.RootDir)
- if err != nil {
- return nil, err
+ if err := os.MkdirAll(conf.RootDir, 0711); err != nil {
+ return nil, fmt.Errorf("creating container root directory %q: %v", conf.RootDir, err)
}
- defer unlockRoot()
+
+ c := &Container{
+ ID: args.ID,
+ Spec: args.Spec,
+ ConsoleSocket: args.ConsoleSocket,
+ BundleDir: args.BundleDir,
+ Status: Creating,
+ CreatedAt: time.Now(),
+ Owner: os.Getenv("USER"),
+ Saver: StateFile{
+ RootDir: conf.RootDir,
+ ID: args.ID,
+ },
+ }
+ // The Cleanup object cleans up partially created containers when an error
+ // occurs. Any errors occurring during cleanup itself are ignored.
+ cu := cleanup.Make(func() { _ = c.Destroy() })
+ defer cu.Clean()
// Lock the container metadata file to prevent concurrent creations of
// containers with the same id.
- containerRoot := filepath.Join(conf.RootDir, args.ID)
- unlock, err := lockContainerMetadata(containerRoot)
- if err != nil {
+ if err := c.Saver.lockForNew(); err != nil {
return nil, err
}
- defer unlock()
-
- // Check if the container already exists by looking for the metadata
- // file.
- if _, err := os.Stat(filepath.Join(containerRoot, metadataFilename)); err == nil {
- return nil, fmt.Errorf("container with id %q already exists", args.ID)
- } else if !os.IsNotExist(err) {
- return nil, fmt.Errorf("looking for existing container in %q: %v", containerRoot, err)
- }
-
- c := &Container{
- ID: args.ID,
- Spec: args.Spec,
- ConsoleSocket: args.ConsoleSocket,
- BundleDir: args.BundleDir,
- Root: containerRoot,
- Status: Creating,
- CreatedAt: time.Now(),
- Owner: os.Getenv("USER"),
- RootContainerDir: conf.RootDir,
- }
- // The Cleanup object cleans up partially created containers when an error occurs.
- // Any errors occuring during cleanup itself are ignored.
- cu := specutils.MakeCleanup(func() { _ = c.Destroy() })
- defer cu.Clean()
+ defer c.Saver.unlock()
// If the metadata annotations indicate that this container should be
// started in an existing sandbox, we must do so. The metadata will
@@ -375,7 +324,7 @@ func New(conf *boot.Config, args Args) (*Container, error) {
}
}
if err := runInCgroup(cg, func() error {
- ioFiles, specFile, err := c.createGoferProcess(args.Spec, conf, args.BundleDir)
+ ioFiles, specFile, err := c.createGoferProcess(args.Spec, conf, args.BundleDir, args.Attached)
if err != nil {
return err
}
@@ -431,7 +380,7 @@ func New(conf *boot.Config, args Args) (*Container, error) {
c.changeStatus(Created)
// Save the metadata file.
- if err := c.save(); err != nil {
+ if err := c.saveLocked(); err != nil {
return nil, err
}
@@ -451,17 +400,12 @@ func New(conf *boot.Config, args Args) (*Container, error) {
func (c *Container) Start(conf *boot.Config) error {
log.Debugf("Start container %q", c.ID)
- unlockRoot, err := maybeLockRootContainer(c.Spec, c.RootContainerDir)
- if err != nil {
+ if err := c.Saver.lock(); err != nil {
return err
}
- defer unlockRoot()
+ unlock := cleanup.Make(func() { c.Saver.unlock() })
+ defer unlock.Clean()
- unlock, err := c.lock()
- if err != nil {
- return err
- }
- defer unlock()
if err := c.requireStatus("start", Created); err != nil {
return err
}
@@ -479,11 +423,11 @@ func (c *Container) Start(conf *boot.Config) error {
return err
}
} else {
- // Join cgroup to strt gofer process to ensure it's part of the cgroup from
+ // Join cgroup to start gofer process to ensure it's part of the cgroup from
// the start (and all their children processes).
if err := runInCgroup(c.Sandbox.Cgroup, func() error {
// Create the gofer process.
- ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir)
+ ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir, false)
if err != nil {
return err
}
@@ -509,14 +453,15 @@ func (c *Container) Start(conf *boot.Config) error {
}
c.changeStatus(Running)
- if err := c.save(); err != nil {
+ if err := c.saveLocked(); err != nil {
return err
}
- // Adjust the oom_score_adj for sandbox. This must be done after
- // save().
- err = adjustSandboxOOMScoreAdj(c.Sandbox, c.RootContainerDir, false)
- if err != nil {
+ // Release lock before adjusting OOM score because the lock is acquired there.
+ unlock.Clean()
+
+ // Adjust the oom_score_adj for sandbox. This must be done after saveLocked().
+ if err := adjustSandboxOOMScoreAdj(c.Sandbox, c.Saver.RootDir, false); err != nil {
return err
}
@@ -529,11 +474,10 @@ func (c *Container) Start(conf *boot.Config) error {
// to restore a container from its state file.
func (c *Container) Restore(spec *specs.Spec, conf *boot.Config, restoreFile string) error {
log.Debugf("Restore container %q", c.ID)
- unlock, err := c.lock()
- if err != nil {
+ if err := c.Saver.lock(); err != nil {
return err
}
- defer unlock()
+ defer c.Saver.unlock()
if err := c.requireStatus("restore", Created); err != nil {
return err
@@ -551,7 +495,7 @@ func (c *Container) Restore(spec *specs.Spec, conf *boot.Config, restoreFile str
return err
}
c.changeStatus(Running)
- return c.save()
+ return c.saveLocked()
}
// Run is a helper that calls Create + Start + Wait.
@@ -563,7 +507,7 @@ func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) {
}
// Clean up partially created container if an error occurs.
// Any errors returned by Destroy() itself are ignored.
- cu := specutils.MakeCleanup(func() {
+ cu := cleanup.Make(func() {
c.Destroy()
})
defer cu.Clean()
@@ -679,21 +623,15 @@ func (c *Container) SignalProcess(sig syscall.Signal, pid int32) error {
// forwarding signals.
func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() {
log.Debugf("Forwarding all signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
- sigCh := make(chan os.Signal, 1)
- signal.Notify(sigCh)
- go func() {
- for s := range sigCh {
- log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", s, c.ID, pid, fgProcess)
- if err := c.Sandbox.SignalProcess(c.ID, pid, s.(syscall.Signal), fgProcess); err != nil {
- log.Warningf("error forwarding signal %d to container %q: %v", s, c.ID, err)
- }
+ stop := sighandling.StartSignalForwarding(func(sig linux.Signal) {
+ log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", sig, c.ID, pid, fgProcess)
+ if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.Signal(sig), fgProcess); err != nil {
+ log.Warningf("error forwarding signal %d to container %q: %v", sig, c.ID, err)
}
- log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
- }()
-
+ })
return func() {
- signal.Stop(sigCh)
- close(sigCh)
+ log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
+ stop()
}
}
@@ -711,11 +649,10 @@ func (c *Container) Checkpoint(f *os.File) error {
// The call only succeeds if the container's status is created or running.
func (c *Container) Pause() error {
log.Debugf("Pausing container %q", c.ID)
- unlock, err := c.lock()
- if err != nil {
+ if err := c.Saver.lock(); err != nil {
return err
}
- defer unlock()
+ defer c.Saver.unlock()
if c.Status != Created && c.Status != Running {
return fmt.Errorf("cannot pause container %q in state %v", c.ID, c.Status)
@@ -725,18 +662,17 @@ func (c *Container) Pause() error {
return fmt.Errorf("pausing container: %v", err)
}
c.changeStatus(Paused)
- return c.save()
+ return c.saveLocked()
}
// Resume unpauses the container and its kernel.
// The call only succeeds if the container's status is paused.
func (c *Container) Resume() error {
log.Debugf("Resuming container %q", c.ID)
- unlock, err := c.lock()
- if err != nil {
+ if err := c.Saver.lock(); err != nil {
return err
}
- defer unlock()
+ defer c.Saver.unlock()
if c.Status != Paused {
return fmt.Errorf("cannot resume container %q in state %v", c.ID, c.Status)
@@ -745,7 +681,7 @@ func (c *Container) Resume() error {
return fmt.Errorf("resuming container: %v", err)
}
c.changeStatus(Running)
- return c.save()
+ return c.saveLocked()
}
// State returns the metadata of the container.
@@ -773,6 +709,17 @@ func (c *Container) Processes() ([]*control.Process, error) {
func (c *Container) Destroy() error {
log.Debugf("Destroy container %q", c.ID)
+ if err := c.Saver.lock(); err != nil {
+ return err
+ }
+ defer func() {
+ c.Saver.unlock()
+ c.Saver.close()
+ }()
+
+ // Stored for later use as stop() sets c.Sandbox to nil.
+ sb := c.Sandbox
+
// We must perform the following cleanup steps:
// * stop the container and gofer processes,
// * remove the container filesystem on the host, and
@@ -782,48 +729,43 @@ func (c *Container) Destroy() error {
// do our best to perform all of the cleanups. Hence, we keep a slice
// of errors return their concatenation.
var errs []string
-
- unlock, err := maybeLockRootContainer(c.Spec, c.RootContainerDir)
- if err != nil {
- return err
- }
- defer unlock()
-
- // Stored for later use as stop() sets c.Sandbox to nil.
- sb := c.Sandbox
-
if err := c.stop(); err != nil {
err = fmt.Errorf("stopping container: %v", err)
log.Warningf("%v", err)
errs = append(errs, err.Error())
}
- if err := os.RemoveAll(c.Root); err != nil && !os.IsNotExist(err) {
- err = fmt.Errorf("deleting container root directory %q: %v", c.Root, err)
+ if err := c.Saver.destroy(); err != nil {
+ err = fmt.Errorf("deleting container state files: %v", err)
log.Warningf("%v", err)
errs = append(errs, err.Error())
}
c.changeStatus(Stopped)
- // Adjust oom_score_adj for the sandbox. This must be done after the
- // container is stopped and the directory at c.Root is removed.
- // We must test if the sandbox is nil because Destroy should be
- // idempotent.
- if sb != nil {
- if err := adjustSandboxOOMScoreAdj(sb, c.RootContainerDir, true); err != nil {
+ // Adjust oom_score_adj for the sandbox. This must be done after the container
+ // is stopped and the directory at c.Root is removed. Adjustment can be
+ // skipped if the root container is exiting, because it brings down the entire
+ // sandbox.
+ //
+ // Use 'sb' to tell whether it has been executed before because Destroy must
+ // be idempotent.
+ if sb != nil && !isRoot(c.Spec) {
+ if err := adjustSandboxOOMScoreAdj(sb, c.Saver.RootDir, true); err != nil {
errs = append(errs, err.Error())
}
}
// "If any poststop hook fails, the runtime MUST log a warning, but the
- // remaining hooks and lifecycle continue as if the hook had succeeded" -OCI spec.
- // Based on the OCI, "The post-stop hooks MUST be called after the container is
- // deleted but before the delete operation returns"
+ // remaining hooks and lifecycle continue as if the hook had
+ // succeeded" - OCI spec.
+ //
+ // Based on the OCI, "The post-stop hooks MUST be called after the container
+ // is deleted but before the delete operation returns"
// Run it here to:
// 1) Conform to the OCI.
- // 2) Make sure it only runs once, because the root has been deleted, the container
- // can't be loaded again.
+ // 2) Make sure it only runs once, because the root has been deleted, the
+ // container can't be loaded again.
if c.Spec.Hooks != nil {
executeHooksBestEffort(c.Spec.Hooks.Poststop, c.State())
}
@@ -834,18 +776,13 @@ func (c *Container) Destroy() error {
return fmt.Errorf(strings.Join(errs, "\n"))
}
-// save saves the container metadata to a file.
+// saveLocked saves the container metadata to a file.
//
// Precondition: container must be locked with container.lock().
-func (c *Container) save() error {
+func (c *Container) saveLocked() error {
log.Debugf("Save container %q", c.ID)
- metaFile := filepath.Join(c.Root, metadataFilename)
- meta, err := json.Marshal(c)
- if err != nil {
- return fmt.Errorf("invalid container metadata: %v", err)
- }
- if err := ioutil.WriteFile(metaFile, meta, 0640); err != nil {
- return fmt.Errorf("writing container metadata: %v", err)
+ if err := c.Saver.saveLocked(c); err != nil {
+ return fmt.Errorf("saving container metadata: %v", err)
}
return nil
}
@@ -924,7 +861,7 @@ func (c *Container) waitForStopped() error {
return backoff.Retry(op, b)
}
-func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, *os.File, error) {
+func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string, attached bool) ([]*os.File, *os.File, error) {
// Start with the general config flags.
args := conf.ToFlags()
@@ -1018,6 +955,14 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
cmd.ExtraFiles = goferEnds
cmd.Args[0] = "runsc-gofer"
+ if attached {
+ // The gofer is attached to the lifetime of this process, so it
+ // should synchronously die when this process dies.
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Pdeathsig: syscall.SIGKILL,
+ }
+ }
+
// Enter new namespaces to isolate from the rest of the system. Don't unshare
// cgroup because gofer is added to a cgroup in the caller's namespace.
nss := []specs.LinuxNamespace{
@@ -1106,48 +1051,6 @@ func (c *Container) requireStatus(action string, statuses ...Status) error {
return fmt.Errorf("cannot %s container %q in state %s", action, c.ID, c.Status)
}
-// lock takes a file lock on the container metadata lock file.
-func (c *Container) lock() (func() error, error) {
- return lockContainerMetadata(filepath.Join(c.Root, c.ID))
-}
-
-// lockContainerMetadata takes a file lock on the metadata lock file in the
-// given container root directory.
-func lockContainerMetadata(containerRootDir string) (func() error, error) {
- if err := os.MkdirAll(containerRootDir, 0711); err != nil {
- return nil, fmt.Errorf("creating container root directory %q: %v", containerRootDir, err)
- }
- f := filepath.Join(containerRootDir, metadataLockFilename)
- l := flock.NewFlock(f)
- if err := l.Lock(); err != nil {
- return nil, fmt.Errorf("acquiring lock on container lock file %q: %v", f, err)
- }
- return l.Unlock, nil
-}
-
-// maybeLockRootContainer locks the sandbox root container. It is used to
-// prevent races to create and delete child container sandboxes.
-func maybeLockRootContainer(spec *specs.Spec, rootDir string) (func() error, error) {
- if isRoot(spec) {
- return func() error { return nil }, nil
- }
-
- sbid, ok := specutils.SandboxID(spec)
- if !ok {
- return nil, fmt.Errorf("no sandbox ID found when locking root container")
- }
- sb, err := Load(rootDir, sbid)
- if err != nil {
- return nil, err
- }
-
- unlock, err := sb.lock()
- if err != nil {
- return nil, err
- }
- return unlock, nil
-}
-
func isRoot(spec *specs.Spec) bool {
return specutils.SpecContainerType(spec) != specutils.ContainerTypeContainer
}
@@ -1168,22 +1071,19 @@ func runInCgroup(cg *cgroup.Cgroup, fn func() error) error {
// adjustGoferOOMScoreAdj sets the oom_store_adj for the container's gofer.
func (c *Container) adjustGoferOOMScoreAdj() error {
- if c.GoferPid != 0 && c.Spec.Process.OOMScoreAdj != nil {
- if err := setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj); err != nil {
- return fmt.Errorf("setting gofer oom_score_adj for container %q: %v", c.ID, err)
- }
+ if c.GoferPid == 0 || c.Spec.Process.OOMScoreAdj == nil {
+ return nil
}
-
- return nil
+ return setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj)
}
// adjustSandboxOOMScoreAdj sets the oom_score_adj for the sandbox.
// oom_score_adj is set to the lowest oom_score_adj among the containers
// running in the sandbox.
//
-// TODO(gvisor.dev/issue/512): This call could race with other containers being
+// TODO(gvisor.dev/issue/238): This call could race with other containers being
// created at the same time and end up setting the wrong oom_score_adj to the
-// sandbox.
+// sandbox. Use rpc client to synchronize.
func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool) error {
containers, err := loadSandbox(rootDir, s.ID)
if err != nil {
@@ -1251,24 +1151,29 @@ func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool)
}
// Set the lowest of all containers oom_score_adj to the sandbox.
- if err := setOOMScoreAdj(s.Pid, lowScore); err != nil {
- return fmt.Errorf("setting oom_score_adj for sandbox %q: %v", s.ID, err)
- }
-
- return nil
+ return setOOMScoreAdj(s.Pid, lowScore)
}
// setOOMScoreAdj sets oom_score_adj to the given value for the given PID.
// /proc must be available and mounted read-write. scoreAdj should be between
-// -1000 and 1000.
+// -1000 and 1000. It's a noop if the process has already exited.
func setOOMScoreAdj(pid int, scoreAdj int) error {
f, err := os.OpenFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid), os.O_WRONLY, 0644)
if err != nil {
+ // Ignore NotExist errors because it can race with process exit.
+ if os.IsNotExist(err) {
+ log.Warningf("Process (%d) not found setting oom_score_adj", pid)
+ return nil
+ }
return err
}
defer f.Close()
if _, err := f.WriteString(strconv.Itoa(scoreAdj)); err != nil {
- return err
+ if errors.Is(err, syscall.ESRCH) {
+ log.Warningf("Process (%d) exited while setting oom_score_adj", pid)
+ return nil
+ }
+ return fmt.Errorf("setting oom_score_adj to %q: %v", scoreAdj, err)
}
return nil
}
diff --git a/runsc/container/container_norace_test.go b/runsc/container/container_norace_test.go
new file mode 100644
index 000000000..838c1e20a
--- /dev/null
+++ b/runsc/container/container_norace_test.go
@@ -0,0 +1,20 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !race
+
+package container
+
+// Allow both kvm and ptrace for non-race builds.
+var platformOptions = []configOption{ptrace, kvm}
diff --git a/runsc/container/container_race_test.go b/runsc/container/container_race_test.go
new file mode 100644
index 000000000..9fb4c4fc0
--- /dev/null
+++ b/runsc/container/container_race_test.go
@@ -0,0 +1,20 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build race
+
+package container
+
+// Only enabled ptrace with race builds.
+var platformOptions = []configOption{ptrace}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 07eacaac0..5e8247bc8 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -20,13 +20,13 @@ import (
"fmt"
"io"
"io/ioutil"
+ "math"
"os"
"path"
"path/filepath"
"reflect"
"strconv"
"strings"
- "sync"
"syscall"
"testing"
"time"
@@ -37,11 +37,13 @@ import (
"gvisor.dev/gvisor/pkg/bits"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/test/testutil"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/boot/platforms"
"gvisor.dev/gvisor/runsc/specutils"
- "gvisor.dev/gvisor/runsc/testutil"
)
// waitForProcessList waits for the given process list to show up in the container.
@@ -69,6 +71,7 @@ func waitForProcessCount(cont *Container, want int) error {
return &backoff.PermanentError{Err: err}
}
if got := len(pss); got != want {
+ log.Infof("Waiting for process count to reach %d. Current: %d", want, got)
return fmt.Errorf("wrong process count, got: %d, want: %d", got, want)
}
return nil
@@ -89,37 +92,72 @@ func blockUntilWaitable(pid int) error {
return err
}
-// procListsEqual is used to check whether 2 Process lists are equal for all
-// implemented fields.
-func procListsEqual(got, want []*control.Process) bool {
- if len(got) != len(want) {
+// procListsEqual is used to check whether 2 Process lists are equal. Fields
+// set to -1 in wants are ignored. Timestamp and threads fields are always
+// ignored.
+func procListsEqual(gots, wants []*control.Process) bool {
+ if len(gots) != len(wants) {
return false
}
- for i := range got {
- pd1 := got[i]
- pd2 := want[i]
- // Zero out unimplemented and timing dependant fields.
- pd1.Time = ""
- pd1.STime = ""
- pd1.C = 0
- if *pd1 != *pd2 {
+ for i := range gots {
+ got := gots[i]
+ want := wants[i]
+
+ if want.UID != math.MaxUint32 && want.UID != got.UID {
+ return false
+ }
+ if want.PID != -1 && want.PID != got.PID {
+ return false
+ }
+ if want.PPID != -1 && want.PPID != got.PPID {
+ return false
+ }
+ if len(want.TTY) != 0 && want.TTY != got.TTY {
+ return false
+ }
+ if len(want.Cmd) != 0 && want.Cmd != got.Cmd {
return false
}
}
return true
}
-// getAndCheckProcLists is similar to waitForProcessList, but does not wait and retry the
-// test for equality. This is because we already confirmed that exec occurred.
-func getAndCheckProcLists(cont *Container, want []*control.Process) error {
- got, err := cont.Processes()
- if err != nil {
- return fmt.Errorf("error getting process data from container: %v", err)
- }
- if procListsEqual(got, want) {
- return nil
+type processBuilder struct {
+ process control.Process
+}
+
+func newProcessBuilder() *processBuilder {
+ return &processBuilder{
+ process: control.Process{
+ UID: math.MaxUint32,
+ PID: -1,
+ PPID: -1,
+ },
}
- return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(want))
+}
+
+func (p *processBuilder) Cmd(cmd string) *processBuilder {
+ p.process.Cmd = cmd
+ return p
+}
+
+func (p *processBuilder) PID(pid kernel.ThreadID) *processBuilder {
+ p.process.PID = pid
+ return p
+}
+
+func (p *processBuilder) PPID(ppid kernel.ThreadID) *processBuilder {
+ p.process.PPID = ppid
+ return p
+}
+
+func (p *processBuilder) UID(uid auth.KUID) *processBuilder {
+ p.process.UID = uid
+ return p
+}
+
+func (p *processBuilder) Process() *control.Process {
+ return &p.process
}
func procListToString(pl []*control.Process) string {
@@ -145,7 +183,7 @@ func createWriteableOutputFile(path string) (*os.File, error) {
return outputFile, nil
}
-func waitForFile(f *os.File) error {
+func waitForFileNotEmpty(f *os.File) error {
op := func() error {
fi, err := f.Stat()
if err != nil {
@@ -160,6 +198,17 @@ func waitForFile(f *os.File) error {
return testutil.Poll(op, 30*time.Second)
}
+func waitForFileExist(path string) error {
+ op := func() error {
+ if _, err := os.Stat(path); os.IsNotExist(err) {
+ return err
+ }
+ return nil
+ }
+
+ return testutil.Poll(op, 30*time.Second)
+}
+
// readOutputNum reads a file at given filepath and returns the int at the
// requested position.
func readOutputNum(file string, position int) (int, error) {
@@ -169,7 +218,7 @@ func readOutputNum(file string, position int) (int, error) {
}
// Ensure that there is content in output file.
- if err := waitForFile(f); err != nil {
+ if err := waitForFileNotEmpty(f); err != nil {
return 0, fmt.Errorf("error waiting for output file: %v", err)
}
@@ -202,16 +251,15 @@ func readOutputNum(file string, position int) (int, error) {
// run starts the sandbox and waits for it to exit, checking that the
// application succeeded.
func run(spec *specs.Spec, conf *boot.Config) error {
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
return fmt.Errorf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
// Create, start and wait for the container.
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
Attached: true,
@@ -230,39 +278,64 @@ type configOption int
const (
overlay configOption = iota
+ ptrace
kvm
nonExclusiveFS
)
-var noOverlay = []configOption{kvm, nonExclusiveFS}
-var all = append(noOverlay, overlay)
+var (
+ noOverlay = append(platformOptions, nonExclusiveFS)
+ all = append(noOverlay, overlay)
+)
// configs generates different configurations to run tests.
-func configs(opts ...configOption) []*boot.Config {
+func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
// Always load the default config.
- cs := []*boot.Config{testutil.TestConfig()}
-
+ cs := make(map[string]*boot.Config)
for _, o := range opts {
- c := testutil.TestConfig()
switch o {
case overlay:
+ c := testutil.TestConfig(t)
c.Overlay = true
+ cs["overlay"] = c
+ case ptrace:
+ c := testutil.TestConfig(t)
+ c.Platform = platforms.Ptrace
+ cs["ptrace"] = c
case kvm:
- // TODO(b/112165693): KVM tests are flaky. Disable until fixed.
- continue
-
+ c := testutil.TestConfig(t)
c.Platform = platforms.KVM
+ cs["kvm"] = c
case nonExclusiveFS:
+ c := testutil.TestConfig(t)
c.FileAccess = boot.FileAccessShared
+ cs["non-exclusive"] = c
default:
panic(fmt.Sprintf("unknown config option %v", o))
-
}
- cs = append(cs, c)
}
return cs
}
+func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*boot.Config {
+ vfs1 := configs(t, opts...)
+
+ var optsVFS2 []configOption
+ for _, opt := range opts {
+ // TODO(gvisor.dev/issue/1487): Enable overlay tests.
+ if opt != overlay {
+ optsVFS2 = append(optsVFS2, opt)
+ }
+ }
+
+ for key, value := range configs(t, optsVFS2...) {
+ value.VFS2 = true
+ vfs1[key+"VFS2"] = value
+ }
+
+ return vfs1
+}
+
// TestLifecycle tests the basic Create/Start/Signal/Destroy container lifecycle.
// It verifies after each step that the container can be loaded from disk, and
// has the correct status.
@@ -272,132 +345,126 @@ func TestLifecycle(t *testing.T) {
childReaper.Start()
defer childReaper.Stop()
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
- // The container will just sleep for a long time. We will kill it before
- // it finishes sleeping.
- spec := testutil.NewSpecWithArgs("sleep", "100")
+ for name, conf := range configsWithVFS2(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ // The container will just sleep for a long time. We will kill it before
+ // it finishes sleeping.
+ spec := testutil.NewSpecWithArgs("sleep", "100")
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
-
- // expectedPL lists the expected process state of the container.
- expectedPL := []*control.Process{
- {
- UID: 0,
- PID: 1,
- PPID: 0,
- C: 0,
- Cmd: "sleep",
- },
- }
- // Create the container.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- c, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer c.Destroy()
-
- // Load the container from disk and check the status.
- c, err = Load(rootDir, args.ID)
- if err != nil {
- t.Fatalf("error loading container: %v", err)
- }
- if got, want := c.Status, Created; got != want {
- t.Errorf("container status got %v, want %v", got, want)
- }
+ rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // List should return the container id.
- ids, err := List(rootDir)
- if err != nil {
- t.Fatalf("error listing containers: %v", err)
- }
- if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) {
- t.Errorf("container list got %v, want %v", got, want)
- }
+ // expectedPL lists the expected process state of the container.
+ expectedPL := []*control.Process{
+ newProcessBuilder().Cmd("sleep").Process(),
+ }
+ // Create the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer c.Destroy()
- // Start the container.
- if err := c.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ // Load the container from disk and check the status.
+ c, err = Load(rootDir, args.ID)
+ if err != nil {
+ t.Fatalf("error loading container: %v", err)
+ }
+ if got, want := c.Status, Created; got != want {
+ t.Errorf("container status got %v, want %v", got, want)
+ }
- // Load the container from disk and check the status.
- c, err = Load(rootDir, args.ID)
- if err != nil {
- t.Fatalf("error loading container: %v", err)
- }
- if got, want := c.Status, Running; got != want {
- t.Errorf("container status got %v, want %v", got, want)
- }
+ // List should return the container id.
+ ids, err := List(rootDir)
+ if err != nil {
+ t.Fatalf("error listing containers: %v", err)
+ }
+ if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) {
+ t.Errorf("container list got %v, want %v", got, want)
+ }
- // Verify that "sleep 100" is running.
- if err := waitForProcessList(c, expectedPL); err != nil {
- t.Error(err)
- }
+ // Start the container.
+ if err := c.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- // Wait on the container.
- var wg sync.WaitGroup
- wg.Add(1)
- ch := make(chan struct{})
- go func() {
- ch <- struct{}{}
- ws, err := c.Wait()
+ // Load the container from disk and check the status.
+ c, err = Load(rootDir, args.ID)
if err != nil {
- t.Fatalf("error waiting on container: %v", err)
+ t.Fatalf("error loading container: %v", err)
}
- if got, want := ws.Signal(), syscall.SIGTERM; got != want {
- t.Fatalf("got signal %v, want %v", got, want)
+ if got, want := c.Status, Running; got != want {
+ t.Errorf("container status got %v, want %v", got, want)
}
- wg.Done()
- }()
- // Wait a bit to ensure that we've started waiting on the
- // container before we signal.
- <-ch
- time.Sleep(100 * time.Millisecond)
- // Send the container a SIGTERM which will cause it to stop.
- if err := c.SignalContainer(syscall.SIGTERM, false); err != nil {
- t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err)
- }
- // Wait for it to die.
- wg.Wait()
+ // Verify that "sleep 100" is running.
+ if err := waitForProcessList(c, expectedPL); err != nil {
+ t.Error(err)
+ }
- // Load the container from disk and check the status.
- c, err = Load(rootDir, args.ID)
- if err != nil {
- t.Fatalf("error loading container: %v", err)
- }
- if got, want := c.Status, Stopped; got != want {
- t.Errorf("container status got %v, want %v", got, want)
- }
+ // Wait on the container.
+ ch := make(chan error)
+ go func() {
+ ws, err := c.Wait()
+ if err != nil {
+ ch <- err
+ }
+ if got, want := ws.Signal(), syscall.SIGTERM; got != want {
+ ch <- fmt.Errorf("got signal %v, want %v", got, want)
+ }
+ ch <- nil
+ }()
- // Destroy the container.
- if err := c.Destroy(); err != nil {
- t.Fatalf("error destroying container: %v", err)
- }
+ // Wait a bit to ensure that we've started waiting on
+ // the container before we signal.
+ time.Sleep(time.Second)
- // List should not return the container id.
- ids, err = List(rootDir)
- if err != nil {
- t.Fatalf("error listing containers: %v", err)
- }
- if len(ids) != 0 {
- t.Errorf("expected container list to be empty, but got %v", ids)
- }
+ // Send the container a SIGTERM which will cause it to stop.
+ if err := c.SignalContainer(syscall.SIGTERM, false); err != nil {
+ t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err)
+ }
- // Loading the container by id should fail.
- if _, err = Load(rootDir, args.ID); err == nil {
- t.Errorf("expected loading destroyed container to fail, but it did not")
- }
+ // Wait for it to die.
+ if err := <-ch; err != nil {
+ t.Fatalf("error waiting for container: %v", err)
+ }
+
+ // Load the container from disk and check the status.
+ c, err = Load(rootDir, args.ID)
+ if err != nil {
+ t.Fatalf("error loading container: %v", err)
+ }
+ if got, want := c.Status, Stopped; got != want {
+ t.Errorf("container status got %v, want %v", got, want)
+ }
+
+ // Destroy the container.
+ if err := c.Destroy(); err != nil {
+ t.Fatalf("error destroying container: %v", err)
+ }
+
+ // List should not return the container id.
+ ids, err = List(rootDir)
+ if err != nil {
+ t.Fatalf("error listing containers: %v", err)
+ }
+ if len(ids) != 0 {
+ t.Errorf("expected container list to be empty, but got %v", ids)
+ }
+
+ // Loading the container by id should fail.
+ if _, err = Load(rootDir, args.ID); err == nil {
+ t.Errorf("expected loading destroyed container to fail, but it did not")
+ }
+ })
}
}
@@ -406,12 +473,14 @@ func TestExePath(t *testing.T) {
// Create two directories that will be prepended to PATH.
firstPath, err := ioutil.TempDir(testutil.TmpDir(), "first")
if err != nil {
- t.Fatal(err)
+ t.Fatalf("error creating temporary directory: %v", err)
}
+ defer os.RemoveAll(firstPath)
secondPath, err := ioutil.TempDir(testutil.TmpDir(), "second")
if err != nil {
- t.Fatal(err)
+ t.Fatalf("error creating temporary directory: %v", err)
}
+ defer os.RemoveAll(secondPath)
// Create two minimal executables in the second path, two of which
// will be masked by files in first path.
@@ -419,11 +488,11 @@ func TestExePath(t *testing.T) {
path := filepath.Join(secondPath, p)
f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0777)
if err != nil {
- t.Fatal(err)
+ t.Fatalf("error opening path: %v", err)
}
defer f.Close()
if _, err := io.WriteString(f, "#!/bin/true\n"); err != nil {
- t.Fatal(err)
+ t.Fatalf("error writing contents: %v", err)
}
}
@@ -432,7 +501,7 @@ func TestExePath(t *testing.T) {
nonExecutable := filepath.Join(firstPath, "masked1")
f2, err := os.OpenFile(nonExecutable, os.O_CREATE|os.O_EXCL, 0666)
if err != nil {
- t.Fatal(err)
+ t.Fatalf("error opening file: %v", err)
}
f2.Close()
@@ -440,85 +509,95 @@ func TestExePath(t *testing.T) {
// executable in the second.
nonRegular := filepath.Join(firstPath, "masked2")
if err := os.Mkdir(nonRegular, 0777); err != nil {
- t.Fatal(err)
- }
-
- for _, conf := range configs(overlay) {
- t.Logf("Running test with conf: %+v", conf)
- for _, test := range []struct {
- path string
- success bool
- }{
- {path: "true", success: true},
- {path: "bin/true", success: true},
- {path: "/bin/true", success: true},
- {path: "thisfiledoesntexit", success: false},
- {path: "bin/thisfiledoesntexit", success: false},
- {path: "/bin/thisfiledoesntexit", success: false},
-
- {path: "unmasked", success: true},
- {path: filepath.Join(firstPath, "unmasked"), success: false},
- {path: filepath.Join(secondPath, "unmasked"), success: true},
-
- {path: "masked1", success: true},
- {path: filepath.Join(firstPath, "masked1"), success: false},
- {path: filepath.Join(secondPath, "masked1"), success: true},
-
- {path: "masked2", success: true},
- {path: filepath.Join(firstPath, "masked2"), success: false},
- {path: filepath.Join(secondPath, "masked2"), success: true},
- } {
- spec := testutil.NewSpecWithArgs(test.path)
- spec.Process.Env = []string{
- fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")),
- }
-
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("exec: %s, error setting up container: %v", test.path, err)
- }
-
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- Attached: true,
- }
- ws, err := Run(conf, args)
-
- os.RemoveAll(rootDir)
- os.RemoveAll(bundleDir)
-
- if test.success {
- if err != nil {
- t.Errorf("exec: %s, error running container: %v", test.path, err)
- }
- if ws.ExitStatus() != 0 {
- t.Errorf("exec: %s, got exit status %v want %v", test.path, ws.ExitStatus(), 0)
- }
- } else {
- if err == nil {
- t.Errorf("exec: %s, got: no error, want: error", test.path)
- }
+ t.Fatalf("error making directory: %v", err)
+ }
+
+ for name, conf := range configsWithVFS2(t, overlay) {
+ t.Run(name, func(t *testing.T) {
+ for _, test := range []struct {
+ path string
+ success bool
+ }{
+ {path: "true", success: true},
+ {path: "bin/true", success: true},
+ {path: "/bin/true", success: true},
+ {path: "thisfiledoesntexit", success: false},
+ {path: "bin/thisfiledoesntexit", success: false},
+ {path: "/bin/thisfiledoesntexit", success: false},
+
+ {path: "unmasked", success: true},
+ {path: filepath.Join(firstPath, "unmasked"), success: false},
+ {path: filepath.Join(secondPath, "unmasked"), success: true},
+
+ {path: "masked1", success: true},
+ {path: filepath.Join(firstPath, "masked1"), success: false},
+ {path: filepath.Join(secondPath, "masked1"), success: true},
+
+ {path: "masked2", success: true},
+ {path: filepath.Join(firstPath, "masked2"), success: false},
+ {path: filepath.Join(secondPath, "masked2"), success: true},
+ } {
+ t.Run(fmt.Sprintf("path=%s,success=%t", test.path, test.success), func(t *testing.T) {
+ spec := testutil.NewSpecWithArgs(test.path)
+ spec.Process.Env = []string{
+ fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")),
+ }
+
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("exec: error setting up container: %v", err)
+ }
+ defer cleanup()
+
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ Attached: true,
+ }
+ ws, err := Run(conf, args)
+
+ if test.success {
+ if err != nil {
+ t.Errorf("exec: error running container: %v", err)
+ }
+ if ws.ExitStatus() != 0 {
+ t.Errorf("exec: got exit status %v want %v", ws.ExitStatus(), 0)
+ }
+ } else {
+ if err == nil {
+ t.Errorf("exec: got: no error, want: error")
+ }
+ }
+ })
}
- }
+ })
}
}
// Test the we can retrieve the application exit status from the container.
func TestAppExitStatus(t *testing.T) {
+ doAppExitStatus(t, false)
+}
+
+// This is TestAppExitStatus for VFSv2.
+func TestAppExitStatusVFS2(t *testing.T) {
+ doAppExitStatus(t, true)
+}
+
+func doAppExitStatus(t *testing.T, vfs2 bool) {
// First container will succeed.
succSpec := testutil.NewSpecWithArgs("true")
- conf := testutil.TestConfig()
- rootDir, bundleDir, err := testutil.SetupContainer(succSpec, conf)
+ conf := testutil.TestConfig(t)
+ conf.VFS2 = vfs2
+ _, bundleDir, cleanup, err := testutil.SetupContainer(succSpec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: succSpec,
BundleDir: bundleDir,
Attached: true,
@@ -535,15 +614,14 @@ func TestAppExitStatus(t *testing.T) {
wantStatus := 123
errSpec := testutil.NewSpecWithArgs("bash", "-c", fmt.Sprintf("exit %d", wantStatus))
- rootDir2, bundleDir2, err := testutil.SetupContainer(errSpec, conf)
+ _, bundleDir2, cleanup2, err := testutil.SetupContainer(errSpec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir2)
- defer os.RemoveAll(bundleDir2)
+ defer cleanup2()
args2 := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: errSpec,
BundleDir: bundleDir2,
Attached: true,
@@ -559,164 +637,271 @@ func TestAppExitStatus(t *testing.T) {
// TestExec verifies that a container can exec a new program.
func TestExec(t *testing.T) {
- for _, conf := range configs(overlay) {
- t.Logf("Running test with conf: %+v", conf)
+ for name, conf := range configsWithVFS2(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "exec-test")
+ if err != nil {
+ t.Fatalf("error creating temporary directory: %v", err)
+ }
+ // Note that some shells may exec the final command in a sequence as
+ // an optimization. We avoid this here by adding the exit 0.
+ cmd := fmt.Sprintf("ln -s /bin/true %q/symlink && sleep 100 && exit 0", dir)
+ spec := testutil.NewSpecWithArgs("sh", "-c", cmd)
- const uid = 343
- spec := testutil.NewSpecWithArgs("sleep", "100")
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ // Create and start the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont.Destroy()
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- // Create and start the container.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- cont, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer cont.Destroy()
- if err := cont.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ // Wait until sleep is running to ensure the symlink was created.
+ expectedPL := []*control.Process{
+ newProcessBuilder().Cmd("sh").Process(),
+ newProcessBuilder().Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(cont, expectedPL); err != nil {
+ t.Fatalf("waitForProcessList: %v", err)
+ }
- // expectedPL lists the expected process state of the container.
- expectedPL := []*control.Process{
- {
- UID: 0,
- PID: 1,
- PPID: 0,
- C: 0,
- Cmd: "sleep",
- },
- {
- UID: uid,
- PID: 2,
- PPID: 0,
- C: 0,
- Cmd: "sleep",
- },
- }
+ for _, tc := range []struct {
+ name string
+ args control.ExecArgs
+ }{
+ {
+ name: "complete",
+ args: control.ExecArgs{
+ Filename: "/bin/true",
+ Argv: []string{"/bin/true"},
+ },
+ },
+ {
+ name: "filename",
+ args: control.ExecArgs{
+ Filename: "/bin/true",
+ },
+ },
+ {
+ name: "argv",
+ args: control.ExecArgs{
+ Argv: []string{"/bin/true"},
+ },
+ },
+ {
+ name: "filename resolution",
+ args: control.ExecArgs{
+ Filename: "true",
+ Envv: []string{"PATH=/bin"},
+ },
+ },
+ {
+ name: "argv resolution",
+ args: control.ExecArgs{
+ Argv: []string{"true"},
+ Envv: []string{"PATH=/bin"},
+ },
+ },
+ {
+ name: "argv symlink",
+ args: control.ExecArgs{
+ Argv: []string{filepath.Join(dir, "symlink")},
+ },
+ },
+ {
+ name: "working dir",
+ args: control.ExecArgs{
+ Argv: []string{"/bin/sh", "-c", `if [[ "${PWD}" != "/tmp" ]]; then exit 1; fi`},
+ WorkingDirectory: "/tmp",
+ },
+ },
+ {
+ name: "user",
+ args: control.ExecArgs{
+ Argv: []string{"/bin/sh", "-c", `if [[ "$(id -u)" != "343" ]]; then exit 1; fi`},
+ KUID: 343,
+ },
+ },
+ {
+ name: "group",
+ args: control.ExecArgs{
+ Argv: []string{"/bin/sh", "-c", `if [[ "$(id -g)" != "343" ]]; then exit 1; fi`},
+ KGID: 343,
+ },
+ },
+ {
+ name: "env",
+ args: control.ExecArgs{
+ Argv: []string{"/bin/sh", "-c", `if [[ "${FOO}" != "123" ]]; then exit 1; fi`},
+ Envv: []string{"FOO=123"},
+ },
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ // t.Parallel()
+ if ws, err := cont.executeSync(&tc.args); err != nil {
+ t.Fatalf("executeAsync(%+v): %v", tc.args, err)
+ } else if ws != 0 {
+ t.Fatalf("executeAsync(%+v) failed with exit: %v", tc.args, ws)
+ }
+ })
+ }
+ })
+ }
+}
- // Verify that "sleep 100" is running.
- if err := waitForProcessList(cont, expectedPL[:1]); err != nil {
- t.Error(err)
- }
+// TestExecProcList verifies that a container can exec a new program and it
+// shows correcly in the process list.
+func TestExecProcList(t *testing.T) {
+ for name, conf := range configsWithVFS2(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ const uid = 343
+ spec := testutil.NewSpecWithArgs("sleep", "100")
- execArgs := &control.ExecArgs{
- Filename: "/bin/sleep",
- Argv: []string{"/bin/sleep", "5"},
- WorkingDirectory: "/",
- KUID: uid,
- }
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // Verify that "sleep 100" and "sleep 5" are running after exec.
- // First, start running exec (whick blocks).
- status := make(chan error, 1)
- go func() {
- exitStatus, err := cont.executeSync(execArgs)
+ // Create and start the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
- log.Debugf("error executing: %v", err)
- status <- err
- } else if exitStatus != 0 {
- log.Debugf("bad status: %d", exitStatus)
- status <- fmt.Errorf("failed with exit status: %v", exitStatus)
- } else {
- status <- nil
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont.Destroy()
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
}
- }()
- if err := waitForProcessList(cont, expectedPL); err != nil {
- t.Fatal(err)
- }
+ execArgs := &control.ExecArgs{
+ Filename: "/bin/sleep",
+ Argv: []string{"/bin/sleep", "5"},
+ WorkingDirectory: "/",
+ KUID: uid,
+ }
+
+ // Verify that "sleep 100" and "sleep 5" are running after exec. First,
+ // start running exec (which blocks).
+ ch := make(chan error)
+ go func() {
+ exitStatus, err := cont.executeSync(execArgs)
+ if err != nil {
+ ch <- err
+ } else if exitStatus != 0 {
+ ch <- fmt.Errorf("failed with exit status: %v", exitStatus)
+ } else {
+ ch <- nil
+ }
+ }()
- // Ensure that exec finished without error.
- select {
- case <-time.After(10 * time.Second):
- t.Fatalf("container timed out waiting for exec to finish.")
- case st := <-status:
- if st != nil {
- t.Errorf("container failed to exec %v: %v", args, err)
+ // expectedPL lists the expected process state of the container.
+ expectedPL := []*control.Process{
+ newProcessBuilder().PID(1).PPID(0).Cmd("sleep").UID(0).Process(),
+ newProcessBuilder().PID(2).PPID(0).Cmd("sleep").UID(uid).Process(),
}
- }
+ if err := waitForProcessList(cont, expectedPL); err != nil {
+ t.Fatalf("error waiting for processes: %v", err)
+ }
+
+ // Ensure that exec finished without error.
+ select {
+ case <-time.After(10 * time.Second):
+ t.Fatalf("container timed out waiting for exec to finish.")
+ case err := <-ch:
+ if err != nil {
+ t.Errorf("container failed to exec %v: %v", args, err)
+ }
+ }
+ })
}
}
// TestKillPid verifies that we can signal individual exec'd processes.
func TestKillPid(t *testing.T) {
- for _, conf := range configs(overlay) {
- t.Logf("Running test with conf: %+v", conf)
-
- app, err := testutil.FindFile("runsc/container/test_app/test_app")
- if err != nil {
- t.Fatal("error finding test_app:", err)
- }
+ for name, conf := range configsWithVFS2(t, overlay) {
+ t.Run(name, func(t *testing.T) {
+ app, err := testutil.FindFile("test/cmd/test_app/test_app")
+ if err != nil {
+ t.Fatal("error finding test_app:", err)
+ }
- const nProcs = 4
- spec := testutil.NewSpecWithArgs(app, "task-tree", "--depth", strconv.Itoa(nProcs-1), "--width=1", "--pause=true")
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ const nProcs = 4
+ spec := testutil.NewSpecWithArgs(app, "task-tree", "--depth", strconv.Itoa(nProcs-1), "--width=1", "--pause=true")
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // Create and start the container.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- cont, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer cont.Destroy()
- if err := cont.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ // Create and start the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont.Destroy()
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- // Verify that all processes are running.
- if err := waitForProcessCount(cont, nProcs); err != nil {
- t.Fatalf("timed out waiting for processes to start: %v", err)
- }
+ // Verify that all processes are running.
+ if err := waitForProcessCount(cont, nProcs); err != nil {
+ t.Fatalf("timed out waiting for processes to start: %v", err)
+ }
- // Kill the child process with the largest PID.
- procs, err := cont.Processes()
- if err != nil {
- t.Fatalf("failed to get process list: %v", err)
- }
- var pid int32
- for _, p := range procs {
- if pid < int32(p.PID) {
- pid = int32(p.PID)
+ // Kill the child process with the largest PID.
+ procs, err := cont.Processes()
+ if err != nil {
+ t.Fatalf("failed to get process list: %v", err)
+ }
+ var pid int32
+ for _, p := range procs {
+ if pid < int32(p.PID) {
+ pid = int32(p.PID)
+ }
+ }
+ if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil {
+ t.Fatalf("failed to signal process %d: %v", pid, err)
}
- }
- if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil {
- t.Fatalf("failed to signal process %d: %v", pid, err)
- }
- // Verify that one process is gone.
- if err := waitForProcessCount(cont, nProcs-1); err != nil {
- t.Fatal(err)
- }
+ // Verify that one process is gone.
+ if err := waitForProcessCount(cont, nProcs-1); err != nil {
+ t.Fatalf("error waiting for processes: %v", err)
+ }
- procs, err = cont.Processes()
- if err != nil {
- t.Fatalf("failed to get process list: %v", err)
- }
- for _, p := range procs {
- if pid == int32(p.PID) {
- t.Fatalf("pid %d is still alive, which should be killed", pid)
+ procs, err = cont.Processes()
+ if err != nil {
+ t.Fatalf("failed to get process list: %v", err)
}
- }
+ for _, p := range procs {
+ if pid == int32(p.PID) {
+ t.Fatalf("pid %d is still alive, which should be killed", pid)
+ }
+ }
+ })
}
}
@@ -727,160 +912,160 @@ func TestKillPid(t *testing.T) {
// be the next consecutive number after the last number from the checkpointed container.
func TestCheckpointRestore(t *testing.T) {
// Skip overlay because test requires writing to host file.
- for _, conf := range configs(noOverlay...) {
- t.Logf("Running test with conf: %+v", conf)
-
- dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test")
- if err != nil {
- t.Fatalf("ioutil.TempDir failed: %v", err)
- }
- if err := os.Chmod(dir, 0777); err != nil {
- t.Fatalf("error chmoding file: %q, %v", dir, err)
- }
+ for name, conf := range configs(t, noOverlay...) {
+ t.Run(name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test")
+ if err != nil {
+ t.Fatalf("ioutil.TempDir failed: %v", err)
+ }
+ defer os.RemoveAll(dir)
+ if err := os.Chmod(dir, 0777); err != nil {
+ t.Fatalf("error chmoding file: %q, %v", dir, err)
+ }
- outputPath := filepath.Join(dir, "output")
- outputFile, err := createWriteableOutputFile(outputPath)
- if err != nil {
- t.Fatalf("error creating output file: %v", err)
- }
- defer outputFile.Close()
+ outputPath := filepath.Join(dir, "output")
+ outputFile, err := createWriteableOutputFile(outputPath)
+ if err != nil {
+ t.Fatalf("error creating output file: %v", err)
+ }
+ defer outputFile.Close()
- script := fmt.Sprintf("for ((i=0; ;i++)); do echo $i >> %q; sleep 1; done", outputPath)
- spec := testutil.NewSpecWithArgs("bash", "-c", script)
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ script := fmt.Sprintf("for ((i=0; ;i++)); do echo $i >> %q; sleep 1; done", outputPath)
+ spec := testutil.NewSpecWithArgs("bash", "-c", script)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // Create and start the container.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- cont, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer cont.Destroy()
- if err := cont.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ // Create and start the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont.Destroy()
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- // Set the image path, which is where the checkpoint image will be saved.
- imagePath := filepath.Join(dir, "test-image-file")
+ // Set the image path, which is where the checkpoint image will be saved.
+ imagePath := filepath.Join(dir, "test-image-file")
- // Create the image file and open for writing.
- file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644)
- if err != nil {
- t.Fatalf("error opening new file at imagePath: %v", err)
- }
- defer file.Close()
+ // Create the image file and open for writing.
+ file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644)
+ if err != nil {
+ t.Fatalf("error opening new file at imagePath: %v", err)
+ }
+ defer file.Close()
- // Wait until application has ran.
- if err := waitForFile(outputFile); err != nil {
- t.Fatalf("Failed to wait for output file: %v", err)
- }
+ // Wait until application has ran.
+ if err := waitForFileNotEmpty(outputFile); err != nil {
+ t.Fatalf("Failed to wait for output file: %v", err)
+ }
- // Checkpoint running container; save state into new file.
- if err := cont.Checkpoint(file); err != nil {
- t.Fatalf("error checkpointing container to empty file: %v", err)
- }
- defer os.RemoveAll(imagePath)
+ // Checkpoint running container; save state into new file.
+ if err := cont.Checkpoint(file); err != nil {
+ t.Fatalf("error checkpointing container to empty file: %v", err)
+ }
+ defer os.RemoveAll(imagePath)
- lastNum, err := readOutputNum(outputPath, -1)
- if err != nil {
- t.Fatalf("error with outputFile: %v", err)
- }
+ lastNum, err := readOutputNum(outputPath, -1)
+ if err != nil {
+ t.Fatalf("error with outputFile: %v", err)
+ }
- // Delete and recreate file before restoring.
- if err := os.Remove(outputPath); err != nil {
- t.Fatalf("error removing file")
- }
- outputFile2, err := createWriteableOutputFile(outputPath)
- if err != nil {
- t.Fatalf("error creating output file: %v", err)
- }
- defer outputFile2.Close()
+ // Delete and recreate file before restoring.
+ if err := os.Remove(outputPath); err != nil {
+ t.Fatalf("error removing file")
+ }
+ outputFile2, err := createWriteableOutputFile(outputPath)
+ if err != nil {
+ t.Fatalf("error creating output file: %v", err)
+ }
+ defer outputFile2.Close()
- // Restore into a new container.
- args2 := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- cont2, err := New(conf, args2)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer cont2.Destroy()
+ // Restore into a new container.
+ args2 := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont2, err := New(conf, args2)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont2.Destroy()
- if err := cont2.Restore(spec, conf, imagePath); err != nil {
- t.Fatalf("error restoring container: %v", err)
- }
+ if err := cont2.Restore(spec, conf, imagePath); err != nil {
+ t.Fatalf("error restoring container: %v", err)
+ }
- // Wait until application has ran.
- if err := waitForFile(outputFile2); err != nil {
- t.Fatalf("Failed to wait for output file: %v", err)
- }
+ // Wait until application has ran.
+ if err := waitForFileNotEmpty(outputFile2); err != nil {
+ t.Fatalf("Failed to wait for output file: %v", err)
+ }
- firstNum, err := readOutputNum(outputPath, 0)
- if err != nil {
- t.Fatalf("error with outputFile: %v", err)
- }
+ firstNum, err := readOutputNum(outputPath, 0)
+ if err != nil {
+ t.Fatalf("error with outputFile: %v", err)
+ }
- // Check that lastNum is one less than firstNum and that the container picks
- // up from where it left off.
- if lastNum+1 != firstNum {
- t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum)
- }
- cont2.Destroy()
+ // Check that lastNum is one less than firstNum and that the container picks
+ // up from where it left off.
+ if lastNum+1 != firstNum {
+ t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum)
+ }
+ cont2.Destroy()
- // Restore into another container!
- // Delete and recreate file before restoring.
- if err := os.Remove(outputPath); err != nil {
- t.Fatalf("error removing file")
- }
- outputFile3, err := createWriteableOutputFile(outputPath)
- if err != nil {
- t.Fatalf("error creating output file: %v", err)
- }
- defer outputFile3.Close()
+ // Restore into another container!
+ // Delete and recreate file before restoring.
+ if err := os.Remove(outputPath); err != nil {
+ t.Fatalf("error removing file")
+ }
+ outputFile3, err := createWriteableOutputFile(outputPath)
+ if err != nil {
+ t.Fatalf("error creating output file: %v", err)
+ }
+ defer outputFile3.Close()
- // Restore into a new container.
- args3 := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- cont3, err := New(conf, args3)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer cont3.Destroy()
+ // Restore into a new container.
+ args3 := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont3, err := New(conf, args3)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont3.Destroy()
- if err := cont3.Restore(spec, conf, imagePath); err != nil {
- t.Fatalf("error restoring container: %v", err)
- }
+ if err := cont3.Restore(spec, conf, imagePath); err != nil {
+ t.Fatalf("error restoring container: %v", err)
+ }
- // Wait until application has ran.
- if err := waitForFile(outputFile3); err != nil {
- t.Fatalf("Failed to wait for output file: %v", err)
- }
+ // Wait until application has ran.
+ if err := waitForFileNotEmpty(outputFile3); err != nil {
+ t.Fatalf("Failed to wait for output file: %v", err)
+ }
- firstNum2, err := readOutputNum(outputPath, 0)
- if err != nil {
- t.Fatalf("error with outputFile: %v", err)
- }
+ firstNum2, err := readOutputNum(outputPath, 0)
+ if err != nil {
+ t.Fatalf("error with outputFile: %v", err)
+ }
- // Check that lastNum is one less than firstNum and that the container picks
- // up from where it left off.
- if lastNum+1 != firstNum2 {
- t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2)
- }
- cont3.Destroy()
+ // Check that lastNum is one less than firstNum and that the container picks
+ // up from where it left off.
+ if lastNum+1 != firstNum2 {
+ t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2)
+ }
+ cont3.Destroy()
+ })
}
}
@@ -888,256 +1073,213 @@ func TestCheckpointRestore(t *testing.T) {
// with filesystem Unix Domain Socket use.
func TestUnixDomainSockets(t *testing.T) {
// Skip overlay because test requires writing to host file.
- for _, conf := range configs(noOverlay...) {
- t.Logf("Running test with conf: %+v", conf)
-
- // UDS path is limited to 108 chars for compatibility with older systems.
- // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is
- // not exceeded. Assumes '/tmp' exists in the system.
- dir, err := ioutil.TempDir("/tmp", "uds-test")
- if err != nil {
- t.Fatalf("ioutil.TempDir failed: %v", err)
- }
- defer os.RemoveAll(dir)
+ for name, conf := range configs(t, noOverlay...) {
+ t.Run(name, func(t *testing.T) {
+ // UDS path is limited to 108 chars for compatibility with older systems.
+ // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is
+ // not exceeded. Assumes '/tmp' exists in the system.
+ dir, err := ioutil.TempDir("/tmp", "uds-test")
+ if err != nil {
+ t.Fatalf("ioutil.TempDir failed: %v", err)
+ }
+ defer os.RemoveAll(dir)
- outputPath := filepath.Join(dir, "uds_output")
- outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666)
- if err != nil {
- t.Fatalf("error creating output file: %v", err)
- }
- defer outputFile.Close()
+ outputPath := filepath.Join(dir, "uds_output")
+ outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666)
+ if err != nil {
+ t.Fatalf("error creating output file: %v", err)
+ }
+ defer outputFile.Close()
- app, err := testutil.FindFile("runsc/container/test_app/test_app")
- if err != nil {
- t.Fatal("error finding test_app:", err)
- }
+ app, err := testutil.FindFile("test/cmd/test_app/test_app")
+ if err != nil {
+ t.Fatal("error finding test_app:", err)
+ }
- socketPath := filepath.Join(dir, "uds_socket")
- defer os.Remove(socketPath)
+ socketPath := filepath.Join(dir, "uds_socket")
+ defer os.Remove(socketPath)
- spec := testutil.NewSpecWithArgs(app, "uds", "--file", outputPath, "--socket", socketPath)
- spec.Process.User = specs.User{
- UID: uint32(os.Getuid()),
- GID: uint32(os.Getgid()),
- }
- spec.Mounts = []specs.Mount{{
- Type: "bind",
- Destination: dir,
- Source: dir,
- }}
+ spec := testutil.NewSpecWithArgs(app, "uds", "--file", outputPath, "--socket", socketPath)
+ spec.Process.User = specs.User{
+ UID: uint32(os.Getuid()),
+ GID: uint32(os.Getgid()),
+ }
+ spec.Mounts = []specs.Mount{{
+ Type: "bind",
+ Destination: dir,
+ Source: dir,
+ }}
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // Create and start the container.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- cont, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer cont.Destroy()
- if err := cont.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ // Create and start the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont.Destroy()
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- // Set the image path, the location where the checkpoint image will be saved.
- imagePath := filepath.Join(dir, "test-image-file")
+ // Set the image path, the location where the checkpoint image will be saved.
+ imagePath := filepath.Join(dir, "test-image-file")
- // Create the image file and open for writing.
- file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644)
- if err != nil {
- t.Fatalf("error opening new file at imagePath: %v", err)
- }
- defer file.Close()
- defer os.RemoveAll(imagePath)
+ // Create the image file and open for writing.
+ file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644)
+ if err != nil {
+ t.Fatalf("error opening new file at imagePath: %v", err)
+ }
+ defer file.Close()
+ defer os.RemoveAll(imagePath)
- // Wait until application has ran.
- if err := waitForFile(outputFile); err != nil {
- t.Fatalf("Failed to wait for output file: %v", err)
- }
+ // Wait until application has ran.
+ if err := waitForFileNotEmpty(outputFile); err != nil {
+ t.Fatalf("Failed to wait for output file: %v", err)
+ }
- // Checkpoint running container; save state into new file.
- if err := cont.Checkpoint(file); err != nil {
- t.Fatalf("error checkpointing container to empty file: %v", err)
- }
+ // Checkpoint running container; save state into new file.
+ if err := cont.Checkpoint(file); err != nil {
+ t.Fatalf("error checkpointing container to empty file: %v", err)
+ }
- // Read last number outputted before checkpoint.
- lastNum, err := readOutputNum(outputPath, -1)
- if err != nil {
- t.Fatalf("error with outputFile: %v", err)
- }
+ // Read last number outputted before checkpoint.
+ lastNum, err := readOutputNum(outputPath, -1)
+ if err != nil {
+ t.Fatalf("error with outputFile: %v", err)
+ }
- // Delete and recreate file before restoring.
- if err := os.Remove(outputPath); err != nil {
- t.Fatalf("error removing file")
- }
- outputFile2, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666)
- if err != nil {
- t.Fatalf("error creating output file: %v", err)
- }
- defer outputFile2.Close()
+ // Delete and recreate file before restoring.
+ if err := os.Remove(outputPath); err != nil {
+ t.Fatalf("error removing file")
+ }
+ outputFile2, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666)
+ if err != nil {
+ t.Fatalf("error creating output file: %v", err)
+ }
+ defer outputFile2.Close()
- // Restore into a new container.
- argsRestore := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- contRestore, err := New(conf, argsRestore)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer contRestore.Destroy()
+ // Restore into a new container.
+ argsRestore := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ contRestore, err := New(conf, argsRestore)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer contRestore.Destroy()
- if err := contRestore.Restore(spec, conf, imagePath); err != nil {
- t.Fatalf("error restoring container: %v", err)
- }
+ if err := contRestore.Restore(spec, conf, imagePath); err != nil {
+ t.Fatalf("error restoring container: %v", err)
+ }
- // Wait until application has ran.
- if err := waitForFile(outputFile2); err != nil {
- t.Fatalf("Failed to wait for output file: %v", err)
- }
+ // Wait until application has ran.
+ if err := waitForFileNotEmpty(outputFile2); err != nil {
+ t.Fatalf("Failed to wait for output file: %v", err)
+ }
- // Read first number outputted after restore.
- firstNum, err := readOutputNum(outputPath, 0)
- if err != nil {
- t.Fatalf("error with outputFile: %v", err)
- }
+ // Read first number outputted after restore.
+ firstNum, err := readOutputNum(outputPath, 0)
+ if err != nil {
+ t.Fatalf("error with outputFile: %v", err)
+ }
- // Check that lastNum is one less than firstNum.
- if lastNum+1 != firstNum {
- t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum)
- }
- contRestore.Destroy()
+ // Check that lastNum is one less than firstNum.
+ if lastNum+1 != firstNum {
+ t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum)
+ }
+ contRestore.Destroy()
+ })
}
}
// TestPauseResume tests that we can successfully pause and resume a container.
-// It checks starts running sleep and executes another sleep. It pauses and checks
-// that both processes are still running: sleep will be paused and still exist.
-// It will then unpause and confirm that both processes are running. Then it will
-// wait until one sleep completes and check to make sure the other is running.
+// The container will keep touching a file to indicate it's running. The test
+// pauses the container, removes the file, and checks that it doesn't get
+// recreated. Then it resumes the container, verify that the file gets created
+// again.
func TestPauseResume(t *testing.T) {
- for _, conf := range configs(noOverlay...) {
- t.Logf("Running test with conf: %+v", conf)
- const uid = 343
- spec := testutil.NewSpecWithArgs("sleep", "20")
-
- lock, err := ioutil.TempFile(testutil.TmpDir(), "lock")
- if err != nil {
- t.Fatalf("error creating output file: %v", err)
- }
- defer lock.Close()
-
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
-
- // Create and start the container.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- cont, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer cont.Destroy()
- if err := cont.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
-
- // expectedPL lists the expected process state of the container.
- expectedPL := []*control.Process{
- {
- UID: 0,
- PID: 1,
- PPID: 0,
- C: 0,
- Cmd: "sleep",
- },
- {
- UID: uid,
- PID: 2,
- PPID: 0,
- C: 0,
- Cmd: "bash",
- },
- }
-
- script := fmt.Sprintf("while [[ -f %q ]]; do sleep 0.1; done", lock.Name())
- execArgs := &control.ExecArgs{
- Filename: "/bin/bash",
- Argv: []string{"bash", "-c", script},
- WorkingDirectory: "/",
- KUID: uid,
- }
+ for name, conf := range configs(t, noOverlay...) {
+ t.Run(name, func(t *testing.T) {
+ tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "lock")
+ if err != nil {
+ t.Fatalf("error creating temp dir: %v", err)
+ }
+ defer os.RemoveAll(tmpDir)
- // First, start running exec.
- _, err = cont.Execute(execArgs)
- if err != nil {
- t.Fatalf("error executing: %v", err)
- }
+ running := path.Join(tmpDir, "running")
+ script := fmt.Sprintf("while [[ true ]]; do touch %q; sleep 0.1; done", running)
+ spec := testutil.NewSpecWithArgs("/bin/bash", "-c", script)
- // Verify that "sleep 5" is running.
- if err := waitForProcessList(cont, expectedPL); err != nil {
- t.Fatal(err)
- }
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // Pause the running container.
- if err := cont.Pause(); err != nil {
- t.Errorf("error pausing container: %v", err)
- }
- if got, want := cont.Status, Paused; got != want {
- t.Errorf("container status got %v, want %v", got, want)
- }
+ // Create and start the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont.Destroy()
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- if err := os.Remove(lock.Name()); err != nil {
- t.Fatalf("os.Remove(lock) failed: %v", err)
- }
- // Script loops and sleeps for 100ms. Give a bit a time for it to exit in
- // case pause didn't work.
- time.Sleep(200 * time.Millisecond)
+ // Wait until container starts running, observed by the existence of running
+ // file.
+ if err := waitForFileExist(running); err != nil {
+ t.Errorf("error waiting for container to start: %v", err)
+ }
- // Verify that the two processes still exist.
- if err := getAndCheckProcLists(cont, expectedPL); err != nil {
- t.Fatal(err)
- }
+ // Pause the running container.
+ if err := cont.Pause(); err != nil {
+ t.Errorf("error pausing container: %v", err)
+ }
+ if got, want := cont.Status, Paused; got != want {
+ t.Errorf("container status got %v, want %v", got, want)
+ }
- // Resume the running container.
- if err := cont.Resume(); err != nil {
- t.Errorf("error pausing container: %v", err)
- }
- if got, want := cont.Status, Running; got != want {
- t.Errorf("container status got %v, want %v", got, want)
- }
+ if err := os.Remove(running); err != nil {
+ t.Fatalf("os.Remove(%q) failed: %v", running, err)
+ }
+ // Script touches the file every 100ms. Give a bit a time for it to run to
+ // catch the case that pause didn't work.
+ time.Sleep(200 * time.Millisecond)
+ if _, err := os.Stat(running); !os.IsNotExist(err) {
+ t.Fatalf("container did not pause: file exist check: %v", err)
+ }
- expectedPL2 := []*control.Process{
- {
- UID: 0,
- PID: 1,
- PPID: 0,
- C: 0,
- Cmd: "sleep",
- },
- }
+ // Resume the running container.
+ if err := cont.Resume(); err != nil {
+ t.Errorf("error pausing container: %v", err)
+ }
+ if got, want := cont.Status, Running; got != want {
+ t.Errorf("container status got %v, want %v", got, want)
+ }
- // Verify that deleting the file triggered the process to exit.
- if err := waitForProcessList(cont, expectedPL2); err != nil {
- t.Fatal(err)
- }
+ // Verify that the file is once again created by container.
+ if err := waitForFileExist(running); err != nil {
+ t.Fatalf("error resuming container: file exist check: %v", err)
+ }
+ })
}
}
@@ -1146,17 +1288,16 @@ func TestPauseResume(t *testing.T) {
// occurs given the correct state.
func TestPauseResumeStatus(t *testing.T) {
spec := testutil.NewSpecWithArgs("sleep", "20")
- conf := testutil.TestConfig()
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ conf := testutil.TestConfig(t)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
// Create and start the container.
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
}
@@ -1212,357 +1353,350 @@ func TestCapabilities(t *testing.T) {
uid := auth.KUID(os.Getuid() + 1)
gid := auth.KGID(os.Getgid() + 1)
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
-
- spec := testutil.NewSpecWithArgs("sleep", "100")
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ for name, conf := range configsWithVFS2(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ spec := testutil.NewSpecWithArgs("sleep", "100")
+ rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // Create and start the container.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- cont, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer cont.Destroy()
- if err := cont.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ // Create and start the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont.Destroy()
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- // expectedPL lists the expected process state of the container.
- expectedPL := []*control.Process{
- {
- UID: 0,
- PID: 1,
- PPID: 0,
- C: 0,
- Cmd: "sleep",
- },
- {
- UID: uid,
- PID: 2,
- PPID: 0,
- C: 0,
- Cmd: "exe",
- },
- }
- if err := waitForProcessList(cont, expectedPL[:1]); err != nil {
- t.Fatalf("Failed to wait for sleep to start, err: %v", err)
- }
+ // expectedPL lists the expected process state of the container.
+ expectedPL := []*control.Process{
+ newProcessBuilder().Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(cont, expectedPL); err != nil {
+ t.Fatalf("Failed to wait for sleep to start, err: %v", err)
+ }
- // Create an executable that can't be run with the specified UID:GID.
- // This shouldn't be callable within the container until we add the
- // CAP_DAC_OVERRIDE capability to skip the access check.
- exePath := filepath.Join(rootDir, "exe")
- if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil {
- t.Fatalf("couldn't create executable: %v", err)
- }
- defer os.Remove(exePath)
-
- // Need to traverse the intermediate directory.
- os.Chmod(rootDir, 0755)
-
- execArgs := &control.ExecArgs{
- Filename: exePath,
- Argv: []string{exePath},
- WorkingDirectory: "/",
- KUID: uid,
- KGID: gid,
- Capabilities: &auth.TaskCapabilities{},
- }
+ // Create an executable that can't be run with the specified UID:GID.
+ // This shouldn't be callable within the container until we add the
+ // CAP_DAC_OVERRIDE capability to skip the access check.
+ exePath := filepath.Join(rootDir, "exe")
+ if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil {
+ t.Fatalf("couldn't create executable: %v", err)
+ }
+ defer os.Remove(exePath)
+
+ // Need to traverse the intermediate directory.
+ os.Chmod(rootDir, 0755)
+
+ execArgs := &control.ExecArgs{
+ Filename: exePath,
+ Argv: []string{exePath},
+ WorkingDirectory: "/",
+ KUID: uid,
+ KGID: gid,
+ Capabilities: &auth.TaskCapabilities{},
+ }
- // "exe" should fail because we don't have the necessary permissions.
- if _, err := cont.executeSync(execArgs); err == nil {
- t.Fatalf("container executed without error, but an error was expected")
- }
+ // "exe" should fail because we don't have the necessary permissions.
+ if _, err := cont.executeSync(execArgs); err == nil {
+ t.Fatalf("container executed without error, but an error was expected")
+ }
- // Now we run with the capability enabled and should succeed.
- execArgs.Capabilities = &auth.TaskCapabilities{
- EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
- }
- // "exe" should not fail this time.
- if _, err := cont.executeSync(execArgs); err != nil {
- t.Fatalf("container failed to exec %v: %v", args, err)
- }
+ // Now we run with the capability enabled and should succeed.
+ execArgs.Capabilities = &auth.TaskCapabilities{
+ EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
+ }
+ // "exe" should not fail this time.
+ if _, err := cont.executeSync(execArgs); err != nil {
+ t.Fatalf("container failed to exec %v: %v", args, err)
+ }
+ })
}
}
// TestRunNonRoot checks that sandbox can be configured when running as
// non-privileged user.
func TestRunNonRoot(t *testing.T) {
- for _, conf := range configs(noOverlay...) {
- t.Logf("Running test with conf: %+v", conf)
-
- spec := testutil.NewSpecWithArgs("/bin/true")
-
- // Set a random user/group with no access to "blocked" dir.
- spec.Process.User.UID = 343
- spec.Process.User.GID = 2401
- spec.Process.Capabilities = nil
+ for name, conf := range configsWithVFS2(t, noOverlay...) {
+ t.Run(name, func(t *testing.T) {
+ spec := testutil.NewSpecWithArgs("/bin/true")
+
+ // Set a random user/group with no access to "blocked" dir.
+ spec.Process.User.UID = 343
+ spec.Process.User.GID = 2401
+ spec.Process.Capabilities = nil
+
+ // User running inside container can't list '$TMP/blocked' and would fail to
+ // mount it.
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked")
+ if err != nil {
+ t.Fatalf("ioutil.TempDir() failed: %v", err)
+ }
+ if err := os.Chmod(dir, 0700); err != nil {
+ t.Fatalf("os.MkDir(%q) failed: %v", dir, err)
+ }
+ dir = path.Join(dir, "test")
+ if err := os.Mkdir(dir, 0755); err != nil {
+ t.Fatalf("os.MkDir(%q) failed: %v", dir, err)
+ }
- // User running inside container can't list '$TMP/blocked' and would fail to
- // mount it.
- dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked")
- if err != nil {
- t.Fatalf("ioutil.TempDir() failed: %v", err)
- }
- if err := os.Chmod(dir, 0700); err != nil {
- t.Fatalf("os.MkDir(%q) failed: %v", dir, err)
- }
- dir = path.Join(dir, "test")
- if err := os.Mkdir(dir, 0755); err != nil {
- t.Fatalf("os.MkDir(%q) failed: %v", dir, err)
- }
+ src, err := ioutil.TempDir(testutil.TmpDir(), "src")
+ if err != nil {
+ t.Fatalf("ioutil.TempDir() failed: %v", err)
+ }
- src, err := ioutil.TempDir(testutil.TmpDir(), "src")
- if err != nil {
- t.Fatalf("ioutil.TempDir() failed: %v", err)
- }
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Destination: dir,
+ Source: src,
+ Type: "bind",
+ })
- spec.Mounts = append(spec.Mounts, specs.Mount{
- Destination: dir,
- Source: src,
- Type: "bind",
+ if err := run(spec, conf); err != nil {
+ t.Fatalf("error running sandbox: %v", err)
+ }
})
-
- if err := run(spec, conf); err != nil {
- t.Fatalf("error running sandbox: %v", err)
- }
}
}
// TestMountNewDir checks that runsc will create destination directory if it
// doesn't exit.
func TestMountNewDir(t *testing.T) {
- for _, conf := range configs(overlay) {
- t.Logf("Running test with conf: %+v", conf)
+ for name, conf := range configsWithVFS2(t, overlay) {
+ t.Run(name, func(t *testing.T) {
+ root, err := ioutil.TempDir(testutil.TmpDir(), "root")
+ if err != nil {
+ t.Fatal("ioutil.TempDir() failed:", err)
+ }
- root, err := ioutil.TempDir(testutil.TmpDir(), "root")
- if err != nil {
- t.Fatal("ioutil.TempDir() failed:", err)
- }
+ srcDir := path.Join(root, "src", "dir", "anotherdir")
+ if err := os.MkdirAll(srcDir, 0755); err != nil {
+ t.Fatalf("os.MkDir(%q) failed: %v", srcDir, err)
+ }
- srcDir := path.Join(root, "src", "dir", "anotherdir")
- if err := os.MkdirAll(srcDir, 0755); err != nil {
- t.Fatalf("os.MkDir(%q) failed: %v", srcDir, err)
- }
+ mountDir := path.Join(root, "dir", "anotherdir")
- mountDir := path.Join(root, "dir", "anotherdir")
+ spec := testutil.NewSpecWithArgs("/bin/ls", mountDir)
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Destination: mountDir,
+ Source: srcDir,
+ Type: "bind",
+ })
- spec := testutil.NewSpecWithArgs("/bin/ls", mountDir)
- spec.Mounts = append(spec.Mounts, specs.Mount{
- Destination: mountDir,
- Source: srcDir,
- Type: "bind",
+ if err := run(spec, conf); err != nil {
+ t.Fatalf("error running sandbox: %v", err)
+ }
})
-
- if err := run(spec, conf); err != nil {
- t.Fatalf("error running sandbox: %v", err)
- }
}
}
func TestReadonlyRoot(t *testing.T) {
- for _, conf := range configs(overlay) {
- t.Logf("Running test with conf: %+v", conf)
-
- spec := testutil.NewSpecWithArgs("/bin/touch", "/foo")
- spec.Root.Readonly = true
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ for name, conf := range configsWithVFS2(t, overlay) {
+ t.Run(name, func(t *testing.T) {
+ spec := testutil.NewSpecWithArgs("/bin/touch", "/foo")
+ spec.Root.Readonly = true
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // Create, start and wait for the container.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- c, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer c.Destroy()
- if err := c.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ // Create, start and wait for the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer c.Destroy()
+ if err := c.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- ws, err := c.Wait()
- if err != nil {
- t.Fatalf("error waiting on container: %v", err)
- }
- if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
- t.Fatalf("container failed, waitStatus: %v", ws)
- }
+ ws, err := c.Wait()
+ if err != nil {
+ t.Fatalf("error waiting on container: %v", err)
+ }
+ if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
+ t.Fatalf("container failed, waitStatus: %v", ws)
+ }
+ })
}
}
func TestUIDMap(t *testing.T) {
- for _, conf := range configs(noOverlay...) {
- t.Logf("Running test with conf: %+v", conf)
- testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount")
- if err != nil {
- t.Fatal(err)
- }
- defer os.RemoveAll(testDir)
- testFile := path.Join(testDir, "testfile")
-
- spec := testutil.NewSpecWithArgs("touch", "/tmp/testfile")
- uid := os.Getuid()
- gid := os.Getgid()
- spec.Linux = &specs.Linux{
- Namespaces: []specs.LinuxNamespace{
- {Type: specs.UserNamespace},
- {Type: specs.PIDNamespace},
- {Type: specs.MountNamespace},
- },
- UIDMappings: []specs.LinuxIDMapping{
- {
- ContainerID: 0,
- HostID: uint32(uid),
- Size: 1,
+ for name, conf := range configsWithVFS2(t, noOverlay...) {
+ t.Run(name, func(t *testing.T) {
+ testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount")
+ if err != nil {
+ t.Fatalf("ioutil.TempDir() failed: %v", err)
+ }
+ defer os.RemoveAll(testDir)
+ testFile := path.Join(testDir, "testfile")
+
+ spec := testutil.NewSpecWithArgs("touch", "/tmp/testfile")
+ uid := os.Getuid()
+ gid := os.Getgid()
+ spec.Linux = &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{
+ {Type: specs.UserNamespace},
+ {Type: specs.PIDNamespace},
+ {Type: specs.MountNamespace},
},
- },
- GIDMappings: []specs.LinuxIDMapping{
- {
- ContainerID: 0,
- HostID: uint32(gid),
- Size: 1,
+ UIDMappings: []specs.LinuxIDMapping{
+ {
+ ContainerID: 0,
+ HostID: uint32(uid),
+ Size: 1,
+ },
},
- },
- }
+ GIDMappings: []specs.LinuxIDMapping{
+ {
+ ContainerID: 0,
+ HostID: uint32(gid),
+ Size: 1,
+ },
+ },
+ }
- spec.Mounts = append(spec.Mounts, specs.Mount{
- Destination: "/tmp",
- Source: testDir,
- Type: "bind",
- })
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Destination: "/tmp",
+ Source: testDir,
+ Type: "bind",
+ })
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // Create, start and wait for the container.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- c, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer c.Destroy()
- if err := c.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ // Create, start and wait for the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer c.Destroy()
+ if err := c.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- ws, err := c.Wait()
- if err != nil {
- t.Fatalf("error waiting on container: %v", err)
- }
- if !ws.Exited() || ws.ExitStatus() != 0 {
- t.Fatalf("container failed, waitStatus: %v", ws)
- }
- st := syscall.Stat_t{}
- if err := syscall.Stat(testFile, &st); err != nil {
- t.Fatalf("error stat /testfile: %v", err)
- }
+ ws, err := c.Wait()
+ if err != nil {
+ t.Fatalf("error waiting on container: %v", err)
+ }
+ if !ws.Exited() || ws.ExitStatus() != 0 {
+ t.Fatalf("container failed, waitStatus: %v", ws)
+ }
+ st := syscall.Stat_t{}
+ if err := syscall.Stat(testFile, &st); err != nil {
+ t.Fatalf("error stat /testfile: %v", err)
+ }
- if st.Uid != uint32(uid) || st.Gid != uint32(gid) {
- t.Fatalf("UID: %d (%d) GID: %d (%d)", st.Uid, uid, st.Gid, gid)
- }
+ if st.Uid != uint32(uid) || st.Gid != uint32(gid) {
+ t.Fatalf("UID: %d (%d) GID: %d (%d)", st.Uid, uid, st.Gid, gid)
+ }
+ })
}
}
func TestReadonlyMount(t *testing.T) {
- for _, conf := range configs(overlay) {
- t.Logf("Running test with conf: %+v", conf)
-
- dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount")
- spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
- if err != nil {
- t.Fatalf("ioutil.TempDir() failed: %v", err)
- }
- spec.Mounts = append(spec.Mounts, specs.Mount{
- Destination: dir,
- Source: dir,
- Type: "bind",
- Options: []string{"ro"},
- })
- spec.Root.Readonly = false
-
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ for name, conf := range configsWithVFS2(t, overlay) {
+ t.Run(name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount")
+ spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
+ if err != nil {
+ t.Fatalf("ioutil.TempDir() failed: %v", err)
+ }
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Destination: dir,
+ Source: dir,
+ Type: "bind",
+ Options: []string{"ro"},
+ })
+ spec.Root.Readonly = false
+
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // Create, start and wait for the container.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- c, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer c.Destroy()
- if err := c.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ // Create, start and wait for the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer c.Destroy()
+ if err := c.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- ws, err := c.Wait()
- if err != nil {
- t.Fatalf("error waiting on container: %v", err)
- }
- if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
- t.Fatalf("container failed, waitStatus: %v", ws)
- }
+ ws, err := c.Wait()
+ if err != nil {
+ t.Fatalf("error waiting on container: %v", err)
+ }
+ if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
+ t.Fatalf("container failed, waitStatus: %v", ws)
+ }
+ })
}
}
// TestAbbreviatedIDs checks that runsc supports using abbreviated container
// IDs in place of full IDs.
func TestAbbreviatedIDs(t *testing.T) {
- rootDir, err := testutil.SetupRootDir()
+ doAbbreviatedIDsTest(t, false)
+}
+
+func TestAbbreviatedIDsVFS2(t *testing.T) {
+ doAbbreviatedIDsTest(t, true)
+}
+
+func doAbbreviatedIDsTest(t *testing.T, vfs2 bool) {
+ rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- defer os.RemoveAll(rootDir)
+ defer cleanup()
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
+ conf.VFS2 = vfs2
cids := []string{
- "foo-" + testutil.UniqueContainerID(),
- "bar-" + testutil.UniqueContainerID(),
- "baz-" + testutil.UniqueContainerID(),
+ "foo-" + testutil.RandomContainerID(),
+ "bar-" + testutil.RandomContainerID(),
+ "baz-" + testutil.RandomContainerID(),
}
for _, cid := range cids {
spec := testutil.NewSpecWithArgs("sleep", "100")
- bundleDir, err := testutil.SetupBundleDir(spec)
+ bundleDir, cleanup, err := testutil.SetupBundleDir(spec)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
// Create and start the container.
args := Args{
@@ -1605,18 +1739,27 @@ func TestAbbreviatedIDs(t *testing.T) {
}
func TestGoferExits(t *testing.T) {
+ doGoferExitTest(t, false)
+}
+
+func TestGoferExitsVFS2(t *testing.T) {
+ doGoferExitTest(t, true)
+}
+
+func doGoferExitTest(t *testing.T, vfs2 bool) {
spec := testutil.NewSpecWithArgs("/bin/sleep", "10000")
- conf := testutil.TestConfig()
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ conf := testutil.TestConfig(t)
+ conf.VFS2 = vfs2
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
// Create and start the container.
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
}
@@ -1645,7 +1788,7 @@ func TestGoferExits(t *testing.T) {
}
func TestRootNotMount(t *testing.T) {
- appSym, err := testutil.FindFile("runsc/container/test_app/test_app")
+ appSym, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
t.Fatal("error finding test_app:", err)
}
@@ -1675,27 +1818,26 @@ func TestRootNotMount(t *testing.T) {
spec.Root.Readonly = true
spec.Mounts = nil
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
if err := run(spec, conf); err != nil {
t.Fatalf("error running sandbox: %v", err)
}
}
func TestUserLog(t *testing.T) {
- app, err := testutil.FindFile("runsc/container/test_app/test_app")
+ app, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
t.Fatal("error finding test_app:", err)
}
// sched_rr_get_interval = 148 - not implemented in gvisor.
spec := testutil.NewSpecWithArgs(app, "syscall", "--syscall=148")
- conf := testutil.TestConfig()
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ conf := testutil.TestConfig(t)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
dir, err := ioutil.TempDir(testutil.TmpDir(), "user_log_test")
if err != nil {
@@ -1705,7 +1847,7 @@ func TestUserLog(t *testing.T) {
// Create, start and wait for the container.
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
UserLog: userLog,
@@ -1723,78 +1865,85 @@ func TestUserLog(t *testing.T) {
if err != nil {
t.Fatalf("error opening user log file %q: %v", userLog, err)
}
- if want := "Unsupported syscall: sched_rr_get_interval"; !strings.Contains(string(out), want) {
+ if want := "Unsupported syscall sched_rr_get_interval("; !strings.Contains(string(out), want) {
t.Errorf("user log file doesn't contain %q, out: %s", want, string(out))
}
}
func TestWaitOnExitedSandbox(t *testing.T) {
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
-
- // Run a shell that sleeps for 1 second and then exits with a
- // non-zero code.
- const wantExit = 17
- cmd := fmt.Sprintf("sleep 1; exit %d", wantExit)
- spec := testutil.NewSpecWithArgs("/bin/sh", "-c", cmd)
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ for name, conf := range configsWithVFS2(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ // Run a shell that sleeps for 1 second and then exits with a
+ // non-zero code.
+ const wantExit = 17
+ cmd := fmt.Sprintf("sleep 1; exit %d", wantExit)
+ spec := testutil.NewSpecWithArgs("/bin/sh", "-c", cmd)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- // Create and Start the container.
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- c, err := New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- defer c.Destroy()
- if err := c.Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ // Create and Start the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer c.Destroy()
+ if err := c.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- // Wait on the sandbox. This will make an RPC to the sandbox
- // and get the actual exit status of the application.
- ws, err := c.Wait()
- if err != nil {
- t.Fatalf("error waiting on container: %v", err)
- }
- if got := ws.ExitStatus(); got != wantExit {
- t.Errorf("got exit status %d, want %d", got, wantExit)
- }
+ // Wait on the sandbox. This will make an RPC to the sandbox
+ // and get the actual exit status of the application.
+ ws, err := c.Wait()
+ if err != nil {
+ t.Fatalf("error waiting on container: %v", err)
+ }
+ if got := ws.ExitStatus(); got != wantExit {
+ t.Errorf("got exit status %d, want %d", got, wantExit)
+ }
- // Now the sandbox has exited, but the zombie sandbox process
- // still exists. Calling Wait() now will return the sandbox
- // exit status.
- ws, err = c.Wait()
- if err != nil {
- t.Fatalf("error waiting on container: %v", err)
- }
- if got := ws.ExitStatus(); got != wantExit {
- t.Errorf("got exit status %d, want %d", got, wantExit)
- }
+ // Now the sandbox has exited, but the zombie sandbox process
+ // still exists. Calling Wait() now will return the sandbox
+ // exit status.
+ ws, err = c.Wait()
+ if err != nil {
+ t.Fatalf("error waiting on container: %v", err)
+ }
+ if got := ws.ExitStatus(); got != wantExit {
+ t.Errorf("got exit status %d, want %d", got, wantExit)
+ }
+ })
}
}
func TestDestroyNotStarted(t *testing.T) {
+ doDestroyNotStartedTest(t, false)
+}
+
+func TestDestroyNotStartedVFS2(t *testing.T) {
+ doDestroyNotStartedTest(t, true)
+}
+
+func doDestroyNotStartedTest(t *testing.T, vfs2 bool) {
spec := testutil.NewSpecWithArgs("/bin/sleep", "100")
- conf := testutil.TestConfig()
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ conf := testutil.TestConfig(t)
+ conf.VFS2 = vfs2
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
// Create the container and check that it can be destroyed.
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
}
@@ -1809,19 +1958,27 @@ func TestDestroyNotStarted(t *testing.T) {
// TestDestroyStarting attempts to force a race between start and destroy.
func TestDestroyStarting(t *testing.T) {
+ doDestroyNotStartedTest(t, false)
+}
+
+func TestDestroyStartedVFS2(t *testing.T) {
+ doDestroyNotStartedTest(t, true)
+}
+
+func doDestroyStartingTest(t *testing.T, vfs2 bool) {
for i := 0; i < 10; i++ {
spec := testutil.NewSpecWithArgs("/bin/sleep", "100")
- conf := testutil.TestConfig()
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ conf := testutil.TestConfig(t)
+ conf.VFS2 = vfs2
+ rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
// Create the container and check that it can be destroyed.
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
}
@@ -1856,23 +2013,23 @@ func TestDestroyStarting(t *testing.T) {
}
func TestCreateWorkingDir(t *testing.T) {
- for _, conf := range configs(overlay) {
- t.Logf("Running test with conf: %+v", conf)
-
- tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create")
- if err != nil {
- t.Fatalf("ioutil.TempDir() failed: %v", err)
- }
- dir := path.Join(tmpDir, "new/working/dir")
+ for name, conf := range configsWithVFS2(t, overlay) {
+ t.Run(name, func(t *testing.T) {
+ tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create")
+ if err != nil {
+ t.Fatalf("ioutil.TempDir() failed: %v", err)
+ }
+ dir := path.Join(tmpDir, "new/working/dir")
- // touch will fail if the directory doesn't exist.
- spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
- spec.Process.Cwd = dir
- spec.Root.Readonly = true
+ // touch will fail if the directory doesn't exist.
+ spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
+ spec.Process.Cwd = dir
+ spec.Root.Readonly = true
- if err := run(spec, conf); err != nil {
- t.Fatalf("Error running container: %v", err)
- }
+ if err := run(spec, conf); err != nil {
+ t.Fatalf("Error running container: %v", err)
+ }
+ })
}
}
@@ -1929,16 +2086,15 @@ func TestMountPropagation(t *testing.T) {
},
}
- conf := testutil.TestConfig()
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ conf := testutil.TestConfig(t)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
}
@@ -1980,87 +2136,87 @@ func TestMountPropagation(t *testing.T) {
}
func TestMountSymlink(t *testing.T) {
- for _, conf := range configs(overlay) {
- t.Logf("Running test with conf: %+v", conf)
-
- dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink")
- if err != nil {
- t.Fatalf("ioutil.TempDir() failed: %v", err)
- }
+ for name, conf := range configsWithVFS2(t, overlay) {
+ t.Run(name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink")
+ if err != nil {
+ t.Fatalf("ioutil.TempDir() failed: %v", err)
+ }
+ defer os.RemoveAll(dir)
- source := path.Join(dir, "source")
- target := path.Join(dir, "target")
- for _, path := range []string{source, target} {
- if err := os.MkdirAll(path, 0777); err != nil {
- t.Fatalf("os.MkdirAll(): %v", err)
+ source := path.Join(dir, "source")
+ target := path.Join(dir, "target")
+ for _, path := range []string{source, target} {
+ if err := os.MkdirAll(path, 0777); err != nil {
+ t.Fatalf("os.MkdirAll(): %v", err)
+ }
}
- }
- f, err := os.Create(path.Join(source, "file"))
- if err != nil {
- t.Fatalf("os.Create(): %v", err)
- }
- f.Close()
+ f, err := os.Create(path.Join(source, "file"))
+ if err != nil {
+ t.Fatalf("os.Create(): %v", err)
+ }
+ f.Close()
- link := path.Join(dir, "link")
- if err := os.Symlink(target, link); err != nil {
- t.Fatalf("os.Symlink(%q, %q): %v", target, link, err)
- }
+ link := path.Join(dir, "link")
+ if err := os.Symlink(target, link); err != nil {
+ t.Fatalf("os.Symlink(%q, %q): %v", target, link, err)
+ }
- spec := testutil.NewSpecWithArgs("/bin/sleep", "1000")
+ spec := testutil.NewSpecWithArgs("/bin/sleep", "1000")
- // Mount to a symlink to ensure the mount code will follow it and mount
- // at the symlink target.
- spec.Mounts = append(spec.Mounts, specs.Mount{
- Type: "bind",
- Destination: link,
- Source: source,
- })
+ // Mount to a symlink to ensure the mount code will follow it and mount
+ // at the symlink target.
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Type: "bind",
+ Destination: link,
+ Source: source,
+ })
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
- if err != nil {
- t.Fatalf("error setting up container: %v", err)
- }
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
- args := Args{
- ID: testutil.UniqueContainerID(),
- Spec: spec,
- BundleDir: bundleDir,
- }
- cont, err := New(conf, args)
- if err != nil {
- t.Fatalf("creating container: %v", err)
- }
- defer cont.Destroy()
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("creating container: %v", err)
+ }
+ defer cont.Destroy()
- if err := cont.Start(conf); err != nil {
- t.Fatalf("starting container: %v", err)
- }
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("starting container: %v", err)
+ }
- // Check that symlink was resolved and mount was created where the symlink
- // is pointing to.
- file := path.Join(target, "file")
- execArgs := &control.ExecArgs{
- Filename: "/usr/bin/test",
- Argv: []string{"test", "-f", file},
- }
- if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
- t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err)
- }
+ // Check that symlink was resolved and mount was created where the symlink
+ // is pointing to.
+ file := path.Join(target, "file")
+ execArgs := &control.ExecArgs{
+ Filename: "/usr/bin/test",
+ Argv: []string{"test", "-f", file},
+ }
+ if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
+ t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err)
+ }
+ })
}
}
// Check that --net-raw disables the CAP_NET_RAW capability.
func TestNetRaw(t *testing.T) {
capNetRaw := strconv.FormatUint(bits.MaskOf64(int(linux.CAP_NET_RAW)), 10)
- app, err := testutil.FindFile("runsc/container/test_app/test_app")
+ app, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
t.Fatal("error finding test_app:", err)
}
for _, enableRaw := range []bool{true, false} {
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.EnableRaw = enableRaw
test := "--enabled"
@@ -2075,40 +2231,98 @@ func TestNetRaw(t *testing.T) {
}
}
-// TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works.
-func TestOverlayfsStaleRead(t *testing.T) {
- conf := testutil.TestConfig()
- conf.OverlayfsStaleRead = true
+// TestTTYField checks TTY field returned by container.Processes().
+func TestTTYField(t *testing.T) {
+ stop := testutil.StartReaper()
+ defer stop()
- in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in")
+ testApp, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
- t.Fatalf("ioutil.TempFile() failed: %v", err)
- }
- defer in.Close()
- if _, err := in.WriteString("stale data"); err != nil {
- t.Fatalf("in.Write() failed: %v", err)
+ t.Fatal("error finding test_app:", err)
}
- out, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.out")
- if err != nil {
- t.Fatalf("ioutil.TempFile() failed: %v", err)
+ testCases := []struct {
+ name string
+ useTTY bool
+ wantTTYField string
+ }{
+ {
+ name: "no tty",
+ useTTY: false,
+ wantTTYField: "?",
+ },
+ {
+ name: "tty used",
+ useTTY: true,
+ wantTTYField: "pts/0",
+ },
}
- defer out.Close()
- const want = "foobar"
- cmd := fmt.Sprintf("cat %q && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name())
- spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd)
- if err := run(spec, conf); err != nil {
- t.Fatalf("Error running container: %v", err)
- }
+ for _, test := range testCases {
+ for _, vfs2 := range []bool{false, true} {
+ name := test.name
+ if vfs2 {
+ name += "-vfs2"
+ }
+ t.Run(name, func(t *testing.T) {
+ conf := testutil.TestConfig(t)
+ conf.VFS2 = vfs2
+
+ // We will run /bin/sleep, possibly with an open TTY.
+ cmd := []string{"/bin/sleep", "10000"}
+ if test.useTTY {
+ // Run inside the "pty-runner".
+ cmd = append([]string{testApp, "pty-runner"}, cmd...)
+ }
- gotBytes, err := ioutil.ReadAll(out)
- if err != nil {
- t.Fatalf("out.Read() failed: %v", err)
- }
- got := strings.TrimSpace(string(gotBytes))
- if want != got {
- t.Errorf("Wrong content in out file, got: %q. want: %q", got, want)
+ spec := testutil.NewSpecWithArgs(cmd...)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer cleanup()
+
+ // Create and start the container.
+ args := Args{
+ ID: testutil.RandomContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer c.Destroy()
+ if err := c.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
+
+ // Wait for sleep to be running, and check the TTY
+ // field.
+ var gotTTYField string
+ cb := func() error {
+ ps, err := c.Processes()
+ if err != nil {
+ err = fmt.Errorf("error getting process data from container: %v", err)
+ return &backoff.PermanentError{Err: err}
+ }
+ for _, p := range ps {
+ if strings.Contains(p.Cmd, "sleep") {
+ gotTTYField = p.TTY
+ return nil
+ }
+ }
+ return fmt.Errorf("sleep not running")
+ }
+ if err := testutil.Poll(cb, 30*time.Second); err != nil {
+ t.Fatalf("error waiting for sleep process: %v", err)
+ }
+
+ if gotTTYField != test.wantTTYField {
+ t.Errorf("tty field got %q, want %q", gotTTYField, test.wantTTYField)
+ }
+ })
+ }
}
}
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index a5a62378c..e189648f4 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -22,23 +22,24 @@ import (
"path"
"path/filepath"
"strings"
- "sync"
"syscall"
"testing"
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/test/testutil"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/specutils"
- "gvisor.dev/gvisor/runsc/testutil"
)
func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) {
var specs []*specs.Spec
var ids []string
- rootID := testutil.UniqueContainerID()
+ rootID := testutil.RandomContainerID()
for i, cmd := range cmds {
spec := testutil.NewSpecWithArgs(cmd...)
@@ -52,7 +53,7 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) {
specutils.ContainerdContainerTypeAnnotation: specutils.ContainerdContainerTypeContainer,
specutils.ContainerdSandboxIDAnnotation: rootID,
}
- ids = append(ids, testutil.UniqueContainerID())
+ ids = append(ids, testutil.RandomContainerID())
}
specs = append(specs, spec)
}
@@ -64,23 +65,16 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.")
}
+ cu := cleanup.Cleanup{}
+ defer cu.Clean()
+
var containers []*Container
- var bundles []string
- cleanup := func() {
- for _, c := range containers {
- c.Destroy()
- }
- for _, b := range bundles {
- os.RemoveAll(b)
- }
- }
for i, spec := range specs {
- bundleDir, err := testutil.SetupBundleDir(spec)
+ bundleDir, cleanup, err := testutil.SetupBundleDir(spec)
if err != nil {
- cleanup()
return nil, nil, fmt.Errorf("error setting up container: %v", err)
}
- bundles = append(bundles, bundleDir)
+ cu.Add(cleanup)
args := Args{
ID: ids[i],
@@ -89,45 +83,46 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
}
cont, err := New(conf, args)
if err != nil {
- cleanup()
return nil, nil, fmt.Errorf("error creating container: %v", err)
}
+ cu.Add(func() { cont.Destroy() })
containers = append(containers, cont)
if err := cont.Start(conf); err != nil {
- cleanup()
return nil, nil, fmt.Errorf("error starting container: %v", err)
}
}
- return containers, cleanup, nil
+
+ return containers, cu.Release(), nil
}
type execDesc struct {
c *Container
cmd []string
want int
- desc string
+ name string
}
-func execMany(execs []execDesc) error {
+func execMany(t *testing.T, execs []execDesc) {
for _, exec := range execs {
- args := &control.ExecArgs{Argv: exec.cmd}
- if ws, err := exec.c.executeSync(args); err != nil {
- return fmt.Errorf("error executing %+v: %v", args, err)
- } else if ws.ExitStatus() != exec.want {
- return fmt.Errorf("%q: exec %q got exit status: %d, want: %d", exec.desc, exec.cmd, ws.ExitStatus(), exec.want)
- }
+ t.Run(exec.name, func(t *testing.T) {
+ args := &control.ExecArgs{Argv: exec.cmd}
+ if ws, err := exec.c.executeSync(args); err != nil {
+ t.Errorf("error executing %+v: %v", args, err)
+ } else if ws.ExitStatus() != exec.want {
+ t.Errorf("%q: exec %q got exit status: %d, want: %d", exec.name, exec.cmd, ws.ExitStatus(), exec.want)
+ }
+ })
}
- return nil
}
func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) {
for _, spec := range pod {
- spec.Annotations[path.Join(boot.MountPrefix, name, "source")] = mount.Source
- spec.Annotations[path.Join(boot.MountPrefix, name, "type")] = mount.Type
- spec.Annotations[path.Join(boot.MountPrefix, name, "share")] = "pod"
+ spec.Annotations[boot.MountPrefix+name+".source"] = mount.Source
+ spec.Annotations[boot.MountPrefix+name+".type"] = mount.Type
+ spec.Annotations[boot.MountPrefix+name+".share"] = "pod"
if len(mount.Options) > 0 {
- spec.Annotations[path.Join(boot.MountPrefix, name, "options")] = strings.Join(mount.Options, ",")
+ spec.Annotations[boot.MountPrefix+name+".options"] = strings.Join(mount.Options, ",")
}
}
}
@@ -135,161 +130,161 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) {
// TestMultiContainerSanity checks that it is possible to run 2 dead-simple
// containers in the same sandbox.
func TestMultiContainerSanity(t *testing.T) {
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
-
- rootDir, err := testutil.SetupRootDir()
- if err != nil {
- t.Fatalf("error creating root dir: %v", err)
- }
- defer os.RemoveAll(rootDir)
- conf.RootDir = rootDir
+ for name, conf := range configsWithVFS2(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ rootDir, cleanup, err := testutil.SetupRootDir()
+ if err != nil {
+ t.Fatalf("error creating root dir: %v", err)
+ }
+ defer cleanup()
+ conf.RootDir = rootDir
- // Setup the containers.
- sleep := []string{"sleep", "100"}
- specs, ids := createSpecs(sleep, sleep)
- containers, cleanup, err := startContainers(conf, specs, ids)
- if err != nil {
- t.Fatalf("error starting containers: %v", err)
- }
- defer cleanup()
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ specs, ids := createSpecs(sleep, sleep)
+ containers, cleanup, err := startContainers(conf, specs, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
- // Check via ps that multiple processes are running.
- expectedPL := []*control.Process{
- {PID: 1, Cmd: "sleep"},
- }
- if err := waitForProcessList(containers[0], expectedPL); err != nil {
- t.Errorf("failed to wait for sleep to start: %v", err)
- }
- expectedPL = []*control.Process{
- {PID: 2, Cmd: "sleep"},
- }
- if err := waitForProcessList(containers[1], expectedPL); err != nil {
- t.Errorf("failed to wait for sleep to start: %v", err)
- }
+ // Check via ps that multiple processes are running.
+ expectedPL := []*control.Process{
+ newProcessBuilder().PID(1).PPID(0).Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(containers[0], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+ expectedPL = []*control.Process{
+ newProcessBuilder().PID(2).PPID(0).Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(containers[1], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+ })
}
}
// TestMultiPIDNS checks that it is possible to run 2 dead-simple
// containers in the same sandbox with different pidns.
func TestMultiPIDNS(t *testing.T) {
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
-
- rootDir, err := testutil.SetupRootDir()
- if err != nil {
- t.Fatalf("error creating root dir: %v", err)
- }
- defer os.RemoveAll(rootDir)
- conf.RootDir = rootDir
-
- // Setup the containers.
- sleep := []string{"sleep", "100"}
- testSpecs, ids := createSpecs(sleep, sleep)
- testSpecs[1].Linux = &specs.Linux{
- Namespaces: []specs.LinuxNamespace{
- {
- Type: "pid",
+ for name, conf := range configs(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ rootDir, cleanup, err := testutil.SetupRootDir()
+ if err != nil {
+ t.Fatalf("error creating root dir: %v", err)
+ }
+ defer cleanup()
+ conf.RootDir = rootDir
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ testSpecs, ids := createSpecs(sleep, sleep)
+ testSpecs[1].Linux = &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{
+ {
+ Type: "pid",
+ },
},
- },
- }
+ }
- containers, cleanup, err := startContainers(conf, testSpecs, ids)
- if err != nil {
- t.Fatalf("error starting containers: %v", err)
- }
- defer cleanup()
+ containers, cleanup, err := startContainers(conf, testSpecs, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
- // Check via ps that multiple processes are running.
- expectedPL := []*control.Process{
- {PID: 1, Cmd: "sleep"},
- }
- if err := waitForProcessList(containers[0], expectedPL); err != nil {
- t.Errorf("failed to wait for sleep to start: %v", err)
- }
- expectedPL = []*control.Process{
- {PID: 1, Cmd: "sleep"},
- }
- if err := waitForProcessList(containers[1], expectedPL); err != nil {
- t.Errorf("failed to wait for sleep to start: %v", err)
- }
+ // Check via ps that multiple processes are running.
+ expectedPL := []*control.Process{
+ newProcessBuilder().PID(1).Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(containers[0], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+ expectedPL = []*control.Process{
+ newProcessBuilder().PID(1).Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(containers[1], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+ })
}
}
// TestMultiPIDNSPath checks the pidns path.
func TestMultiPIDNSPath(t *testing.T) {
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
-
- rootDir, err := testutil.SetupRootDir()
- if err != nil {
- t.Fatalf("error creating root dir: %v", err)
- }
- defer os.RemoveAll(rootDir)
- conf.RootDir = rootDir
-
- // Setup the containers.
- sleep := []string{"sleep", "100"}
- testSpecs, ids := createSpecs(sleep, sleep, sleep)
- testSpecs[0].Linux = &specs.Linux{
- Namespaces: []specs.LinuxNamespace{
- {
- Type: "pid",
- Path: "/proc/1/ns/pid",
+ for name, conf := range configs(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ rootDir, cleanup, err := testutil.SetupRootDir()
+ if err != nil {
+ t.Fatalf("error creating root dir: %v", err)
+ }
+ defer cleanup()
+ conf.RootDir = rootDir
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ testSpecs, ids := createSpecs(sleep, sleep, sleep)
+ testSpecs[0].Linux = &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{
+ {
+ Type: "pid",
+ Path: "/proc/1/ns/pid",
+ },
},
- },
- }
- testSpecs[1].Linux = &specs.Linux{
- Namespaces: []specs.LinuxNamespace{
- {
- Type: "pid",
- Path: "/proc/1/ns/pid",
+ }
+ testSpecs[1].Linux = &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{
+ {
+ Type: "pid",
+ Path: "/proc/1/ns/pid",
+ },
},
- },
- }
- testSpecs[2].Linux = &specs.Linux{
- Namespaces: []specs.LinuxNamespace{
- {
- Type: "pid",
- Path: "/proc/2/ns/pid",
+ }
+ testSpecs[2].Linux = &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{
+ {
+ Type: "pid",
+ Path: "/proc/2/ns/pid",
+ },
},
- },
- }
+ }
- containers, cleanup, err := startContainers(conf, testSpecs, ids)
- if err != nil {
- t.Fatalf("error starting containers: %v", err)
- }
- defer cleanup()
+ containers, cleanup, err := startContainers(conf, testSpecs, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
- // Check via ps that multiple processes are running.
- expectedPL := []*control.Process{
- {PID: 1, Cmd: "sleep"},
- }
- if err := waitForProcessList(containers[0], expectedPL); err != nil {
- t.Errorf("failed to wait for sleep to start: %v", err)
- }
- if err := waitForProcessList(containers[2], expectedPL); err != nil {
- t.Errorf("failed to wait for sleep to start: %v", err)
- }
+ // Check via ps that multiple processes are running.
+ expectedPL := []*control.Process{
+ newProcessBuilder().PID(1).PPID(0).Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(containers[0], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+ if err := waitForProcessList(containers[2], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
- expectedPL = []*control.Process{
- {PID: 2, Cmd: "sleep"},
- }
- if err := waitForProcessList(containers[1], expectedPL); err != nil {
- t.Errorf("failed to wait for sleep to start: %v", err)
- }
+ expectedPL = []*control.Process{
+ newProcessBuilder().PID(2).PPID(0).Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(containers[1], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+ })
}
}
func TestMultiContainerWait(t *testing.T) {
- rootDir, err := testutil.SetupRootDir()
+ rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- defer os.RemoveAll(rootDir)
+ defer cleanup()
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// The first container should run the entire duration of the test.
@@ -306,7 +301,7 @@ func TestMultiContainerWait(t *testing.T) {
// Check via ps that multiple processes are running.
expectedPL := []*control.Process{
- {PID: 2, Cmd: "sleep"},
+ newProcessBuilder().PID(2).PPID(0).Cmd("sleep").Process(),
}
if err := waitForProcessList(containers[1], expectedPL); err != nil {
t.Errorf("failed to wait for sleep to start: %v", err)
@@ -351,7 +346,7 @@ func TestMultiContainerWait(t *testing.T) {
// After Wait returns, ensure that the root container is running and
// the child has finished.
expectedPL = []*control.Process{
- {PID: 1, Cmd: "sleep"},
+ newProcessBuilder().Cmd("sleep").Process(),
}
if err := waitForProcessList(containers[0], expectedPL); err != nil {
t.Errorf("failed to wait for %q to start: %v", strings.Join(containers[0].Spec.Process.Args, " "), err)
@@ -361,13 +356,13 @@ func TestMultiContainerWait(t *testing.T) {
// TestExecWait ensures what we can wait containers and individual processes in the
// sandbox that have already exited.
func TestExecWait(t *testing.T) {
- rootDir, err := testutil.SetupRootDir()
+ rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- defer os.RemoveAll(rootDir)
+ defer cleanup()
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// The first container should run the entire duration of the test.
@@ -383,7 +378,7 @@ func TestExecWait(t *testing.T) {
// Check via ps that process is running.
expectedPL := []*control.Process{
- {PID: 2, Cmd: "sleep"},
+ newProcessBuilder().Cmd("sleep").Process(),
}
if err := waitForProcessList(containers[1], expectedPL); err != nil {
t.Fatalf("failed to wait for sleep to start: %v", err)
@@ -418,7 +413,7 @@ func TestExecWait(t *testing.T) {
// Wait for the exec'd process to exit.
expectedPL = []*control.Process{
- {PID: 1, Cmd: "sleep"},
+ newProcessBuilder().PID(1).Cmd("sleep").Process(),
}
if err := waitForProcessList(containers[0], expectedPL); err != nil {
t.Fatalf("failed to wait for second container to stop: %v", err)
@@ -457,13 +452,13 @@ func TestMultiContainerMount(t *testing.T) {
})
// Setup the containers.
- rootDir, err := testutil.SetupRootDir()
+ rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- defer os.RemoveAll(rootDir)
+ defer cleanup()
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
containers, cleanup, err := startContainers(conf, sps, ids)
@@ -484,175 +479,177 @@ func TestMultiContainerMount(t *testing.T) {
// TestMultiContainerSignal checks that it is possible to signal individual
// containers without killing the entire sandbox.
func TestMultiContainerSignal(t *testing.T) {
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
-
- rootDir, err := testutil.SetupRootDir()
- if err != nil {
- t.Fatalf("error creating root dir: %v", err)
- }
- defer os.RemoveAll(rootDir)
- conf.RootDir = rootDir
-
- // Setup the containers.
- sleep := []string{"sleep", "100"}
- specs, ids := createSpecs(sleep, sleep)
- containers, cleanup, err := startContainers(conf, specs, ids)
- if err != nil {
- t.Fatalf("error starting containers: %v", err)
- }
- defer cleanup()
+ for name, conf := range configs(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ rootDir, cleanup, err := testutil.SetupRootDir()
+ if err != nil {
+ t.Fatalf("error creating root dir: %v", err)
+ }
+ defer cleanup()
+ conf.RootDir = rootDir
- // Check via ps that container 1 process is running.
- expectedPL := []*control.Process{
- {PID: 2, Cmd: "sleep"},
- }
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ specs, ids := createSpecs(sleep, sleep)
+ containers, cleanup, err := startContainers(conf, specs, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
- if err := waitForProcessList(containers[1], expectedPL); err != nil {
- t.Errorf("failed to wait for sleep to start: %v", err)
- }
+ // Check via ps that container 1 process is running.
+ expectedPL := []*control.Process{
+ newProcessBuilder().Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(containers[1], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
- // Kill process 2.
- if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil {
- t.Errorf("failed to kill process 2: %v", err)
- }
+ // Kill process 2.
+ if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil {
+ t.Errorf("failed to kill process 2: %v", err)
+ }
- // Make sure process 1 is still running.
- expectedPL = []*control.Process{
- {PID: 1, Cmd: "sleep"},
- }
- if err := waitForProcessList(containers[0], expectedPL); err != nil {
- t.Errorf("failed to wait for sleep to start: %v", err)
- }
+ // Make sure process 1 is still running.
+ expectedPL = []*control.Process{
+ newProcessBuilder().PID(1).Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(containers[0], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
- // goferPid is reset when container is destroyed.
- goferPid := containers[1].GoferPid
+ // goferPid is reset when container is destroyed.
+ goferPid := containers[1].GoferPid
- // Destroy container and ensure container's gofer process has exited.
- if err := containers[1].Destroy(); err != nil {
- t.Errorf("failed to destroy container: %v", err)
- }
- _, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) {
- cpid, err := syscall.Wait4(goferPid, nil, 0, nil)
- return uintptr(cpid), 0, err
- })
- if err != syscall.ECHILD {
- t.Errorf("error waiting for gofer to exit: %v", err)
- }
- // Make sure process 1 is still running.
- if err := waitForProcessList(containers[0], expectedPL); err != nil {
- t.Errorf("failed to wait for sleep to start: %v", err)
- }
+ // Destroy container and ensure container's gofer process has exited.
+ if err := containers[1].Destroy(); err != nil {
+ t.Errorf("failed to destroy container: %v", err)
+ }
+ _, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) {
+ cpid, err := syscall.Wait4(goferPid, nil, 0, nil)
+ return uintptr(cpid), 0, err
+ })
+ if err != syscall.ECHILD {
+ t.Errorf("error waiting for gofer to exit: %v", err)
+ }
+ // Make sure process 1 is still running.
+ if err := waitForProcessList(containers[0], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
- // Now that process 2 is gone, ensure we get an error trying to
- // signal it again.
- if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil {
- t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID)
- }
+ // Now that process 2 is gone, ensure we get an error trying to
+ // signal it again.
+ if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil {
+ t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID)
+ }
- // Kill process 1.
- if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil {
- t.Errorf("failed to kill process 1: %v", err)
- }
+ // Kill process 1.
+ if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil {
+ t.Errorf("failed to kill process 1: %v", err)
+ }
- // Ensure that container's gofer and sandbox process are no more.
- err = blockUntilWaitable(containers[0].GoferPid)
- if err != nil && err != syscall.ECHILD {
- t.Errorf("error waiting for gofer to exit: %v", err)
- }
+ // Ensure that container's gofer and sandbox process are no more.
+ err = blockUntilWaitable(containers[0].GoferPid)
+ if err != nil && err != syscall.ECHILD {
+ t.Errorf("error waiting for gofer to exit: %v", err)
+ }
- err = blockUntilWaitable(containers[0].Sandbox.Pid)
- if err != nil && err != syscall.ECHILD {
- t.Errorf("error waiting for sandbox to exit: %v", err)
- }
+ err = blockUntilWaitable(containers[0].Sandbox.Pid)
+ if err != nil && err != syscall.ECHILD {
+ t.Errorf("error waiting for sandbox to exit: %v", err)
+ }
- // The sentry should be gone, so signaling should yield an error.
- if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil {
- t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID)
- }
+ // The sentry should be gone, so signaling should yield an error.
+ if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil {
+ t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID)
+ }
- if err := containers[0].Destroy(); err != nil {
- t.Errorf("failed to destroy container: %v", err)
- }
+ if err := containers[0].Destroy(); err != nil {
+ t.Errorf("failed to destroy container: %v", err)
+ }
+ })
}
}
// TestMultiContainerDestroy checks that container are properly cleaned-up when
// they are destroyed.
func TestMultiContainerDestroy(t *testing.T) {
- app, err := testutil.FindFile("runsc/container/test_app/test_app")
+ app, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
t.Fatal("error finding test_app:", err)
}
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
-
- rootDir, err := testutil.SetupRootDir()
- if err != nil {
- t.Fatalf("error creating root dir: %v", err)
- }
- defer os.RemoveAll(rootDir)
- conf.RootDir = rootDir
-
- // First container will remain intact while the second container is killed.
- podSpecs, ids := createSpecs(
- []string{"sleep", "100"},
- []string{app, "fork-bomb"})
-
- // Run the fork bomb in a PID namespace to prevent processes to be
- // re-parented to PID=1 in the root container.
- podSpecs[1].Linux = &specs.Linux{
- Namespaces: []specs.LinuxNamespace{{Type: "pid"}},
- }
- containers, cleanup, err := startContainers(conf, podSpecs, ids)
- if err != nil {
- t.Fatalf("error starting containers: %v", err)
- }
- defer cleanup()
+ for name, conf := range configs(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ rootDir, cleanup, err := testutil.SetupRootDir()
+ if err != nil {
+ t.Fatalf("error creating root dir: %v", err)
+ }
+ defer cleanup()
+ conf.RootDir = rootDir
+
+ // First container will remain intact while the second container is killed.
+ podSpecs, ids := createSpecs(
+ []string{"sleep", "100"},
+ []string{app, "fork-bomb"})
+
+ // Run the fork bomb in a PID namespace to prevent processes to be
+ // re-parented to PID=1 in the root container.
+ podSpecs[1].Linux = &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{{Type: "pid"}},
+ }
+ containers, cleanup, err := startContainers(conf, podSpecs, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
- // Exec more processes to ensure signal all works for exec'd processes too.
- args := &control.ExecArgs{
- Filename: app,
- Argv: []string{app, "fork-bomb"},
- }
- if _, err := containers[1].Execute(args); err != nil {
- t.Fatalf("error exec'ing: %v", err)
- }
+ // Exec more processes to ensure signal all works for exec'd processes too.
+ args := &control.ExecArgs{
+ Filename: app,
+ Argv: []string{app, "fork-bomb"},
+ }
+ if _, err := containers[1].Execute(args); err != nil {
+ t.Fatalf("error exec'ing: %v", err)
+ }
- // Let it brew...
- time.Sleep(500 * time.Millisecond)
+ // Let it brew...
+ time.Sleep(500 * time.Millisecond)
- if err := containers[1].Destroy(); err != nil {
- t.Fatalf("error destroying container: %v", err)
- }
+ if err := containers[1].Destroy(); err != nil {
+ t.Fatalf("error destroying container: %v", err)
+ }
- // Check that destroy killed all processes belonging to the container and
- // waited for them to exit before returning.
- pss, err := containers[0].Sandbox.Processes("")
- if err != nil {
- t.Fatalf("error getting process data from sandbox: %v", err)
- }
- expectedPL := []*control.Process{{PID: 1, Cmd: "sleep"}}
- if !procListsEqual(pss, expectedPL) {
- t.Errorf("container got process list: %s, want: %s", procListToString(pss), procListToString(expectedPL))
- }
+ // Check that destroy killed all processes belonging to the container and
+ // waited for them to exit before returning.
+ pss, err := containers[0].Sandbox.Processes("")
+ if err != nil {
+ t.Fatalf("error getting process data from sandbox: %v", err)
+ }
+ expectedPL := []*control.Process{
+ newProcessBuilder().PID(1).Cmd("sleep").Process(),
+ }
+ if !procListsEqual(pss, expectedPL) {
+ t.Errorf("container got process list: %s, want: %s: error: %v",
+ procListToString(pss), procListToString(expectedPL), err)
+ }
- // Check that cont.Destroy is safe to call multiple times.
- if err := containers[1].Destroy(); err != nil {
- t.Errorf("error destroying container: %v", err)
- }
+ // Check that cont.Destroy is safe to call multiple times.
+ if err := containers[1].Destroy(); err != nil {
+ t.Errorf("error destroying container: %v", err)
+ }
+ })
}
}
func TestMultiContainerProcesses(t *testing.T) {
- rootDir, err := testutil.SetupRootDir()
+ rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- defer os.RemoveAll(rootDir)
+ defer cleanup()
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// Note: use curly braces to keep 'sh' process around. Otherwise, shell
@@ -669,7 +666,7 @@ func TestMultiContainerProcesses(t *testing.T) {
// Check root's container process list doesn't include other containers.
expectedPL0 := []*control.Process{
- {PID: 1, Cmd: "sleep"},
+ newProcessBuilder().PID(1).Cmd("sleep").Process(),
}
if err := waitForProcessList(containers[0], expectedPL0); err != nil {
t.Errorf("failed to wait for process to start: %v", err)
@@ -677,8 +674,8 @@ func TestMultiContainerProcesses(t *testing.T) {
// Same for the other container.
expectedPL1 := []*control.Process{
- {PID: 2, Cmd: "sh"},
- {PID: 3, PPID: 2, Cmd: "sleep"},
+ newProcessBuilder().PID(2).Cmd("sh").Process(),
+ newProcessBuilder().PID(3).PPID(2).Cmd("sleep").Process(),
}
if err := waitForProcessList(containers[1], expectedPL1); err != nil {
t.Errorf("failed to wait for process to start: %v", err)
@@ -692,7 +689,7 @@ func TestMultiContainerProcesses(t *testing.T) {
if _, err := containers[1].Execute(args); err != nil {
t.Fatalf("error exec'ing: %v", err)
}
- expectedPL1 = append(expectedPL1, &control.Process{PID: 4, Cmd: "sleep"})
+ expectedPL1 = append(expectedPL1, newProcessBuilder().PID(4).Cmd("sleep").Process())
if err := waitForProcessList(containers[1], expectedPL1); err != nil {
t.Errorf("failed to wait for process to start: %v", err)
}
@@ -705,13 +702,13 @@ func TestMultiContainerProcesses(t *testing.T) {
// TestMultiContainerKillAll checks that all process that belong to a container
// are killed when SIGKILL is sent to *all* processes in that container.
func TestMultiContainerKillAll(t *testing.T) {
- rootDir, err := testutil.SetupRootDir()
+ rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- defer os.RemoveAll(rootDir)
+ defer cleanup()
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
for _, tc := range []struct {
@@ -720,7 +717,7 @@ func TestMultiContainerKillAll(t *testing.T) {
{killContainer: true},
{killContainer: false},
} {
- app, err := testutil.FindFile("runsc/container/test_app/test_app")
+ app, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
t.Fatal("error finding test_app:", err)
}
@@ -738,11 +735,11 @@ func TestMultiContainerKillAll(t *testing.T) {
// Wait until all processes are created.
rootProcCount := int(math.Pow(2, 3) - 1)
if err := waitForProcessCount(containers[0], rootProcCount); err != nil {
- t.Fatal(err)
+ t.Fatalf("error waitting for processes: %v", err)
}
procCount := int(math.Pow(2, 5) - 1)
if err := waitForProcessCount(containers[1], procCount); err != nil {
- t.Fatal(err)
+ t.Fatalf("error waiting for processes: %v", err)
}
// Exec more processes to ensure signal works for exec'd processes too.
@@ -756,7 +753,7 @@ func TestMultiContainerKillAll(t *testing.T) {
// Wait for these new processes to start.
procCount += int(math.Pow(2, 3) - 1)
if err := waitForProcessCount(containers[1], procCount); err != nil {
- t.Fatal(err)
+ t.Fatalf("error waiting for processes: %v", err)
}
if tc.killContainer {
@@ -789,11 +786,11 @@ func TestMultiContainerKillAll(t *testing.T) {
// Check that all processes are gone.
if err := waitForProcessCount(containers[1], 0); err != nil {
- t.Fatal(err)
+ t.Fatalf("error waiting for processes: %v", err)
}
// Check that root container was not affected.
if err := waitForProcessCount(containers[0], rootProcCount); err != nil {
- t.Fatal(err)
+ t.Fatalf("error waiting for processes: %v", err)
}
}
}
@@ -803,18 +800,17 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) {
[]string{"/bin/sleep", "100"},
[]string{"/bin/sleep", "100"})
- conf := testutil.TestConfig()
- rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf)
+ conf := testutil.TestConfig(t)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(specs[0], conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(rootBundleDir)
+ defer cleanup()
rootArgs := Args{
ID: ids[0],
Spec: specs[0],
- BundleDir: rootBundleDir,
+ BundleDir: bundleDir,
}
root, err := New(conf, rootArgs)
if err != nil {
@@ -826,11 +822,11 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) {
}
// Create and destroy sub-container.
- bundleDir, err := testutil.SetupBundleDir(specs[1])
+ bundleDir, cleanupSub, err := testutil.SetupBundleDir(specs[1])
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(bundleDir)
+ defer cleanupSub()
args := Args{
ID: ids[1],
@@ -857,18 +853,17 @@ func TestMultiContainerDestroyStarting(t *testing.T) {
}
specs, ids := createSpecs(cmds...)
- conf := testutil.TestConfig()
- rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf)
+ conf := testutil.TestConfig(t)
+ rootDir, bundleDir, cleanup, err := testutil.SetupContainer(specs[0], conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(rootBundleDir)
+ defer cleanup()
rootArgs := Args{
ID: ids[0],
Spec: specs[0],
- BundleDir: rootBundleDir,
+ BundleDir: bundleDir,
}
root, err := New(conf, rootArgs)
if err != nil {
@@ -885,16 +880,16 @@ func TestMultiContainerDestroyStarting(t *testing.T) {
continue // skip root container
}
- bundleDir, err := testutil.SetupBundleDir(specs[i])
+ bundleDir, cleanup, err := testutil.SetupBundleDir(specs[i])
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
rootArgs := Args{
ID: ids[i],
Spec: specs[i],
- BundleDir: rootBundleDir,
+ BundleDir: bundleDir,
}
cont, err := New(conf, rootArgs)
if err != nil {
@@ -936,13 +931,13 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) {
script := fmt.Sprintf("if [ -f %q ]; then exit 1; else touch %q; fi", filename, filename)
cmd := []string{"sh", "-c", script}
- rootDir, err := testutil.SetupRootDir()
+ rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- defer os.RemoveAll(rootDir)
+ defer cleanup()
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// Make sure overlay is enabled, and none of the root filesystems are
@@ -976,7 +971,7 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) {
// TestMultiContainerContainerDestroyStress tests that IO operations continue
// to work after containers have been stopped and gofers killed.
func TestMultiContainerContainerDestroyStress(t *testing.T) {
- app, err := testutil.FindFile("runsc/container/test_app/test_app")
+ app, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
t.Fatal("error finding test_app:", err)
}
@@ -1005,13 +1000,12 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) {
childrenSpecs := allSpecs[1:]
childrenIDs := allIDs[1:]
- conf := testutil.TestConfig()
- rootDir, bundleDir, err := testutil.SetupContainer(rootSpec, conf)
+ conf := testutil.TestConfig(t)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(rootSpec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
// Start root container.
rootArgs := Args{
@@ -1037,11 +1031,11 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) {
var children []*Container
for j, spec := range specs {
- bundleDir, err := testutil.SetupBundleDir(spec)
+ bundleDir, cleanup, err := testutil.SetupBundleDir(spec)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
args := Args{
ID: ids[j],
@@ -1079,355 +1073,348 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) {
// Test that pod shared mounts are properly mounted in 2 containers and that
// changes from one container is reflected in the other.
func TestMultiContainerSharedMount(t *testing.T) {
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
-
- rootDir, err := testutil.SetupRootDir()
- if err != nil {
- t.Fatalf("error creating root dir: %v", err)
- }
- defer os.RemoveAll(rootDir)
- conf.RootDir = rootDir
-
- // Setup the containers.
- sleep := []string{"sleep", "100"}
- podSpec, ids := createSpecs(sleep, sleep)
- mnt0 := specs.Mount{
- Destination: "/mydir/test",
- Source: "/some/dir",
- Type: "tmpfs",
- Options: nil,
- }
- podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+ for name, conf := range configsWithVFS2(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ rootDir, cleanup, err := testutil.SetupRootDir()
+ if err != nil {
+ t.Fatalf("error creating root dir: %v", err)
+ }
+ defer cleanup()
+ conf.RootDir = rootDir
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: nil,
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
- mnt1 := mnt0
- mnt1.Destination = "/mydir2/test2"
- podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
- createSharedMount(mnt0, "test-mount", podSpec...)
+ createSharedMount(mnt0, "test-mount", podSpec...)
- containers, cleanup, err := startContainers(conf, podSpec, ids)
- if err != nil {
- t.Fatalf("error starting containers: %v", err)
- }
- defer cleanup()
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
- file0 := path.Join(mnt0.Destination, "abc")
- file1 := path.Join(mnt1.Destination, "abc")
- execs := []execDesc{
- {
- c: containers[0],
- cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
- desc: "directory is mounted in container0",
- },
- {
- c: containers[1],
- cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
- desc: "directory is mounted in container1",
- },
- {
- c: containers[0],
- cmd: []string{"/usr/bin/touch", file0},
- desc: "create file in container0",
- },
- {
- c: containers[0],
- cmd: []string{"/usr/bin/test", "-f", file0},
- desc: "file appears in container0",
- },
- {
- c: containers[1],
- cmd: []string{"/usr/bin/test", "-f", file1},
- desc: "file appears in container1",
- },
- {
- c: containers[1],
- cmd: []string{"/bin/rm", file1},
- desc: "file removed from container1",
- },
- {
- c: containers[0],
- cmd: []string{"/usr/bin/test", "!", "-f", file0},
- desc: "file removed from container0",
- },
- {
- c: containers[1],
- cmd: []string{"/usr/bin/test", "!", "-f", file1},
- desc: "file removed from container1",
- },
- {
- c: containers[1],
- cmd: []string{"/bin/mkdir", file1},
- desc: "create directory in container1",
- },
- {
- c: containers[0],
- cmd: []string{"/usr/bin/test", "-d", file0},
- desc: "dir appears in container0",
- },
- {
- c: containers[1],
- cmd: []string{"/usr/bin/test", "-d", file1},
- desc: "dir appears in container1",
- },
- {
- c: containers[0],
- cmd: []string{"/bin/rmdir", file0},
- desc: "create directory in container0",
- },
- {
- c: containers[0],
- cmd: []string{"/usr/bin/test", "!", "-d", file0},
- desc: "dir removed from container0",
- },
- {
- c: containers[1],
- cmd: []string{"/usr/bin/test", "!", "-d", file1},
- desc: "dir removed from container1",
- },
- }
- if err := execMany(execs); err != nil {
- t.Fatal(err.Error())
- }
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
+ name: "directory is mounted in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
+ name: "directory is mounted in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/bin/touch", file0},
+ name: "create file in container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ name: "file appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ name: "file appears in container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/rm", file1},
+ name: "remove file from container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-f", file0},
+ name: "file removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-f", file1},
+ name: "file removed from container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/mkdir", file1},
+ name: "create directory in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", file0},
+ name: "dir appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", file1},
+ name: "dir appears in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/bin/rmdir", file0},
+ name: "remove directory from container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-d", file0},
+ name: "dir removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-d", file1},
+ name: "dir removed from container1",
+ },
+ }
+ execMany(t, execs)
+ })
}
}
// Test that pod mounts are mounted as readonly when requested.
func TestMultiContainerSharedMountReadonly(t *testing.T) {
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
-
- rootDir, err := testutil.SetupRootDir()
- if err != nil {
- t.Fatalf("error creating root dir: %v", err)
- }
- defer os.RemoveAll(rootDir)
- conf.RootDir = rootDir
-
- // Setup the containers.
- sleep := []string{"sleep", "100"}
- podSpec, ids := createSpecs(sleep, sleep)
- mnt0 := specs.Mount{
- Destination: "/mydir/test",
- Source: "/some/dir",
- Type: "tmpfs",
- Options: []string{"ro"},
- }
- podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+ for name, conf := range configsWithVFS2(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ rootDir, cleanup, err := testutil.SetupRootDir()
+ if err != nil {
+ t.Fatalf("error creating root dir: %v", err)
+ }
+ defer cleanup()
+ conf.RootDir = rootDir
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: []string{"ro"},
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
- mnt1 := mnt0
- mnt1.Destination = "/mydir2/test2"
- podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
- createSharedMount(mnt0, "test-mount", podSpec...)
+ createSharedMount(mnt0, "test-mount", podSpec...)
- containers, cleanup, err := startContainers(conf, podSpec, ids)
- if err != nil {
- t.Fatalf("error starting containers: %v", err)
- }
- defer cleanup()
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
- file0 := path.Join(mnt0.Destination, "abc")
- file1 := path.Join(mnt1.Destination, "abc")
- execs := []execDesc{
- {
- c: containers[0],
- cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
- desc: "directory is mounted in container0",
- },
- {
- c: containers[1],
- cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
- desc: "directory is mounted in container1",
- },
- {
- c: containers[0],
- cmd: []string{"/usr/bin/touch", file0},
- want: 1,
- desc: "fails to write to container0",
- },
- {
- c: containers[1],
- cmd: []string{"/usr/bin/touch", file1},
- want: 1,
- desc: "fails to write to container1",
- },
- }
- if err := execMany(execs); err != nil {
- t.Fatal(err.Error())
- }
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
+ name: "directory is mounted in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
+ name: "directory is mounted in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/bin/touch", file0},
+ want: 1,
+ name: "fails to write to container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/touch", file1},
+ want: 1,
+ name: "fails to write to container1",
+ },
+ }
+ execMany(t, execs)
+ })
}
}
// Test that shared pod mounts continue to work after container is restarted.
func TestMultiContainerSharedMountRestart(t *testing.T) {
- for _, conf := range configs(all...) {
- t.Logf("Running test with conf: %+v", conf)
-
- rootDir, err := testutil.SetupRootDir()
- if err != nil {
- t.Fatalf("error creating root dir: %v", err)
- }
- defer os.RemoveAll(rootDir)
- conf.RootDir = rootDir
-
- // Setup the containers.
- sleep := []string{"sleep", "100"}
- podSpec, ids := createSpecs(sleep, sleep)
- mnt0 := specs.Mount{
- Destination: "/mydir/test",
- Source: "/some/dir",
- Type: "tmpfs",
- Options: nil,
- }
- podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+ //TODO(gvisor.dev/issue/1487): This is failing with VFS2.
+ for name, conf := range configs(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ rootDir, cleanup, err := testutil.SetupRootDir()
+ if err != nil {
+ t.Fatalf("error creating root dir: %v", err)
+ }
+ defer cleanup()
+ conf.RootDir = rootDir
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: nil,
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
- mnt1 := mnt0
- mnt1.Destination = "/mydir2/test2"
- podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
- createSharedMount(mnt0, "test-mount", podSpec...)
+ createSharedMount(mnt0, "test-mount", podSpec...)
- containers, cleanup, err := startContainers(conf, podSpec, ids)
- if err != nil {
- t.Fatalf("error starting containers: %v", err)
- }
- defer cleanup()
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
- file0 := path.Join(mnt0.Destination, "abc")
- file1 := path.Join(mnt1.Destination, "abc")
- execs := []execDesc{
- {
- c: containers[0],
- cmd: []string{"/usr/bin/touch", file0},
- desc: "create file in container0",
- },
- {
- c: containers[0],
- cmd: []string{"/usr/bin/test", "-f", file0},
- desc: "file appears in container0",
- },
- {
- c: containers[1],
- cmd: []string{"/usr/bin/test", "-f", file1},
- desc: "file appears in container1",
- },
- }
- if err := execMany(execs); err != nil {
- t.Fatal(err.Error())
- }
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/bin/touch", file0},
+ name: "create file in container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ name: "file appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ name: "file appears in container1",
+ },
+ }
+ execMany(t, execs)
- containers[1].Destroy()
+ containers[1].Destroy()
- bundleDir, err := testutil.SetupBundleDir(podSpec[1])
- if err != nil {
- t.Fatalf("error restarting container: %v", err)
- }
- defer os.RemoveAll(bundleDir)
+ bundleDir, cleanup, err := testutil.SetupBundleDir(podSpec[1])
+ if err != nil {
+ t.Fatalf("error restarting container: %v", err)
+ }
+ defer cleanup()
- args := Args{
- ID: ids[1],
- Spec: podSpec[1],
- BundleDir: bundleDir,
- }
- containers[1], err = New(conf, args)
- if err != nil {
- t.Fatalf("error creating container: %v", err)
- }
- if err := containers[1].Start(conf); err != nil {
- t.Fatalf("error starting container: %v", err)
- }
+ args := Args{
+ ID: ids[1],
+ Spec: podSpec[1],
+ BundleDir: bundleDir,
+ }
+ containers[1], err = New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ if err := containers[1].Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
- execs = []execDesc{
- {
- c: containers[0],
- cmd: []string{"/usr/bin/test", "-f", file0},
- desc: "file is still in container0",
- },
- {
- c: containers[1],
- cmd: []string{"/usr/bin/test", "-f", file1},
- desc: "file is still in container1",
- },
- {
- c: containers[1],
- cmd: []string{"/bin/rm", file1},
- desc: "file removed from container1",
- },
- {
- c: containers[0],
- cmd: []string{"/usr/bin/test", "!", "-f", file0},
- desc: "file removed from container0",
- },
- {
- c: containers[1],
- cmd: []string{"/usr/bin/test", "!", "-f", file1},
- desc: "file removed from container1",
- },
- }
- if err := execMany(execs); err != nil {
- t.Fatal(err.Error())
- }
+ execs = []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ name: "file is still in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ name: "file is still in container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/rm", file1},
+ name: "file removed from container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-f", file0},
+ name: "file removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-f", file1},
+ name: "file removed from container1",
+ },
+ }
+ execMany(t, execs)
+ })
}
}
// Test that unsupported pod mounts options are ignored when matching master and
// slave mounts.
func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) {
- rootDir, err := testutil.SetupRootDir()
- if err != nil {
- t.Fatalf("error creating root dir: %v", err)
- }
- defer os.RemoveAll(rootDir)
-
- conf := testutil.TestConfig()
- conf.RootDir = rootDir
-
- // Setup the containers.
- sleep := []string{"/bin/sleep", "100"}
- podSpec, ids := createSpecs(sleep, sleep)
- mnt0 := specs.Mount{
- Destination: "/mydir/test",
- Source: "/some/dir",
- Type: "tmpfs",
- Options: []string{"rw", "rbind", "relatime"},
- }
- podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+ for name, conf := range configsWithVFS2(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ rootDir, cleanup, err := testutil.SetupRootDir()
+ if err != nil {
+ t.Fatalf("error creating root dir: %v", err)
+ }
+ defer cleanup()
+ conf.RootDir = rootDir
+
+ // Setup the containers.
+ sleep := []string{"/bin/sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: []string{"rw", "rbind", "relatime"},
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
- mnt1 := mnt0
- mnt1.Destination = "/mydir2/test2"
- mnt1.Options = []string{"rw", "nosuid"}
- podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ mnt1.Options = []string{"rw", "nosuid"}
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
- createSharedMount(mnt0, "test-mount", podSpec...)
+ createSharedMount(mnt0, "test-mount", podSpec...)
- containers, cleanup, err := startContainers(conf, podSpec, ids)
- if err != nil {
- t.Fatalf("error starting containers: %v", err)
- }
- defer cleanup()
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
- execs := []execDesc{
- {
- c: containers[0],
- cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
- desc: "directory is mounted in container0",
- },
- {
- c: containers[1],
- cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
- desc: "directory is mounted in container1",
- },
- }
- if err := execMany(execs); err != nil {
- t.Fatal(err.Error())
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
+ name: "directory is mounted in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
+ name: "directory is mounted in container1",
+ },
+ }
+ execMany(t, execs)
+ })
}
}
// Test that one container can send an FD to another container, even though
// they have distinct MountNamespaces.
func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) {
- app, err := testutil.FindFile("runsc/container/test_app/test_app")
+ app, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
t.Fatal("error finding test_app:", err)
}
@@ -1456,13 +1443,13 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) {
Type: "tmpfs",
}
- rootDir, err := testutil.SetupRootDir()
+ rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- defer os.RemoveAll(rootDir)
+ defer cleanup()
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// Create the specs.
@@ -1493,13 +1480,13 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) {
// Test that container is destroyed when Gofer is killed.
func TestMultiContainerGoferKilled(t *testing.T) {
- rootDir, err := testutil.SetupRootDir()
+ rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- defer os.RemoveAll(rootDir)
+ defer cleanup()
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
sleep := []string{"sleep", "100"}
@@ -1513,7 +1500,7 @@ func TestMultiContainerGoferKilled(t *testing.T) {
// Ensure container is running
c := containers[2]
expectedPL := []*control.Process{
- {PID: 3, Cmd: "sleep"},
+ newProcessBuilder().PID(3).Cmd("sleep").Process(),
}
if err := waitForProcessList(c, expectedPL); err != nil {
t.Errorf("failed to wait for sleep to start: %v", err)
@@ -1541,7 +1528,7 @@ func TestMultiContainerGoferKilled(t *testing.T) {
continue // container[2] has been killed.
}
pl := []*control.Process{
- {PID: kernel.ThreadID(i + 1), Cmd: "sleep"},
+ newProcessBuilder().PID(kernel.ThreadID(i + 1)).Cmd("sleep").Process(),
}
if err := waitForProcessList(c, pl); err != nil {
t.Errorf("Container %q was affected by another container: %v", c.ID, err)
@@ -1561,7 +1548,7 @@ func TestMultiContainerGoferKilled(t *testing.T) {
// Wait until sandbox stops. waitForProcessList will loop until sandbox exits
// and RPC errors out.
impossiblePL := []*control.Process{
- {PID: 100, Cmd: "non-existent-process"},
+ newProcessBuilder().Cmd("non-existent-process").Process(),
}
if err := waitForProcessList(c, impossiblePL); err == nil {
t.Fatalf("Sandbox was not killed after gofer death")
@@ -1580,13 +1567,13 @@ func TestMultiContainerLoadSandbox(t *testing.T) {
sleep := []string{"sleep", "100"}
specs, ids := createSpecs(sleep, sleep, sleep)
- rootDir, err := testutil.SetupRootDir()
+ rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- defer os.RemoveAll(rootDir)
+ defer cleanup()
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
// Create containers for the sandbox.
@@ -1613,7 +1600,7 @@ func TestMultiContainerLoadSandbox(t *testing.T) {
}
// Create a valid but empty container directory.
- randomCID := testutil.UniqueContainerID()
+ randomCID := testutil.RandomContainerID()
dir = filepath.Join(conf.RootDir, randomCID)
if err := os.MkdirAll(dir, 0755); err != nil {
t.Fatalf("os.MkdirAll(%q)=%v", dir, err)
@@ -1680,13 +1667,13 @@ func TestMultiContainerRunNonRoot(t *testing.T) {
Type: "bind",
})
- rootDir, err := testutil.SetupRootDir()
+ rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
t.Fatalf("error creating root dir: %v", err)
}
- defer os.RemoveAll(rootDir)
+ defer cleanup()
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.RootDir = rootDir
pod, cleanup, err := startContainers(conf, podSpecs, ids)
@@ -1705,3 +1692,83 @@ func TestMultiContainerRunNonRoot(t *testing.T) {
t.Fatalf("child container failed, waitStatus: %v", ws)
}
}
+
+// TestMultiContainerHomeEnvDir tests that the HOME environment variable is set
+// for root containers, sub-containers, and execed processes.
+func TestMultiContainerHomeEnvDir(t *testing.T) {
+ // TODO(gvisor.dev/issue/1487): VFSv2 configs failing.
+ // NOTE: Don't use overlay since we need changes to persist to the temp dir
+ // outside the sandbox.
+ for testName, conf := range configs(t, noOverlay...) {
+ t.Run(testName, func(t *testing.T) {
+
+ rootDir, cleanup, err := testutil.SetupRootDir()
+ if err != nil {
+ t.Fatalf("error creating root dir: %v", err)
+ }
+ defer cleanup()
+ conf.RootDir = rootDir
+
+ // Create temp files we can write the value of $HOME to.
+ homeDirs := map[string]*os.File{}
+ for _, name := range []string{"root", "sub", "exec"} {
+ homeFile, err := ioutil.TempFile(testutil.TmpDir(), name)
+ if err != nil {
+ t.Fatalf("creating temp file: %v", err)
+ }
+ homeDirs[name] = homeFile
+ }
+
+ // We will sleep in the root container in order to ensure that
+ // the root container doesn't terminate before sub containers can be
+ // created.
+ rootCmd := []string{"/bin/sh", "-c", fmt.Sprintf("printf \"$HOME\" > %s; sleep 1000", homeDirs["root"].Name())}
+ subCmd := []string{"/bin/sh", "-c", fmt.Sprintf("printf \"$HOME\" > %s", homeDirs["sub"].Name())}
+ execCmd := []string{"/bin/sh", "-c", fmt.Sprintf("printf \"$HOME\" > %s", homeDirs["exec"].Name())}
+
+ // Setup the containers, a root container and sub container.
+ specConfig, ids := createSpecs(rootCmd, subCmd)
+ containers, cleanup, err := startContainers(conf, specConfig, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ // Exec into the root container synchronously.
+ args := &control.ExecArgs{Argv: execCmd}
+ if _, err := containers[0].executeSync(args); err != nil {
+ t.Errorf("error executing %+v: %v", args, err)
+ }
+
+ // Wait for the subcontainer to finish.
+ _, err = containers[1].Wait()
+ if err != nil {
+ t.Errorf("wait on child container: %v", err)
+ }
+
+ // Wait for the root container to run.
+ expectedPL := []*control.Process{
+ newProcessBuilder().Cmd("sh").Process(),
+ newProcessBuilder().Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(containers[0], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+
+ // Check the written files.
+ for name, tmpFile := range homeDirs {
+ dirBytes, err := ioutil.ReadAll(tmpFile)
+ if err != nil {
+ t.Fatalf("reading %s temp file: %v", name, err)
+ }
+ got := string(dirBytes)
+
+ want := "/"
+ if got != want {
+ t.Errorf("%s $HOME incorrect: got: %q, want: %q", name, got, want)
+ }
+ }
+
+ })
+ }
+}
diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go
index dc4194134..bac177a88 100644
--- a/runsc/container/shared_volume_test.go
+++ b/runsc/container/shared_volume_test.go
@@ -24,16 +24,15 @@ import (
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/test/testutil"
"gvisor.dev/gvisor/runsc/boot"
- "gvisor.dev/gvisor/runsc/testutil"
)
// TestSharedVolume checks that modifications to a volume mount are propagated
// into and out of the sandbox.
func TestSharedVolume(t *testing.T) {
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.FileAccess = boot.FileAccessShared
- t.Logf("Running test with conf: %+v", conf)
// Main process just sleeps. We will use "exec" to probe the state of
// the filesystem.
@@ -44,16 +43,15 @@ func TestSharedVolume(t *testing.T) {
t.Fatalf("TempDir failed: %v", err)
}
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
// Create and start the container.
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
}
@@ -190,9 +188,8 @@ func checkFile(c *Container, filename string, want []byte) error {
// TestSharedVolumeFile tests that changes to file content outside the sandbox
// is reflected inside.
func TestSharedVolumeFile(t *testing.T) {
- conf := testutil.TestConfig()
+ conf := testutil.TestConfig(t)
conf.FileAccess = boot.FileAccessShared
- t.Logf("Running test with conf: %+v", conf)
// Main process just sleeps. We will use "exec" to probe the state of
// the filesystem.
@@ -203,16 +200,15 @@ func TestSharedVolumeFile(t *testing.T) {
t.Fatalf("TempDir failed: %v", err)
}
- rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
- defer os.RemoveAll(rootDir)
- defer os.RemoveAll(bundleDir)
+ defer cleanup()
// Create and start the container.
args := Args{
- ID: testutil.UniqueContainerID(),
+ ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
}
diff --git a/runsc/container/state_file.go b/runsc/container/state_file.go
new file mode 100644
index 000000000..17a251530
--- /dev/null
+++ b/runsc/container/state_file.go
@@ -0,0 +1,185 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package container
+
+import (
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+
+ "github.com/gofrs/flock"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sync"
+)
+
+const stateFileExtension = ".state"
+
+// StateFile handles load from/save to container state safely from multiple
+// processes. It uses a lock file to provide synchronization between operations.
+//
+// The lock file is located at: "${s.RootDir}/${s.ID}.lock".
+// The state file is located at: "${s.RootDir}/${s.ID}.state".
+type StateFile struct {
+ // RootDir is the directory containing the container metadata file.
+ RootDir string `json:"rootDir"`
+
+ // ID is the container ID.
+ ID string `json:"id"`
+
+ //
+ // Fields below this line are not saved in the state file and will not
+ // be preserved across commands.
+ //
+
+ once sync.Once
+ flock *flock.Flock
+}
+
+// List returns all container ids in the given root directory.
+func List(rootDir string) ([]string, error) {
+ log.Debugf("List containers %q", rootDir)
+ list, err := filepath.Glob(filepath.Join(rootDir, "*"+stateFileExtension))
+ if err != nil {
+ return nil, err
+ }
+ var out []string
+ for _, path := range list {
+ // Filter out files that do no belong to a container.
+ fileName := filepath.Base(path)
+ if len(fileName) < len(stateFileExtension) {
+ panic(fmt.Sprintf("invalid file match %q", path))
+ }
+ // Remove the extension.
+ cid := fileName[:len(fileName)-len(stateFileExtension)]
+ if validateID(cid) == nil {
+ out = append(out, cid)
+ }
+ }
+ return out, nil
+}
+
+// lock globally locks all locking operations for the container.
+func (s *StateFile) lock() error {
+ s.once.Do(func() {
+ s.flock = flock.NewFlock(s.lockPath())
+ })
+
+ if err := s.flock.Lock(); err != nil {
+ return fmt.Errorf("acquiring lock on %q: %v", s.flock, err)
+ }
+ return nil
+}
+
+// lockForNew acquires the lock and checks if the state file doesn't exist. This
+// is done to ensure that more than one creation didn't race to create
+// containers with the same ID.
+func (s *StateFile) lockForNew() error {
+ if err := s.lock(); err != nil {
+ return err
+ }
+
+ // Checks if the container already exists by looking for the metadata file.
+ if _, err := os.Stat(s.statePath()); err == nil {
+ s.unlock()
+ return fmt.Errorf("container already exists")
+ } else if !os.IsNotExist(err) {
+ s.unlock()
+ return fmt.Errorf("looking for existing container: %v", err)
+ }
+ return nil
+}
+
+// unlock globally unlocks all locking operations for the container.
+func (s *StateFile) unlock() error {
+ if !s.flock.Locked() {
+ panic("unlock called without lock held")
+ }
+
+ if err := s.flock.Unlock(); err != nil {
+ log.Warningf("Error to release lock on %q: %v", s.flock, err)
+ return fmt.Errorf("releasing lock on %q: %v", s.flock, err)
+ }
+ return nil
+}
+
+// saveLocked saves 'v' to the state file.
+//
+// Preconditions: lock() must been called before.
+func (s *StateFile) saveLocked(v interface{}) error {
+ if !s.flock.Locked() {
+ panic("saveLocked called without lock held")
+ }
+
+ meta, err := json.Marshal(v)
+ if err != nil {
+ return err
+ }
+ if err := ioutil.WriteFile(s.statePath(), meta, 0640); err != nil {
+ return fmt.Errorf("writing json file: %v", err)
+ }
+ return nil
+}
+
+func (s *StateFile) load(v interface{}) error {
+ if err := s.lock(); err != nil {
+ return err
+ }
+ defer s.unlock()
+
+ metaBytes, err := ioutil.ReadFile(s.statePath())
+ if err != nil {
+ return err
+ }
+ return json.Unmarshal(metaBytes, &v)
+}
+
+func (s *StateFile) close() error {
+ if s.flock == nil {
+ return nil
+ }
+ if s.flock.Locked() {
+ panic("Closing locked file")
+ }
+ return s.flock.Close()
+}
+
+func buildStatePath(rootDir, id string) string {
+ return filepath.Join(rootDir, id+stateFileExtension)
+}
+
+// statePath is the full path to the state file.
+func (s *StateFile) statePath() string {
+ return buildStatePath(s.RootDir, s.ID)
+}
+
+// lockPath is the full path to the lock file.
+func (s *StateFile) lockPath() string {
+ return filepath.Join(s.RootDir, s.ID+".lock")
+}
+
+// destroy deletes all state created by the stateFile. It may be called with the
+// lock file held. In that case, the lock file must still be unlocked and
+// properly closed after destroy returns.
+func (s *StateFile) destroy() error {
+ if err := os.Remove(s.statePath()); err != nil && !os.IsNotExist(err) {
+ return err
+ }
+ if err := os.Remove(s.lockPath()); err != nil && !os.IsNotExist(err) {
+ return err
+ }
+ return nil
+}
diff --git a/runsc/container/test_app/BUILD b/runsc/container/test_app/BUILD
deleted file mode 100644
index 9bf9e6e9d..000000000
--- a/runsc/container/test_app/BUILD
+++ /dev/null
@@ -1,19 +0,0 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_binary")
-
-package(licenses = ["notice"])
-
-go_binary(
- name = "test_app",
- testonly = 1,
- srcs = [
- "fds.go",
- "test_app.go",
- ],
- pure = "on",
- visibility = ["//runsc/container:__pkg__"],
- deps = [
- "//pkg/unet",
- "//runsc/testutil",
- "@com_github_google_subcommands//:go_default_library",
- ],
-)
diff --git a/runsc/container/test_app/fds.go b/runsc/container/test_app/fds.go
deleted file mode 100644
index a90cc1662..000000000
--- a/runsc/container/test_app/fds.go
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package main
-
-import (
- "context"
- "io/ioutil"
- "log"
- "os"
- "time"
-
- "flag"
- "github.com/google/subcommands"
- "gvisor.dev/gvisor/pkg/unet"
- "gvisor.dev/gvisor/runsc/testutil"
-)
-
-const fileContents = "foobarbaz"
-
-// fdSender will open a file and send the FD over a unix domain socket.
-type fdSender struct {
- socketPath string
-}
-
-// Name implements subcommands.Command.Name.
-func (*fdSender) Name() string {
- return "fd_sender"
-}
-
-// Synopsis implements subcommands.Command.Synopsys.
-func (*fdSender) Synopsis() string {
- return "creates a file and sends the FD over the socket"
-}
-
-// Usage implements subcommands.Command.Usage.
-func (*fdSender) Usage() string {
- return "fd_sender <flags>"
-}
-
-// SetFlags implements subcommands.Command.SetFlags.
-func (fds *fdSender) SetFlags(f *flag.FlagSet) {
- f.StringVar(&fds.socketPath, "socket", "", "path to socket")
-}
-
-// Execute implements subcommands.Command.Execute.
-func (fds *fdSender) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
- if fds.socketPath == "" {
- log.Fatalf("socket flag must be set")
- }
-
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- log.Fatalf("TempDir failed: %v", err)
- }
-
- fileToSend, err := ioutil.TempFile(dir, "")
- if err != nil {
- log.Fatalf("TempFile failed: %v", err)
- }
- defer fileToSend.Close()
-
- if _, err := fileToSend.WriteString(fileContents); err != nil {
- log.Fatalf("Write(%q) failed: %v", fileContents, err)
- }
-
- // Receiver may not be started yet, so try connecting in a poll loop.
- var s *unet.Socket
- if err := testutil.Poll(func() error {
- var err error
- s, err = unet.Connect(fds.socketPath, true /* SEQPACKET, so we can send empty message with FD */)
- return err
- }, 10*time.Second); err != nil {
- log.Fatalf("Error connecting to socket %q: %v", fds.socketPath, err)
- }
- defer s.Close()
-
- w := s.Writer(true)
- w.ControlMessage.PackFDs(int(fileToSend.Fd()))
- if _, err := w.WriteVec([][]byte{[]byte{'a'}}); err != nil {
- log.Fatalf("Error sending FD %q over socket %q: %v", fileToSend.Fd(), fds.socketPath, err)
- }
-
- log.Print("FD SENDER exiting successfully")
- return subcommands.ExitSuccess
-}
-
-// fdReceiver receives an FD from a unix domain socket and does things to it.
-type fdReceiver struct {
- socketPath string
-}
-
-// Name implements subcommands.Command.Name.
-func (*fdReceiver) Name() string {
- return "fd_receiver"
-}
-
-// Synopsis implements subcommands.Command.Synopsys.
-func (*fdReceiver) Synopsis() string {
- return "reads an FD from a unix socket, and then does things to it"
-}
-
-// Usage implements subcommands.Command.Usage.
-func (*fdReceiver) Usage() string {
- return "fd_receiver <flags>"
-}
-
-// SetFlags implements subcommands.Command.SetFlags.
-func (fdr *fdReceiver) SetFlags(f *flag.FlagSet) {
- f.StringVar(&fdr.socketPath, "socket", "", "path to socket")
-}
-
-// Execute implements subcommands.Command.Execute.
-func (fdr *fdReceiver) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
- if fdr.socketPath == "" {
- log.Fatalf("Flags cannot be empty, given: socket: %q", fdr.socketPath)
- }
-
- ss, err := unet.BindAndListen(fdr.socketPath, true /* packet */)
- if err != nil {
- log.Fatalf("BindAndListen(%q) failed: %v", fdr.socketPath, err)
- }
- defer ss.Close()
-
- var s *unet.Socket
- c := make(chan error, 1)
- go func() {
- var err error
- s, err = ss.Accept()
- c <- err
- }()
-
- select {
- case err := <-c:
- if err != nil {
- log.Fatalf("Accept() failed: %v", err)
- }
- case <-time.After(10 * time.Second):
- log.Fatalf("Timeout waiting for accept")
- }
-
- r := s.Reader(true)
- r.EnableFDs(1)
- b := [][]byte{{'a'}}
- if n, err := r.ReadVec(b); n != 1 || err != nil {
- log.Fatalf("ReadVec got n=%d err %v (wanted 0, nil)", n, err)
- }
-
- fds, err := r.ExtractFDs()
- if err != nil {
- log.Fatalf("ExtractFD() got err %v", err)
- }
- if len(fds) != 1 {
- log.Fatalf("ExtractFD() got %d FDs, wanted 1", len(fds))
- }
- fd := fds[0]
-
- file := os.NewFile(uintptr(fd), "received file")
- defer file.Close()
- if _, err := file.Seek(0, os.SEEK_SET); err != nil {
- log.Fatalf("Seek(0, 0) failed: %v", err)
- }
-
- got, err := ioutil.ReadAll(file)
- if err != nil {
- log.Fatalf("ReadAll failed: %v", err)
- }
- if string(got) != fileContents {
- log.Fatalf("ReadAll got %q want %q", string(got), fileContents)
- }
-
- log.Print("FD RECEIVER exiting successfully")
- return subcommands.ExitSuccess
-}
diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go
deleted file mode 100644
index 913d781c6..000000000
--- a/runsc/container/test_app/test_app.go
+++ /dev/null
@@ -1,354 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Binary test_app is like a swiss knife for tests that need to run anything
-// inside the sandbox. New functionality can be added with new commands.
-package main
-
-import (
- "context"
- "fmt"
- "io/ioutil"
- "log"
- "net"
- "os"
- "os/exec"
- "regexp"
- "strconv"
- sys "syscall"
- "time"
-
- "flag"
- "github.com/google/subcommands"
- "gvisor.dev/gvisor/runsc/testutil"
-)
-
-func main() {
- subcommands.Register(subcommands.HelpCommand(), "")
- subcommands.Register(subcommands.FlagsCommand(), "")
- subcommands.Register(new(capability), "")
- subcommands.Register(new(fdReceiver), "")
- subcommands.Register(new(fdSender), "")
- subcommands.Register(new(forkBomb), "")
- subcommands.Register(new(reaper), "")
- subcommands.Register(new(syscall), "")
- subcommands.Register(new(taskTree), "")
- subcommands.Register(new(uds), "")
-
- flag.Parse()
-
- exitCode := subcommands.Execute(context.Background())
- os.Exit(int(exitCode))
-}
-
-type uds struct {
- fileName string
- socketPath string
-}
-
-// Name implements subcommands.Command.Name.
-func (*uds) Name() string {
- return "uds"
-}
-
-// Synopsis implements subcommands.Command.Synopsys.
-func (*uds) Synopsis() string {
- return "creates unix domain socket client and server. Client sends a contant flow of sequential numbers. Server prints them to --file"
-}
-
-// Usage implements subcommands.Command.Usage.
-func (*uds) Usage() string {
- return "uds <flags>"
-}
-
-// SetFlags implements subcommands.Command.SetFlags.
-func (c *uds) SetFlags(f *flag.FlagSet) {
- f.StringVar(&c.fileName, "file", "", "name of output file")
- f.StringVar(&c.socketPath, "socket", "", "path to socket")
-}
-
-// Execute implements subcommands.Command.Execute.
-func (c *uds) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
- if c.fileName == "" || c.socketPath == "" {
- log.Fatalf("Flags cannot be empty, given: fileName: %q, socketPath: %q", c.fileName, c.socketPath)
- return subcommands.ExitFailure
- }
- outputFile, err := os.OpenFile(c.fileName, os.O_WRONLY|os.O_CREATE, 0666)
- if err != nil {
- log.Fatal("error opening output file:", err)
- }
-
- defer os.Remove(c.socketPath)
-
- listener, err := net.Listen("unix", c.socketPath)
- if err != nil {
- log.Fatal("error listening on socket %q:", c.socketPath, err)
- }
-
- go server(listener, outputFile)
- for i := 0; ; i++ {
- conn, err := net.Dial("unix", c.socketPath)
- if err != nil {
- log.Fatal("error dialing:", err)
- }
- if _, err := conn.Write([]byte(strconv.Itoa(i))); err != nil {
- log.Fatal("error writing:", err)
- }
- conn.Close()
- time.Sleep(100 * time.Millisecond)
- }
-}
-
-func server(listener net.Listener, out *os.File) {
- buf := make([]byte, 16)
-
- for {
- c, err := listener.Accept()
- if err != nil {
- log.Fatal("error accepting connection:", err)
- }
- nr, err := c.Read(buf)
- if err != nil {
- log.Fatal("error reading from buf:", err)
- }
- data := buf[0:nr]
- fmt.Fprint(out, string(data)+"\n")
- }
-}
-
-type taskTree struct {
- depth int
- width int
- pause bool
-}
-
-// Name implements subcommands.Command.
-func (*taskTree) Name() string {
- return "task-tree"
-}
-
-// Synopsis implements subcommands.Command.
-func (*taskTree) Synopsis() string {
- return "creates a tree of tasks"
-}
-
-// Usage implements subcommands.Command.
-func (*taskTree) Usage() string {
- return "task-tree <flags>"
-}
-
-// SetFlags implements subcommands.Command.
-func (c *taskTree) SetFlags(f *flag.FlagSet) {
- f.IntVar(&c.depth, "depth", 1, "number of levels to create")
- f.IntVar(&c.width, "width", 1, "number of tasks at each level")
- f.BoolVar(&c.pause, "pause", false, "whether the tasks should pause perpetually")
-}
-
-// Execute implements subcommands.Command.
-func (c *taskTree) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
- stop := testutil.StartReaper()
- defer stop()
-
- if c.depth == 0 {
- log.Printf("Child sleeping, PID: %d\n", os.Getpid())
- select {}
- }
- log.Printf("Parent %d sleeping, PID: %d\n", c.depth, os.Getpid())
-
- var cmds []*exec.Cmd
- for i := 0; i < c.width; i++ {
- cmd := exec.Command(
- "/proc/self/exe", c.Name(),
- "--depth", strconv.Itoa(c.depth-1),
- "--width", strconv.Itoa(c.width),
- "--pause", strconv.FormatBool(c.pause))
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
-
- if err := cmd.Start(); err != nil {
- log.Fatal("failed to call self:", err)
- }
- cmds = append(cmds, cmd)
- }
-
- for _, c := range cmds {
- c.Wait()
- }
-
- if c.pause {
- select {}
- }
-
- return subcommands.ExitSuccess
-}
-
-type forkBomb struct {
- delay time.Duration
-}
-
-// Name implements subcommands.Command.
-func (*forkBomb) Name() string {
- return "fork-bomb"
-}
-
-// Synopsis implements subcommands.Command.
-func (*forkBomb) Synopsis() string {
- return "creates child process until the end of times"
-}
-
-// Usage implements subcommands.Command.
-func (*forkBomb) Usage() string {
- return "fork-bomb <flags>"
-}
-
-// SetFlags implements subcommands.Command.
-func (c *forkBomb) SetFlags(f *flag.FlagSet) {
- f.DurationVar(&c.delay, "delay", 100*time.Millisecond, "amount of time to delay creation of child")
-}
-
-// Execute implements subcommands.Command.
-func (c *forkBomb) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
- time.Sleep(c.delay)
-
- cmd := exec.Command("/proc/self/exe", c.Name())
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- if err := cmd.Run(); err != nil {
- log.Fatal("failed to call self:", err)
- }
- return subcommands.ExitSuccess
-}
-
-type reaper struct{}
-
-// Name implements subcommands.Command.
-func (*reaper) Name() string {
- return "reaper"
-}
-
-// Synopsis implements subcommands.Command.
-func (*reaper) Synopsis() string {
- return "reaps all children in a loop"
-}
-
-// Usage implements subcommands.Command.
-func (*reaper) Usage() string {
- return "reaper <flags>"
-}
-
-// SetFlags implements subcommands.Command.
-func (*reaper) SetFlags(*flag.FlagSet) {}
-
-// Execute implements subcommands.Command.
-func (c *reaper) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
- stop := testutil.StartReaper()
- defer stop()
- select {}
-}
-
-type syscall struct {
- sysno uint64
-}
-
-// Name implements subcommands.Command.
-func (*syscall) Name() string {
- return "syscall"
-}
-
-// Synopsis implements subcommands.Command.
-func (*syscall) Synopsis() string {
- return "syscall makes a syscall"
-}
-
-// Usage implements subcommands.Command.
-func (*syscall) Usage() string {
- return "syscall <flags>"
-}
-
-// SetFlags implements subcommands.Command.
-func (s *syscall) SetFlags(f *flag.FlagSet) {
- f.Uint64Var(&s.sysno, "syscall", 0, "syscall to call")
-}
-
-// Execute implements subcommands.Command.
-func (s *syscall) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
- if _, _, errno := sys.Syscall(uintptr(s.sysno), 0, 0, 0); errno != 0 {
- fmt.Printf("syscall(%d, 0, 0...) failed: %v\n", s.sysno, errno)
- } else {
- fmt.Printf("syscall(%d, 0, 0...) success\n", s.sysno)
- }
- return subcommands.ExitSuccess
-}
-
-type capability struct {
- enabled uint64
- disabled uint64
-}
-
-// Name implements subcommands.Command.
-func (*capability) Name() string {
- return "capability"
-}
-
-// Synopsis implements subcommands.Command.
-func (*capability) Synopsis() string {
- return "checks if effective capabilities are set/unset"
-}
-
-// Usage implements subcommands.Command.
-func (*capability) Usage() string {
- return "capability [--enabled=number] [--disabled=number]"
-}
-
-// SetFlags implements subcommands.Command.
-func (c *capability) SetFlags(f *flag.FlagSet) {
- f.Uint64Var(&c.enabled, "enabled", 0, "")
- f.Uint64Var(&c.disabled, "disabled", 0, "")
-}
-
-// Execute implements subcommands.Command.
-func (c *capability) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
- if c.enabled == 0 && c.disabled == 0 {
- fmt.Println("One of the flags must be set")
- return subcommands.ExitUsageError
- }
-
- status, err := ioutil.ReadFile("/proc/self/status")
- if err != nil {
- fmt.Printf("Error reading %q: %v\n", "proc/self/status", err)
- return subcommands.ExitFailure
- }
- re := regexp.MustCompile("CapEff:\t([0-9a-f]+)\n")
- matches := re.FindStringSubmatch(string(status))
- if matches == nil || len(matches) != 2 {
- fmt.Printf("Effective capabilities not found in\n%s\n", status)
- return subcommands.ExitFailure
- }
- caps, err := strconv.ParseUint(matches[1], 16, 64)
- if err != nil {
- fmt.Printf("failed to convert capabilities %q: %v\n", matches[1], err)
- return subcommands.ExitFailure
- }
-
- if c.enabled != 0 && (caps&c.enabled) != c.enabled {
- fmt.Printf("Missing capabilities, want: %#x: got: %#x\n", c.enabled, caps)
- return subcommands.ExitFailure
- }
- if c.disabled != 0 && (caps&c.disabled) != 0 {
- fmt.Printf("Extra capabilities found, dont_want: %#x: got: %#x\n", c.disabled, caps)
- return subcommands.ExitFailure
- }
-
- return subcommands.ExitSuccess
-}
diff --git a/runsc/criutil/BUILD b/runsc/criutil/BUILD
deleted file mode 100644
index 558133a0e..000000000
--- a/runsc/criutil/BUILD
+++ /dev/null
@@ -1,12 +0,0 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "criutil",
- testonly = 1,
- srcs = ["criutil.go"],
- importpath = "gvisor.dev/gvisor/runsc/criutil",
- visibility = ["//:sandbox"],
- deps = ["//runsc/testutil"],
-)
diff --git a/runsc/criutil/criutil.go b/runsc/criutil/criutil.go
deleted file mode 100644
index 773f5a1c4..000000000
--- a/runsc/criutil/criutil.go
+++ /dev/null
@@ -1,277 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package criutil contains utility functions for interacting with the
-// Container Runtime Interface (CRI), principally via the crictl command line
-// tool. This requires critools to be installed on the local system.
-package criutil
-
-import (
- "encoding/json"
- "fmt"
- "os"
- "os/exec"
- "strings"
- "time"
-
- "gvisor.dev/gvisor/runsc/testutil"
-)
-
-const endpointPrefix = "unix://"
-
-// Crictl contains information required to run the crictl utility.
-type Crictl struct {
- executable string
- timeout time.Duration
- imageEndpoint string
- runtimeEndpoint string
-}
-
-// NewCrictl returns a Crictl configured with a timeout and an endpoint over
-// which it will talk to containerd.
-func NewCrictl(timeout time.Duration, endpoint string) *Crictl {
- // Bazel doesn't pass PATH through, assume the location of crictl
- // unless specified by environment variable.
- executable := os.Getenv("CRICTL_PATH")
- if executable == "" {
- executable = "/usr/local/bin/crictl"
- }
- return &Crictl{
- executable: executable,
- timeout: timeout,
- imageEndpoint: endpointPrefix + endpoint,
- runtimeEndpoint: endpointPrefix + endpoint,
- }
-}
-
-// Pull pulls an container image. It corresponds to `crictl pull`.
-func (cc *Crictl) Pull(imageName string) error {
- _, err := cc.run("pull", imageName)
- return err
-}
-
-// RunPod creates a sandbox. It corresponds to `crictl runp`.
-func (cc *Crictl) RunPod(sbSpecFile string) (string, error) {
- podID, err := cc.run("runp", sbSpecFile)
- if err != nil {
- return "", fmt.Errorf("runp failed: %v", err)
- }
- // Strip the trailing newline from crictl output.
- return strings.TrimSpace(podID), nil
-}
-
-// Create creates a container within a sandbox. It corresponds to `crictl
-// create`.
-func (cc *Crictl) Create(podID, contSpecFile, sbSpecFile string) (string, error) {
- podID, err := cc.run("create", podID, contSpecFile, sbSpecFile)
- if err != nil {
- return "", fmt.Errorf("create failed: %v", err)
- }
- // Strip the trailing newline from crictl output.
- return strings.TrimSpace(podID), nil
-}
-
-// Start starts a container. It corresponds to `crictl start`.
-func (cc *Crictl) Start(contID string) (string, error) {
- output, err := cc.run("start", contID)
- if err != nil {
- return "", fmt.Errorf("start failed: %v", err)
- }
- return output, nil
-}
-
-// Stop stops a container. It corresponds to `crictl stop`.
-func (cc *Crictl) Stop(contID string) error {
- _, err := cc.run("stop", contID)
- return err
-}
-
-// Exec execs a program inside a container. It corresponds to `crictl exec`.
-func (cc *Crictl) Exec(contID string, args ...string) (string, error) {
- a := []string{"exec", contID}
- a = append(a, args...)
- output, err := cc.run(a...)
- if err != nil {
- return "", fmt.Errorf("exec failed: %v", err)
- }
- return output, nil
-}
-
-// Rm removes a container. It corresponds to `crictl rm`.
-func (cc *Crictl) Rm(contID string) error {
- _, err := cc.run("rm", contID)
- return err
-}
-
-// StopPod stops a pod. It corresponds to `crictl stopp`.
-func (cc *Crictl) StopPod(podID string) error {
- _, err := cc.run("stopp", podID)
- return err
-}
-
-// containsConfig is a minimal copy of
-// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/apis/cri/runtime/v1alpha2/api.proto
-// It only contains fields needed for testing.
-type containerConfig struct {
- Status containerStatus
-}
-
-type containerStatus struct {
- Network containerNetwork
-}
-
-type containerNetwork struct {
- IP string
-}
-
-// PodIP returns a pod's IP address.
-func (cc *Crictl) PodIP(podID string) (string, error) {
- output, err := cc.run("inspectp", podID)
- if err != nil {
- return "", err
- }
- conf := &containerConfig{}
- if err := json.Unmarshal([]byte(output), conf); err != nil {
- return "", fmt.Errorf("failed to unmarshal JSON: %v, %s", err, output)
- }
- if conf.Status.Network.IP == "" {
- return "", fmt.Errorf("no IP found in config: %s", output)
- }
- return conf.Status.Network.IP, nil
-}
-
-// RmPod removes a container. It corresponds to `crictl rmp`.
-func (cc *Crictl) RmPod(podID string) error {
- _, err := cc.run("rmp", podID)
- return err
-}
-
-// StartContainer pulls the given image ands starts the container in the
-// sandbox with the given podID.
-func (cc *Crictl) StartContainer(podID, image, sbSpec, contSpec string) (string, error) {
- // Write the specs to files that can be read by crictl.
- sbSpecFile, err := testutil.WriteTmpFile("sbSpec", sbSpec)
- if err != nil {
- return "", fmt.Errorf("failed to write sandbox spec: %v", err)
- }
- contSpecFile, err := testutil.WriteTmpFile("contSpec", contSpec)
- if err != nil {
- return "", fmt.Errorf("failed to write container spec: %v", err)
- }
-
- return cc.startContainer(podID, image, sbSpecFile, contSpecFile)
-}
-
-func (cc *Crictl) startContainer(podID, image, sbSpecFile, contSpecFile string) (string, error) {
- if err := cc.Pull(image); err != nil {
- return "", fmt.Errorf("failed to pull %s: %v", image, err)
- }
-
- contID, err := cc.Create(podID, contSpecFile, sbSpecFile)
- if err != nil {
- return "", fmt.Errorf("failed to create container in pod %q: %v", podID, err)
- }
-
- if _, err := cc.Start(contID); err != nil {
- return "", fmt.Errorf("failed to start container %q in pod %q: %v", contID, podID, err)
- }
-
- return contID, nil
-}
-
-// StopContainer stops and deletes the container with the given container ID.
-func (cc *Crictl) StopContainer(contID string) error {
- if err := cc.Stop(contID); err != nil {
- return fmt.Errorf("failed to stop container %q: %v", contID, err)
- }
-
- if err := cc.Rm(contID); err != nil {
- return fmt.Errorf("failed to remove container %q: %v", contID, err)
- }
-
- return nil
-}
-
-// StartPodAndContainer pulls an image, then starts a sandbox and container in
-// that sandbox. It returns the pod ID and container ID.
-func (cc *Crictl) StartPodAndContainer(image, sbSpec, contSpec string) (string, string, error) {
- // Write the specs to files that can be read by crictl.
- sbSpecFile, err := testutil.WriteTmpFile("sbSpec", sbSpec)
- if err != nil {
- return "", "", fmt.Errorf("failed to write sandbox spec: %v", err)
- }
- contSpecFile, err := testutil.WriteTmpFile("contSpec", contSpec)
- if err != nil {
- return "", "", fmt.Errorf("failed to write container spec: %v", err)
- }
-
- podID, err := cc.RunPod(sbSpecFile)
- if err != nil {
- return "", "", err
- }
-
- contID, err := cc.startContainer(podID, image, sbSpecFile, contSpecFile)
-
- return podID, contID, err
-}
-
-// StopPodAndContainer stops a container and pod.
-func (cc *Crictl) StopPodAndContainer(podID, contID string) error {
- if err := cc.StopContainer(contID); err != nil {
- return fmt.Errorf("failed to stop container %q in pod %q: %v", contID, podID, err)
- }
-
- if err := cc.StopPod(podID); err != nil {
- return fmt.Errorf("failed to stop pod %q: %v", podID, err)
- }
-
- if err := cc.RmPod(podID); err != nil {
- return fmt.Errorf("failed to remove pod %q: %v", podID, err)
- }
-
- return nil
-}
-
-// run runs crictl with the given args and returns an error if it takes longer
-// than cc.Timeout to run.
-func (cc *Crictl) run(args ...string) (string, error) {
- defaultArgs := []string{
- "--image-endpoint", cc.imageEndpoint,
- "--runtime-endpoint", cc.runtimeEndpoint,
- }
- cmd := exec.Command(cc.executable, append(defaultArgs, args...)...)
-
- // Run the command with a timeout.
- done := make(chan string)
- errCh := make(chan error)
- go func() {
- output, err := cmd.CombinedOutput()
- if err != nil {
- errCh <- fmt.Errorf("error: \"%v\", output: %s", err, string(output))
- return
- }
- done <- string(output)
- }()
- select {
- case output := <-done:
- return output, nil
- case err := <-errCh:
- return "", err
- case <-time.After(cc.timeout):
- if err := testutil.KillCommand(cmd); err != nil {
- return "", fmt.Errorf("timed out, then couldn't kill process %+v: %v", cmd, err)
- }
- return "", fmt.Errorf("timed out: %+v", cmd)
- }
-}
diff --git a/runsc/debian/description b/runsc/debian/description
index 6e3b1b2c0..9e8e08805 100644
--- a/runsc/debian/description
+++ b/runsc/debian/description
@@ -1,5 +1 @@
-gVisor is a user-space kernel, written in Go, that implements a substantial
-portion of the Linux system surface. It includes an Open Container Initiative
-(OCI) runtime called runsc that provides an isolation boundary between the
-application and the host kernel. The runsc runtime integrates with Docker and
-Kubernetes, making it simple to run sandboxed containers.
+gVisor container sandbox runtime
diff --git a/runsc/debian/postinst.sh b/runsc/debian/postinst.sh
index dc7aeee87..d1e28e17b 100755
--- a/runsc/debian/postinst.sh
+++ b/runsc/debian/postinst.sh
@@ -18,7 +18,14 @@ if [ "$1" != configure ]; then
exit 0
fi
+# Update docker configuration.
if [ -f /etc/docker/daemon.json ]; then
runsc install
- systemctl restart docker || echo "unable to restart docker; you must do so manually." >&2
+ if systemctl status docker 2>/dev/null; then
+ systemctl restart docker || echo "unable to restart docker; you must do so manually." >&2
+ fi
fi
+
+# For containerd-based installers, we don't automatically update the
+# configuration. If it uses a v2 shim, then it will find the package binaries
+# automatically when provided the appropriate annotation.
diff --git a/runsc/dockerutil/BUILD b/runsc/dockerutil/BUILD
deleted file mode 100644
index 0e0423504..000000000
--- a/runsc/dockerutil/BUILD
+++ /dev/null
@@ -1,15 +0,0 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "dockerutil",
- testonly = 1,
- srcs = ["dockerutil.go"],
- importpath = "gvisor.dev/gvisor/runsc/dockerutil",
- visibility = ["//:sandbox"],
- deps = [
- "//runsc/testutil",
- "@com_github_kr_pty//:go_default_library",
- ],
-)
diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go
deleted file mode 100644
index 57f6ae8de..000000000
--- a/runsc/dockerutil/dockerutil.go
+++ /dev/null
@@ -1,467 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package dockerutil is a collection of utility functions, primarily for
-// testing.
-package dockerutil
-
-import (
- "encoding/json"
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "os"
- "os/exec"
- "path"
- "regexp"
- "strconv"
- "strings"
- "syscall"
- "time"
-
- "github.com/kr/pty"
- "gvisor.dev/gvisor/runsc/testutil"
-)
-
-var (
- runtime = flag.String("runtime", "runsc", "specify which runtime to use")
- config = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths")
-)
-
-// EnsureSupportedDockerVersion checks if correct docker is installed.
-func EnsureSupportedDockerVersion() {
- cmd := exec.Command("docker", "version")
- out, err := cmd.CombinedOutput()
- if err != nil {
- log.Fatalf("Error running %q: %v", "docker version", err)
- }
- re := regexp.MustCompile(`Version:\s+(\d+)\.(\d+)\.\d.*`)
- matches := re.FindStringSubmatch(string(out))
- if len(matches) != 3 {
- log.Fatalf("Invalid docker output: %s", out)
- }
- major, _ := strconv.Atoi(matches[1])
- minor, _ := strconv.Atoi(matches[2])
- if major < 17 || (major == 17 && minor < 9) {
- log.Fatalf("Docker version 17.09.0 or greater is required, found: %02d.%02d", major, minor)
- }
-}
-
-// RuntimePath returns the binary path for the current runtime.
-func RuntimePath() (string, error) {
- // Read the configuration data; the file must exist.
- configBytes, err := ioutil.ReadFile(*config)
- if err != nil {
- return "", err
- }
-
- // Unmarshal the configuration.
- c := make(map[string]interface{})
- if err := json.Unmarshal(configBytes, &c); err != nil {
- return "", err
- }
-
- // Decode the expected configuration.
- r, ok := c["runtimes"]
- if !ok {
- return "", fmt.Errorf("no runtimes declared: %v", c)
- }
- rs, ok := r.(map[string]interface{})
- if !ok {
- // The runtimes are not a map.
- return "", fmt.Errorf("unexpected format: %v", c)
- }
- r, ok = rs[*runtime]
- if !ok {
- // The expected runtime is not declared.
- return "", fmt.Errorf("runtime %q not found: %v", *runtime, c)
- }
- rs, ok = r.(map[string]interface{})
- if !ok {
- // The runtime is not a map.
- return "", fmt.Errorf("unexpected format: %v", c)
- }
- p, ok := rs["path"].(string)
- if !ok {
- // The runtime does not declare a path.
- return "", fmt.Errorf("unexpected format: %v", c)
- }
- return p, nil
-}
-
-// MountMode describes if the mount should be ro or rw.
-type MountMode int
-
-const (
- // ReadOnly is what the name says.
- ReadOnly MountMode = iota
- // ReadWrite is what the name says.
- ReadWrite
-)
-
-// String returns the mount mode argument for this MountMode.
-func (m MountMode) String() string {
- switch m {
- case ReadOnly:
- return "ro"
- case ReadWrite:
- return "rw"
- }
- panic(fmt.Sprintf("invalid mode: %d", m))
-}
-
-// MountArg formats the volume argument to mount in the container.
-func MountArg(source, target string, mode MountMode) string {
- return fmt.Sprintf("-v=%s:%s:%v", source, target, mode)
-}
-
-// LinkArg formats the link argument.
-func LinkArg(source *Docker, target string) string {
- return fmt.Sprintf("--link=%s:%s", source.Name, target)
-}
-
-// PrepareFiles creates temp directory to copy files there. The sandbox doesn't
-// have access to files in the test dir.
-func PrepareFiles(names ...string) (string, error) {
- dir, err := ioutil.TempDir("", "image-test")
- if err != nil {
- return "", fmt.Errorf("ioutil.TempDir failed: %v", err)
- }
- if err := os.Chmod(dir, 0777); err != nil {
- return "", fmt.Errorf("os.Chmod(%q, 0777) failed: %v", dir, err)
- }
- for _, name := range names {
- src := getLocalPath(name)
- dst := path.Join(dir, name)
- if err := testutil.Copy(src, dst); err != nil {
- return "", fmt.Errorf("testutil.Copy(%q, %q) failed: %v", src, dst, err)
- }
- }
- return dir, nil
-}
-
-func getLocalPath(file string) string {
- return path.Join(".", file)
-}
-
-// do executes docker command.
-func do(args ...string) (string, error) {
- log.Printf("Running: docker %s\n", args)
- cmd := exec.Command("docker", args...)
- out, err := cmd.CombinedOutput()
- if err != nil {
- return "", fmt.Errorf("error executing docker %s: %v\nout: %s", args, err, out)
- }
- return string(out), nil
-}
-
-// doWithPty executes docker command with stdio attached to a pty.
-func doWithPty(args ...string) (*exec.Cmd, *os.File, error) {
- log.Printf("Running with pty: docker %s\n", args)
- cmd := exec.Command("docker", args...)
- ptmx, err := pty.Start(cmd)
- if err != nil {
- return nil, nil, fmt.Errorf("error executing docker %s with a pty: %v", args, err)
- }
- return cmd, ptmx, nil
-}
-
-// Pull pulls a docker image. This is used in tests to isolate the
-// time to pull the image off the network from the time to actually
-// start the container, to avoid timeouts over slow networks.
-func Pull(image string) error {
- _, err := do("pull", image)
- return err
-}
-
-// Docker contains the name and the runtime of a docker container.
-type Docker struct {
- Runtime string
- Name string
-}
-
-// MakeDocker sets up the struct for a Docker container.
-// Names of containers will be unique.
-func MakeDocker(namePrefix string) Docker {
- return Docker{
- Name: testutil.RandomName(namePrefix),
- Runtime: *runtime,
- }
-}
-
-// logDockerID logs a container id, which is needed to find container runsc logs.
-func (d *Docker) logDockerID() {
- id, err := d.ID()
- if err != nil {
- log.Printf("%v\n", err)
- }
- log.Printf("Name: %s ID: %v\n", d.Name, id)
-}
-
-// Create calls 'docker create' with the arguments provided.
-func (d *Docker) Create(args ...string) error {
- a := []string{"create", "--runtime", d.Runtime, "--name", d.Name}
- a = append(a, args...)
- _, err := do(a...)
- if err == nil {
- d.logDockerID()
- }
- return err
-}
-
-// Start calls 'docker start'.
-func (d *Docker) Start() error {
- if _, err := do("start", d.Name); err != nil {
- return fmt.Errorf("error starting container %q: %v", d.Name, err)
- }
- return nil
-}
-
-// Stop calls 'docker stop'.
-func (d *Docker) Stop() error {
- if _, err := do("stop", d.Name); err != nil {
- return fmt.Errorf("error stopping container %q: %v", d.Name, err)
- }
- return nil
-}
-
-// Run calls 'docker run' with the arguments provided. The container starts
-// running in the background and the call returns immediately.
-func (d *Docker) Run(args ...string) error {
- a := d.runArgs("-d")
- a = append(a, args...)
- _, err := do(a...)
- if err == nil {
- d.logDockerID()
- }
- return err
-}
-
-// RunWithPty is like Run but with an attached pty.
-func (d *Docker) RunWithPty(args ...string) (*exec.Cmd, *os.File, error) {
- a := d.runArgs("-it")
- a = append(a, args...)
- return doWithPty(a...)
-}
-
-// RunFg calls 'docker run' with the arguments provided in the foreground. It
-// blocks until the container exits and returns the output.
-func (d *Docker) RunFg(args ...string) (string, error) {
- a := d.runArgs(args...)
- out, err := do(a...)
- if err == nil {
- d.logDockerID()
- }
- return string(out), err
-}
-
-func (d *Docker) runArgs(args ...string) []string {
- // Environment variable RUNSC_TEST_NAME is picked up by the runtime and added
- // to the log name, so one can easily identify the corresponding logs for
- // this test.
- rv := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-e", "RUNSC_TEST_NAME=" + d.Name}
- return append(rv, args...)
-}
-
-// Logs calls 'docker logs'.
-func (d *Docker) Logs() (string, error) {
- return do("logs", d.Name)
-}
-
-// Exec calls 'docker exec' with the arguments provided.
-func (d *Docker) Exec(args ...string) (string, error) {
- return d.ExecWithFlags(nil, args...)
-}
-
-// ExecWithFlags calls 'docker exec <flags> name <args>'.
-func (d *Docker) ExecWithFlags(flags []string, args ...string) (string, error) {
- a := []string{"exec"}
- a = append(a, flags...)
- a = append(a, d.Name)
- a = append(a, args...)
- return do(a...)
-}
-
-// ExecAsUser calls 'docker exec' as the given user with the arguments
-// provided.
-func (d *Docker) ExecAsUser(user string, args ...string) (string, error) {
- a := []string{"exec", "--user", user, d.Name}
- a = append(a, args...)
- return do(a...)
-}
-
-// ExecWithTerminal calls 'docker exec -it' with the arguments provided and
-// attaches a pty to stdio.
-func (d *Docker) ExecWithTerminal(args ...string) (*exec.Cmd, *os.File, error) {
- a := []string{"exec", "-it", d.Name}
- a = append(a, args...)
- return doWithPty(a...)
-}
-
-// Pause calls 'docker pause'.
-func (d *Docker) Pause() error {
- if _, err := do("pause", d.Name); err != nil {
- return fmt.Errorf("error pausing container %q: %v", d.Name, err)
- }
- return nil
-}
-
-// Unpause calls 'docker pause'.
-func (d *Docker) Unpause() error {
- if _, err := do("unpause", d.Name); err != nil {
- return fmt.Errorf("error unpausing container %q: %v", d.Name, err)
- }
- return nil
-}
-
-// Checkpoint calls 'docker checkpoint'.
-func (d *Docker) Checkpoint(name string) error {
- if _, err := do("checkpoint", "create", d.Name, name); err != nil {
- return fmt.Errorf("error pausing container %q: %v", d.Name, err)
- }
- return nil
-}
-
-// Restore calls 'docker start --checkname [name]'.
-func (d *Docker) Restore(name string) error {
- if _, err := do("start", "--checkpoint", name, d.Name); err != nil {
- return fmt.Errorf("error starting container %q: %v", d.Name, err)
- }
- return nil
-}
-
-// Remove calls 'docker rm'.
-func (d *Docker) Remove() error {
- if _, err := do("rm", d.Name); err != nil {
- return fmt.Errorf("error deleting container %q: %v", d.Name, err)
- }
- return nil
-}
-
-// CleanUp kills and deletes the container (best effort).
-func (d *Docker) CleanUp() {
- d.logDockerID()
- if _, err := do("kill", d.Name); err != nil {
- if strings.Contains(err.Error(), "is not running") {
- // Nothing to kill. Don't log the error in this case.
- } else {
- log.Printf("error killing container %q: %v", d.Name, err)
- }
- }
- if err := d.Remove(); err != nil {
- log.Print(err)
- }
-}
-
-// FindPort returns the host port that is mapped to 'sandboxPort'. This calls
-// docker to allocate a free port in the host and prevent conflicts.
-func (d *Docker) FindPort(sandboxPort int) (int, error) {
- format := fmt.Sprintf(`{{ (index (index .NetworkSettings.Ports "%d/tcp") 0).HostPort }}`, sandboxPort)
- out, err := do("inspect", "-f", format, d.Name)
- if err != nil {
- return -1, fmt.Errorf("error retrieving port: %v", err)
- }
- port, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n"))
- if err != nil {
- return -1, fmt.Errorf("error parsing port %q: %v", out, err)
- }
- return port, nil
-}
-
-// SandboxPid returns the PID to the sandbox process.
-func (d *Docker) SandboxPid() (int, error) {
- out, err := do("inspect", "-f={{.State.Pid}}", d.Name)
- if err != nil {
- return -1, fmt.Errorf("error retrieving pid: %v", err)
- }
- pid, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n"))
- if err != nil {
- return -1, fmt.Errorf("error parsing pid %q: %v", out, err)
- }
- return pid, nil
-}
-
-// ID returns the container ID.
-func (d *Docker) ID() (string, error) {
- out, err := do("inspect", "-f={{.Id}}", d.Name)
- if err != nil {
- return "", fmt.Errorf("error retrieving ID: %v", err)
- }
- return strings.TrimSpace(string(out)), nil
-}
-
-// Wait waits for container to exit, up to the given timeout. Returns error if
-// wait fails or timeout is hit. Returns the application return code otherwise.
-// Note that the application may have failed even if err == nil, always check
-// the exit code.
-func (d *Docker) Wait(timeout time.Duration) (syscall.WaitStatus, error) {
- timeoutChan := time.After(timeout)
- waitChan := make(chan (syscall.WaitStatus))
- errChan := make(chan (error))
-
- go func() {
- out, err := do("wait", d.Name)
- if err != nil {
- errChan <- fmt.Errorf("error waiting for container %q: %v", d.Name, err)
- }
- exit, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n"))
- if err != nil {
- errChan <- fmt.Errorf("error parsing exit code %q: %v", out, err)
- }
- waitChan <- syscall.WaitStatus(uint32(exit))
- }()
-
- select {
- case ws := <-waitChan:
- return ws, nil
- case err := <-errChan:
- return syscall.WaitStatus(1), err
- case <-timeoutChan:
- return syscall.WaitStatus(1), fmt.Errorf("timeout waiting for container %q", d.Name)
- }
-}
-
-// WaitForOutput calls 'docker logs' to retrieve containers output and searches
-// for the given pattern.
-func (d *Docker) WaitForOutput(pattern string, timeout time.Duration) (string, error) {
- matches, err := d.WaitForOutputSubmatch(pattern, timeout)
- if err != nil {
- return "", err
- }
- if len(matches) == 0 {
- return "", nil
- }
- return matches[0], nil
-}
-
-// WaitForOutputSubmatch calls 'docker logs' to retrieve containers output and
-// searches for the given pattern. It returns any regexp submatches as well.
-func (d *Docker) WaitForOutputSubmatch(pattern string, timeout time.Duration) ([]string, error) {
- re := regexp.MustCompile(pattern)
- var out string
- for exp := time.Now().Add(timeout); time.Now().Before(exp); {
- var err error
- out, err = d.Logs()
- if err != nil {
- return nil, err
- }
- if matches := re.FindStringSubmatch(out); matches != nil {
- // Success!
- return matches, nil
- }
- time.Sleep(100 * time.Millisecond)
- }
- return nil, fmt.Errorf("timeout waiting for output %q: %s", re.String(), out)
-}
diff --git a/runsc/flag/BUILD b/runsc/flag/BUILD
new file mode 100644
index 000000000..5cb7604a8
--- /dev/null
+++ b/runsc/flag/BUILD
@@ -0,0 +1,9 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "flag",
+ srcs = ["flag.go"],
+ visibility = ["//:sandbox"],
+)
diff --git a/runsc/flag/flag.go b/runsc/flag/flag.go
new file mode 100644
index 000000000..0ca4829d7
--- /dev/null
+++ b/runsc/flag/flag.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flag
+
+import (
+ "flag"
+)
+
+type FlagSet = flag.FlagSet
+
+var (
+ NewFlagSet = flag.NewFlagSet
+ String = flag.String
+ Bool = flag.Bool
+ Int = flag.Int
+ Uint = flag.Uint
+ CommandLine = flag.CommandLine
+ Parse = flag.Parse
+)
+
+const ContinueOnError = flag.ContinueOnError
diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD
index 80a4aa2fe..05e3637f7 100644
--- a/runsc/fsgofer/BUILD
+++ b/runsc/fsgofer/BUILD
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -6,19 +6,19 @@ go_library(
name = "fsgofer",
srcs = [
"fsgofer.go",
+ "fsgofer_amd64_unsafe.go",
+ "fsgofer_arm64_unsafe.go",
"fsgofer_unsafe.go",
],
- importpath = "gvisor.dev/gvisor/runsc/fsgofer",
- visibility = [
- "//runsc:__subpackages__",
- ],
+ visibility = ["//runsc:__subpackages__"],
deps = [
"//pkg/abi/linux",
+ "//pkg/cleanup",
"//pkg/fd",
"//pkg/log",
"//pkg/p9",
+ "//pkg/sync",
"//pkg/syserr",
- "//runsc/specutils",
"@org_golang_x_sys//unix:go_default_library",
],
)
@@ -27,9 +27,10 @@ go_test(
name = "fsgofer_test",
size = "small",
srcs = ["fsgofer_test.go"],
- embed = [":fsgofer"],
+ library = ":fsgofer",
deps = [
"//pkg/log",
"//pkg/p9",
+ "//pkg/test/testutil",
],
)
diff --git a/runsc/fsgofer/filter/BUILD b/runsc/fsgofer/filter/BUILD
index 02168ad1b..82b48ef32 100644
--- a/runsc/fsgofer/filter/BUILD
+++ b/runsc/fsgofer/filter/BUILD
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
@@ -6,12 +6,13 @@ go_library(
name = "filter",
srcs = [
"config.go",
+ "config_amd64.go",
+ "config_arm64.go",
"extra_filters.go",
"extra_filters_msan.go",
"extra_filters_race.go",
"filter.go",
],
- importpath = "gvisor.dev/gvisor/runsc/fsgofer/filter",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index 2ea95f8fb..88814b83c 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -25,11 +25,7 @@ import (
// allowedSyscalls is the set of syscalls executed by the gofer.
var allowedSyscalls = seccomp.SyscallRules{
- syscall.SYS_ACCEPT: {},
- syscall.SYS_ARCH_PRCTL: []seccomp.Rule{
- {seccomp.AllowValue(linux.ARCH_GET_FS)},
- {seccomp.AllowValue(linux.ARCH_SET_FS)},
- },
+ syscall.SYS_ACCEPT: {},
syscall.SYS_CLOCK_GETTIME: {},
syscall.SYS_CLONE: []seccomp.Rule{
{
@@ -132,6 +128,19 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_MADVISE: {},
unix.SYS_MEMFD_CREATE: {}, /// Used by flipcall.PacketWindowAllocator.Init().
syscall.SYS_MKDIRAT: {},
+ syscall.SYS_MKNODAT: {},
+ // Used by the Go runtime as a temporarily workaround for a Linux
+ // 5.2-5.4 bug.
+ //
+ // See src/runtime/os_linux_x86.go.
+ //
+ // TODO(b/148688965): Remove once this is gone from Go.
+ syscall.SYS_MLOCK: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4096),
+ },
+ },
syscall.SYS_MMAP: []seccomp.Rule{
{
seccomp.AllowAny{},
@@ -155,7 +164,6 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_MPROTECT: {},
syscall.SYS_MUNMAP: {},
syscall.SYS_NANOSLEEP: {},
- syscall.SYS_NEWFSTATAT: {},
syscall.SYS_OPENAT: {},
syscall.SYS_PPOLL: {},
syscall.SYS_PREAD64: {},
diff --git a/runsc/fsgofer/filter/config_amd64.go b/runsc/fsgofer/filter/config_amd64.go
new file mode 100644
index 000000000..a4b28cb8b
--- /dev/null
+++ b/runsc/fsgofer/filter/config_amd64.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package filter
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
+)
+
+func init() {
+ allowedSyscalls[syscall.SYS_ARCH_PRCTL] = []seccomp.Rule{
+ {seccomp.AllowValue(linux.ARCH_GET_FS)},
+ {seccomp.AllowValue(linux.ARCH_SET_FS)},
+ }
+
+ allowedSyscalls[syscall.SYS_NEWFSTATAT] = []seccomp.Rule{}
+}
diff --git a/runsc/fsgofer/filter/config_arm64.go b/runsc/fsgofer/filter/config_arm64.go
new file mode 100644
index 000000000..d2697deb7
--- /dev/null
+++ b/runsc/fsgofer/filter/config_arm64.go
@@ -0,0 +1,27 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package filter
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/seccomp"
+)
+
+func init() {
+ allowedSyscalls[syscall.SYS_FSTATAT] = []seccomp.Rule{}
+}
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 3fceecb3d..c6694c278 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -29,15 +29,15 @@ import (
"path/filepath"
"runtime"
"strconv"
- "sync"
"syscall"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/p9"
- "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/sync"
)
const (
@@ -48,36 +48,6 @@ const (
openFlags = syscall.O_NOFOLLOW | syscall.O_CLOEXEC
)
-type fileType int
-
-const (
- regular fileType = iota
- directory
- symlink
- socket
- unknown
-)
-
-// String implements fmt.Stringer.
-func (f fileType) String() string {
- switch f {
- case regular:
- return "regular"
- case directory:
- return "directory"
- case symlink:
- return "symlink"
- case socket:
- return "socket"
- }
- return "unknown"
-}
-
-// ControlSocketAddr generates an abstract unix socket name for the given id.
-func ControlSocketAddr(id string) string {
- return fmt.Sprintf("\x00runsc-gofer.%s", id)
-}
-
// Config sets configuration options for each attach point.
type Config struct {
// ROMount is set to true if this is a readonly mount.
@@ -132,19 +102,19 @@ func (a *attachPoint) Attach() (p9.File, error) {
return nil, fmt.Errorf("attach point already attached, prefix: %s", a.prefix)
}
- f, err := openAnyFile(a.prefix, func(mode int) (*fd.FD, error) {
+ f, readable, err := openAnyFile(a.prefix, func(mode int) (*fd.FD, error) {
return fd.Open(a.prefix, openFlags|mode, 0)
})
if err != nil {
return nil, fmt.Errorf("unable to open %q: %v", a.prefix, err)
}
- stat, err := stat(f.FD())
+ stat, err := fstat(f.FD())
if err != nil {
return nil, fmt.Errorf("unable to stat %q: %v", a.prefix, err)
}
- lf, err := newLocalFile(a, f, a.prefix, stat)
+ lf, err := newLocalFile(a, f, a.prefix, readable, stat)
if err != nil {
return nil, fmt.Errorf("unable to create localFile %q: %v", a.prefix, err)
}
@@ -175,8 +145,6 @@ func (a *attachPoint) makeQID(stat syscall.Stat_t) p9.QID {
log.Warningf("first 8 bytes of host inode id %x will be truncated to construct virtual inode id", stat.Ino)
}
ino := uint64(dev)<<56 | maskedIno
- log.Debugf("host inode %x on device %x mapped to virtual inode %x", stat.Ino, stat.Dev, ino)
-
return p9.QID{
Type: p9.FileMode(stat.Mode).QIDType(),
Path: ino,
@@ -199,9 +167,8 @@ func (a *attachPoint) makeQID(stat syscall.Stat_t) p9.QID {
// The reason that the file is not opened initially as read-write is for better
// performance with 'overlay2' storage driver. overlay2 eagerly copies the
// entire file up when it's opened in write mode, and would perform badly when
+// multiple files are only being opened for read (esp. startup).
type localFile struct {
- p9.DefaultWalkGetAttr
-
// attachPoint is the attachPoint that serves this localFile.
attachPoint *attachPoint
@@ -213,12 +180,19 @@ type localFile struct {
// opened with.
file *fd.FD
+ // controlReadable tells whether 'file' was opened with read permissions
+ // during a walk.
+ controlReadable bool
+
// mode is the mode in which the file was opened. Set to invalidMode
// if localFile isn't opened.
mode p9.OpenFlags
- // ft is the fileType for this file.
- ft fileType
+ // fileType for this file. It is equivalent to:
+ // syscall.Stat_t.Mode & syscall.S_IFMT
+ fileType uint32
+
+ qid p9.QID
// readDirMu protects against concurrent Readdir calls.
readDirMu sync.Mutex
@@ -252,83 +226,88 @@ func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) {
return fd.New(d), nil
}
-func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, error) {
+func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, bool, error) {
path := path.Join(parent.hostPath, name)
- f, err := openAnyFile(path, func(mode int) (*fd.FD, error) {
+ f, readable, err := openAnyFile(path, func(mode int) (*fd.FD, error) {
return fd.OpenAt(parent.file, name, openFlags|mode, 0)
})
- return f, path, err
+ return f, path, readable, err
}
// openAnyFile attempts to open the file in O_RDONLY and if it fails fallsback
// to O_PATH. 'path' is used for logging messages only. 'fn' is what does the
// actual file open and is customizable by the caller.
-func openAnyFile(path string, fn func(mode int) (*fd.FD, error)) (*fd.FD, error) {
+func openAnyFile(path string, fn func(mode int) (*fd.FD, error)) (*fd.FD, bool, error) {
// Attempt to open file in the following mode in order:
// 1. RDONLY | NONBLOCK: for all files, directories, ro mounts, FIFOs.
// Use non-blocking to prevent getting stuck inside open(2) for
// FIFOs. This option has no effect on regular files.
// 2. PATH: for symlinks, sockets.
- modes := []int{syscall.O_RDONLY | syscall.O_NONBLOCK, unix.O_PATH}
+ options := []struct {
+ mode int
+ readable bool
+ }{
+ {
+ mode: syscall.O_RDONLY | syscall.O_NONBLOCK,
+ readable: true,
+ },
+ {
+ mode: unix.O_PATH,
+ readable: false,
+ },
+ }
var err error
- var file *fd.FD
- for i, mode := range modes {
- file, err = fn(mode)
+ for i, option := range options {
+ var file *fd.FD
+ file, err = fn(option.mode)
if err == nil {
- // openat succeeded, we're done.
- break
+ // Succeeded opening the file, we're done.
+ return file, option.readable, nil
}
switch e := extractErrno(err); e {
case syscall.ENOENT:
// File doesn't exist, no point in retrying.
- return nil, e
+ return nil, false, e
}
- // openat failed. Try again with next mode, preserving 'err' in case this
- // was the last attempt.
- log.Debugf("Attempt %d to open file failed, mode: %#x, path: %q, err: %v", i, openFlags|mode, path, err)
+ // File failed to open. Try again with next mode, preserving 'err' in case
+ // this was the last attempt.
+ log.Debugf("Attempt %d to open file failed, mode: %#x, path: %q, err: %v", i, openFlags|option.mode, path, err)
}
- if err != nil {
- // All attempts to open file have failed, return the last error.
- log.Debugf("Failed to open file, path: %q, err: %v", path, err)
- return nil, extractErrno(err)
- }
-
- return file, nil
+ // All attempts to open file have failed, return the last error.
+ log.Debugf("Failed to open file, path: %q, err: %v", path, err)
+ return nil, false, extractErrno(err)
}
-func getSupportedFileType(stat syscall.Stat_t, permitSocket bool) (fileType, error) {
- var ft fileType
+func checkSupportedFileType(stat syscall.Stat_t, permitSocket bool) error {
switch stat.Mode & syscall.S_IFMT {
- case syscall.S_IFREG:
- ft = regular
- case syscall.S_IFDIR:
- ft = directory
- case syscall.S_IFLNK:
- ft = symlink
+ case syscall.S_IFREG, syscall.S_IFDIR, syscall.S_IFLNK:
+ return nil
+
case syscall.S_IFSOCK:
if !permitSocket {
- return unknown, syscall.EPERM
+ return syscall.EPERM
}
- ft = socket
+ return nil
+
default:
- return unknown, syscall.EPERM
+ return syscall.EPERM
}
- return ft, nil
}
-func newLocalFile(a *attachPoint, file *fd.FD, path string, stat syscall.Stat_t) (*localFile, error) {
- ft, err := getSupportedFileType(stat, a.conf.HostUDS)
- if err != nil {
+func newLocalFile(a *attachPoint, file *fd.FD, path string, readable bool, stat syscall.Stat_t) (*localFile, error) {
+ if err := checkSupportedFileType(stat, a.conf.HostUDS); err != nil {
return nil, err
}
return &localFile{
- attachPoint: a,
- hostPath: path,
- file: file,
- mode: invalidMode,
- ft: ft,
+ attachPoint: a,
+ hostPath: path,
+ file: file,
+ mode: invalidMode,
+ fileType: stat.Mode & syscall.S_IFMT,
+ qid: a.makeQID(stat),
+ controlReadable: readable,
}, nil
}
@@ -347,13 +326,13 @@ func newFDMaybe(file *fd.FD) *fd.FD {
// fd is blocking; non-blocking is required.
if err := syscall.SetNonblock(dup.FD(), true); err != nil {
- dup.Close()
+ _ = dup.Close()
return nil
}
return dup
}
-func stat(fd int) (syscall.Stat_t, error) {
+func fstat(fd int) (syscall.Stat_t, error) {
var stat syscall.Stat_t
if err := syscall.Fstat(fd, &stat); err != nil {
return syscall.Stat_t{}, err
@@ -361,43 +340,44 @@ func stat(fd int) (syscall.Stat_t, error) {
return stat, nil
}
+func stat(path string) (syscall.Stat_t, error) {
+ var stat syscall.Stat_t
+ if err := syscall.Stat(path, &stat); err != nil {
+ return syscall.Stat_t{}, err
+ }
+ return stat, nil
+}
+
func fchown(fd int, uid p9.UID, gid p9.GID) error {
return syscall.Fchownat(fd, "", int(uid), int(gid), linux.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW)
}
// Open implements p9.File.
-func (l *localFile) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
+func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
if l.isOpen() {
panic(fmt.Sprintf("attempting to open already opened file: %q", l.hostPath))
}
// Check if control file can be used or if a new open must be created.
var newFile *fd.FD
- if mode == p9.ReadOnly {
- log.Debugf("Open reusing control file, mode: %v, %q", mode, l.hostPath)
+ if flags == p9.ReadOnly && l.controlReadable {
+ log.Debugf("Open reusing control file, flags: %v, %q", flags, l.hostPath)
newFile = l.file
} else {
// Ideally reopen would call name_to_handle_at (with empty name) and
// open_by_handle_at to reopen the file without using 'hostPath'. However,
// name_to_handle_at and open_by_handle_at aren't supported by overlay2.
- log.Debugf("Open reopening file, mode: %v, %q", mode, l.hostPath)
+ log.Debugf("Open reopening file, flags: %v, %q", flags, l.hostPath)
var err error
- newFile, err = reopenProcFd(l.file, openFlags|mode.OSFlags())
+ // Constrain open flags to the open mode and O_TRUNC.
+ newFile, err = reopenProcFd(l.file, openFlags|(flags.OSFlags()&(syscall.O_ACCMODE|syscall.O_TRUNC)))
if err != nil {
return nil, p9.QID{}, 0, extractErrno(err)
}
}
- stat, err := stat(newFile.FD())
- if err != nil {
- if newFile != l.file {
- newFile.Close()
- }
- return nil, p9.QID{}, 0, extractErrno(err)
- }
-
var fd *fd.FD
- if stat.Mode&syscall.S_IFMT == syscall.S_IFREG {
+ if l.fileType == syscall.S_IFREG {
// Donate FD for regular files only.
fd = newFDMaybe(newFile)
}
@@ -409,8 +389,8 @@ func (l *localFile) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
}
l.file = newFile
}
- l.mode = mode
- return fd, l.attachPoint.makeQID(stat), 0, nil
+ l.mode = flags & p9.OpenFlagsModeMask
+ return fd, l.qid, 0, nil
}
// Create implements p9.File.
@@ -437,8 +417,8 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid
if err != nil {
return nil, nil, p9.QID{}, 0, extractErrno(err)
}
- cu := specutils.MakeCleanup(func() {
- child.Close()
+ cu := cleanup.Make(func() {
+ _ = child.Close()
// Best effort attempt to remove the file in case of failure.
if err := syscall.Unlinkat(l.file.FD(), name); err != nil {
log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, name), err)
@@ -449,7 +429,7 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid
if err := fchown(child.FD(), uid, gid); err != nil {
return nil, nil, p9.QID{}, 0, extractErrno(err)
}
- stat, err := stat(child.FD())
+ stat, err := fstat(child.FD())
if err != nil {
return nil, nil, p9.QID{}, 0, extractErrno(err)
}
@@ -459,10 +439,12 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid
hostPath: path.Join(l.hostPath, name),
file: child,
mode: mode,
+ fileType: syscall.S_IFREG,
+ qid: l.attachPoint.makeQID(stat),
}
cu.Release()
- return newFDMaybe(c.file), c, l.attachPoint.makeQID(stat), 0, nil
+ return newFDMaybe(c.file), c, c.qid, 0, nil
}
// Mkdir implements p9.File.
@@ -478,7 +460,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID)
if err := syscall.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil {
return p9.QID{}, extractErrno(err)
}
- cu := specutils.MakeCleanup(func() {
+ cu := cleanup.Make(func() {
// Best effort attempt to remove the dir in case of failure.
if err := unix.Unlinkat(l.file.FD(), name, unix.AT_REMOVEDIR); err != nil {
log.Warningf("error unlinking dir %q after failure: %v", path.Join(l.hostPath, name), err)
@@ -497,7 +479,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID)
if err := fchown(f.FD(), uid, gid); err != nil {
return p9.QID{}, extractErrno(err)
}
- stat, err := stat(f.FD())
+ stat, err := fstat(f.FD())
if err != nil {
return p9.QID{}, extractErrno(err)
}
@@ -508,55 +490,74 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID)
// Walk implements p9.File.
func (l *localFile) Walk(names []string) ([]p9.QID, p9.File, error) {
+ qids, file, _, err := l.walk(names)
+ return qids, file, err
+}
+
+// WalkGetAttr implements p9.File.
+func (l *localFile) WalkGetAttr(names []string) ([]p9.QID, p9.File, p9.AttrMask, p9.Attr, error) {
+ qids, file, stat, err := l.walk(names)
+ if err != nil {
+ return nil, nil, p9.AttrMask{}, p9.Attr{}, err
+ }
+ mask, attr := l.fillAttr(stat)
+ return qids, file, mask, attr, nil
+}
+
+func (l *localFile) walk(names []string) ([]p9.QID, p9.File, syscall.Stat_t, error) {
// Duplicate current file if 'names' is empty.
if len(names) == 0 {
- newFile, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) {
+ newFile, readable, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) {
return reopenProcFd(l.file, openFlags|mode)
})
if err != nil {
- return nil, nil, extractErrno(err)
+ return nil, nil, syscall.Stat_t{}, extractErrno(err)
}
- stat, err := stat(newFile.FD())
+ stat, err := fstat(newFile.FD())
if err != nil {
- newFile.Close()
- return nil, nil, extractErrno(err)
+ _ = newFile.Close()
+ return nil, nil, syscall.Stat_t{}, extractErrno(err)
}
c := &localFile{
- attachPoint: l.attachPoint,
- hostPath: l.hostPath,
- file: newFile,
- mode: invalidMode,
+ attachPoint: l.attachPoint,
+ hostPath: l.hostPath,
+ file: newFile,
+ mode: invalidMode,
+ fileType: l.fileType,
+ qid: l.attachPoint.makeQID(stat),
+ controlReadable: readable,
}
- return []p9.QID{l.attachPoint.makeQID(stat)}, c, nil
+ return []p9.QID{c.qid}, c, stat, nil
}
var qids []p9.QID
+ var lastStat syscall.Stat_t
last := l
for _, name := range names {
- f, path, err := openAnyFileFromParent(last, name)
+ f, path, readable, err := openAnyFileFromParent(last, name)
if last != l {
- last.Close()
+ _ = last.Close()
}
if err != nil {
- return nil, nil, extractErrno(err)
+ return nil, nil, syscall.Stat_t{}, extractErrno(err)
}
- stat, err := stat(f.FD())
+ lastStat, err = fstat(f.FD())
if err != nil {
- f.Close()
- return nil, nil, extractErrno(err)
+ _ = f.Close()
+ return nil, nil, syscall.Stat_t{}, extractErrno(err)
}
- c, err := newLocalFile(last.attachPoint, f, path, stat)
+ c, err := newLocalFile(last.attachPoint, f, path, readable, lastStat)
if err != nil {
- f.Close()
- return nil, nil, extractErrno(err)
+ _ = f.Close()
+ return nil, nil, syscall.Stat_t{}, extractErrno(err)
}
- qids = append(qids, l.attachPoint.makeQID(stat))
+ qids = append(qids, c.qid)
last = c
}
- return qids, last, nil
+ return qids, last, lastStat, nil
}
// StatFS implements p9.File.
@@ -592,16 +593,20 @@ func (l *localFile) FSync() error {
// GetAttr implements p9.File.
func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) {
- stat, err := stat(l.file.FD())
+ stat, err := fstat(l.file.FD())
if err != nil {
return p9.QID{}, p9.AttrMask{}, p9.Attr{}, extractErrno(err)
}
+ mask, attr := l.fillAttr(stat)
+ return l.qid, mask, attr, nil
+}
+func (l *localFile) fillAttr(stat syscall.Stat_t) (p9.AttrMask, p9.Attr) {
attr := p9.Attr{
Mode: p9.FileMode(stat.Mode),
UID: p9.UID(stat.Uid),
GID: p9.GID(stat.Gid),
- NLink: stat.Nlink,
+ NLink: uint64(stat.Nlink),
RDev: stat.Rdev,
Size: uint64(stat.Size),
BlockSize: uint64(stat.Blksize),
@@ -625,8 +630,7 @@ func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error)
MTime: true,
CTime: true,
}
-
- return l.attachPoint.makeQID(stat), valid, attr, nil
+ return valid, attr
}
// SetAttr implements p9.File. Due to mismatch in file API, options
@@ -667,7 +671,7 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
// Check if it's possible to use cached file, or if another one needs to be
// opened for write.
f := l.file
- if l.ft == regular && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite {
+ if l.fileType == syscall.S_IFREG && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite {
var err error
f, err = reopenProcFd(l.file, openFlags|os.O_WRONLY)
if err != nil {
@@ -723,7 +727,7 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
}
}
- if l.ft == symlink {
+ if l.fileType == syscall.S_IFLNK {
// utimensat operates different that other syscalls. To operate on a
// symlink it *requires* AT_SYMLINK_NOFOLLOW with dirFD and a non-empty
// name.
@@ -765,6 +769,22 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
return err
}
+func (*localFile) GetXattr(string, uint64) (string, error) {
+ return "", syscall.EOPNOTSUPP
+}
+
+func (*localFile) SetXattr(string, string, uint32) error {
+ return syscall.EOPNOTSUPP
+}
+
+func (*localFile) ListXattr(uint64) (map[string]struct{}, error) {
+ return nil, syscall.EOPNOTSUPP
+}
+
+func (*localFile) RemoveXattr(string) error {
+ return syscall.EOPNOTSUPP
+}
+
// Allocate implements p9.File.
func (l *localFile) Allocate(mode p9.AllocateMode, offset, length uint64) error {
if !l.isOpen() {
@@ -778,7 +798,7 @@ func (l *localFile) Allocate(mode p9.AllocateMode, offset, length uint64) error
}
// Rename implements p9.File; this should never be called.
-func (l *localFile) Rename(p9.File, string) error {
+func (*localFile) Rename(p9.File, string) error {
panic("rename called directly")
}
@@ -846,7 +866,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9.
if err := unix.Symlinkat(target, l.file.FD(), newName); err != nil {
return p9.QID{}, extractErrno(err)
}
- cu := specutils.MakeCleanup(func() {
+ cu := cleanup.Make(func() {
// Best effort attempt to remove the symlink in case of failure.
if err := syscall.Unlinkat(l.file.FD(), newName); err != nil {
log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, newName), err)
@@ -864,7 +884,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9.
if err := fchown(f.FD(), uid, gid); err != nil {
return p9.QID{}, extractErrno(err)
}
- stat, err := stat(f.FD())
+ stat, err := fstat(f.FD())
if err != nil {
return p9.QID{}, extractErrno(err)
}
@@ -891,13 +911,39 @@ func (l *localFile) Link(target p9.File, newName string) error {
}
// Mknod implements p9.File.
-//
-// Not implemented.
-func (*localFile) Mknod(_ string, _ p9.FileMode, _ uint32, _ uint32, _ p9.UID, _ p9.GID) (p9.QID, error) {
+func (l *localFile) Mknod(name string, mode p9.FileMode, _ uint32, _ uint32, _ p9.UID, _ p9.GID) (p9.QID, error) {
+ conf := l.attachPoint.conf
+ if conf.ROMount {
+ if conf.PanicOnWrite {
+ panic("attempt to write to RO mount")
+ }
+ return p9.QID{}, syscall.EROFS
+ }
+
+ hostPath := path.Join(l.hostPath, name)
+
+ // Return EEXIST if the file already exists.
+ if _, err := stat(hostPath); err == nil {
+ return p9.QID{}, syscall.EEXIST
+ }
+
// From mknod(2) man page:
// "EPERM: [...] if the filesystem containing pathname does not support
// the type of node requested."
- return p9.QID{}, syscall.EPERM
+ if mode.FileType() != p9.ModeRegular {
+ return p9.QID{}, syscall.EPERM
+ }
+
+ // Allow Mknod to create regular files.
+ if err := syscall.Mknod(hostPath, uint32(mode), 0); err != nil {
+ return p9.QID{}, err
+ }
+
+ stat, err := stat(hostPath)
+ if err != nil {
+ return p9.QID{}, extractErrno(err)
+ }
+ return l.attachPoint.makeQID(stat), nil
}
// UnlinkAt implements p9.File.
@@ -933,9 +979,12 @@ func (l *localFile) Readdir(offset uint64, count uint32) ([]p9.Dirent, error) {
skip := uint64(0)
- // Check if the file is at the correct position already. If not, seek to the
- // beginning and read the entire directory again.
- if l.lastDirentOffset != offset {
+ // Check if the file is at the correct position already. If not, seek to
+ // the beginning and read the entire directory again. We always seek if
+ // offset is 0, since this is side-effectual (equivalent to rewinddir(3),
+ // which causes the directory stream to resynchronize with the directory's
+ // current contents).
+ if l.lastDirentOffset != offset || offset == 0 {
if _, err := syscall.Seek(l.file.FD(), 0, 0); err != nil {
return nil, extractErrno(err)
}
@@ -955,14 +1004,14 @@ func (l *localFile) Readdir(offset uint64, count uint32) ([]p9.Dirent, error) {
}
func (l *localFile) readDirent(f int, offset uint64, count uint32, skip uint64) ([]p9.Dirent, error) {
+ var dirents []p9.Dirent
+
// Limit 'count' to cap the slice size that is returned.
const maxCount = 100000
if count > maxCount {
count = maxCount
}
- dirents := make([]p9.Dirent, 0, count)
-
// Pre-allocate buffers that will be reused to get partial results.
direntsBuf := make([]byte, 8192)
names := make([]string, 0, 100)
@@ -1063,13 +1112,13 @@ func (l *localFile) Connect(flags p9.ConnectFlags) (*fd.FD, error) {
}
if err := syscall.SetNonblock(f, true); err != nil {
- syscall.Close(f)
+ _ = syscall.Close(f)
return nil, err
}
sa := syscall.SockaddrUnix{Name: l.hostPath}
if err := syscall.Connect(f, &sa); err != nil {
- syscall.Close(f)
+ _ = syscall.Close(f)
return nil, err
}
diff --git a/runsc/fsgofer/fsgofer_amd64_unsafe.go b/runsc/fsgofer/fsgofer_amd64_unsafe.go
new file mode 100644
index 000000000..5d4aab597
--- /dev/null
+++ b/runsc/fsgofer/fsgofer_amd64_unsafe.go
@@ -0,0 +1,49 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package fsgofer
+
+import (
+ "syscall"
+ "unsafe"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserr"
+)
+
+func statAt(dirFd int, name string) (syscall.Stat_t, error) {
+ nameBytes, err := syscall.BytePtrFromString(name)
+ if err != nil {
+ return syscall.Stat_t{}, err
+ }
+ namePtr := unsafe.Pointer(nameBytes)
+
+ var stat syscall.Stat_t
+ statPtr := unsafe.Pointer(&stat)
+
+ if _, _, errno := syscall.Syscall6(
+ syscall.SYS_NEWFSTATAT,
+ uintptr(dirFd),
+ uintptr(namePtr),
+ uintptr(statPtr),
+ linux.AT_SYMLINK_NOFOLLOW,
+ 0,
+ 0); errno != 0 {
+
+ return syscall.Stat_t{}, syserr.FromHost(errno).ToError()
+ }
+ return stat, nil
+}
diff --git a/runsc/fsgofer/fsgofer_arm64_unsafe.go b/runsc/fsgofer/fsgofer_arm64_unsafe.go
new file mode 100644
index 000000000..8041fd352
--- /dev/null
+++ b/runsc/fsgofer/fsgofer_arm64_unsafe.go
@@ -0,0 +1,49 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package fsgofer
+
+import (
+ "syscall"
+ "unsafe"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserr"
+)
+
+func statAt(dirFd int, name string) (syscall.Stat_t, error) {
+ nameBytes, err := syscall.BytePtrFromString(name)
+ if err != nil {
+ return syscall.Stat_t{}, err
+ }
+ namePtr := unsafe.Pointer(nameBytes)
+
+ var stat syscall.Stat_t
+ statPtr := unsafe.Pointer(&stat)
+
+ if _, _, errno := syscall.Syscall6(
+ syscall.SYS_FSTATAT,
+ uintptr(dirFd),
+ uintptr(namePtr),
+ uintptr(statPtr),
+ linux.AT_SYMLINK_NOFOLLOW,
+ 0,
+ 0); errno != 0 {
+
+ return syscall.Stat_t{}, syserr.FromHost(errno).ToError()
+ }
+ return stat, nil
+}
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index 05af7e397..94f167417 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -26,6 +26,19 @@ import (
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/test/testutil"
+)
+
+var allOpenFlags = []p9.OpenFlags{p9.ReadOnly, p9.WriteOnly, p9.ReadWrite}
+
+var (
+ allTypes = []uint32{syscall.S_IFREG, syscall.S_IFDIR, syscall.S_IFLNK}
+
+ // allConfs is set in init().
+ allConfs []Config
+
+ rwConfs = []Config{{ROMount: false}}
+ roConfs = []Config{{ROMount: true}}
)
func init() {
@@ -39,6 +52,13 @@ func init() {
}
}
+func configTestName(config *Config) string {
+ if config.ROMount {
+ return "ROMount"
+ }
+ return "RWMount"
+}
+
func assertPanic(t *testing.T, f func()) {
defer func() {
if r := recover(); r == nil {
@@ -88,71 +108,76 @@ func testReadWrite(f p9.File, flags p9.OpenFlags, content []byte) error {
return nil
}
-var allOpenFlags = []p9.OpenFlags{p9.ReadOnly, p9.WriteOnly, p9.ReadWrite}
-
-var (
- allTypes = []fileType{regular, directory, symlink}
-
- // allConfs is set in init() above.
- allConfs []Config
-
- rwConfs = []Config{{ROMount: false}}
- roConfs = []Config{{ROMount: true}}
-)
-
type state struct {
- root *localFile
- file *localFile
- conf Config
- ft fileType
+ root *localFile
+ file *localFile
+ conf Config
+ fileType uint32
}
func (s state) String() string {
- return fmt.Sprintf("type(%v)", s.ft)
+ return fmt.Sprintf("type(%v)", s.fileType)
+}
+
+func typeName(fileType uint32) string {
+ switch fileType {
+ case syscall.S_IFREG:
+ return "file"
+ case syscall.S_IFDIR:
+ return "directory"
+ case syscall.S_IFLNK:
+ return "symlink"
+ default:
+ panic(fmt.Sprintf("invalid file type for test: %d", fileType))
+ }
}
func runAll(t *testing.T, test func(*testing.T, state)) {
runCustom(t, allTypes, allConfs, test)
}
-func runCustom(t *testing.T, types []fileType, confs []Config, test func(*testing.T, state)) {
+func runCustom(t *testing.T, types []uint32, confs []Config, test func(*testing.T, state)) {
for _, c := range confs {
- t.Logf("Config: %+v", c)
-
for _, ft := range types {
- t.Logf("File type: %v", ft)
+ name := fmt.Sprintf("%s/%s", configTestName(&c), typeName(ft))
+ t.Run(name, func(t *testing.T) {
+ path, name, err := setup(ft)
+ if err != nil {
+ t.Fatalf("%v", err)
+ }
+ defer os.RemoveAll(path)
- path, name, err := setup(ft)
- if err != nil {
- t.Fatalf("%v", err)
- }
- defer os.RemoveAll(path)
+ a, err := NewAttachPoint(path, c)
+ if err != nil {
+ t.Fatalf("NewAttachPoint failed: %v", err)
+ }
+ root, err := a.Attach()
+ if err != nil {
+ t.Fatalf("Attach failed, err: %v", err)
+ }
- a, err := NewAttachPoint(path, c)
- if err != nil {
- t.Fatalf("NewAttachPoint failed: %v", err)
- }
- root, err := a.Attach()
- if err != nil {
- t.Fatalf("Attach failed, err: %v", err)
- }
+ _, file, err := root.Walk([]string{name})
+ if err != nil {
+ root.Close()
+ t.Fatalf("root.Walk({%q}) failed, err: %v", "symlink", err)
+ }
- _, file, err := root.Walk([]string{name})
- if err != nil {
+ st := state{
+ root: root.(*localFile),
+ file: file.(*localFile),
+ conf: c,
+ fileType: ft,
+ }
+ test(t, st)
+ file.Close()
root.Close()
- t.Fatalf("root.Walk({%q}) failed, err: %v", "symlink", err)
- }
-
- st := state{root: root.(*localFile), file: file.(*localFile), conf: c, ft: ft}
- test(t, st)
- file.Close()
- root.Close()
+ })
}
}
}
-func setup(ft fileType) (string, string, error) {
- path, err := ioutil.TempDir("", "root-")
+func setup(fileType uint32) (string, string, error) {
+ path, err := ioutil.TempDir(testutil.TmpDir(), "root-")
if err != nil {
return "", "", fmt.Errorf("ioutil.TempDir() failed, err: %v", err)
}
@@ -169,26 +194,26 @@ func setup(ft fileType) (string, string, error) {
defer root.Close()
var name string
- switch ft {
- case regular:
+ switch fileType {
+ case syscall.S_IFREG:
name = "file"
_, f, _, _, err := root.Create(name, p9.ReadWrite, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid()))
if err != nil {
return "", "", fmt.Errorf("createFile(root, %q) failed, err: %v", "test", err)
}
defer f.Close()
- case directory:
+ case syscall.S_IFDIR:
name = "dir"
if _, err := root.Mkdir(name, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != nil {
return "", "", fmt.Errorf("root.MkDir(%q) failed, err: %v", name, err)
}
- case symlink:
+ case syscall.S_IFLNK:
name = "symlink"
if _, err := root.Symlink("/some/target", name, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != nil {
return "", "", fmt.Errorf("root.Symlink(%q) failed, err: %v", name, err)
}
default:
- panic(fmt.Sprintf("unknown file type %v", ft))
+ panic(fmt.Sprintf("unknown file type %v", fileType))
}
return path, name, nil
}
@@ -202,7 +227,7 @@ func createFile(dir *localFile, name string) (*localFile, error) {
}
func TestReadWrite(t *testing.T) {
- runCustom(t, []fileType{directory}, rwConfs, func(t *testing.T, s state) {
+ runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
child, err := createFile(s.file, "test")
if err != nil {
t.Fatalf("%v: createFile() failed, err: %v", s, err)
@@ -232,7 +257,7 @@ func TestReadWrite(t *testing.T) {
}
func TestCreate(t *testing.T) {
- runCustom(t, []fileType{directory}, rwConfs, func(t *testing.T, s state) {
+ runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
for i, flags := range allOpenFlags {
_, l, _, _, err := s.file.Create(fmt.Sprintf("test-%d", i), flags, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid()))
if err != nil {
@@ -249,7 +274,7 @@ func TestCreate(t *testing.T) {
// TestReadWriteDup tests that a file opened in any mode can be dup'ed and
// reopened in any other mode.
func TestReadWriteDup(t *testing.T) {
- runCustom(t, []fileType{directory}, rwConfs, func(t *testing.T, s state) {
+ runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
child, err := createFile(s.file, "test")
if err != nil {
t.Fatalf("%v: createFile() failed, err: %v", s, err)
@@ -291,7 +316,7 @@ func TestReadWriteDup(t *testing.T) {
}
func TestUnopened(t *testing.T) {
- runCustom(t, []fileType{regular}, allConfs, func(t *testing.T, s state) {
+ runCustom(t, []uint32{syscall.S_IFREG}, allConfs, func(t *testing.T, s state) {
b := []byte("foobar")
if _, err := s.file.WriteAt(b, 0); err != syscall.EBADF {
t.Errorf("%v: WriteAt() should have failed, got: %v, expected: syscall.EBADF", s, err)
@@ -308,6 +333,32 @@ func TestUnopened(t *testing.T) {
})
}
+// TestOpenOPath is a regression test to ensure that a file that cannot be open
+// for read is allowed to be open. This was happening because the control file
+// was open with O_PATH, but Open() was not checking for it and allowing the
+// control file to be reused.
+func TestOpenOPath(t *testing.T) {
+ runCustom(t, []uint32{syscall.S_IFREG}, rwConfs, func(t *testing.T, s state) {
+ // Fist remove all permissions on the file.
+ if err := s.file.SetAttr(p9.SetAttrMask{Permissions: true}, p9.SetAttr{Permissions: p9.FileMode(0)}); err != nil {
+ t.Fatalf("SetAttr(): %v", err)
+ }
+ // Then walk to the file again to open a new control file.
+ filename := filepath.Base(s.file.hostPath)
+ _, newFile, err := s.root.Walk([]string{filename})
+ if err != nil {
+ t.Fatalf("root.Walk(%q): %v", filename, err)
+ }
+
+ if newFile.(*localFile).controlReadable {
+ t.Fatalf("control file didn't open with O_PATH: %+v", newFile)
+ }
+ if _, _, _, err := newFile.Open(p9.ReadOnly); err != syscall.EACCES {
+ t.Fatalf("Open() should have failed, got: %v, wanted: EACCES", err)
+ }
+ })
+}
+
func SetGetAttr(l *localFile, valid p9.SetAttrMask, attr p9.SetAttr) (p9.Attr, error) {
if err := l.SetAttr(valid, attr); err != nil {
return p9.Attr{}, err
@@ -324,7 +375,7 @@ func TestSetAttrPerm(t *testing.T) {
valid := p9.SetAttrMask{Permissions: true}
attr := p9.SetAttr{Permissions: 0777}
got, err := SetGetAttr(s.file, valid, attr)
- if s.ft == symlink {
+ if s.fileType == syscall.S_IFLNK {
if err == nil {
t.Fatalf("%v: SetGetAttr(valid, %v) should have failed", s, attr.Permissions)
}
@@ -345,7 +396,7 @@ func TestSetAttrSize(t *testing.T) {
valid := p9.SetAttrMask{Size: true}
attr := p9.SetAttr{Size: size}
got, err := SetGetAttr(s.file, valid, attr)
- if s.ft == symlink || s.ft == directory {
+ if s.fileType == syscall.S_IFLNK || s.fileType == syscall.S_IFDIR {
if err == nil {
t.Fatalf("%v: SetGetAttr(valid, %v) should have failed", s, attr.Permissions)
}
@@ -427,7 +478,7 @@ func TestLink(t *testing.T) {
}
err = dir.Link(s.file, linkFile)
- if s.ft == directory {
+ if s.fileType == syscall.S_IFDIR {
if err != syscall.EPERM {
t.Errorf("%v: Link(target, %s) should have failed, got: %v, expected: syscall.EPERM", s, linkFile, err)
}
@@ -485,7 +536,7 @@ func TestROMountPanics(t *testing.T) {
}
func TestWalkNotFound(t *testing.T) {
- runCustom(t, []fileType{directory}, allConfs, func(t *testing.T, s state) {
+ runCustom(t, []uint32{syscall.S_IFDIR}, allConfs, func(t *testing.T, s state) {
if _, _, err := s.file.Walk([]string{"nobody-here"}); err != syscall.ENOENT {
t.Errorf("%v: Walk(%q) should have failed, got: %v, expected: syscall.ENOENT", s, "nobody-here", err)
}
@@ -506,7 +557,7 @@ func TestWalkDup(t *testing.T) {
}
func TestReaddir(t *testing.T) {
- runCustom(t, []fileType{directory}, rwConfs, func(t *testing.T, s state) {
+ runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
name := "dir"
if _, err := s.file.Mkdir(name, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != nil {
t.Fatalf("%v: MkDir(%s) failed, err: %v", s, name, err)
diff --git a/runsc/fsgofer/fsgofer_unsafe.go b/runsc/fsgofer/fsgofer_unsafe.go
index ff2556aee..542b54365 100644
--- a/runsc/fsgofer/fsgofer_unsafe.go
+++ b/runsc/fsgofer/fsgofer_unsafe.go
@@ -18,34 +18,9 @@ import (
"syscall"
"unsafe"
- "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/syserr"
)
-func statAt(dirFd int, name string) (syscall.Stat_t, error) {
- nameBytes, err := syscall.BytePtrFromString(name)
- if err != nil {
- return syscall.Stat_t{}, err
- }
- namePtr := unsafe.Pointer(nameBytes)
-
- var stat syscall.Stat_t
- statPtr := unsafe.Pointer(&stat)
-
- if _, _, errno := syscall.Syscall6(
- syscall.SYS_NEWFSTATAT,
- uintptr(dirFd),
- uintptr(namePtr),
- uintptr(statPtr),
- linux.AT_SYMLINK_NOFOLLOW,
- 0,
- 0); errno != 0 {
-
- return syscall.Stat_t{}, syserr.FromHost(errno).ToError()
- }
- return stat, nil
-}
-
func utimensat(dirFd int, name string, times [2]syscall.Timespec, flags int) error {
// utimensat(2) doesn't accept empty name, instead name must be nil to make it
// operate directly on 'dirFd' unlike other *at syscalls.
diff --git a/runsc/main.go b/runsc/main.go
index ae906c661..69cb505fa 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -26,8 +26,7 @@ import (
"path/filepath"
"strings"
"syscall"
-
- "flag"
+ "time"
"github.com/google/subcommands"
"gvisor.dev/gvisor/pkg/log"
@@ -35,6 +34,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/cmd"
+ "gvisor.dev/gvisor/runsc/flag"
"gvisor.dev/gvisor/runsc/specutils"
)
@@ -46,15 +46,19 @@ var (
logFormat = flag.String("log-format", "text", "log format: text (default), json, or json-k8s.")
debug = flag.Bool("debug", false, "enable debug logging.")
showVersion = flag.Bool("version", false, "show version and exit.")
+ // TODO(gvisor.dev/issue/193): support systemd cgroups
+ systemdCgroup = flag.Bool("systemd-cgroup", false, "Use systemd for cgroups. NOT SUPPORTED.")
// These flags are unique to runsc, and are used to configure parts of the
// system that are not covered by the runtime spec.
// Debugging flags.
debugLog = flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.")
+ panicLog = flag.String("panic-log", "", "file path were panic reports and other Go's runtime messages are written.")
logPackets = flag.Bool("log-packets", false, "enable network packet logging.")
logFD = flag.Int("log-fd", -1, "file descriptor to log to. If set, the 'log' flag is ignored.")
debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored.")
+ panicLogFD = flag.Int("panic-log-fd", -1, "file descriptor to write Go's runtime messages.")
debugLogFormat = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s.")
alsoLogToStderr = flag.Bool("alsologtostderr", false, "send log messages to stderr.")
@@ -67,11 +71,14 @@ var (
platformName = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm.")
network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
hardwareGSO = flag.Bool("gso", true, "enable hardware segmentation offload if it is supported by a network device.")
- softwareGSO = flag.Bool("software-gso", true, "enable software segmentation offload when hardware ofload can't be enabled.")
+ softwareGSO = flag.Bool("software-gso", true, "enable software segmentation offload when hardware offload can't be enabled.")
+ txChecksumOffload = flag.Bool("tx-checksum-offload", false, "enable TX checksum offload.")
+ rxChecksumOffload = flag.Bool("rx-checksum-offload", true, "enable RX checksum offload.")
+ qDisc = flag.String("qdisc", "fifo", "specifies which queueing discipline to apply by default to the non loopback nics used by the sandbox.")
fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
fsGoferHostUDS = flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
- overlayfsStaleRead = flag.Bool("overlayfs-stale-read", false, "reopen cached FDs after a file is opened for write to workaround overlayfs limitation on kernels before 4.19.")
+ overlayfsStaleRead = flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem")
watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
@@ -79,6 +86,9 @@ var (
numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
rootless = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
referenceLeakMode = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), log-names, log-traces.")
+ cpuNumFromQuota = flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)")
+ vfs2Enabled = flag.Bool("vfs2", false, "TEST ONLY; use while VFSv2 is landing. This uses the new experimental VFS layer.")
+ fuseEnabled = flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.")
// Test flags, not to be used outside tests, ever.
testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
@@ -113,8 +123,8 @@ func main() {
subcommands.Register(new(cmd.Resume), "")
subcommands.Register(new(cmd.Run), "")
subcommands.Register(new(cmd.Spec), "")
- subcommands.Register(new(cmd.Start), "")
subcommands.Register(new(cmd.State), "")
+ subcommands.Register(new(cmd.Start), "")
subcommands.Register(new(cmd.Wait), "")
// Register internal commands with the internal group name. This causes
@@ -124,6 +134,7 @@ func main() {
subcommands.Register(new(cmd.Boot), internalGroup)
subcommands.Register(new(cmd.Debug), internalGroup)
subcommands.Register(new(cmd.Gofer), internalGroup)
+ subcommands.Register(new(cmd.Statefile), internalGroup)
// All subcommands must be registered before flag parsing.
flag.Parse()
@@ -136,6 +147,12 @@ func main() {
os.Exit(0)
}
+ // TODO(gvisor.dev/issue/193): support systemd cgroups
+ if *systemdCgroup {
+ fmt.Fprintln(os.Stderr, "systemd cgroup flag passed, but systemd cgroups not supported. See gvisor.dev/issue/193")
+ os.Exit(1)
+ }
+
var errorLogger io.Writer
if *logFD > -1 {
errorLogger = os.NewFile(uintptr(*logFD), "error log file")
@@ -185,6 +202,11 @@ func main() {
cmd.Fatalf("%v", err)
}
+ queueingDiscipline, err := boot.MakeQueueingDiscipline(*qDisc)
+ if err != nil {
+ cmd.Fatalf("%s", err)
+ }
+
// Sets the reference leak check mode. Also set it in config below to
// propagate it to child processes.
refs.SetLeakMode(refsLeakMode)
@@ -196,6 +218,7 @@ func main() {
LogFilename: *logFilename,
LogFormat: *logFormat,
DebugLog: *debugLog,
+ PanicLog: *panicLog,
DebugLogFormat: *debugLogFormat,
FileAccess: fsAccess,
FSGoferHostUDS: *fsGoferHostUDS,
@@ -203,6 +226,8 @@ func main() {
Network: netType,
HardwareGSO: *hardwareGSO,
SoftwareGSO: *softwareGSO,
+ TXChecksumOffload: *txChecksumOffload,
+ RXChecksumOffload: *rxChecksumOffload,
LogPackets: *logPackets,
Platform: platformType,
Strace: *strace,
@@ -216,7 +241,10 @@ func main() {
AlsoLogToStderr: *alsoLogToStderr,
ReferenceLeakMode: refsLeakMode,
OverlayfsStaleRead: *overlayfsStaleRead,
-
+ CPUNumFromQuota: *cpuNumFromQuota,
+ VFS2: *vfs2Enabled,
+ FUSE: *fuseEnabled,
+ QDisc: queueingDiscipline,
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
TestOnlyTestNameEnv: *testOnlyTestNameEnv,
}
@@ -229,26 +257,24 @@ func main() {
log.SetLevel(log.Debug)
}
+ // Logging will include the local date and time via the time package.
+ //
+ // On first use, time.Local initializes the local time zone, which
+ // involves opening tzdata files on the host. Since this requires
+ // opening host files, it must be done before syscall filter
+ // installation.
+ //
+ // Generally there will be a log message before filter installation
+ // that will force initialization, but force initialization here in
+ // case that does not occur.
+ _ = time.Local.String()
+
subcommand := flag.CommandLine.Arg(0)
var e log.Emitter
if *debugLogFD > -1 {
f := os.NewFile(uintptr(*debugLogFD), "debug log file")
- // Quick sanity check to make sure no other commands get passed
- // a log fd (they should use log dir instead).
- if subcommand != "boot" && subcommand != "gofer" {
- cmd.Fatalf("flag --debug-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand)
- }
-
- // If we are the boot process, then we own our stdio FDs and can do what we
- // want with them. Since Docker and Containerd both eat boot's stderr, we
- // dup our stderr to the provided log FD so that panics will appear in the
- // logs, rather than just disappear.
- if err := syscall.Dup3(int(f.Fd()), int(os.Stderr.Fd()), 0); err != nil {
- cmd.Fatalf("error dup'ing fd %d to stderr: %v", f.Fd(), err)
- }
-
e = newEmitter(*debugLogFormat, f)
} else if *debugLog != "" {
@@ -264,8 +290,26 @@ func main() {
e = newEmitter("text", ioutil.Discard)
}
- if *alsoLogToStderr {
- e = log.MultiEmitter{e, newEmitter(*debugLogFormat, os.Stderr)}
+ if *panicLogFD > -1 || *debugLogFD > -1 {
+ fd := *panicLogFD
+ if fd < 0 {
+ fd = *debugLogFD
+ }
+ // Quick sanity check to make sure no other commands get passed
+ // a log fd (they should use log dir instead).
+ if subcommand != "boot" && subcommand != "gofer" {
+ cmd.Fatalf("flags --debug-log-fd and --panic-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand)
+ }
+
+ // If we are the boot process, then we own our stdio FDs and can do what we
+ // want with them. Since Docker and Containerd both eat boot's stderr, we
+ // dup our stderr to the provided log FD so that panics will appear in the
+ // logs, rather than just disappear.
+ if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil {
+ cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err)
+ }
+ } else if *alsoLogToStderr {
+ e = &log.MultiEmitter{e, newEmitter(*debugLogFormat, os.Stderr)}
}
log.SetTarget(e)
@@ -281,6 +325,7 @@ func main() {
log.Infof("\t\tFileAccess: %v, overlay: %t", conf.FileAccess, conf.Overlay)
log.Infof("\t\tNetwork: %v, logging: %t", conf.Network, conf.LogPackets)
log.Infof("\t\tStrace: %t, max size: %d, syscalls: %s", conf.Strace, conf.StraceLogSize, conf.StraceSyscalls)
+ log.Infof("\t\tVFS2 enabled: %v", conf.VFS2)
log.Infof("***************************")
if *testOnlyAllowRunAsCurrentUserWithoutChroot {
@@ -297,7 +342,7 @@ func main() {
log.Infof("Exiting with status: %v", ws)
if ws.Signaled() {
// No good way to return it, emulate what the shell does. Maybe raise
- // signall to self?
+ // signal to self?
os.Exit(128 + int(ws.Signal()))
}
os.Exit(ws.ExitStatus())
@@ -310,11 +355,11 @@ func main() {
func newEmitter(format string, logFile io.Writer) log.Emitter {
switch format {
case "text":
- return &log.GoogleEmitter{&log.Writer{Next: logFile}}
+ return log.GoogleEmitter{&log.Writer{Next: logFile}}
case "json":
- return &log.JSONEmitter{log.Writer{Next: logFile}}
+ return log.JSONEmitter{&log.Writer{Next: logFile}}
case "json-k8s":
- return &log.K8sJSONEmitter{log.Writer{Next: logFile}}
+ return log.K8sJSONEmitter{&log.Writer{Next: logFile}}
}
cmd.Fatalf("invalid log format %q, must be 'text', 'json', or 'json-k8s'", format)
panic("unreachable")
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index 27459e6d1..2b9d4549d 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
@@ -9,16 +9,18 @@ go_library(
"network_unsafe.go",
"sandbox.go",
],
- importpath = "gvisor.dev/gvisor/runsc/sandbox",
visibility = [
"//runsc:__subpackages__",
],
deps = [
+ "//pkg/cleanup",
"//pkg/control/client",
"//pkg/control/server",
"//pkg/log",
"//pkg/sentry/control",
"//pkg/sentry/platform",
+ "//pkg/sync",
+ "//pkg/tcpip/header",
"//pkg/tcpip/stack",
"//pkg/urpc",
"//runsc/boot",
@@ -27,7 +29,7 @@ go_library(
"//runsc/console",
"//runsc/specutils",
"@com_github_cenkalti_backoff//:go_default_library",
- "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
"@com_github_syndtr_gocapability//capability:go_default_library",
"@com_github_vishvananda_netlink//:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index d42de0176..817a923ad 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -21,13 +21,13 @@ import (
"path/filepath"
"runtime"
"strconv"
- "strings"
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/urpc"
"gvisor.dev/gvisor/runsc/boot"
@@ -62,7 +62,7 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi
// Build the path to the net namespace of the sandbox process.
// This is what we will copy.
nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net")
- if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.HardwareGSO, conf.SoftwareGSO, conf.NumNetworkChannels); err != nil {
+ if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.HardwareGSO, conf.SoftwareGSO, conf.TXChecksumOffload, conf.RXChecksumOffload, conf.NumNetworkChannels, conf.QDisc); err != nil {
return fmt.Errorf("creating interfaces from net namespace %q: %v", nsPath, err)
}
case boot.NetworkHost:
@@ -74,30 +74,8 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi
}
func createDefaultLoopbackInterface(conn *urpc.Client) error {
- link := boot.LoopbackLink{
- Name: "lo",
- Addresses: []net.IP{
- net.IP("\x7f\x00\x00\x01"),
- net.IPv6loopback,
- },
- Routes: []boot.Route{
- {
- Destination: net.IPNet{
-
- IP: net.IPv4(0x7f, 0, 0, 0),
- Mask: net.IPv4Mask(0xff, 0, 0, 0),
- },
- },
- {
- Destination: net.IPNet{
- IP: net.IPv6loopback,
- Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)),
- },
- },
- },
- }
if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &boot.CreateLinksAndRoutesArgs{
- LoopbackLinks: []boot.LoopbackLink{link},
+ LoopbackLinks: []boot.LoopbackLink{boot.DefaultLoopbackLink},
}, nil); err != nil {
return fmt.Errorf("creating loopback link and routes: %v", err)
}
@@ -137,7 +115,7 @@ func isRootNS() (bool, error) {
// createInterfacesAndRoutesFromNS scrapes the interface and routes from the
// net namespace with the given path, creates them in the sandbox, and removes
// them from the host.
-func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareGSO bool, softwareGSO bool, numNetworkChannels int) error {
+func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareGSO bool, softwareGSO bool, txChecksumOffload bool, rxChecksumOffload bool, numNetworkChannels int, qDisc boot.QueueingDiscipline) error {
// Join the network namespace that we will be copying.
restore, err := joinNetNS(nsPath)
if err != nil {
@@ -156,7 +134,6 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
return err
}
if isRoot {
-
return fmt.Errorf("cannot run with network enabled in root network namespace")
}
@@ -173,53 +150,59 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
return fmt.Errorf("fetching interface addresses for %q: %v", iface.Name, err)
}
- // We build our own loopback devices.
+ // We build our own loopback device.
if iface.Flags&net.FlagLoopback != 0 {
- links, err := loopbackLinks(iface, allAddrs)
+ link, err := loopbackLink(iface, allAddrs)
if err != nil {
- return fmt.Errorf("getting loopback routes and links for iface %q: %v", iface.Name, err)
+ return fmt.Errorf("getting loopback link for iface %q: %v", iface.Name, err)
}
- args.LoopbackLinks = append(args.LoopbackLinks, links...)
+ args.LoopbackLinks = append(args.LoopbackLinks, link)
continue
}
- // Keep only IPv4 addresses.
- var ip4addrs []*net.IPNet
+ var ipAddrs []*net.IPNet
for _, ifaddr := range allAddrs {
ipNet, ok := ifaddr.(*net.IPNet)
if !ok {
return fmt.Errorf("address is not IPNet: %+v", ifaddr)
}
- if ipNet.IP.To4() == nil {
- log.Warningf("IPv6 is not supported, skipping: %v", ipNet)
- continue
- }
- ip4addrs = append(ip4addrs, ipNet)
+ ipAddrs = append(ipAddrs, ipNet)
}
- if len(ip4addrs) == 0 {
- log.Warningf("No IPv4 address found for interface %q, skipping", iface.Name)
+ if len(ipAddrs) == 0 {
+ log.Warningf("No usable IP addresses found for interface %q, skipping", iface.Name)
continue
}
// Scrape the routes before removing the address, since that
// will remove the routes as well.
- routes, def, err := routesForIface(iface)
+ routes, defv4, defv6, err := routesForIface(iface)
if err != nil {
return fmt.Errorf("getting routes for interface %q: %v", iface.Name, err)
}
- if def != nil {
- if !args.DefaultGateway.Route.Empty() {
- return fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, def, args.DefaultGateway)
+ if defv4 != nil {
+ if !args.Defaultv4Gateway.Route.Empty() {
+ return fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, defv4, args.Defaultv4Gateway)
+ }
+ args.Defaultv4Gateway.Route = *defv4
+ args.Defaultv4Gateway.Name = iface.Name
+ }
+
+ if defv6 != nil {
+ if !args.Defaultv6Gateway.Route.Empty() {
+ return fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, defv6, args.Defaultv6Gateway)
}
- args.DefaultGateway.Route = *def
- args.DefaultGateway.Name = iface.Name
+ args.Defaultv6Gateway.Route = *defv6
+ args.Defaultv6Gateway.Name = iface.Name
}
link := boot.FDBasedLink{
- Name: iface.Name,
- MTU: iface.MTU,
- Routes: routes,
- NumChannels: numNetworkChannels,
+ Name: iface.Name,
+ MTU: iface.MTU,
+ Routes: routes,
+ TXChecksumOffload: txChecksumOffload,
+ RXChecksumOffload: rxChecksumOffload,
+ NumChannels: numNetworkChannels,
+ QDisc: qDisc,
}
// Get the link for the interface.
@@ -247,6 +230,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
}
args.FilePayload.Files = append(args.FilePayload.Files, socketEntry.deviceFile)
}
+
if link.GSOMaxSize == 0 && softwareGSO {
// Hardware GSO is disabled. Let's enable software GSO.
link.GSOMaxSize = stack.SoftwareGSOMaxSize
@@ -255,7 +239,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
// Collect the addresses for the interface, enable forwarding,
// and remove them from the host.
- for _, addr := range ip4addrs {
+ for _, addr := range ipAddrs {
link.Addresses = append(link.Addresses, addr.IP)
// Steal IP address from NIC.
@@ -316,81 +300,96 @@ func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) (
}
}
- // Use SO_RCVBUFFORCE because on linux the receive buffer for an
- // AF_PACKET socket is capped by "net.core.rmem_max". rmem_max
- // defaults to a unusually low value of 208KB. This is too low
- // for gVisor to be able to receive packets at high throughputs
- // without incurring packet drops.
- const rcvBufSize = 4 << 20 // 4MB.
+ // Use SO_RCVBUFFORCE/SO_SNDBUFFORCE because on linux the receive/send buffer
+ // for an AF_PACKET socket is capped by "net.core.rmem_max/wmem_max".
+ // wmem_max/rmem_max default to a unusually low value of 208KB. This is too low
+ // for gVisor to be able to receive packets at high throughputs without
+ // incurring packet drops.
+ const bufSize = 4 << 20 // 4MB.
+
+ if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, bufSize); err != nil {
+ return nil, fmt.Errorf("failed to increase socket rcv buffer to %d: %v", bufSize, err)
+ }
- if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, rcvBufSize); err != nil {
- return nil, fmt.Errorf("failed to increase socket rcv buffer to %d: %v", rcvBufSize, err)
+ if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUFFORCE, bufSize); err != nil {
+ return nil, fmt.Errorf("failed to increase socket snd buffer to %d: %v", bufSize, err)
}
+
return &socketEntry{deviceFile, gsoMaxSize}, nil
}
-// loopbackLinks collects the links for a loopback interface.
-func loopbackLinks(iface net.Interface, addrs []net.Addr) ([]boot.LoopbackLink, error) {
- var links []boot.LoopbackLink
+// loopbackLink returns the link with addresses and routes for a loopback
+// interface.
+func loopbackLink(iface net.Interface, addrs []net.Addr) (boot.LoopbackLink, error) {
+ link := boot.LoopbackLink{
+ Name: iface.Name,
+ }
for _, addr := range addrs {
ipNet, ok := addr.(*net.IPNet)
if !ok {
- return nil, fmt.Errorf("address is not IPNet: %+v", addr)
+ return boot.LoopbackLink{}, fmt.Errorf("address is not IPNet: %+v", addr)
}
dst := *ipNet
dst.IP = dst.IP.Mask(dst.Mask)
- links = append(links, boot.LoopbackLink{
- Name: iface.Name,
- Addresses: []net.IP{ipNet.IP},
- Routes: []boot.Route{{
- Destination: dst,
- }},
+ link.Addresses = append(link.Addresses, ipNet.IP)
+ link.Routes = append(link.Routes, boot.Route{
+ Destination: dst,
})
}
- return links, nil
+ return link, nil
}
// routesForIface iterates over all routes for the given interface and converts
-// them to boot.Routes.
-func routesForIface(iface net.Interface) ([]boot.Route, *boot.Route, error) {
+// them to boot.Routes. It also returns the a default v4/v6 route if found.
+func routesForIface(iface net.Interface) ([]boot.Route, *boot.Route, *boot.Route, error) {
link, err := netlink.LinkByIndex(iface.Index)
if err != nil {
- return nil, nil, err
+ return nil, nil, nil, err
}
rs, err := netlink.RouteList(link, netlink.FAMILY_ALL)
if err != nil {
- return nil, nil, fmt.Errorf("getting routes from %q: %v", iface.Name, err)
+ return nil, nil, nil, fmt.Errorf("getting routes from %q: %v", iface.Name, err)
}
- var def *boot.Route
+ var defv4, defv6 *boot.Route
var routes []boot.Route
for _, r := range rs {
// Is it a default route?
if r.Dst == nil {
if r.Gw == nil {
- return nil, nil, fmt.Errorf("default route with no gateway %q: %+v", iface.Name, r)
- }
- if r.Gw.To4() == nil {
- log.Warningf("IPv6 is not supported, skipping default route: %v", r)
- continue
- }
- if def != nil {
- return nil, nil, fmt.Errorf("more than one default route found %q, def: %+v, route: %+v", iface.Name, def, r)
+ return nil, nil, nil, fmt.Errorf("default route with no gateway %q: %+v", iface.Name, r)
}
// Create a catch all route to the gateway.
- def = &boot.Route{
- Destination: net.IPNet{
- IP: net.IPv4zero,
- Mask: net.IPMask(net.IPv4zero),
- },
- Gateway: r.Gw,
+ switch len(r.Gw) {
+ case header.IPv4AddressSize:
+ if defv4 != nil {
+ return nil, nil, nil, fmt.Errorf("more than one default route found %q, def: %+v, route: %+v", iface.Name, defv4, r)
+ }
+ defv4 = &boot.Route{
+ Destination: net.IPNet{
+ IP: net.IPv4zero,
+ Mask: net.IPMask(net.IPv4zero),
+ },
+ Gateway: r.Gw,
+ }
+ case header.IPv6AddressSize:
+ if defv6 != nil {
+ return nil, nil, nil, fmt.Errorf("more than one default route found %q, def: %+v, route: %+v", iface.Name, defv6, r)
+ }
+
+ defv6 = &boot.Route{
+ Destination: net.IPNet{
+ IP: net.IPv6zero,
+ Mask: net.IPMask(net.IPv6zero),
+ },
+ Gateway: r.Gw,
+ }
+ default:
+ return nil, nil, nil, fmt.Errorf("unexpected address size for gateway: %+v for route: %+v", r.Gw, r)
}
continue
}
- if r.Dst.IP.To4() == nil {
- log.Warningf("IPv6 is not supported, skipping route: %v", r)
- continue
- }
+
dst := *r.Dst
dst.IP = dst.IP.Mask(dst.Mask)
routes = append(routes, boot.Route{
@@ -398,7 +397,7 @@ func routesForIface(iface net.Interface) ([]boot.Route, *boot.Route, error) {
Gateway: r.Gw,
})
}
- return routes, def, nil
+ return routes, defv4, defv6, nil
}
// removeAddress removes IP address from network device. It's equivalent to:
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index ee9327fc8..36bb0c9c9 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -18,21 +18,25 @@ package sandbox
import (
"context"
"fmt"
+ "io"
+ "math"
"os"
"os/exec"
"strconv"
- "sync"
+ "strings"
"syscall"
"time"
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/control/client"
"gvisor.dev/gvisor/pkg/control/server"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/urpc"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/boot/platforms"
@@ -116,7 +120,7 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) {
s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup}
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
- c := specutils.MakeCleanup(func() {
+ c := cleanup.Make(func() {
err := s.destroy()
log.Warningf("error destroying sandbox: %v", err)
})
@@ -141,7 +145,19 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) {
// Wait until the sandbox has booted.
b := make([]byte, 1)
if l, err := clientSyncFile.Read(b); err != nil || l != 1 {
- return nil, fmt.Errorf("waiting for sandbox to start: %v", err)
+ err := fmt.Errorf("waiting for sandbox to start: %v", err)
+ // If the sandbox failed to start, it may be because the binary
+ // permissions were incorrect. Check the bits and return a more helpful
+ // error message.
+ //
+ // NOTE: The error message is checked because error types are lost over
+ // rpc calls.
+ if strings.Contains(err.Error(), io.EOF.Error()) {
+ if permsErr := checkBinaryPermissions(conf); permsErr != nil {
+ return nil, fmt.Errorf("%v: %v", err, permsErr)
+ }
+ }
+ return nil, err
}
c.Release()
@@ -368,8 +384,24 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
cmd.Args = append(cmd.Args, "--debug-log-fd="+strconv.Itoa(nextFD))
nextFD++
}
+ if conf.PanicLog != "" {
+ test := ""
+ if len(conf.TestOnlyTestNameEnv) != 0 {
+ // Fetch test name if one is provided and the test only flag was set.
+ if t, ok := specutils.EnvVar(args.Spec.Process.Env, conf.TestOnlyTestNameEnv); ok {
+ test = t
+ }
+ }
- cmd.Args = append(cmd.Args, "--panic-signal="+strconv.Itoa(int(syscall.SIGTERM)))
+ panicLogFile, err := specutils.DebugLogFile(conf.PanicLog, "panic", test)
+ if err != nil {
+ return fmt.Errorf("opening debug log file in %q: %v", conf.PanicLog, err)
+ }
+ defer panicLogFile.Close()
+ cmd.ExtraFiles = append(cmd.ExtraFiles, panicLogFile)
+ cmd.Args = append(cmd.Args, "--panic-log-fd="+strconv.Itoa(nextFD))
+ nextFD++
+ }
// Add the "boot" command to the args.
//
@@ -415,9 +447,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
nextFD++
}
- // If the platform needs a device FD we must pass it in.
- if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil {
+ gPlatform, err := platform.Lookup(conf.Platform)
+ if err != nil {
return err
+ }
+
+ if deviceFile, err := gPlatform.OpenDevice(); err != nil {
+ return fmt.Errorf("opening device file for platform %q: %v", gPlatform, err)
} else if deviceFile != nil {
defer deviceFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, deviceFile)
@@ -425,6 +461,12 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
nextFD++
}
+ // TODO(b/151157106): syscall tests fail by timeout if asyncpreemptoff
+ // isn't set.
+ if conf.Platform == "kvm" {
+ cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1")
+ }
+
// The current process' stdio must be passed to the application via the
// --stdio-fds flag. The stdio of the sandbox process itself must not
// be connected to the same FDs, otherwise we risk leaking sandbox
@@ -436,9 +478,7 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
// If the console control socket file is provided, then create a new
// pty master/slave pair and set the TTY on the sandbox process.
- if args.ConsoleSocket != "" {
- cmd.Args = append(cmd.Args, "--console=true")
-
+ if args.Spec.Process.Terminal && args.ConsoleSocket != "" {
// console.NewWithSocket will send the master on the given
// socket, and return the slave.
tty, err := console.NewWithSocket(args.ConsoleSocket)
@@ -502,7 +542,7 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
{Type: specs.UTSNamespace},
}
- if conf.Platform == platforms.Ptrace {
+ if gPlatform.Requirements().RequiresCurrentPIDNS {
// TODO(b/75837838): Also set a new PID namespace so that we limit
// access to other host processes.
log.Infof("Sandbox will be started in the current PID namespace")
@@ -563,45 +603,32 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
cmd.Args = append(cmd.Args, "--setup-root")
+ const nobody = 65534
if conf.Rootless {
- log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
+ log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
- ContainerID: 0,
+ ContainerID: nobody,
HostID: os.Getuid(),
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
- ContainerID: 0,
+ ContainerID: nobody,
HostID: os.Getgid(),
Size: 1,
},
}
- cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
} else {
// Map nobody in the new namespace to nobody in the parent namespace.
//
// A sandbox process will construct an empty
- // root for itself, so it has to have the CAP_SYS_ADMIN
- // capability.
- //
- // FIXME(b/122554829): The current implementations of
- // os/exec doesn't allow to set ambient capabilities if
- // a process is started in a new user namespace. As a
- // workaround, we start the sandbox process with the 0
- // UID and then it constructs a chroot and sets UID to
- // nobody. https://github.com/golang/go/issues/2315
- const nobody = 65534
+ // root for itself, so it has to have
+ // CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities.
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
- ContainerID: 0,
- HostID: nobody - 1,
- Size: 1,
- },
- {
ContainerID: nobody,
HostID: nobody,
Size: 1,
@@ -614,11 +641,11 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
Size: 1,
},
}
-
- // Set credentials to run as user and group nobody.
- cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody}
}
+ // Set credentials to run as user and group nobody.
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody}
+ cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT))
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}
@@ -631,6 +658,26 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
if err != nil {
return fmt.Errorf("getting cpu count from cgroups: %v", err)
}
+ if conf.CPUNumFromQuota {
+ // Dropping below 2 CPUs can trigger application to disable
+ // locks that can lead do hard to debug errors, so just
+ // leaving two cores as reasonable default.
+ const minCPUs = 2
+
+ quota, err := s.Cgroup.CPUQuota()
+ if err != nil {
+ return fmt.Errorf("getting cpu qouta from cgroups: %v", err)
+ }
+ if n := int(math.Ceil(quota)); n > 0 {
+ if n < minCPUs {
+ n = minCPUs
+ }
+ if n < cpuNum {
+ // Only lower the cpu number.
+ cpuNum = n
+ }
+ }
+ }
cmd.Args = append(cmd.Args, "--cpu-num", strconv.Itoa(cpuNum))
mem, err := s.Cgroup.MemoryLimit()
@@ -656,6 +703,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
nextFD++
}
+ if args.Attached {
+ // Kill sandbox if parent process exits in attached mode.
+ cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
+ // Tells boot that any process it creates must have pdeathsig set.
+ cmd.Args = append(cmd.Args, "--attached")
+ }
+
// Add container as the last argument.
cmd.Args = append(cmd.Args, s.ID)
@@ -664,15 +718,22 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
log.Debugf("Donating FD %d: %q", i+3, f.Name())
}
- if args.Attached {
- // Kill sandbox if parent process exits in attached mode.
- cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
- }
-
log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args)
log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr)
if err := specutils.StartInNS(cmd, nss); err != nil {
- return fmt.Errorf("Sandbox: %v", err)
+ err := fmt.Errorf("starting sandbox: %v", err)
+ // If the sandbox failed to start, it may be because the binary
+ // permissions were incorrect. Check the bits and return a more helpful
+ // error message.
+ //
+ // NOTE: The error message is checked because error types are lost over
+ // rpc calls.
+ if strings.Contains(err.Error(), syscall.EACCES.Error()) {
+ if permsErr := checkBinaryPermissions(conf); permsErr != nil {
+ return fmt.Errorf("%v: %v", err, permsErr)
+ }
+ }
+ return err
}
s.child = true
s.Pid = cmd.Process.Pid
@@ -951,6 +1012,46 @@ func (s *Sandbox) StopCPUProfile() error {
return nil
}
+// BlockProfile writes a block profile to the given file.
+func (s *Sandbox) BlockProfile(f *os.File) error {
+ log.Debugf("Block profile %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ opts := control.ProfileOpts{
+ FilePayload: urpc.FilePayload{
+ Files: []*os.File{f},
+ },
+ }
+ if err := conn.Call(boot.BlockProfile, &opts, nil); err != nil {
+ return fmt.Errorf("getting sandbox %q block profile: %v", s.ID, err)
+ }
+ return nil
+}
+
+// MutexProfile writes a mutex profile to the given file.
+func (s *Sandbox) MutexProfile(f *os.File) error {
+ log.Debugf("Mutex profile %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ opts := control.ProfileOpts{
+ FilePayload: urpc.FilePayload{
+ Files: []*os.File{f},
+ },
+ }
+ if err := conn.Call(boot.MutexProfile, &opts, nil); err != nil {
+ return fmt.Errorf("getting sandbox %q mutex profile: %v", s.ID, err)
+ }
+ return nil
+}
+
// StartTrace start trace writing to the given file.
func (s *Sandbox) StartTrace(f *os.File) error {
log.Debugf("Trace start %q", s.ID)
@@ -1004,16 +1105,22 @@ func (s *Sandbox) ChangeLogging(args control.LoggingArgs) error {
// DestroyContainer destroys the given container. If it is the root container,
// then the entire sandbox is destroyed.
func (s *Sandbox) DestroyContainer(cid string) error {
+ if err := s.destroyContainer(cid); err != nil {
+ // If the sandbox isn't running, the container has already been destroyed,
+ // ignore the error in this case.
+ if s.IsRunning() {
+ return err
+ }
+ }
+ return nil
+}
+
+func (s *Sandbox) destroyContainer(cid string) error {
if s.IsRootContainer(cid) {
log.Debugf("Destroying root container %q by destroying sandbox", cid)
return s.destroy()
}
- if !s.IsRunning() {
- // Sandbox isn't running anymore, container is already destroyed.
- return nil
- }
-
log.Debugf("Destroying container %q in sandbox %q", cid, s.ID)
conn, err := s.sandboxConnect()
if err != nil {
@@ -1069,3 +1176,31 @@ func deviceFileForPlatform(name string) (*os.File, error) {
}
return f, nil
}
+
+// checkBinaryPermissions verifies that the required binary bits are set on
+// the runsc executable.
+func checkBinaryPermissions(conf *boot.Config) error {
+ // All platforms need the other exe bit
+ neededBits := os.FileMode(0001)
+ if conf.Platform == platforms.Ptrace {
+ // Ptrace needs the other read bit
+ neededBits |= os.FileMode(0004)
+ }
+
+ exePath, err := os.Executable()
+ if err != nil {
+ return fmt.Errorf("getting exe path: %v", err)
+ }
+
+ // Check the permissions of the runsc binary and print an error if it
+ // doesn't match expectations.
+ info, err := os.Stat(exePath)
+ if err != nil {
+ return fmt.Errorf("stat file: %v", err)
+ }
+
+ if info.Mode().Perm()&neededBits != neededBits {
+ return fmt.Errorf(specutils.FaqErrorMsg("runsc-perms", fmt.Sprintf("%s does not have the correct permissions", exePath)))
+ }
+ return nil
+}
diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD
index 205638803..43851a22f 100644
--- a/runsc/specutils/BUILD
+++ b/runsc/specutils/BUILD
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -10,7 +10,6 @@ go_library(
"namespace.go",
"specutils.go",
],
- importpath = "gvisor.dev/gvisor/runsc/specutils",
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
@@ -18,7 +17,8 @@ go_library(
"//pkg/log",
"//pkg/sentry/kernel/auth",
"@com_github_cenkalti_backoff//:go_default_library",
- "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@com_github_mohae_deepcopy//:go_default_library",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
"@com_github_syndtr_gocapability//capability:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
],
@@ -28,6 +28,6 @@ go_test(
name = "specutils_test",
size = "small",
srcs = ["specutils_test.go"],
- embed = [":specutils"],
- deps = ["@com_github_opencontainers_runtime-spec//specs-go:go_default_library"],
+ library = ":specutils",
+ deps = ["@com_github_opencontainers_runtime_spec//specs-go:go_default_library"],
)
diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go
index c7dd3051c..23001d67c 100644
--- a/runsc/specutils/namespace.go
+++ b/runsc/specutils/namespace.go
@@ -18,6 +18,7 @@ import (
"fmt"
"os"
"os/exec"
+ "os/signal"
"path/filepath"
"runtime"
"syscall"
@@ -252,13 +253,27 @@ func MaybeRunAsRoot() error {
},
Credential: &syscall.Credential{Uid: 0, Gid: 0},
GidMappingsEnableSetgroups: false,
+
+ // Make sure child is killed when the parent terminates.
+ Pdeathsig: syscall.SIGKILL,
}
cmd.Env = os.Environ()
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
- if err := cmd.Run(); err != nil {
+ if err := cmd.Start(); err != nil {
+ return fmt.Errorf("re-executing self: %w", err)
+ }
+ ch := make(chan os.Signal, 1)
+ signal.Notify(ch)
+ go func() {
+ for {
+ // Forward all signals to child process.
+ cmd.Process.Signal(<-ch)
+ }
+ }()
+ if err := cmd.Wait(); err != nil {
if exit, ok := err.(*exec.ExitError); ok {
if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
os.Exit(ws.ExitStatus())
@@ -266,7 +281,7 @@ func MaybeRunAsRoot() error {
log.Warningf("No wait status provided, exiting with -1: %v", err)
os.Exit(-1)
}
- return fmt.Errorf("re-executing self: %v", err)
+ return err
}
// Child completed with success.
os.Exit(0)
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index d3c2e4e78..5015c3a84 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -29,6 +29,7 @@ import (
"time"
"github.com/cenkalti/backoff"
+ "github.com/mohae/deepcopy"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bits"
@@ -44,20 +45,31 @@ var ExePath = "/proc/self/exe"
var Version = specs.Version
// LogSpec logs the spec in a human-friendly way.
-func LogSpec(spec *specs.Spec) {
- log.Debugf("Spec: %+v", spec)
- log.Debugf("Spec.Hooks: %+v", spec.Hooks)
- log.Debugf("Spec.Linux: %+v", spec.Linux)
- if spec.Linux != nil && spec.Linux.Resources != nil {
- res := spec.Linux.Resources
- log.Debugf("Spec.Linux.Resources.Memory: %+v", res.Memory)
- log.Debugf("Spec.Linux.Resources.CPU: %+v", res.CPU)
- log.Debugf("Spec.Linux.Resources.BlockIO: %+v", res.BlockIO)
- log.Debugf("Spec.Linux.Resources.Network: %+v", res.Network)
- }
- log.Debugf("Spec.Process: %+v", spec.Process)
- log.Debugf("Spec.Root: %+v", spec.Root)
- log.Debugf("Spec.Mounts: %+v", spec.Mounts)
+func LogSpec(orig *specs.Spec) {
+ if !log.IsLogging(log.Debug) {
+ return
+ }
+
+ // Strip down parts of the spec that are not interesting.
+ spec := deepcopy.Copy(orig).(*specs.Spec)
+ if spec.Process != nil {
+ spec.Process.Capabilities = nil
+ }
+ if spec.Linux != nil {
+ spec.Linux.Seccomp = nil
+ spec.Linux.MaskedPaths = nil
+ spec.Linux.ReadonlyPaths = nil
+ if spec.Linux.Resources != nil {
+ spec.Linux.Resources.Devices = nil
+ }
+ }
+
+ out, err := json.MarshalIndent(spec, "", " ")
+ if err != nil {
+ log.Debugf("Failed to marshal spec: %v", err)
+ return
+ }
+ log.Debugf("Spec:\n%s", out)
}
// ValidateSpec validates that the spec is compatible with runsc.
@@ -92,6 +104,12 @@ func ValidateSpec(spec *specs.Spec) error {
log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile)
}
+ // PR_SET_NO_NEW_PRIVS is assumed to always be set.
+ // See kernel.Task.updateCredsForExecLocked.
+ if !spec.Process.NoNewPrivileges {
+ log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.")
+ }
+
// TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox.
if spec.Linux != nil && spec.Linux.Seccomp != nil {
log.Warningf("Seccomp spec is being ignored")
@@ -438,36 +456,6 @@ func ContainsStr(strs []string, str string) bool {
return false
}
-// Cleanup allows defers to be aborted when cleanup needs to happen
-// conditionally. Usage:
-// c := MakeCleanup(func() { f.Close() })
-// defer c.Clean() // any failure before release is called will close the file.
-// ...
-// c.Release() // on success, aborts closing the file and return it.
-// return f
-type Cleanup struct {
- clean func()
-}
-
-// MakeCleanup creates a new Cleanup object.
-func MakeCleanup(f func()) Cleanup {
- return Cleanup{clean: f}
-}
-
-// Clean calls the cleanup function.
-func (c *Cleanup) Clean() {
- if c.clean != nil {
- c.clean()
- c.clean = nil
- }
-}
-
-// Release releases the cleanup from its duties, i.e. cleanup function is not
-// called after this point.
-func (c *Cleanup) Release() {
- c.clean = nil
-}
-
// RetryEintr retries the function until an error different than EINTR is
// returned.
func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) {
@@ -528,3 +516,8 @@ func EnvVar(env []string, name string) (string, bool) {
}
return "", false
}
+
+// FaqErrorMsg returns an error message pointing to the FAQ.
+func FaqErrorMsg(anchor, msg string) string {
+ return fmt.Sprintf("%s; see https://gvisor.dev/faq#%s for more details", msg, anchor)
+}
diff --git a/runsc/testutil/BUILD b/runsc/testutil/BUILD
deleted file mode 100644
index c96ca2eb6..000000000
--- a/runsc/testutil/BUILD
+++ /dev/null
@@ -1,18 +0,0 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "testutil",
- testonly = 1,
- srcs = ["testutil.go"],
- importpath = "gvisor.dev/gvisor/runsc/testutil",
- visibility = ["//:sandbox"],
- deps = [
- "//pkg/log",
- "//runsc/boot",
- "//runsc/specutils",
- "@com_github_cenkalti_backoff//:go_default_library",
- "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
- ],
-)
diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go
deleted file mode 100644
index 9632776d2..000000000
--- a/runsc/testutil/testutil.go
+++ /dev/null
@@ -1,476 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package testutil contains utility functions for runsc tests.
-package testutil
-
-import (
- "bufio"
- "context"
- "debug/elf"
- "encoding/base32"
- "encoding/json"
- "flag"
- "fmt"
- "io"
- "io/ioutil"
- "math"
- "math/rand"
- "net/http"
- "os"
- "os/exec"
- "os/signal"
- "path/filepath"
- "strconv"
- "strings"
- "sync"
- "sync/atomic"
- "syscall"
- "time"
-
- "github.com/cenkalti/backoff"
- specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/runsc/boot"
- "gvisor.dev/gvisor/runsc/specutils"
-)
-
-var (
- checkpoint = flag.Bool("checkpoint", true, "control checkpoint/restore support")
-)
-
-func init() {
- rand.Seed(time.Now().UnixNano())
-}
-
-// IsCheckpointSupported returns the relevant command line flag.
-func IsCheckpointSupported() bool {
- return *checkpoint
-}
-
-// TmpDir returns the absolute path to a writable directory that can be used as
-// scratch by the test.
-func TmpDir() string {
- dir := os.Getenv("TEST_TMPDIR")
- if dir == "" {
- dir = "/tmp"
- }
- return dir
-}
-
-// ConfigureExePath configures the executable for runsc in the test environment.
-func ConfigureExePath() error {
- path, err := FindFile("runsc/runsc")
- if err != nil {
- return err
- }
- specutils.ExePath = path
- return nil
-}
-
-// FindFile searchs for a file inside the test run environment. It returns the
-// full path to the file. It fails if none or more than one file is found.
-func FindFile(path string) (string, error) {
- wd, err := os.Getwd()
- if err != nil {
- return "", err
- }
-
- // The test root is demarcated by a path element called "__main__". Search for
- // it backwards from the working directory.
- root := wd
- for {
- dir, name := filepath.Split(root)
- if name == "__main__" {
- break
- }
- if len(dir) == 0 {
- return "", fmt.Errorf("directory __main__ not found in %q", wd)
- }
- // Remove ending slash to loop around.
- root = dir[:len(dir)-1]
- }
-
- // Annoyingly, bazel adds the build type to the directory path for go
- // binaries, but not for c++ binaries. We use two different patterns to
- // to find our file.
- patterns := []string{
- // Try the obvious path first.
- filepath.Join(root, path),
- // If it was a go binary, use a wildcard to match the build
- // type. The pattern is: /test-path/__main__/directories/*/file.
- filepath.Join(root, filepath.Dir(path), "*", filepath.Base(path)),
- }
-
- for _, p := range patterns {
- matches, err := filepath.Glob(p)
- if err != nil {
- // "The only possible returned error is ErrBadPattern,
- // when pattern is malformed." -godoc
- return "", fmt.Errorf("error globbing %q: %v", p, err)
- }
- switch len(matches) {
- case 0:
- // Try the next pattern.
- case 1:
- // We found it.
- return matches[0], nil
- default:
- return "", fmt.Errorf("more than one match found for %q: %s", path, matches)
- }
- }
- return "", fmt.Errorf("file %q not found", path)
-}
-
-// TestConfig returns the default configuration to use in tests. Note that
-// 'RootDir' must be set by caller if required.
-func TestConfig() *boot.Config {
- return &boot.Config{
- Debug: true,
- LogFormat: "text",
- DebugLogFormat: "text",
- AlsoLogToStderr: true,
- LogPackets: true,
- Network: boot.NetworkNone,
- Strace: true,
- Platform: "ptrace",
- FileAccess: boot.FileAccessExclusive,
- TestOnlyAllowRunAsCurrentUserWithoutChroot: true,
- NumNetworkChannels: 1,
- }
-}
-
-// NewSpecWithArgs creates a simple spec with the given args suitable for use
-// in tests.
-func NewSpecWithArgs(args ...string) *specs.Spec {
- return &specs.Spec{
- // The host filesystem root is the container root.
- Root: &specs.Root{
- Path: "/",
- Readonly: true,
- },
- Process: &specs.Process{
- Args: args,
- Env: []string{
- "PATH=" + os.Getenv("PATH"),
- },
- Capabilities: specutils.AllCapabilities(),
- },
- Mounts: []specs.Mount{
- // Root is readonly, but many tests want to write to tmpdir.
- // This creates a writable mount inside the root. Also, when tmpdir points
- // to "/tmp", it makes the the actual /tmp to be mounted and not a tmpfs
- // inside the sentry.
- {
- Type: "bind",
- Destination: TmpDir(),
- Source: TmpDir(),
- },
- },
- Hostname: "runsc-test-hostname",
- }
-}
-
-// SetupRootDir creates a root directory for containers.
-func SetupRootDir() (string, error) {
- rootDir, err := ioutil.TempDir(TmpDir(), "containers")
- if err != nil {
- return "", fmt.Errorf("error creating root dir: %v", err)
- }
- return rootDir, nil
-}
-
-// SetupContainer creates a bundle and root dir for the container, generates a
-// test config, and writes the spec to config.json in the bundle dir.
-func SetupContainer(spec *specs.Spec, conf *boot.Config) (rootDir, bundleDir string, err error) {
- rootDir, err = SetupRootDir()
- if err != nil {
- return "", "", err
- }
- conf.RootDir = rootDir
- bundleDir, err = SetupBundleDir(spec)
- return rootDir, bundleDir, err
-}
-
-// SetupBundleDir creates a bundle dir and writes the spec to config.json.
-func SetupBundleDir(spec *specs.Spec) (bundleDir string, err error) {
- bundleDir, err = ioutil.TempDir(TmpDir(), "bundle")
- if err != nil {
- return "", fmt.Errorf("error creating bundle dir: %v", err)
- }
-
- if err = writeSpec(bundleDir, spec); err != nil {
- return "", fmt.Errorf("error writing spec: %v", err)
- }
- return bundleDir, nil
-}
-
-// writeSpec writes the spec to disk in the given directory.
-func writeSpec(dir string, spec *specs.Spec) error {
- b, err := json.Marshal(spec)
- if err != nil {
- return err
- }
- return ioutil.WriteFile(filepath.Join(dir, "config.json"), b, 0755)
-}
-
-// UniqueContainerID generates a unique container id for each test.
-//
-// The container id is used to create an abstract unix domain socket, which must
-// be unique. While the container forbids creating two containers with the same
-// name, sometimes between test runs the socket does not get cleaned up quickly
-// enough, causing container creation to fail.
-func UniqueContainerID() string {
- // Read 20 random bytes.
- b := make([]byte, 20)
- // "[Read] always returns len(p) and a nil error." --godoc
- if _, err := rand.Read(b); err != nil {
- panic("rand.Read failed: " + err.Error())
- }
- // base32 encode the random bytes, so that the name is a valid
- // container id and can be used as a socket name in the filesystem.
- return fmt.Sprintf("test-container-%s", base32.StdEncoding.EncodeToString(b))
-}
-
-// Copy copies file from src to dst.
-func Copy(src, dst string) error {
- in, err := os.Open(src)
- if err != nil {
- return err
- }
- defer in.Close()
-
- out, err := os.Create(dst)
- if err != nil {
- return err
- }
- defer out.Close()
-
- _, err = io.Copy(out, in)
- return err
-}
-
-// Poll is a shorthand function to poll for something with given timeout.
-func Poll(cb func() error, timeout time.Duration) error {
- ctx, cancel := context.WithTimeout(context.Background(), timeout)
- defer cancel()
- b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
- return backoff.Retry(cb, b)
-}
-
-// WaitForHTTP tries GET requests on a port until the call succeeds or timeout.
-func WaitForHTTP(port int, timeout time.Duration) error {
- cb := func() error {
- c := &http.Client{
- // Calculate timeout to be able to do minimum 5 attempts.
- Timeout: timeout / 5,
- }
- url := fmt.Sprintf("http://localhost:%d/", port)
- resp, err := c.Get(url)
- if err != nil {
- log.Infof("Waiting %s: %v", url, err)
- return err
- }
- resp.Body.Close()
- return nil
- }
- return Poll(cb, timeout)
-}
-
-// Reaper reaps child processes.
-type Reaper struct {
- // mu protects ch, which will be nil if the reaper is not running.
- mu sync.Mutex
- ch chan os.Signal
-}
-
-// Start starts reaping child processes.
-func (r *Reaper) Start() {
- r.mu.Lock()
- defer r.mu.Unlock()
-
- if r.ch != nil {
- panic("reaper.Start called on a running reaper")
- }
-
- r.ch = make(chan os.Signal, 1)
- signal.Notify(r.ch, syscall.SIGCHLD)
-
- go func() {
- for {
- r.mu.Lock()
- ch := r.ch
- r.mu.Unlock()
- if ch == nil {
- return
- }
-
- _, ok := <-ch
- if !ok {
- // Channel closed.
- return
- }
- for {
- cpid, _ := syscall.Wait4(-1, nil, syscall.WNOHANG, nil)
- if cpid < 1 {
- break
- }
- }
- }
- }()
-}
-
-// Stop stops reaping child processes.
-func (r *Reaper) Stop() {
- r.mu.Lock()
- defer r.mu.Unlock()
-
- if r.ch == nil {
- panic("reaper.Stop called on a stopped reaper")
- }
-
- signal.Stop(r.ch)
- close(r.ch)
- r.ch = nil
-}
-
-// StartReaper is a helper that starts a new Reaper and returns a function to
-// stop it.
-func StartReaper() func() {
- r := &Reaper{}
- r.Start()
- return r.Stop
-}
-
-// WaitUntilRead reads from the given reader until the wanted string is found
-// or until timeout.
-func WaitUntilRead(r io.Reader, want string, split bufio.SplitFunc, timeout time.Duration) error {
- sc := bufio.NewScanner(r)
- if split != nil {
- sc.Split(split)
- }
- // done must be accessed atomically. A value greater than 0 indicates
- // that the read loop can exit.
- var done uint32
- doneCh := make(chan struct{})
- go func() {
- for sc.Scan() {
- t := sc.Text()
- if strings.Contains(t, want) {
- atomic.StoreUint32(&done, 1)
- close(doneCh)
- break
- }
- if atomic.LoadUint32(&done) > 0 {
- break
- }
- }
- }()
- select {
- case <-time.After(timeout):
- atomic.StoreUint32(&done, 1)
- return fmt.Errorf("timeout waiting to read %q", want)
- case <-doneCh:
- return nil
- }
-}
-
-// KillCommand kills the process running cmd unless it hasn't been started. It
-// returns an error if it cannot kill the process unless the reason is that the
-// process has already exited.
-func KillCommand(cmd *exec.Cmd) error {
- if cmd.Process == nil {
- return nil
- }
- if err := cmd.Process.Kill(); err != nil {
- if !strings.Contains(err.Error(), "process already finished") {
- return fmt.Errorf("failed to kill process %v: %v", cmd, err)
- }
- }
- return nil
-}
-
-// WriteTmpFile writes text to a temporary file, closes the file, and returns
-// the name of the file.
-func WriteTmpFile(pattern, text string) (string, error) {
- file, err := ioutil.TempFile(TmpDir(), pattern)
- if err != nil {
- return "", err
- }
- defer file.Close()
- if _, err := file.Write([]byte(text)); err != nil {
- return "", err
- }
- return file.Name(), nil
-}
-
-// RandomName create a name with a 6 digit random number appended to it.
-func RandomName(prefix string) string {
- return fmt.Sprintf("%s-%06d", prefix, rand.Int31n(1000000))
-}
-
-// IsStatic returns true iff the given file is a static binary.
-func IsStatic(filename string) (bool, error) {
- f, err := elf.Open(filename)
- if err != nil {
- return false, err
- }
- for _, prog := range f.Progs {
- if prog.Type == elf.PT_INTERP {
- return false, nil // Has interpreter.
- }
- }
- return true, nil
-}
-
-// TestBoundsForShard calculates the beginning and end indices for the test
-// based on the TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. The
-// returned ints are the beginning (inclusive) and end (exclusive) of the
-// subslice corresponding to the shard. If either of the env vars are not
-// present, then the function will return bounds that include all tests. If
-// there are more shards than there are tests, then the returned list may be
-// empty.
-func TestBoundsForShard(numTests int) (int, int, error) {
- var (
- begin = 0
- end = numTests
- )
- indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS")
- if indexStr == "" || totalStr == "" {
- return begin, end, nil
- }
-
- // Parse index and total to ints.
- shardIndex, err := strconv.Atoi(indexStr)
- if err != nil {
- return 0, 0, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err)
- }
- shardTotal, err := strconv.Atoi(totalStr)
- if err != nil {
- return 0, 0, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err)
- }
-
- // Calculate!
- shardSize := int(math.Ceil(float64(numTests) / float64(shardTotal)))
- begin = shardIndex * shardSize
- end = ((shardIndex + 1) * shardSize)
- if begin > numTests {
- // Nothing to run.
- return 0, 0, nil
- }
- if end > numTests {
- end = numTests
- }
- return begin, end, nil
-}
diff --git a/runsc/version_test.sh b/runsc/version_test.sh
index cc0ca3f05..747350654 100755
--- a/runsc/version_test.sh
+++ b/runsc/version_test.sh
@@ -16,7 +16,7 @@
set -euf -x -o pipefail
-readonly runsc="${TEST_SRCDIR}/__main__/runsc/linux_amd64_pure_stripped/runsc"
+readonly runsc="$1"
readonly version=$($runsc --version)
# Version should should not match VERSION, which is the default and which will