diff options
Diffstat (limited to 'runsc')
96 files changed, 4854 insertions, 4937 deletions
diff --git a/runsc/BUILD b/runsc/BUILD index e4e8e64a3..757f6d44c 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -1,7 +1,6 @@ -package(licenses = ["notice"]) # Apache 2.0 +load("//tools:defs.bzl", "go_binary", "pkg_deb", "pkg_tar") -load("@io_bazel_rules_go//go:def.bzl", "go_binary") -load("@rules_pkg//:pkg.bzl", "pkg_deb", "pkg_tar") +package(licenses = ["notice"]) go_binary( name = "runsc", @@ -9,7 +8,7 @@ go_binary( "main.go", "version.go", ], - pure = "on", + pure = True, visibility = [ "//visibility:public", ], @@ -20,16 +19,19 @@ go_binary( "//pkg/sentry/platform", "//runsc/boot", "//runsc/cmd", + "//runsc/flag", "//runsc/specutils", "@com_github_google_subcommands//:go_default_library", ], ) # The runsc-race target is a race-compatible BUILD target. This must be built -# via "bazel build --features=race //runsc:runsc-race", since the race feature -# must apply to all dependencies due a bug in gazelle file selection. The pure -# attribute must be off because the race detector requires linking with non-Go -# components, although we still require a static binary. +# via: bazel build --features=race :runsc-race +# +# This is neccessary because the race feature must apply to all dependencies +# due a bug in gazelle file selection. The pure attribute must be off because +# the race detector requires linking with non-Go components, although we still +# require a static binary. # # Note that in the future this might be convertible to a compatible target by # using the pure and static attributes within a select function, but select is @@ -42,7 +44,7 @@ go_binary( "main.go", "version.go", ], - static = "on", + static = True, visibility = [ "//visibility:public", ], @@ -53,6 +55,7 @@ go_binary( "//pkg/sentry/platform", "//runsc/boot", "//runsc/cmd", + "//runsc/flag", "//runsc/specutils", "@com_github_google_subcommands//:go_default_library", ], @@ -76,16 +79,29 @@ pkg_tar( genrule( name = "deb-version", + # Note that runsc must appear in the srcs parameter and not the tools + # parameter, otherwise it will not be stamped. This is reasonable, as tools + # may be encoded differently in the build graph (cached more aggressively + # because they are assumes to be hermetic). + srcs = [":runsc"], outs = ["version.txt"], - cmd = "$(location :runsc) -version | grep 'runsc version' | sed 's/^[^0-9]*//' > $@", + # Note that the little dance here is necessary because files in the $(SRCS) + # attribute are not executable by default, and we can't touch in place. + cmd = "cp $(location :runsc) $(@D)/runsc && \ + chmod a+x $(@D)/runsc && \ + $(@D)/runsc -version | grep version | sed 's/^[^0-9]*//' > $@ && \ + rm -f $(@D)/runsc", stamp = 1, - tools = [":runsc"], ) pkg_deb( name = "runsc-debian", architecture = "amd64", data = ":debian-data", + # Note that the description_file will be flatten (all newlines removed), + # and therefore it is kept to a simple one-line description. The expected + # format for debian packages is "short summary\nLonger explanation of + # tool." and this is impossible with the flattening. description_file = "debian/description", homepage = "https://gvisor.dev/", maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>", @@ -101,5 +117,7 @@ sh_test( name = "version_test", size = "small", srcs = ["version_test.sh"], + args = ["$(location :runsc)"], data = [":runsc"], + tags = ["noguitar"], ) diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 6fe2b57de..ed3c8f546 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -7,6 +7,7 @@ go_library( srcs = [ "compat.go", "compat_amd64.go", + "compat_arm64.go", "config.go", "controller.go", "debug.go", @@ -15,30 +16,33 @@ go_library( "fs.go", "limits.go", "loader.go", + "loader_amd64.go", + "loader_arm64.go", "network.go", - "pprof.go", "strace.go", - "user.go", + "vfs.go", ], - importpath = "gvisor.dev/gvisor/runsc/boot", visibility = [ + "//pkg/test:__subpackages__", "//runsc:__subpackages__", "//test:__subpackages__", ], deps = [ "//pkg/abi", "//pkg/abi/linux", + "//pkg/context", "//pkg/control/server", "//pkg/cpuid", "//pkg/eventchannel", + "//pkg/fspath", "//pkg/log", "//pkg/memutil", "//pkg/rand", "//pkg/refs", "//pkg/sentry/arch", "//pkg/sentry/arch:registers_go_proto", - "//pkg/sentry/context", "//pkg/sentry/control", + "//pkg/sentry/devices/memdev", "//pkg/sentry/fs", "//pkg/sentry/fs/dev", "//pkg/sentry/fs/gofer", @@ -48,6 +52,13 @@ go_library( "//pkg/sentry/fs/sys", "//pkg/sentry/fs/tmpfs", "//pkg/sentry/fs/tty", + "//pkg/sentry/fs/user", + "//pkg/sentry/fsimpl/devtmpfs", + "//pkg/sentry/fsimpl/gofer", + "//pkg/sentry/fsimpl/host", + "//pkg/sentry/fsimpl/proc", + "//pkg/sentry/fsimpl/sys", + "//pkg/sentry/fsimpl/tmpfs", "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel:uncaught_signal_go_proto", @@ -60,16 +71,19 @@ go_library( "//pkg/sentry/socket/hostinet", "//pkg/sentry/socket/netlink", "//pkg/sentry/socket/netlink/route", + "//pkg/sentry/socket/netlink/uevent", "//pkg/sentry/socket/netstack", "//pkg/sentry/socket/unix", "//pkg/sentry/state", "//pkg/sentry/strace", "//pkg/sentry/syscalls/linux", + "//pkg/sentry/syscalls/linux/vfs2", "//pkg/sentry/time", "//pkg/sentry/unimpl:unimplemented_syscall_go_proto", "//pkg/sentry/usage", - "//pkg/sentry/usermem", + "//pkg/sentry/vfs", "//pkg/sentry/watchdog", + "//pkg/sync", "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/link/fdbased", @@ -86,6 +100,7 @@ go_library( "//pkg/urpc", "//runsc/boot/filter", "//runsc/boot/platforms", + "//runsc/boot/pprof", "//runsc/specutils", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", @@ -100,19 +115,21 @@ go_test( "compat_test.go", "fs_test.go", "loader_test.go", - "user_test.go", ], - embed = [":boot"], + library = ":boot", deps = [ "//pkg/control/server", + "//pkg/fspath", "//pkg/log", "//pkg/p9", - "//pkg/sentry/arch:registers_go_proto", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel", + "//pkg/sentry/vfs", + "//pkg/sync", "//pkg/unet", "//runsc/fsgofer", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index 07e35ab10..b7cfb35bf 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -17,18 +17,16 @@ package boot import ( "fmt" "os" - "sync" "syscall" "github.com/golang/protobuf/proto" - "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/eventchannel" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/arch" rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" "gvisor.dev/gvisor/pkg/sentry/strace" spb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" + "gvisor.dev/gvisor/pkg/sync" ) func initCompatLogs(fd int) error { @@ -53,9 +51,9 @@ type compatEmitter struct { } func newCompatEmitter(logFD int) (*compatEmitter, error) { - nameMap, ok := strace.Lookup(abi.Linux, arch.AMD64) + nameMap, ok := getSyscallNameMap() if !ok { - return nil, fmt.Errorf("amd64 Linux syscall table not found") + return nil, fmt.Errorf("Linux syscall table not found") } c := &compatEmitter{ @@ -67,7 +65,7 @@ func newCompatEmitter(logFD int) (*compatEmitter, error) { if logFD > 0 { f := os.NewFile(uintptr(logFD), "user log file") - target := log.MultiEmitter{c.sink, log.K8sJSONEmitter{log.Writer{Next: f}}} + target := &log.MultiEmitter{c.sink, log.K8sJSONEmitter{&log.Writer{Next: f}}} c.sink = &log.BasicLogger{Level: log.Info, Emitter: target} } return c, nil @@ -86,16 +84,16 @@ func (c *compatEmitter) Emit(msg proto.Message) (bool, error) { } func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) { - regs := us.Registers.GetArch().(*rpb.Registers_Amd64).Amd64 + regs := us.Registers c.mu.Lock() defer c.mu.Unlock() - sysnr := regs.OrigRax + sysnr := syscallNum(regs) tr := c.trackers[sysnr] if tr == nil { switch sysnr { - case syscall.SYS_PRCTL, syscall.SYS_ARCH_PRCTL: + case syscall.SYS_PRCTL: // args: cmd, ... tr = newArgsTracker(0) @@ -112,10 +110,14 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) { tr = newArgsTracker(2) default: - tr = &onceTracker{} + tr = newArchArgsTracker(sysnr) + if tr == nil { + tr = &onceTracker{} + } } c.trackers[sysnr] = tr } + if tr.shouldReport(regs) { c.sink.Infof("Unsupported syscall: %s, regs: %+v", c.nameMap.Name(uintptr(sysnr)), regs) tr.onReported(regs) @@ -139,10 +141,10 @@ func (c *compatEmitter) Close() error { // the syscall and arguments. type syscallTracker interface { // shouldReport returns true is the syscall should be reported. - shouldReport(regs *rpb.AMD64Registers) bool + shouldReport(regs *rpb.Registers) bool // onReported marks the syscall as reported. - onReported(regs *rpb.AMD64Registers) + onReported(regs *rpb.Registers) } // onceTracker reports only a single time, used for most syscalls. @@ -150,10 +152,45 @@ type onceTracker struct { reported bool } -func (o *onceTracker) shouldReport(_ *rpb.AMD64Registers) bool { +func (o *onceTracker) shouldReport(_ *rpb.Registers) bool { return !o.reported } -func (o *onceTracker) onReported(_ *rpb.AMD64Registers) { +func (o *onceTracker) onReported(_ *rpb.Registers) { o.reported = true } + +// argsTracker reports only once for each different combination of arguments. +// It's used for generic syscalls like ioctl to report once per 'cmd'. +type argsTracker struct { + // argsIdx is the syscall arguments to use as unique ID. + argsIdx []int + reported map[string]struct{} + count int +} + +func newArgsTracker(argIdx ...int) *argsTracker { + return &argsTracker{argsIdx: argIdx, reported: make(map[string]struct{})} +} + +// key returns the command based on the syscall argument index. +func (a *argsTracker) key(regs *rpb.Registers) string { + var rv string + for _, idx := range a.argsIdx { + rv += fmt.Sprintf("%d|", argVal(idx, regs)) + } + return rv +} + +func (a *argsTracker) shouldReport(regs *rpb.Registers) bool { + if a.count >= reportLimit { + return false + } + _, ok := a.reported[a.key(regs)] + return !ok +} + +func (a *argsTracker) onReported(regs *rpb.Registers) { + a.count++ + a.reported[a.key(regs)] = struct{}{} +} diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go index 43cd0db94..42b0ca8b0 100644 --- a/runsc/boot/compat_amd64.go +++ b/runsc/boot/compat_amd64.go @@ -16,62 +16,81 @@ package boot import ( "fmt" + "syscall" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/sentry/arch" rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + "gvisor.dev/gvisor/pkg/sentry/strace" ) // reportLimit is the max number of events that should be reported per tracker. const reportLimit = 100 -// argsTracker reports only once for each different combination of arguments. -// It's used for generic syscalls like ioctl to report once per 'cmd'. -type argsTracker struct { - // argsIdx is the syscall arguments to use as unique ID. - argsIdx []int - reported map[string]struct{} - count int +// newRegs create a empty Registers instance. +func newRegs() *rpb.Registers { + return &rpb.Registers{ + Arch: &rpb.Registers_Amd64{ + Amd64: &rpb.AMD64Registers{}, + }, + } } -func newArgsTracker(argIdx ...int) *argsTracker { - return &argsTracker{argsIdx: argIdx, reported: make(map[string]struct{})} -} +func argVal(argIdx int, regs *rpb.Registers) uint32 { + amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64 -// cmd returns the command based on the syscall argument index. -func (a *argsTracker) key(regs *rpb.AMD64Registers) string { - var rv string - for _, idx := range a.argsIdx { - rv += fmt.Sprintf("%d|", argVal(idx, regs)) + switch argIdx { + case 0: + return uint32(amd64Regs.Rdi) + case 1: + return uint32(amd64Regs.Rsi) + case 2: + return uint32(amd64Regs.Rdx) + case 3: + return uint32(amd64Regs.R10) + case 4: + return uint32(amd64Regs.R8) + case 5: + return uint32(amd64Regs.R9) } - return rv + panic(fmt.Sprintf("invalid syscall argument index %d", argIdx)) } -func argVal(argIdx int, regs *rpb.AMD64Registers) uint32 { +func setArgVal(argIdx int, argVal uint64, regs *rpb.Registers) { + amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64 + switch argIdx { case 0: - return uint32(regs.Rdi) + amd64Regs.Rdi = argVal case 1: - return uint32(regs.Rsi) + amd64Regs.Rsi = argVal case 2: - return uint32(regs.Rdx) + amd64Regs.Rdx = argVal case 3: - return uint32(regs.R10) + amd64Regs.R10 = argVal case 4: - return uint32(regs.R8) + amd64Regs.R8 = argVal case 5: - return uint32(regs.R9) + amd64Regs.R9 = argVal + default: + panic(fmt.Sprintf("invalid syscall argument index %d", argIdx)) } - panic(fmt.Sprintf("invalid syscall argument index %d", argIdx)) } -func (a *argsTracker) shouldReport(regs *rpb.AMD64Registers) bool { - if a.count >= reportLimit { - return false - } - _, ok := a.reported[a.key(regs)] - return !ok +func getSyscallNameMap() (strace.SyscallMap, bool) { + return strace.Lookup(abi.Linux, arch.AMD64) +} + +func syscallNum(regs *rpb.Registers) uint64 { + amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64 + return amd64Regs.OrigRax } -func (a *argsTracker) onReported(regs *rpb.AMD64Registers) { - a.count++ - a.reported[a.key(regs)] = struct{}{} +func newArchArgsTracker(sysnr uint64) syscallTracker { + switch sysnr { + case syscall.SYS_ARCH_PRCTL: + // args: cmd, ... + return newArgsTracker(0) + } + return nil } diff --git a/runsc/boot/compat_arm64.go b/runsc/boot/compat_arm64.go new file mode 100644 index 000000000..f784cd237 --- /dev/null +++ b/runsc/boot/compat_arm64.go @@ -0,0 +1,91 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "fmt" + + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/sentry/arch" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + "gvisor.dev/gvisor/pkg/sentry/strace" +) + +// reportLimit is the max number of events that should be reported per tracker. +const reportLimit = 100 + +// newRegs create a empty Registers instance. +func newRegs() *rpb.Registers { + return &rpb.Registers{ + Arch: &rpb.Registers_Arm64{ + Arm64: &rpb.ARM64Registers{}, + }, + } +} + +func argVal(argIdx int, regs *rpb.Registers) uint32 { + arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64 + + switch argIdx { + case 0: + return uint32(arm64Regs.R0) + case 1: + return uint32(arm64Regs.R1) + case 2: + return uint32(arm64Regs.R2) + case 3: + return uint32(arm64Regs.R3) + case 4: + return uint32(arm64Regs.R4) + case 5: + return uint32(arm64Regs.R5) + } + panic(fmt.Sprintf("invalid syscall argument index %d", argIdx)) +} + +func setArgVal(argIdx int, argVal uint64, regs *rpb.Registers) { + arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64 + + switch argIdx { + case 0: + arm64Regs.R0 = argVal + case 1: + arm64Regs.R1 = argVal + case 2: + arm64Regs.R2 = argVal + case 3: + arm64Regs.R3 = argVal + case 4: + arm64Regs.R4 = argVal + case 5: + arm64Regs.R5 = argVal + default: + panic(fmt.Sprintf("invalid syscall argument index %d", argIdx)) + } +} + +func getSyscallNameMap() (strace.SyscallMap, bool) { + return strace.Lookup(abi.Linux, arch.ARM64) +} + +func syscallNum(regs *rpb.Registers) uint64 { + arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64 + return arm64Regs.R8 +} + +func newArchArgsTracker(sysnr uint64) syscallTracker { + // currently, no arch specific syscalls need to be handled here. + return nil +} diff --git a/runsc/boot/compat_test.go b/runsc/boot/compat_test.go index 388298d8d..839c5303b 100644 --- a/runsc/boot/compat_test.go +++ b/runsc/boot/compat_test.go @@ -16,8 +16,6 @@ package boot import ( "testing" - - rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" ) func TestOnceTracker(t *testing.T) { @@ -35,31 +33,34 @@ func TestOnceTracker(t *testing.T) { func TestArgsTracker(t *testing.T) { for _, tc := range []struct { - name string - idx []int - rdi1 uint64 - rdi2 uint64 - rsi1 uint64 - rsi2 uint64 - want bool + name string + idx []int + arg1_1 uint64 + arg1_2 uint64 + arg2_1 uint64 + arg2_2 uint64 + want bool }{ - {name: "same rdi", idx: []int{0}, rdi1: 123, rdi2: 123, want: false}, - {name: "same rsi", idx: []int{1}, rsi1: 123, rsi2: 123, want: false}, - {name: "diff rdi", idx: []int{0}, rdi1: 123, rdi2: 321, want: true}, - {name: "diff rsi", idx: []int{1}, rsi1: 123, rsi2: 321, want: true}, - {name: "cmd is uint32", idx: []int{0}, rsi1: 0xdead00000123, rsi2: 0xbeef00000123, want: false}, - {name: "same 2 args", idx: []int{0, 1}, rsi1: 123, rdi1: 321, rsi2: 123, rdi2: 321, want: false}, - {name: "diff 2 args", idx: []int{0, 1}, rsi1: 123, rdi1: 321, rsi2: 789, rdi2: 987, want: true}, + {name: "same arg1", idx: []int{0}, arg1_1: 123, arg1_2: 123, want: false}, + {name: "same arg2", idx: []int{1}, arg2_1: 123, arg2_2: 123, want: false}, + {name: "diff arg1", idx: []int{0}, arg1_1: 123, arg1_2: 321, want: true}, + {name: "diff arg2", idx: []int{1}, arg2_1: 123, arg2_2: 321, want: true}, + {name: "cmd is uint32", idx: []int{0}, arg2_1: 0xdead00000123, arg2_2: 0xbeef00000123, want: false}, + {name: "same 2 args", idx: []int{0, 1}, arg2_1: 123, arg1_1: 321, arg2_2: 123, arg1_2: 321, want: false}, + {name: "diff 2 args", idx: []int{0, 1}, arg2_1: 123, arg1_1: 321, arg2_2: 789, arg1_2: 987, want: true}, } { t.Run(tc.name, func(t *testing.T) { c := newArgsTracker(tc.idx...) - regs := &rpb.AMD64Registers{Rdi: tc.rdi1, Rsi: tc.rsi1} + regs := newRegs() + setArgVal(0, tc.arg1_1, regs) + setArgVal(1, tc.arg2_1, regs) if !c.shouldReport(regs) { t.Error("first call to shouldReport, got: false, want: true") } c.onReported(regs) - regs.Rdi, regs.Rsi = tc.rdi2, tc.rsi2 + setArgVal(0, tc.arg1_2, regs) + setArgVal(1, tc.arg2_2, regs) if got := c.shouldReport(regs); tc.want != got { t.Errorf("second call to shouldReport, got: %t, want: %t", got, tc.want) } @@ -70,7 +71,9 @@ func TestArgsTracker(t *testing.T) { func TestArgsTrackerLimit(t *testing.T) { c := newArgsTracker(0, 1) for i := 0; i < reportLimit; i++ { - regs := &rpb.AMD64Registers{Rdi: 123, Rsi: uint64(i)} + regs := newRegs() + setArgVal(0, 123, regs) + setArgVal(1, uint64(i), regs) if !c.shouldReport(regs) { t.Error("shouldReport before limit was reached, got: false, want: true") } @@ -78,7 +81,9 @@ func TestArgsTrackerLimit(t *testing.T) { } // Should hit the count limit now. - regs := &rpb.AMD64Registers{Rdi: 123, Rsi: 123456} + regs := newRegs() + setArgVal(0, 123, regs) + setArgVal(1, 123456, regs) if c.shouldReport(regs) { t.Error("shouldReport after limit was reached, got: true, want: false") } diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 72a33534f..715a19112 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -158,6 +158,9 @@ type Config struct { // DebugLog is the path to log debug information to, if not empty. DebugLog string + // PanicLog is the path to log GO's runtime messages, if not empty. + PanicLog string + // DebugLogFormat is the log format for debug. DebugLogFormat string @@ -250,6 +253,15 @@ type Config struct { // multiple tests are run in parallel, since there is no way to pass // parameters to the runtime from docker. TestOnlyTestNameEnv string + + // CPUNumFromQuota sets CPU number count to available CPU quota, using + // least integer value greater than or equal to quota. + // + // E.g. 0.2 CPU quota will result in 1, and 1.9 in 2. + CPUNumFromQuota bool + + // Enables VFS2 (not plumbled through yet). + VFS2 bool } // ToFlags returns a slice of flags that correspond to the given Config. @@ -260,6 +272,7 @@ func (c *Config) ToFlags() []string { "--log=" + c.LogFilename, "--log-format=" + c.LogFormat, "--debug-log=" + c.DebugLog, + "--panic-log=" + c.PanicLog, "--debug-log-format=" + c.DebugLogFormat, "--file-access=" + c.FileAccess.String(), "--overlay=" + strconv.FormatBool(c.Overlay), @@ -282,6 +295,9 @@ func (c *Config) ToFlags() []string { "--software-gso=" + strconv.FormatBool(c.SoftwareGSO), "--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead), } + if c.CPUNumFromQuota { + f = append(f, "--cpu-num-from-quota") + } // Only include these if set since it is never to be used by users. if c.TestOnlyAllowRunAsCurrentUserWithoutChroot { f = append(f, "--TESTONLY-unsafe-nonroot=true") @@ -289,5 +305,10 @@ func (c *Config) ToFlags() []string { if len(c.TestOnlyTestNameEnv) != 0 { f = append(f, "--TESTONLY-test-name-env="+c.TestOnlyTestNameEnv) } + + if c.VFS2 { + f = append(f, "--vfs2=true") + } + return f } diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 928285683..8125d5061 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -32,6 +32,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/boot/pprof" "gvisor.dev/gvisor/runsc/specutils" ) @@ -100,11 +101,14 @@ const ( // Profiling related commands (see pprof.go for more details). const ( - StartCPUProfile = "Profile.StartCPUProfile" - StopCPUProfile = "Profile.StopCPUProfile" - HeapProfile = "Profile.HeapProfile" - StartTrace = "Profile.StartTrace" - StopTrace = "Profile.StopTrace" + StartCPUProfile = "Profile.StartCPUProfile" + StopCPUProfile = "Profile.StopCPUProfile" + HeapProfile = "Profile.HeapProfile" + GoroutineProfile = "Profile.GoroutineProfile" + BlockProfile = "Profile.BlockProfile" + MutexProfile = "Profile.MutexProfile" + StartTrace = "Profile.StartTrace" + StopTrace = "Profile.StopTrace" ) // Logging related commands (see logging.go for more details). @@ -142,7 +146,7 @@ func newController(fd int, l *Loader) (*controller, error) { } srv.Register(manager) - if eps, ok := l.k.NetworkStack().(*netstack.Stack); ok { + if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok { net := &Network{ Stack: eps.Stack, } @@ -152,7 +156,9 @@ func newController(fd int, l *Loader) (*controller, error) { srv.Register(&debug{}) srv.Register(&control.Logging{}) if l.conf.ProfileEnable { - srv.Register(&control.Profile{}) + srv.Register(&control.Profile{ + Kernel: l.k, + }) } return &controller{ @@ -339,7 +345,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { return fmt.Errorf("creating memory file: %v", err) } k.SetMemoryFile(mf) - networkStack := cm.l.k.NetworkStack() + networkStack := cm.l.k.RootNetworkNamespace().Stack() cm.l.k = k // Set up the restore environment. @@ -363,9 +369,9 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { } if cm.l.conf.ProfileEnable { - // initializePProf opens /proc/self/maps, so has to be - // called before installing seccomp filters. - initializePProf() + // pprof.Initialize opens /proc/self/maps, so has to be called before + // installing seccomp filters. + pprof.Initialize() } // Seccomp filters have to be applied before parsing the state file. @@ -380,7 +386,9 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { } // Since we have a new kernel we also must make a new watchdog. - dog := watchdog.New(k, watchdog.DefaultTimeout, cm.l.conf.WatchdogAction) + dogOpts := watchdog.DefaultOpts + dogOpts.TaskTimeoutAction = cm.l.conf.WatchdogAction + dog := watchdog.New(k, dogOpts) // Change the loader fields to reflect the changes made when restoring. cm.l.k = k diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go index e5de1f3d7..7e7a31fbd 100644 --- a/runsc/boot/fds.go +++ b/runsc/boot/fds.go @@ -17,9 +17,10 @@ package boot import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" + vfshost "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" "gvisor.dev/gvisor/pkg/sentry/kernel" ) @@ -31,10 +32,13 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) } + if kernel.VFS2Enabled { + return createFDTableVFS2(ctx, console, stdioFDs) + } + k := kernel.KernelFromContext(ctx) fdTable := k.NewFDTable() defer fdTable.DecRef() - mounter := fs.FileOwnerFromContext(ctx) var ttyFile *fs.File for appFD, hostFD := range stdioFDs { @@ -44,7 +48,7 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F // Import the file as a host TTY file. if ttyFile == nil { var err error - appFile, err = host.ImportFile(ctx, hostFD, mounter, true /* isTTY */) + appFile, err = host.ImportFile(ctx, hostFD, true /* isTTY */) if err != nil { return nil, err } @@ -63,7 +67,7 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F } else { // Import the file as a regular host file. var err error - appFile, err = host.ImportFile(ctx, hostFD, mounter, false /* isTTY */) + appFile, err = host.ImportFile(ctx, hostFD, false /* isTTY */) if err != nil { return nil, err } @@ -79,3 +83,26 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F fdTable.IncRef() return fdTable, nil } + +func createFDTableVFS2(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) { + k := kernel.KernelFromContext(ctx) + fdTable := k.NewFDTable() + defer fdTable.DecRef() + + for appFD, hostFD := range stdioFDs { + // TODO(gvisor.dev/issue/1482): Add TTY support. + appFile, err := vfshost.ImportFD(ctx, k.HostMount(), hostFD, false) + if err != nil { + return nil, err + } + + if err := fdTable.NewFDAtVFS2(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { + appFile.DecRef() + return nil, err + } + appFile.DecRef() + } + + fdTable.IncRef() + return fdTable, nil +} diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index f5509b6b7..ed18f0047 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,12 +6,14 @@ go_library( name = "filter", srcs = [ "config.go", + "config_amd64.go", + "config_arm64.go", + "config_profile.go", "extra_filters.go", "extra_filters_msan.go", "extra_filters_race.go", "filter.go", ], - importpath = "gvisor.dev/gvisor/runsc/boot/filter", visibility = [ "//runsc/boot:__subpackages__", ], diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 5ad108261..1828d116a 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -26,10 +26,6 @@ import ( // allowedSyscalls is the set of syscalls executed by the Sentry to the host OS. var allowedSyscalls = seccomp.SyscallRules{ - syscall.SYS_ARCH_PRCTL: []seccomp.Rule{ - {seccomp.AllowValue(linux.ARCH_GET_FS)}, - {seccomp.AllowValue(linux.ARCH_SET_FS)}, - }, syscall.SYS_CLOCK_GETTIME: {}, syscall.SYS_CLONE: []seccomp.Rule{ { @@ -42,9 +38,15 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.CLONE_THREAD), }, }, - syscall.SYS_CLOSE: {}, - syscall.SYS_DUP: {}, - syscall.SYS_DUP2: {}, + syscall.SYS_CLOSE: {}, + syscall.SYS_DUP: {}, + syscall.SYS_DUP3: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.O_CLOEXEC), + }, + }, syscall.SYS_EPOLL_CREATE1: {}, syscall.SYS_EPOLL_CTL: {}, syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{ @@ -132,11 +134,6 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.AllowValue(syscall.SOL_SOCKET), seccomp.AllowValue(syscall.SO_SNDBUF), }, - { - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.SOL_SOCKET), - seccomp.AllowValue(syscall.SO_REUSEADDR), - }, }, syscall.SYS_GETTID: {}, syscall.SYS_GETTIMEOFDAY: {}, @@ -177,6 +174,18 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_LSEEK: {}, syscall.SYS_MADVISE: {}, syscall.SYS_MINCORE: {}, + // Used by the Go runtime as a temporarily workaround for a Linux + // 5.2-5.4 bug. + // + // See src/runtime/os_linux_x86.go. + // + // TODO(b/148688965): Remove once this is gone from Go. + syscall.SYS_MLOCK: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(4096), + }, + }, syscall.SYS_MMAP: []seccomp.Rule{ { seccomp.AllowAny{}, @@ -220,7 +229,9 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_NANOSLEEP: {}, syscall.SYS_PPOLL: {}, syscall.SYS_PREAD64: {}, + syscall.SYS_PREADV: {}, syscall.SYS_PWRITE64: {}, + syscall.SYS_PWRITEV: {}, syscall.SYS_READ: {}, syscall.SYS_RECVMSG: []seccomp.Rule{ { @@ -273,12 +284,21 @@ var allowedSyscalls = seccomp.SyscallRules{ {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)}, }, syscall.SYS_SIGALTSTACK: {}, + unix.SYS_STATX: {}, syscall.SYS_SYNC_FILE_RANGE: {}, syscall.SYS_TGKILL: []seccomp.Rule{ { seccomp.AllowValue(uint64(os.Getpid())), }, }, + syscall.SYS_UTIMENSAT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(0), /* null pathname */ + seccomp.AllowAny{}, + seccomp.AllowValue(0), /* flags */ + }, + }, syscall.SYS_WRITE: {}, // The only user in rawfile.NonBlockingWrite3 always passes iovcnt with // values 2 or 3. Three iovec-s are passed, when the PACKET_VNET_HDR @@ -315,6 +335,26 @@ func hostInetFilters() seccomp.SyscallRules { syscall.SYS_GETSOCKOPT: []seccomp.Rule{ { seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_IP), + seccomp.AllowValue(syscall.IP_TOS), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_IP), + seccomp.AllowValue(syscall.IP_RECVTOS), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_IPV6), + seccomp.AllowValue(syscall.IPV6_TCLASS), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_IPV6), + seccomp.AllowValue(syscall.IPV6_RECVTCLASS), + }, + { + seccomp.AllowAny{}, seccomp.AllowValue(syscall.SOL_IPV6), seccomp.AllowValue(syscall.IPV6_V6ONLY), }, @@ -416,6 +456,34 @@ func hostInetFilters() seccomp.SyscallRules { seccomp.AllowAny{}, seccomp.AllowValue(4), }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_IP), + seccomp.AllowValue(syscall.IP_TOS), + seccomp.AllowAny{}, + seccomp.AllowValue(4), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_IP), + seccomp.AllowValue(syscall.IP_RECVTOS), + seccomp.AllowAny{}, + seccomp.AllowValue(4), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_IPV6), + seccomp.AllowValue(syscall.IPV6_TCLASS), + seccomp.AllowAny{}, + seccomp.AllowValue(4), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_IPV6), + seccomp.AllowValue(syscall.IPV6_RECVTCLASS), + seccomp.AllowAny{}, + seccomp.AllowValue(4), + }, }, syscall.SYS_SHUTDOWN: []seccomp.Rule{ { @@ -479,16 +547,3 @@ func controlServerFilters(fd int) seccomp.SyscallRules { }, } } - -// profileFilters returns extra syscalls made by runtime/pprof package. -func profileFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - syscall.SYS_OPENAT: []seccomp.Rule{ - { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), - }, - }, - } -} diff --git a/runsc/boot/filter/config_amd64.go b/runsc/boot/filter/config_amd64.go new file mode 100644 index 000000000..5335ff82c --- /dev/null +++ b/runsc/boot/filter/config_amd64.go @@ -0,0 +1,31 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package filter + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/seccomp" +) + +func init() { + allowedSyscalls[syscall.SYS_ARCH_PRCTL] = append(allowedSyscalls[syscall.SYS_ARCH_PRCTL], + seccomp.Rule{seccomp.AllowValue(linux.ARCH_GET_FS)}, + seccomp.Rule{seccomp.AllowValue(linux.ARCH_SET_FS)}, + ) +} diff --git a/runsc/boot/filter/config_arm64.go b/runsc/boot/filter/config_arm64.go new file mode 100644 index 000000000..7fa9bbda3 --- /dev/null +++ b/runsc/boot/filter/config_arm64.go @@ -0,0 +1,21 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package filter + +// Reserve for future customization. +func init() { +} diff --git a/runsc/boot/filter/config_profile.go b/runsc/boot/filter/config_profile.go new file mode 100644 index 000000000..194952a7b --- /dev/null +++ b/runsc/boot/filter/config_profile.go @@ -0,0 +1,34 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/seccomp" +) + +// profileFilters returns extra syscalls made by runtime/pprof package. +func profileFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_OPENAT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), + }, + }, + } +} diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 76036c147..98cce60af 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -16,7 +16,6 @@ package boot import ( "fmt" - "path" "path/filepath" "sort" "strconv" @@ -33,8 +32,8 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/gofer" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" @@ -52,7 +51,7 @@ const ( rootDevice = "9pfs-/" // MountPrefix is the annotation prefix for mount hints. - MountPrefix = "gvisor.dev/spec/mount" + MountPrefix = "dev.gvisor.spec.mount." // Filesystems that runsc supports. bind = "bind" @@ -279,6 +278,9 @@ func subtargets(root string, mnts []specs.Mount) []string { } func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error { + if conf.VFS2 { + return setupContainerVFS2(ctx, conf, mntr, procArgs) + } mns, err := mntr.setupFS(conf, procArgs) if err != nil { return err @@ -465,6 +467,13 @@ func (m *mountHint) checkCompatible(mount specs.Mount) error { return nil } +func (m *mountHint) fileAccessType() FileAccessType { + if m.share == container { + return FileAccessExclusive + } + return FileAccessShared +} + func filterUnsupportedOptions(mount specs.Mount) []string { rv := make([]string, 0, len(mount.Options)) for _, o := range mount.Options { @@ -483,14 +492,15 @@ type podMountHints struct { func newPodMountHints(spec *specs.Spec) (*podMountHints, error) { mnts := make(map[string]*mountHint) for k, v := range spec.Annotations { - // Look for 'gvisor.dev/spec/mount' annotations and parse them. + // Look for 'dev.gvisor.spec.mount' annotations and parse them. if strings.HasPrefix(k, MountPrefix) { - parts := strings.Split(k, "/") - if len(parts) != 5 { + // Remove the prefix and split the rest. + parts := strings.Split(k[len(MountPrefix):], ".") + if len(parts) != 2 { return nil, fmt.Errorf("invalid mount annotation: %s=%s", k, v) } - name := parts[3] - if len(name) == 0 || path.Clean(name) != name { + name := parts[0] + if len(name) == 0 { return nil, fmt.Errorf("invalid mount name: %s", name) } mnt := mnts[name] @@ -498,7 +508,7 @@ func newPodMountHints(spec *specs.Spec) (*podMountHints, error) { mnt = &mountHint{name: name} mnts[name] = mnt } - if err := mnt.setField(parts[4], v); err != nil { + if err := mnt.setField(parts[1], v); err != nil { return nil, err } } @@ -566,8 +576,16 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin // should be mounted (e.g. a volume shared between containers). It must be // called for the root container only. func (c *containerMounter) processHints(conf *Config) error { + if conf.VFS2 { + return nil + } ctx := c.k.SupervisorContext() for _, hint := range c.hints.mounts { + // TODO(b/142076984): Only support tmpfs for now. Bind mounts require a + // common gofer to mount all shared volumes. + if hint.mount.Type != tmpfs { + continue + } log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type) inode, err := c.mountSharedMaster(ctx, conf, hint) if err != nil { @@ -764,20 +782,24 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( case bind: fd := c.fds.remove() fsName = "9p" - // Non-root bind mounts are always shared. - opts = p9MountOptions(fd, FileAccessShared) + opts = p9MountOptions(fd, c.getMountAccessType(m)) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly default: - // TODO(nlacasse): Support all the mount types and make this a fatal error. - // Most applications will "just work" without them, so this is a warning - // for now. log.Warningf("ignoring unknown filesystem type %q", m.Type) } return fsName, opts, useOverlay, nil } +func (c *containerMounter) getMountAccessType(mount specs.Mount) FileAccessType { + if hint := c.hints.findMount(mount); hint != nil { + return hint.fileAccessType() + } + // Non-root bind mounts are always shared if no hints were provided. + return FileAccessShared +} + // mountSubmount mounts volumes inside the container's root. Because mounts may // be readonly, a lower ramfs overlay is added to create the mount point dir. // Another overlay is added with tmpfs on top if Config.Overlay is true. @@ -805,7 +827,20 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(opts, ","), nil) if err != nil { - return fmt.Errorf("creating mount with source %q: %v", m.Source, err) + err := fmt.Errorf("creating mount with source %q: %v", m.Source, err) + // Check to see if this is a common error due to a Linux bug. + // This error is generated here in order to cause it to be + // printed to the user using Docker via 'runsc create' etc. rather + // than simply printed to the logs for the 'runsc boot' command. + // + // We check the error message string rather than type because the + // actual error types (syscall.EIO, syscall.EPIPE) are lost by file system + // implementation (e.g. p9). + // TODO(gvisor.dev/issue/1765): Remove message when bug is resolved. + if strings.Contains(err.Error(), syscall.EIO.Error()) || strings.Contains(err.Error(), syscall.EPIPE.Error()) { + return fmt.Errorf("%v: %s", err, specutils.FaqErrorMsg("memlock", "you may be encountering a Linux kernel bug")) + } + return err } // If there are submounts, we need to overlay the mount on top of a ramfs @@ -837,7 +872,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns return fmt.Errorf("mount %q error: %v", m.Destination, err) } - log.Infof("Mounted %q to %q type %s", m.Source, m.Destination, m.Type) + log.Infof("Mounted %q to %q type: %s, internal-options: %q", m.Source, m.Destination, m.Type, opts) return nil } diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go index 49ab34b33..912037075 100644 --- a/runsc/boot/fs_test.go +++ b/runsc/boot/fs_test.go @@ -15,7 +15,6 @@ package boot import ( - "path" "reflect" "strings" "testing" @@ -26,19 +25,19 @@ import ( func TestPodMountHintsHappy(t *testing.T) { spec := &specs.Spec{ Annotations: map[string]string{ - path.Join(MountPrefix, "mount1", "source"): "foo", - path.Join(MountPrefix, "mount1", "type"): "tmpfs", - path.Join(MountPrefix, "mount1", "share"): "pod", + MountPrefix + "mount1.source": "foo", + MountPrefix + "mount1.type": "tmpfs", + MountPrefix + "mount1.share": "pod", - path.Join(MountPrefix, "mount2", "source"): "bar", - path.Join(MountPrefix, "mount2", "type"): "bind", - path.Join(MountPrefix, "mount2", "share"): "container", - path.Join(MountPrefix, "mount2", "options"): "rw,private", + MountPrefix + "mount2.source": "bar", + MountPrefix + "mount2.type": "bind", + MountPrefix + "mount2.share": "container", + MountPrefix + "mount2.options": "rw,private", }, } podHints, err := newPodMountHints(spec) if err != nil { - t.Errorf("newPodMountHints failed: %v", err) + t.Fatalf("newPodMountHints failed: %v", err) } // Check that fields were set correctly. @@ -86,95 +85,95 @@ func TestPodMountHintsErrors(t *testing.T) { { name: "too short", annotations: map[string]string{ - path.Join(MountPrefix, "mount1"): "foo", + MountPrefix + "mount1": "foo", }, error: "invalid mount annotation", }, { name: "no name", annotations: map[string]string{ - MountPrefix + "//source": "foo", + MountPrefix + ".source": "foo", }, error: "invalid mount name", }, { name: "missing source", annotations: map[string]string{ - path.Join(MountPrefix, "mount1", "type"): "tmpfs", - path.Join(MountPrefix, "mount1", "share"): "pod", + MountPrefix + "mount1.type": "tmpfs", + MountPrefix + "mount1.share": "pod", }, error: "source field", }, { name: "missing type", annotations: map[string]string{ - path.Join(MountPrefix, "mount1", "source"): "foo", - path.Join(MountPrefix, "mount1", "share"): "pod", + MountPrefix + "mount1.source": "foo", + MountPrefix + "mount1.share": "pod", }, error: "type field", }, { name: "missing share", annotations: map[string]string{ - path.Join(MountPrefix, "mount1", "source"): "foo", - path.Join(MountPrefix, "mount1", "type"): "tmpfs", + MountPrefix + "mount1.source": "foo", + MountPrefix + "mount1.type": "tmpfs", }, error: "share field", }, { name: "invalid field name", annotations: map[string]string{ - path.Join(MountPrefix, "mount1", "invalid"): "foo", + MountPrefix + "mount1.invalid": "foo", }, error: "invalid mount annotation", }, { name: "invalid source", annotations: map[string]string{ - path.Join(MountPrefix, "mount1", "source"): "", - path.Join(MountPrefix, "mount1", "type"): "tmpfs", - path.Join(MountPrefix, "mount1", "share"): "pod", + MountPrefix + "mount1.source": "", + MountPrefix + "mount1.type": "tmpfs", + MountPrefix + "mount1.share": "pod", }, error: "source cannot be empty", }, { name: "invalid type", annotations: map[string]string{ - path.Join(MountPrefix, "mount1", "source"): "foo", - path.Join(MountPrefix, "mount1", "type"): "invalid-type", - path.Join(MountPrefix, "mount1", "share"): "pod", + MountPrefix + "mount1.source": "foo", + MountPrefix + "mount1.type": "invalid-type", + MountPrefix + "mount1.share": "pod", }, error: "invalid type", }, { name: "invalid share", annotations: map[string]string{ - path.Join(MountPrefix, "mount1", "source"): "foo", - path.Join(MountPrefix, "mount1", "type"): "tmpfs", - path.Join(MountPrefix, "mount1", "share"): "invalid-share", + MountPrefix + "mount1.source": "foo", + MountPrefix + "mount1.type": "tmpfs", + MountPrefix + "mount1.share": "invalid-share", }, error: "invalid share", }, { name: "invalid options", annotations: map[string]string{ - path.Join(MountPrefix, "mount1", "source"): "foo", - path.Join(MountPrefix, "mount1", "type"): "tmpfs", - path.Join(MountPrefix, "mount1", "share"): "pod", - path.Join(MountPrefix, "mount1", "options"): "invalid-option", + MountPrefix + "mount1.source": "foo", + MountPrefix + "mount1.type": "tmpfs", + MountPrefix + "mount1.share": "pod", + MountPrefix + "mount1.options": "invalid-option", }, error: "unknown mount option", }, { name: "duplicate source", annotations: map[string]string{ - path.Join(MountPrefix, "mount1", "source"): "foo", - path.Join(MountPrefix, "mount1", "type"): "tmpfs", - path.Join(MountPrefix, "mount1", "share"): "pod", + MountPrefix + "mount1.source": "foo", + MountPrefix + "mount1.type": "tmpfs", + MountPrefix + "mount1.share": "pod", - path.Join(MountPrefix, "mount2", "source"): "foo", - path.Join(MountPrefix, "mount2", "type"): "bind", - path.Join(MountPrefix, "mount2", "share"): "container", + MountPrefix + "mount2.source": "foo", + MountPrefix + "mount2.type": "bind", + MountPrefix + "mount2.share": "container", }, error: "have the same mount source", }, @@ -191,3 +190,61 @@ func TestPodMountHintsErrors(t *testing.T) { }) } } + +func TestGetMountAccessType(t *testing.T) { + const source = "foo" + for _, tst := range []struct { + name string + annotations map[string]string + want FileAccessType + }{ + { + name: "container=exclusive", + annotations: map[string]string{ + MountPrefix + "mount1.source": source, + MountPrefix + "mount1.type": "bind", + MountPrefix + "mount1.share": "container", + }, + want: FileAccessExclusive, + }, + { + name: "pod=shared", + annotations: map[string]string{ + MountPrefix + "mount1.source": source, + MountPrefix + "mount1.type": "bind", + MountPrefix + "mount1.share": "pod", + }, + want: FileAccessShared, + }, + { + name: "shared=shared", + annotations: map[string]string{ + MountPrefix + "mount1.source": source, + MountPrefix + "mount1.type": "bind", + MountPrefix + "mount1.share": "shared", + }, + want: FileAccessShared, + }, + { + name: "default=shared", + annotations: map[string]string{ + MountPrefix + "mount1.source": source + "mismatch", + MountPrefix + "mount1.type": "bind", + MountPrefix + "mount1.share": "container", + }, + want: FileAccessShared, + }, + } { + t.Run(tst.name, func(t *testing.T) { + spec := &specs.Spec{Annotations: tst.annotations} + podHints, err := newPodMountHints(spec) + if err != nil { + t.Fatalf("newPodMountHints failed: %v", err) + } + mounter := containerMounter{hints: podHints} + if got := mounter.getMountAccessType(specs.Mount{Source: source}); got != tst.want { + t.Errorf("getMountAccessType(), want: %v, got: %v", tst.want, got) + } + }) + } +} diff --git a/runsc/boot/limits.go b/runsc/boot/limits.go index d1c0bb9b5..ce62236e5 100644 --- a/runsc/boot/limits.go +++ b/runsc/boot/limits.go @@ -16,12 +16,12 @@ package boot import ( "fmt" - "sync" "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sync" ) // Mapping from linux resource names to limits.LimitType. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 0c0eba99e..f6ea4c102 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -20,7 +20,6 @@ import ( mrand "math/rand" "os" "runtime" - "sync" "sync/atomic" "syscall" gtime "time" @@ -36,6 +35,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/fs/user" + vfs2host "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -43,11 +44,14 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/sighandling" - slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" + "gvisor.dev/gvisor/pkg/sentry/syscalls/linux/vfs2" "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" "gvisor.dev/gvisor/pkg/tcpip/network/arp" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" @@ -59,16 +63,20 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/transport/udp" "gvisor.dev/gvisor/runsc/boot/filter" _ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms. + "gvisor.dev/gvisor/runsc/boot/pprof" "gvisor.dev/gvisor/runsc/specutils" // Include supported socket providers. "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink" _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route" + _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/uevent" "gvisor.dev/gvisor/pkg/sentry/socket/netstack" _ "gvisor.dev/gvisor/pkg/sentry/socket/unix" ) +var syscallTable *kernel.SyscallTable + // Loader keeps state needed to start the kernel and run the container.. type Loader struct { // k is the kernel. @@ -93,10 +101,6 @@ type Loader struct { // spec is the base configuration for the root container. spec *specs.Spec - // startSignalForwarding enables forwarding of signals to the sandboxed - // container. It should be called after the init process is loaded. - startSignalForwarding func() func() - // stopSignalForwarding disables forwarding of signals to the sandboxed // container. It should be called when a sandbox is destroyed. stopSignalForwarding func() @@ -146,9 +150,6 @@ type execProcess struct { func init() { // Initialize the random number generator. mrand.Seed(gtime.Now().UnixNano()) - - // Register the global syscall table. - kernel.RegisterSyscallTable(slinux.AMD64) } // Args are the arguments for New(). @@ -159,13 +160,17 @@ type Args struct { Spec *specs.Spec // Conf is the system configuration. Conf *Config - // ControllerFD is the FD to the URPC controller. + // ControllerFD is the FD to the URPC controller. The Loader takes ownership + // of this FD and may close it at any time. ControllerFD int - // Device is an optional argument that is passed to the platform. + // Device is an optional argument that is passed to the platform. The Loader + // takes ownership of this file and may close it at any time. Device *os.File - // GoferFDs is an array of FDs used to connect with the Gofer. + // GoferFDs is an array of FDs used to connect with the Gofer. The Loader + // takes ownership of these FDs and may close them at any time. GoferFDs []int - // StdioFDs is the stdio for the application. + // StdioFDs is the stdio for the application. The Loader takes ownership of + // these FDs and may close them at any time. StdioFDs []int // Console is set to true if using TTY. Console bool @@ -178,6 +183,9 @@ type Args struct { UserLogFD int } +// make sure stdioFDs are always the same on initial start and on restore +const startingStdioFD = 64 + // New initializes a new kernel loader configured by spec. // New also handles setting up a kernel for restoring a container. func New(args Args) (*Loader, error) { @@ -191,6 +199,14 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("setting up memory usage: %v", err) } + // Patch the syscall table. + kernel.VFS2Enabled = args.Conf.VFS2 + if kernel.VFS2Enabled { + vfs2.Override(syscallTable.Table) + } + + kernel.RegisterSyscallTable(syscallTable) + // Create kernel and platform. p, err := createPlatform(args.Conf, args.Device) if err != nil { @@ -228,11 +244,8 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("enabling strace: %v", err) } - // Create an empty network stack because the network namespace may be empty at - // this point. Netns is configured before Run() is called. Netstack is - // configured using a control uRPC message. Host network is configured inside - // Run(). - networkStack, err := newEmptyNetworkStack(args.Conf, k) + // Create root network namespace/stack. + netns, err := newRootNetworkNamespace(args.Conf, k, k) if err != nil { return nil, fmt.Errorf("creating network: %v", err) } @@ -275,7 +288,7 @@ func New(args Args) (*Loader, error) { FeatureSet: cpuid.HostFeatureSet(), Timekeeper: tk, RootUserNamespace: creds.UserNamespace, - NetworkStack: networkStack, + RootNetworkNamespace: netns, ApplicationCores: uint(args.NumCPU), Vdso: vdso, RootUTSNamespace: kernel.NewUTSNamespace(args.Spec.Hostname, args.Spec.Hostname, creds.UserNamespace), @@ -300,7 +313,9 @@ func New(args Args) (*Loader, error) { } // Create a watchdog. - dog := watchdog.New(k, watchdog.DefaultTimeout, args.Conf.WatchdogAction) + dogOpts := watchdog.DefaultOpts + dogOpts.TaskTimeoutAction = args.Conf.WatchdogAction + dog := watchdog.New(k, dogOpts) procArgs, err := newProcess(args.ID, args.Spec, creds, k, k.RootPIDNamespace()) if err != nil { @@ -316,6 +331,35 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("creating pod mount hints: %v", err) } + if kernel.VFS2Enabled { + // Set up host mount that will be used for imported fds. + hostFilesystem := vfs2host.NewFilesystem(k.VFS()) + defer hostFilesystem.DecRef() + hostMount, err := k.VFS().NewDisconnectedMount(hostFilesystem, nil, &vfs.MountOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create hostfs mount: %v", err) + } + k.SetHostMount(hostMount) + } + + // Make host FDs stable between invocations. Host FDs must map to the exact + // same number when the sandbox is restored. Otherwise the wrong FD will be + // used. + var stdioFDs []int + newfd := startingStdioFD + for _, fd := range args.StdioFDs { + err := syscall.Dup3(fd, newfd, syscall.O_CLOEXEC) + if err != nil { + return nil, fmt.Errorf("dup3 of stdioFDs failed: %v", err) + } + stdioFDs = append(stdioFDs, newfd) + err = syscall.Close(fd) + if err != nil { + return nil, fmt.Errorf("close original stdioFDs failed: %v", err) + } + newfd++ + } + eid := execID{cid: args.ID} l := &Loader{ k: k, @@ -324,7 +368,7 @@ func New(args Args) (*Loader, error) { watchdog: dog, spec: args.Spec, goferFDs: args.GoferFDs, - stdioFDs: args.StdioFDs, + stdioFDs: stdioFDs, rootProcArgs: procArgs, sandboxID: args.ID, processes: map[execID]*execProcess{eid: {}}, @@ -337,29 +381,6 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("ignore child stop signals failed: %v", err) } - // Handle signals by forwarding them to the root container process - // (except for panic signal, which should cause a panic). - l.startSignalForwarding = sighandling.PrepareHandler(func(sig linux.Signal) { - // Panic signal should cause a panic. - if args.Conf.PanicSignal != -1 && sig == linux.Signal(args.Conf.PanicSignal) { - panic("Signal-induced panic") - } - - // Otherwise forward to root container. - deliveryMode := DeliverToProcess - if args.Console { - // Since we are running with a console, we should - // forward the signal to the foreground process group - // so that job control signals like ^C can be handled - // properly. - deliveryMode = DeliverToForegroundProcessGroup - } - log.Infof("Received external signal %d, mode: %v", sig, deliveryMode) - if err := l.signal(args.ID, 0, int32(sig), deliveryMode); err != nil { - log.Warningf("error sending signal %v to container %q: %v", sig, args.ID, err) - } - }) - // Create the control server using the provided FD. // // This must be done *after* we have initialized the kernel since the @@ -387,11 +408,16 @@ func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel. return kernel.CreateProcessArgs{}, fmt.Errorf("creating limits: %v", err) } + wd := spec.Process.Cwd + if wd == "" { + wd = "/" + } + // Create the process arguments. procArgs := kernel.CreateProcessArgs{ Argv: spec.Process.Args, Envv: spec.Process.Env, - WorkingDirectory: spec.Process.Cwd, // Defaults to '/' if empty. + WorkingDirectory: wd, Credentials: creds, Umask: 0022, Limits: ls, @@ -485,7 +511,7 @@ func (l *Loader) run() error { // Delay host network configuration to this point because network namespace // is configured after the loader is created and before Run() is called. log.Debugf("Configuring host network") - stack := l.k.NetworkStack().(*hostinet.Stack) + stack := l.k.RootNetworkNamespace().Stack().(*hostinet.Stack) if err := stack.Configure(); err != nil { return err } @@ -504,7 +530,7 @@ func (l *Loader) run() error { // l.restore is set by the container manager when a restore call is made. if !l.restore { if l.conf.ProfileEnable { - initializePProf() + pprof.Initialize() } // Finally done with all configuration. Setup filters before user code @@ -536,7 +562,15 @@ func (l *Loader) run() error { } // Add the HOME enviroment variable if it is not already set. - envv, err := maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv) + var envv []string + if kernel.VFS2Enabled { + envv, err = user.MaybeAddExecUserHomeVFS2(ctx, l.rootProcArgs.MountNamespaceVFS2, + l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv) + + } else { + envv, err = user.MaybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, + l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv) + } if err != nil { return err } @@ -567,8 +601,40 @@ func (l *Loader) run() error { ep.tty.InitForegroundProcessGroup(ep.tg.ProcessGroup()) } - // Start signal forwarding only after an init process is created. - l.stopSignalForwarding = l.startSignalForwarding() + // Handle signals by forwarding them to the root container process + // (except for panic signal, which should cause a panic). + l.stopSignalForwarding = sighandling.StartSignalForwarding(func(sig linux.Signal) { + // Panic signal should cause a panic. + if l.conf.PanicSignal != -1 && sig == linux.Signal(l.conf.PanicSignal) { + panic("Signal-induced panic") + } + + // Otherwise forward to root container. + deliveryMode := DeliverToProcess + if l.console { + // Since we are running with a console, we should forward the signal to + // the foreground process group so that job control signals like ^C can + // be handled properly. + deliveryMode = DeliverToForegroundProcessGroup + } + log.Infof("Received external signal %d, mode: %v", sig, deliveryMode) + if err := l.signal(l.sandboxID, 0, int32(sig), deliveryMode); err != nil { + log.Warningf("error sending signal %v to container %q: %v", sig, l.sandboxID, err) + } + }) + + // l.stdioFDs are derived from dup() in boot.New() and they are now dup()ed again + // either in createFDTable() during initial start or in descriptor.initAfterLoad() + // during restore, we can release l.stdioFDs now. VFS2 takes ownership of the + // passed FDs, so only close for VFS1. + if !kernel.VFS2Enabled { + for _, fd := range l.stdioFDs { + err := syscall.Close(fd) + if err != nil { + return fmt.Errorf("close dup()ed stdioFDs: %v", err) + } + } + } log.Infof("Process should have started...") l.watchdog.Start() @@ -795,20 +861,23 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { return 0, fmt.Errorf("container %q not started", args.ContainerID) } + // TODO(gvisor.dev/issue/1623): Add VFS2 support + // Get the container MountNamespace from the Task. tg.Leader().WithMuLocked(func(t *kernel.Task) { - // task.MountNamespace() does not take a ref, so we must do so - // ourselves. + // task.MountNamespace() does not take a ref, so we must do so ourselves. args.MountNamespace = t.MountNamespace() args.MountNamespace.IncRef() }) - defer args.MountNamespace.DecRef() + if args.MountNamespace != nil { + defer args.MountNamespace.DecRef() + } - // Add the HOME enviroment varible if it is not already set. + // Add the HOME environment variable if it is not already set. root := args.MountNamespace.Root() defer root.DecRef() ctx := fs.WithRoot(l.k.SupervisorContext(), root) - envv, err := maybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv) + envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv) if err != nil { return 0, err } @@ -905,47 +974,92 @@ func (l *Loader) WaitExit() kernel.ExitStatus { return l.k.GlobalInit().ExitStatus() } -func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) { +func newRootNetworkNamespace(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (*inet.Namespace, error) { + // Create an empty network stack because the network namespace may be empty at + // this point. Netns is configured before Run() is called. Netstack is + // configured using a control uRPC message. Host network is configured inside + // Run(). switch conf.Network { case NetworkHost: - return hostinet.NewStack(), nil + // No network namespacing support for hostinet yet, hence creator is nil. + return inet.NewRootNamespace(hostinet.NewStack(), nil), nil case NetworkNone, NetworkSandbox: - // NetworkNone sets up loopback using netstack. - netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()} - transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()} - s := netstack.Stack{stack.New(stack.Options{ - NetworkProtocols: netProtos, - TransportProtocols: transProtos, - Clock: clock, - Stats: netstack.Metrics, - HandleLocal: true, - // Enable raw sockets for users with sufficient - // privileges. - RawFactory: raw.EndpointFactory{}, - })} - - // Enable SACK Recovery. - if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { - return nil, fmt.Errorf("failed to enable SACK: %v", err) + s, err := newEmptySandboxNetworkStack(clock, uniqueID) + if err != nil { + return nil, err + } + creator := &sandboxNetstackCreator{ + clock: clock, + uniqueID: uniqueID, } + return inet.NewRootNamespace(s, creator), nil - // Set default TTLs as required by socket/netstack. - s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) - s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) + default: + panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) + } - // Enable Receive Buffer Auto-Tuning. - if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { - return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) - } +} - s.FillDefaultIPTables() +func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) { + netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()} + transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()} + s := netstack.Stack{stack.New(stack.Options{ + NetworkProtocols: netProtos, + TransportProtocols: transProtos, + Clock: clock, + Stats: netstack.Metrics, + HandleLocal: true, + // Enable raw sockets for users with sufficient + // privileges. + RawFactory: raw.EndpointFactory{}, + UniqueID: uniqueID, + })} - return &s, nil + // Enable SACK Recovery. + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { + return nil, fmt.Errorf("failed to enable SACK: %v", err) + } - default: - panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) + // Set default TTLs as required by socket/netstack. + s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) + s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) + + // Enable Receive Buffer Auto-Tuning. + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { + return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) } + + s.FillDefaultIPTables() + + return &s, nil +} + +// sandboxNetstackCreator implements kernel.NetworkStackCreator. +// +// +stateify savable +type sandboxNetstackCreator struct { + clock tcpip.Clock + uniqueID stack.UniqueID +} + +// CreateStack implements kernel.NetworkStackCreator.CreateStack. +func (f *sandboxNetstackCreator) CreateStack() (inet.Stack, error) { + s, err := newEmptySandboxNetworkStack(f.clock, f.uniqueID) + if err != nil { + return nil, err + } + + // Setup loopback. + n := &Network{Stack: s.(*netstack.Stack).Stack} + nicID := tcpip.NICID(f.uniqueID.UniqueID()) + link := DefaultLoopbackLink + linkEP := loopback.New() + if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil { + return nil, err + } + + return s, nil } // signal sends a signal to one or more processes in a container. If PID is 0, @@ -993,7 +1107,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er execTG, _, err := l.threadGroupFromID(execID{cid: cid, pid: tgid}) if err == nil { // Send signal directly to the identified process. - return execTG.SendSignal(&arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo}) } // The caller may be signaling a process not started directly via exec. @@ -1010,7 +1124,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er if tg.Leader().ContainerID() != cid { return fmt.Errorf("process %d is part of a different container: %q", tgid, tg.Leader().ContainerID()) } - return tg.SendSignal(&arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}) } func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, signo int32) error { @@ -1028,7 +1142,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s // No foreground process group has been set. Signal the // original thread group. log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, tgid, tgid) - return tg.SendSignal(&arch.SignalInfo{Signo: signo}) + return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}) } // Send the signal to all processes in the process group. var lastErr error @@ -1036,7 +1150,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s if tg.ProcessGroup() != pg { continue } - if err := tg.SendSignal(&arch.SignalInfo{Signo: signo}); err != nil { + if err := l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}); err != nil { lastErr = err } } diff --git a/runsc/boot/loader_amd64.go b/runsc/boot/loader_amd64.go new file mode 100644 index 000000000..78df86611 --- /dev/null +++ b/runsc/boot/loader_amd64.go @@ -0,0 +1,26 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package boot + +import ( + "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" +) + +func init() { + // Set the global syscall table. + syscallTable = linux.AMD64 +} diff --git a/runsc/boot/loader_arm64.go b/runsc/boot/loader_arm64.go new file mode 100644 index 000000000..250785010 --- /dev/null +++ b/runsc/boot/loader_arm64.go @@ -0,0 +1,26 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package boot + +import ( + "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" +) + +func init() { + // Set the global syscall table. + syscallTable = linux.ARM64 +} diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 147ff7703..55d27a632 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -19,17 +19,21 @@ import ( "math/rand" "os" "reflect" - "sync" "syscall" "testing" "time" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/runsc/fsgofer" ) @@ -65,6 +69,11 @@ func testSpec() *specs.Spec { } } +func resetSyscallTable() { + kernel.VFS2Enabled = false + kernel.FlushSyscallTablesTestOnly() +} + // startGofer starts a new gofer routine serving 'root' path. It returns the // sandbox side of the connection, and a function that when called will stop the // gofer. @@ -100,20 +109,29 @@ func startGofer(root string) (int, func(), error) { return sandboxEnd, cleanup, nil } -func createLoader() (*Loader, func(), error) { +func createLoader(vfsEnabled bool, spec *specs.Spec) (*Loader, func(), error) { fd, err := server.CreateSocket(ControlSocketAddr(fmt.Sprintf("%010d", rand.Int())[:10])) if err != nil { return nil, nil, err } conf := testConfig() - spec := testSpec() + conf.VFS2 = vfsEnabled sandEnd, cleanup, err := startGofer(spec.Root.Path) if err != nil { return nil, nil, err } - stdio := []int{int(os.Stdin.Fd()), int(os.Stdout.Fd()), int(os.Stderr.Fd())} + // Loader takes ownership of stdio. + var stdio []int + for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} { + newFd, err := unix.Dup(int(f.Fd())) + if err != nil { + return nil, nil, err + } + stdio = append(stdio, newFd) + } + args := Args{ ID: "foo", Spec: spec, @@ -132,10 +150,22 @@ func createLoader() (*Loader, func(), error) { // TestRun runs a simple application in a sandbox and checks that it succeeds. func TestRun(t *testing.T) { - l, cleanup, err := createLoader() + defer resetSyscallTable() + doRun(t, false) +} + +// TestRunVFS2 runs TestRun in VFSv2. +func TestRunVFS2(t *testing.T) { + defer resetSyscallTable() + doRun(t, true) +} + +func doRun(t *testing.T, vfsEnabled bool) { + l, cleanup, err := createLoader(vfsEnabled, testSpec()) if err != nil { t.Fatalf("error creating loader: %v", err) } + defer l.Destroy() defer cleanup() @@ -169,7 +199,18 @@ func TestRun(t *testing.T) { // TestStartSignal tests that the controller Start message will cause // WaitForStartSignal to return. func TestStartSignal(t *testing.T) { - l, cleanup, err := createLoader() + defer resetSyscallTable() + doStartSignal(t, false) +} + +// TestStartSignalVFS2 does TestStartSignal with VFS2. +func TestStartSignalVFS2(t *testing.T) { + defer resetSyscallTable() + doStartSignal(t, true) +} + +func doStartSignal(t *testing.T, vfsEnabled bool) { + l, cleanup, err := createLoader(vfsEnabled, testSpec()) if err != nil { t.Fatalf("error creating loader: %v", err) } @@ -217,18 +258,19 @@ func TestStartSignal(t *testing.T) { } -// Test that MountNamespace can be created with various specs. -func TestCreateMountNamespace(t *testing.T) { - testCases := []struct { - name string - // Spec that will be used to create the mount manager. Note - // that we can't mount procfs without a kernel, so each spec - // MUST contain something other than procfs mounted at /proc. - spec specs.Spec - // Paths that are expected to exist in the resulting fs. - expectedPaths []string - }{ - { +type CreateMountTestcase struct { + name string + // Spec that will be used to create the mount manager. Note + // that we can't mount procfs without a kernel, so each spec + // MUST contain something other than procfs mounted at /proc. + spec specs.Spec + // Paths that are expected to exist in the resulting fs. + expectedPaths []string +} + +func createMountTestcases(vfs2 bool) []*CreateMountTestcase { + testCases := []*CreateMountTestcase{ + &CreateMountTestcase{ // Only proc. name: "only proc mount", spec: specs.Spec{ @@ -270,7 +312,7 @@ func TestCreateMountNamespace(t *testing.T) { // /dev, and /sys. expectedPaths: []string{"/some/very/very/deep/path", "/proc", "/dev", "/sys"}, }, - { + &CreateMountTestcase{ // Mounts are nested inside each other. name: "nested mounts", spec: specs.Spec{ @@ -314,7 +356,7 @@ func TestCreateMountNamespace(t *testing.T) { expectedPaths: []string{"/foo", "/foo/bar", "/foo/bar/baz", "/foo/qux", "/foo/qux-quz", "/foo/some/very/very/deep/path", "/proc", "/dev", "/sys"}, }, - { + &CreateMountTestcase{ name: "mount inside /dev", spec: specs.Spec{ Root: &specs.Root{ @@ -357,40 +399,47 @@ func TestCreateMountNamespace(t *testing.T) { }, expectedPaths: []string{"/proc", "/dev", "/dev/fd-foo", "/dev/foo", "/dev/bar", "/sys"}, }, - { - name: "mounts inside mandatory mounts", - spec: specs.Spec{ - Root: &specs.Root{ - Path: os.TempDir(), - Readonly: true, + } + + vfsCase := &CreateMountTestcase{ + name: "mounts inside mandatory mounts", + spec: specs.Spec{ + Root: &specs.Root{ + Path: os.TempDir(), + Readonly: true, + }, + Mounts: []specs.Mount{ + { + Destination: "/proc", + Type: "tmpfs", }, - Mounts: []specs.Mount{ - { - Destination: "/proc", - Type: "tmpfs", - }, - // We don't include /sys, and /tmp in - // the spec, since they will be added - // automatically. - // - // Instead, add submounts inside these - // directories and make sure they are - // visible under the mandatory mounts. - { - Destination: "/sys/bar", - Type: "tmpfs", - }, - { - Destination: "/tmp/baz", - Type: "tmpfs", - }, + // TODO (gvisor.dev/issue/1487): Re-add this case when sysfs supports + // MkDirAt in VFS2 (and remove the reduntant append). + // { + // Destination: "/sys/bar", + // Type: "tmpfs", + // }, + // + { + Destination: "/tmp/baz", + Type: "tmpfs", }, }, - expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz"}, }, + expectedPaths: []string{"/proc", "/sys" /* "/sys/bar" ,*/, "/tmp", "/tmp/baz"}, } - for _, tc := range testCases { + if !vfs2 { + vfsCase.spec.Mounts = append(vfsCase.spec.Mounts, specs.Mount{Destination: "/sys/bar", Type: "tmpfs"}) + vfsCase.expectedPaths = append(vfsCase.expectedPaths, "/sys/bar") + } + return append(testCases, vfsCase) +} + +// Test that MountNamespace can be created with various specs. +func TestCreateMountNamespace(t *testing.T) { + + for _, tc := range createMountTestcases(false /* vfs2 */) { t.Run(tc.name, func(t *testing.T) { conf := testConfig() ctx := contexttest.Context(t) @@ -425,6 +474,56 @@ func TestCreateMountNamespace(t *testing.T) { } } +// Test that MountNamespace can be created with various specs. +func TestCreateMountNamespaceVFS2(t *testing.T) { + + for _, tc := range createMountTestcases(true /* vfs2 */) { + t.Run(tc.name, func(t *testing.T) { + defer resetSyscallTable() + + spec := testSpec() + spec.Mounts = tc.spec.Mounts + spec.Root = tc.spec.Root + + l, loaderCleanup, err := createLoader(true /* VFS2 Enabled */, spec) + if err != nil { + t.Fatalf("failed to create loader: %v", err) + } + defer l.Destroy() + defer loaderCleanup() + + mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints) + if err := mntr.processHints(l.conf); err != nil { + t.Fatalf("failed process hints: %v", err) + } + + ctx := l.rootProcArgs.NewContext(l.k) + mns, err := mntr.setupVFS2(ctx, l.conf, &l.rootProcArgs) + if err != nil { + t.Fatalf("failed to setupVFS2: %v", err) + } + + root := mns.Root() + defer root.DecRef() + for _, p := range tc.expectedPaths { + + target := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(p), + } + + if d, err := l.k.VFS().GetDentryAt(ctx, l.rootProcArgs.Credentials, target, &vfs.GetDentryOptions{}); err != nil { + t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err) + } else { + d.DecRef() + } + + } + }) + } +} + // TestRestoreEnvironment tests that the correct mounts are collected from the spec and config // in order to build the environment for restoring. func TestRestoreEnvironment(t *testing.T) { diff --git a/runsc/boot/network.go b/runsc/boot/network.go index f98c5fd36..bee6ee336 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -17,6 +17,7 @@ package boot import ( "fmt" "net" + "strings" "syscall" "gvisor.dev/gvisor/pkg/log" @@ -31,6 +32,32 @@ import ( "gvisor.dev/gvisor/pkg/urpc" ) +var ( + // DefaultLoopbackLink contains IP addresses and routes of "127.0.0.1/8" and + // "::1/8" on "lo" interface. + DefaultLoopbackLink = LoopbackLink{ + Name: "lo", + Addresses: []net.IP{ + net.IP("\x7f\x00\x00\x01"), + net.IPv6loopback, + }, + Routes: []Route{ + { + Destination: net.IPNet{ + IP: net.IPv4(0x7f, 0, 0, 0), + Mask: net.IPv4Mask(0xff, 0, 0, 0), + }, + }, + { + Destination: net.IPNet{ + IP: net.IPv6loopback, + Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)), + }, + }, + }, + } +) + // Network exposes methods that can be used to configure a network stack. type Network struct { Stack *stack.Stack @@ -80,7 +107,8 @@ type CreateLinksAndRoutesArgs struct { LoopbackLinks []LoopbackLink FDBasedLinks []FDBasedLink - DefaultGateway DefaultRoute + Defaultv4Gateway DefaultRoute + Defaultv6Gateway DefaultRoute } // Empty returns true if route hasn't been set. @@ -122,10 +150,10 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct nicID++ nicids[link.Name] = nicID - ep := loopback.New() + linkEP := loopback.New() log.Infof("Enabling loopback interface %q with id %d on addresses %+v", link.Name, nicID, link.Addresses) - if err := n.createNICWithAddrs(nicID, link.Name, ep, link.Addresses, true /* loopback */); err != nil { + if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil { return err } @@ -157,7 +185,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct } mac := tcpip.LinkAddress(link.LinkAddress) - ep, err := fdbased.New(&fdbased.Options{ + linkEP, err := fdbased.New(&fdbased.Options{ FDs: FDs, MTU: uint32(link.MTU), EthernetHeader: true, @@ -172,7 +200,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct } log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels) - if err := n.createNICWithAddrs(nicID, link.Name, ep, link.Addresses, false /* loopback */); err != nil { + if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil { return err } @@ -186,12 +214,24 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct } } - if !args.DefaultGateway.Route.Empty() { - nicID, ok := nicids[args.DefaultGateway.Name] + if !args.Defaultv4Gateway.Route.Empty() { + nicID, ok := nicids[args.Defaultv4Gateway.Name] if !ok { - return fmt.Errorf("invalid interface name %q for default route", args.DefaultGateway.Name) + return fmt.Errorf("invalid interface name %q for default route", args.Defaultv4Gateway.Name) } - route, err := args.DefaultGateway.Route.toTcpipRoute(nicID) + route, err := args.Defaultv4Gateway.Route.toTcpipRoute(nicID) + if err != nil { + return err + } + routes = append(routes, route) + } + + if !args.Defaultv6Gateway.Route.Empty() { + nicID, ok := nicids[args.Defaultv6Gateway.Name] + if !ok { + return fmt.Errorf("invalid interface name %q for default route", args.Defaultv6Gateway.Name) + } + route, err := args.Defaultv6Gateway.Route.toTcpipRoute(nicID) if err != nil { return err } @@ -205,15 +245,10 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct // createNICWithAddrs creates a NIC in the network stack and adds the given // addresses. -func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, ep stack.LinkEndpoint, addrs []net.IP, loopback bool) error { - if loopback { - if err := n.Stack.CreateNamedLoopbackNIC(id, name, sniffer.New(ep)); err != nil { - return fmt.Errorf("CreateNamedLoopbackNIC(%v, %v) failed: %v", id, name, err) - } - } else { - if err := n.Stack.CreateNamedNIC(id, name, sniffer.New(ep)); err != nil { - return fmt.Errorf("CreateNamedNIC(%v, %v) failed: %v", id, name, err) - } +func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, ep stack.LinkEndpoint, addrs []net.IP) error { + opts := stack.NICOptions{Name: name} + if err := n.Stack.CreateNICWithOptions(id, sniffer.New(ep), opts); err != nil { + return fmt.Errorf("CreateNICWithOptions(%d, _, %+v) failed: %v", id, opts, err) } // Always start with an arp address for the NIC. diff --git a/runsc/boot/platforms/BUILD b/runsc/boot/platforms/BUILD index 03391cdca..77774f43c 100644 --- a/runsc/boot/platforms/BUILD +++ b/runsc/boot/platforms/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "platforms", srcs = ["platforms.go"], - importpath = "gvisor.dev/gvisor/runsc/boot/platforms", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/boot/pprof/BUILD b/runsc/boot/pprof/BUILD new file mode 100644 index 000000000..29cb42b2f --- /dev/null +++ b/runsc/boot/pprof/BUILD @@ -0,0 +1,11 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "pprof", + srcs = ["pprof.go"], + visibility = [ + "//runsc:__subpackages__", + ], +) diff --git a/runsc/boot/pprof.go b/runsc/boot/pprof/pprof.go index 463362f02..1ded20dee 100644 --- a/runsc/boot/pprof.go +++ b/runsc/boot/pprof/pprof.go @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -package boot +// Package pprof provides a stub to initialize custom profilers. +package pprof -func initializePProf() { +// Initialize will be called at boot for initializing custom profilers. +func Initialize() { } diff --git a/runsc/boot/user.go b/runsc/boot/user.go deleted file mode 100644 index 56cc12ee0..000000000 --- a/runsc/boot/user.go +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package boot - -import ( - "bufio" - "fmt" - "io" - "strconv" - "strings" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -type fileReader struct { - // Ctx is the context for the file reader. - Ctx context.Context - - // File is the file to read from. - File *fs.File -} - -// Read implements io.Reader.Read. -func (r *fileReader) Read(buf []byte) (int, error) { - n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf)) - return int(n), err -} - -// getExecUserHome returns the home directory of the executing user read from -// /etc/passwd as read from the container filesystem. -func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.KUID) (string, error) { - // The default user home directory to return if no user matching the user - // if found in the /etc/passwd found in the image. - const defaultHome = "/" - - // Open the /etc/passwd file from the dirent via the root mount namespace. - mnsRoot := rootMns.Root() - maxTraversals := uint(linux.MaxSymlinkTraversals) - dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals) - if err != nil { - // NOTE: Ignore errors opening the passwd file. If the passwd file - // doesn't exist we will return the default home directory. - return defaultHome, nil - } - defer dirent.DecRef() - - // Check read permissions on the file. - if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil { - // NOTE: Ignore permissions errors here and return default root dir. - return defaultHome, nil - } - - // Only open regular files. We don't open other files like named pipes as - // they may block and might present some attack surface to the container. - // Note that runc does not seem to do this kind of checking. - if !fs.IsRegular(dirent.Inode.StableAttr) { - return defaultHome, nil - } - - f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false}) - if err != nil { - return "", err - } - defer f.DecRef() - - r := &fileReader{ - Ctx: ctx, - File: f, - } - - homeDir, err := findHomeInPasswd(uint32(uid), r, defaultHome) - if err != nil { - return "", err - } - - return homeDir, nil -} - -// maybeAddExecUserHome returns a new slice with the HOME enviroment variable -// set if the slice does not already contain it, otherwise it returns the -// original slice unmodified. -func maybeAddExecUserHome(ctx context.Context, mns *fs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) { - // Check if the envv already contains HOME. - for _, env := range envv { - if strings.HasPrefix(env, "HOME=") { - // We have it. Return the original slice unmodified. - return envv, nil - } - } - - // Read /etc/passwd for the user's HOME directory and set the HOME - // environment variable as required by POSIX if it is not overridden by - // the user. - homeDir, err := getExecUserHome(ctx, mns, uid) - if err != nil { - return nil, fmt.Errorf("error reading exec user: %v", err) - } - return append(envv, "HOME="+homeDir), nil -} - -// findHomeInPasswd parses a passwd file and returns the given user's home -// directory. This function does it's best to replicate the runc's behavior. -func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) { - s := bufio.NewScanner(passwd) - - for s.Scan() { - if err := s.Err(); err != nil { - return "", err - } - - line := strings.TrimSpace(s.Text()) - if line == "" { - continue - } - - // Pull out part of passwd entry. Loosely parse the passwd entry as some - // passwd files could be poorly written and for compatibility with runc. - // - // Per 'man 5 passwd' - // /etc/passwd contains one line for each user account, with seven - // fields delimited by colons (“:”). These fields are: - // - // - login name - // - optional encrypted password - // - numerical user ID - // - numerical group ID - // - user name or comment field - // - user home directory - // - optional user command interpreter - parts := strings.Split(line, ":") - - found := false - homeDir := "" - for i, p := range parts { - switch i { - case 2: - parsedUID, err := strconv.ParseUint(p, 10, 32) - if err == nil && parsedUID == uint64(uid) { - found = true - } - case 5: - homeDir = p - } - } - if found { - // NOTE: If the uid is present but the home directory is not - // present in the /etc/passwd entry we return an empty string. This - // is, for better or worse, what runc does. - return homeDir, nil - } - } - - return defaultHome, nil -} diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go deleted file mode 100644 index 9aee2ad07..000000000 --- a/runsc/boot/user_test.go +++ /dev/null @@ -1,254 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package boot - -import ( - "io/ioutil" - "os" - "path/filepath" - "strings" - "syscall" - "testing" - - specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" -) - -func setupTempDir() (string, error) { - tmpDir, err := ioutil.TempDir(os.TempDir(), "exec-user-test") - if err != nil { - return "", err - } - return tmpDir, nil -} - -func setupPasswd(contents string, perms os.FileMode) func() (string, error) { - return func() (string, error) { - tmpDir, err := setupTempDir() - if err != nil { - return "", err - } - - if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { - return "", err - } - - f, err := os.Create(filepath.Join(tmpDir, "etc", "passwd")) - if err != nil { - return "", err - } - defer f.Close() - - _, err = f.WriteString(contents) - if err != nil { - return "", err - } - - err = f.Chmod(perms) - if err != nil { - return "", err - } - return tmpDir, nil - } -} - -// TestGetExecUserHome tests the getExecUserHome function. -func TestGetExecUserHome(t *testing.T) { - tests := map[string]struct { - uid auth.KUID - createRoot func() (string, error) - expected string - }{ - "success": { - uid: 1000, - createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0666), - expected: "/home/adin", - }, - "no_passwd": { - uid: 1000, - createRoot: setupTempDir, - expected: "/", - }, - "no_perms": { - uid: 1000, - createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0000), - expected: "/", - }, - "directory": { - uid: 1000, - createRoot: func() (string, error) { - tmpDir, err := setupTempDir() - if err != nil { - return "", err - } - - if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { - return "", err - } - - if err := syscall.Mkdir(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { - return "", err - } - - return tmpDir, nil - }, - expected: "/", - }, - // Currently we don't allow named pipes. - "named_pipe": { - uid: 1000, - createRoot: func() (string, error) { - tmpDir, err := setupTempDir() - if err != nil { - return "", err - } - - if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { - return "", err - } - - if err := syscall.Mkfifo(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { - return "", err - } - - return tmpDir, nil - }, - expected: "/", - }, - } - - for name, tc := range tests { - t.Run(name, func(t *testing.T) { - tmpDir, err := tc.createRoot() - if err != nil { - t.Fatalf("failed to create root dir: %v", err) - } - - sandEnd, cleanup, err := startGofer(tmpDir) - if err != nil { - t.Fatalf("failed to create gofer: %v", err) - } - defer cleanup() - - ctx := contexttest.Context(t) - conf := &Config{ - RootDir: "unused_root_dir", - Network: NetworkNone, - DisableSeccomp: true, - } - - spec := &specs.Spec{ - Root: &specs.Root{ - Path: tmpDir, - Readonly: true, - }, - // Add /proc mount as tmpfs to avoid needing a kernel. - Mounts: []specs.Mount{ - { - Destination: "/proc", - Type: "tmpfs", - }, - }, - } - - mntr := newContainerMounter(spec, []int{sandEnd}, nil, &podMountHints{}) - mns, err := mntr.createMountNamespace(ctx, conf) - if err != nil { - t.Fatalf("failed to create mount namespace: %v", err) - } - ctx = fs.WithRoot(ctx, mns.Root()) - if err := mntr.mountSubmounts(ctx, conf, mns); err != nil { - t.Fatalf("failed to create mount namespace: %v", err) - } - - got, err := getExecUserHome(ctx, mns, tc.uid) - if err != nil { - t.Fatalf("failed to get user home: %v", err) - } - - if got != tc.expected { - t.Fatalf("expected %v, got: %v", tc.expected, got) - } - }) - } -} - -// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing. -func TestFindHomeInPasswd(t *testing.T) { - tests := map[string]struct { - uid uint32 - passwd string - expected string - def string - }{ - "empty": { - uid: 1000, - passwd: "", - expected: "/", - def: "/", - }, - "whitespace": { - uid: 1000, - passwd: " ", - expected: "/", - def: "/", - }, - "full": { - uid: 1000, - passwd: "adin::1000:1111::/home/adin:/bin/sh", - expected: "/home/adin", - def: "/", - }, - // For better or worse, this is how runc works. - "partial": { - uid: 1000, - passwd: "adin::1000:1111:", - expected: "", - def: "/", - }, - "multiple": { - uid: 1001, - passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh", - expected: "/home/ian", - def: "/", - }, - "duplicate": { - uid: 1000, - passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh", - expected: "/home/adin", - def: "/", - }, - "empty_lines": { - uid: 1001, - passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh", - expected: "/home/ian", - def: "/", - }, - } - - for name, tc := range tests { - t.Run(name, func(t *testing.T) { - got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def) - if err != nil { - t.Fatalf("error parsing passwd: %v", err) - } - if tc.expected != got { - t.Fatalf("expected %v, got: %v", tc.expected, got) - } - }) - } -} diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go new file mode 100644 index 000000000..0b9b0b436 --- /dev/null +++ b/runsc/boot/vfs.go @@ -0,0 +1,366 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "fmt" + "path" + "sort" + "strconv" + "strings" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/devices/memdev" + "gvisor.dev/gvisor/pkg/sentry/fs" + devtmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + goferimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" + procimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" + sysimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" + tmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" + "gvisor.dev/gvisor/pkg/syserror" + + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error { + + vfsObj.MustRegisterFilesystemType(rootFsName, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserList: true, + }) + + vfsObj.MustRegisterFilesystemType(bind, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserList: true, + }) + + vfsObj.MustRegisterFilesystemType(devpts, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) + + vfsObj.MustRegisterFilesystemType(devtmpfs, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) + vfsObj.MustRegisterFilesystemType(proc, &procimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) + vfsObj.MustRegisterFilesystemType(sysfs, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) + vfsObj.MustRegisterFilesystemType(tmpfs, &tmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) + vfsObj.MustRegisterFilesystemType(nonefs, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) + + // Setup files in devtmpfs. + if err := memdev.Register(vfsObj); err != nil { + return fmt.Errorf("registering memdev: %w", err) + } + a, err := devtmpfsimpl.NewAccessor(ctx, vfsObj, creds, devtmpfsimpl.Name) + if err != nil { + return fmt.Errorf("creating devtmpfs accessor: %w", err) + } + defer a.Release() + + if err := a.UserspaceInit(ctx); err != nil { + return fmt.Errorf("initializing userspace: %w", err) + } + if err := memdev.CreateDevtmpfsFiles(ctx, a); err != nil { + return fmt.Errorf("creating devtmpfs files: %w", err) + } + return nil +} + +func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error { + if err := mntr.k.VFS().Init(); err != nil { + return fmt.Errorf("failed to initialize VFS: %w", err) + } + mns, err := mntr.setupVFS2(ctx, conf, procArgs) + if err != nil { + return fmt.Errorf("failed to setupFS: %w", err) + } + procArgs.MountNamespaceVFS2 = mns + return setExecutablePathVFS2(ctx, procArgs) +} + +func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { + + exe := procArgs.Argv[0] + + // Absolute paths can be used directly. + if path.IsAbs(exe) { + procArgs.Filename = exe + return nil + } + + // Paths with '/' in them should be joined to the working directory, or + // to the root if working directory is not set. + if strings.IndexByte(exe, '/') > 0 { + + if !path.IsAbs(procArgs.WorkingDirectory) { + return fmt.Errorf("working directory %q must be absolute", procArgs.WorkingDirectory) + } + + procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe) + return nil + } + + // Paths with a '/' are relative to the CWD. + if strings.IndexByte(exe, '/') > 0 { + procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe) + return nil + } + + // Otherwise, We must lookup the name in the paths, starting from the + // root directory. + root := procArgs.MountNamespaceVFS2.Root() + defer root.DecRef() + + paths := fs.GetPath(procArgs.Envv) + creds := procArgs.Credentials + + for _, p := range paths { + + binPath := path.Join(p, exe) + + pop := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(binPath), + FollowFinalSymlink: true, + } + + opts := &vfs.OpenOptions{ + FileExec: true, + Flags: linux.O_RDONLY, + } + + dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts) + if err == syserror.ENOENT || err == syserror.EACCES { + // Didn't find it here. + continue + } + if err != nil { + return err + } + dentry.DecRef() + + procArgs.Filename = binPath + return nil + } + + return fmt.Errorf("executable %q not found in $PATH=%q", exe, strings.Join(paths, ":")) +} + +func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) { + log.Infof("Configuring container's file system with VFS2") + + // Create context with root credentials to mount the filesystem (the current + // user may not be privileged enough). + rootProcArgs := *procArgs + rootProcArgs.WorkingDirectory = "/" + rootProcArgs.Credentials = auth.NewRootCredentials(procArgs.Credentials.UserNamespace) + rootProcArgs.Umask = 0022 + rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals + rootCtx := procArgs.NewContext(c.k) + + creds := procArgs.Credentials + if err := registerFilesystems(rootCtx, c.k.VFS(), creds); err != nil { + return nil, fmt.Errorf("register filesystems: %w", err) + } + + mns, err := c.createMountNamespaceVFS2(ctx, conf, creds) + if err != nil { + return nil, fmt.Errorf("creating mount namespace: %w", err) + } + + rootProcArgs.MountNamespaceVFS2 = mns + + // Mount submounts. + if err := c.mountSubmountsVFS2(rootCtx, conf, mns, creds); err != nil { + return nil, fmt.Errorf("mounting submounts vfs2: %w", err) + } + + return mns, nil +} + +func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { + + fd := c.fds.remove() + opts := strings.Join(p9MountOptionsVFS2(fd, conf.FileAccess), ",") + + log.Infof("Mounting root over 9P, ioFD: %d", fd) + mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", rootFsName, &vfs.GetFilesystemOptions{Data: opts}) + if err != nil { + return nil, fmt.Errorf("setting up mount namespace: %w", err) + } + return mns, nil +} + +func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error { + + c.prepareMountsVFS2() + + for _, submount := range c.mounts { + log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options) + if err := c.mountSubmountVFS2(ctx, conf, mns, creds, &submount); err != nil { + return err + } + } + + // TODO(gvisor.dev/issue/1487): implement mountTmp from fs.go. + + return c.checkDispenser() +} + +func (c *containerMounter) prepareMountsVFS2() { + // Sort the mounts so that we don't place children before parents. + sort.Slice(c.mounts, func(i, j int) bool { return len(c.mounts[i].Destination) < len(c.mounts[j].Destination) }) +} + +// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1 version. +func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *specs.Mount) error { + root := mns.Root() + defer root.DecRef() + target := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(submount.Destination), + } + + fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, *submount) + if err != nil { + return fmt.Errorf("mountOptions failed: %w", err) + } + + if fsName == "" { + // Filesystem is not supported (e.g. cgroup), just skip it. + return nil + } + + if err := c.makeSyntheticMount(ctx, submount.Destination, root, creds); err != nil { + return err + } + log.Debugf("directory exists or made directory for submount: %s", submount.Destination) + + opts := &vfs.MountOptions{ + GetFilesystemOptions: vfs.GetFilesystemOptions{ + Data: strings.Join(options, ","), + }, + InternalMount: true, + } + + // All writes go to upper, be paranoid and make lower readonly. + opts.ReadOnly = useOverlay + + if err := c.k.VFS().MountAt(ctx, creds, "", target, submount.Type, opts); err != nil { + return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts) + } + log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts) + return nil +} + +// getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values +// used for mounts. +func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Mount) (string, []string, bool, error) { + var ( + fsName string + opts []string + useOverlay bool + ) + + switch m.Type { + case devpts, devtmpfs, proc, sysfs: + fsName = m.Type + case nonefs: + fsName = sysfs + case tmpfs: + fsName = m.Type + + var err error + opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...) + if err != nil { + return "", nil, false, err + } + + case bind: + fd := c.fds.remove() + fsName = "9p" + opts = p9MountOptionsVFS2(fd, c.getMountAccessType(m)) + // If configured, add overlay to all writable mounts. + useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly + + default: + log.Warningf("ignoring unknown filesystem type %q", m.Type) + } + return fsName, opts, useOverlay, nil +} + +// p9MountOptions creates a slice of options for a p9 mount. +// TODO(gvisor.dev/issue/1200): Remove this version in favor of the one in +// fs.go when privateunixsocket lands. +func p9MountOptionsVFS2(fd int, fa FileAccessType) []string { + opts := []string{ + "trans=fd", + "rfdno=" + strconv.Itoa(fd), + "wfdno=" + strconv.Itoa(fd), + } + if fa == FileAccessShared { + opts = append(opts, "cache=remote_revalidating") + } + return opts +} + +func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error { + + target := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(currentPath), + } + + _, err := c.k.VFS().StatAt(ctx, creds, target, &vfs.StatOptions{}) + switch { + + case err == syserror.ENOENT: + if err := c.makeSyntheticMount(ctx, path.Dir(currentPath), root, creds); err != nil { + return err + } + + mkdirOpts := &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true} + if err := c.k.VFS().MkdirAt(ctx, creds, target, mkdirOpts); err != nil { + return fmt.Errorf("failed to makedir for mount %+v: %w", target, err) + } + return nil + + case err != nil: + return fmt.Errorf("stat failed for mount %+v: %w", target, err) + + default: + return nil + } +} diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD index d6165f9e5..d4c7bdfbb 100644 --- a/runsc/cgroup/BUILD +++ b/runsc/cgroup/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "cgroup", srcs = ["cgroup.go"], - importpath = "gvisor.dev/gvisor/runsc/cgroup", visibility = ["//:sandbox"], deps = [ "//pkg/log", @@ -19,6 +18,6 @@ go_test( name = "cgroup_test", size = "small", srcs = ["cgroup_test.go"], - embed = [":cgroup"], + library = ":cgroup", tags = ["local"], ) diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index ab3a25b9b..fa40ee509 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -45,13 +45,13 @@ var controllers = map[string]controller{ "memory": &memory{}, "net_cls": &networkClass{}, "net_prio": &networkPrio{}, + "pids": &pids{}, // These controllers either don't have anything in the OCI spec or is - // irrevalant for a sandbox, e.g. pids. + // irrelevant for a sandbox. "devices": &noop{}, "freezer": &noop{}, "perf_event": &noop{}, - "pids": &noop{}, "systemd": &noop{}, } @@ -101,6 +101,14 @@ func getValue(path, name string) (string, error) { return string(out), nil } +func getInt(path, name string) (int, error) { + s, err := getValue(path, name) + if err != nil { + return 0, err + } + return strconv.Atoi(strings.TrimSpace(s)) +} + // fillFromAncestor sets the value of a cgroup file from the first ancestor // that has content. It does nothing if the file in 'path' has already been set. func fillFromAncestor(path string) (string, error) { @@ -323,6 +331,22 @@ func (c *Cgroup) Join() (func(), error) { return undo, nil } +func (c *Cgroup) CPUQuota() (float64, error) { + path := c.makePath("cpu") + quota, err := getInt(path, "cpu.cfs_quota_us") + if err != nil { + return -1, err + } + period, err := getInt(path, "cpu.cfs_period_us") + if err != nil { + return -1, err + } + if quota <= 0 || period <= 0 { + return -1, err + } + return float64(quota) / float64(period), nil +} + // NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'. func (c *Cgroup) NumCPU() (int, error) { path := c.makePath("cpuset") @@ -501,3 +525,13 @@ func (*networkPrio) set(spec *specs.LinuxResources, path string) error { } return nil } + +type pids struct{} + +func (*pids) set(spec *specs.LinuxResources, path string) error { + if spec.Pids == nil { + return nil + } + val := strconv.FormatInt(spec.Pids.Limit, 10) + return setValue(path, "pids.max", val) +} diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 250845ad7..af3538ef0 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -31,10 +31,10 @@ go_library( "spec.go", "start.go", "state.go", + "statefile.go", "syscalls.go", "wait.go", ], - importpath = "gvisor.dev/gvisor/runsc/cmd", visibility = [ "//runsc:__subpackages__", ], @@ -44,12 +44,16 @@ go_library( "//pkg/sentry/control", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sentry/platform", + "//pkg/state", + "//pkg/state/statefile", + "//pkg/sync", "//pkg/unet", "//pkg/urpc", "//runsc/boot", - "//runsc/boot/platforms", "//runsc/console", "//runsc/container", + "//runsc/flag", "//runsc/fsgofer", "//runsc/fsgofer/filter", "//runsc/specutils", @@ -72,17 +76,17 @@ go_test( data = [ "//runsc", ], - embed = [":cmd"], + library = ":cmd", deps = [ "//pkg/abi/linux", "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/kernel/auth", + "//pkg/test/testutil", "//pkg/urpc", "//runsc/boot", "//runsc/container", "//runsc/specutils", - "//runsc/testutil", "@com_github_google_go-cmp//cmp:go_default_library", "@com_github_google_go-cmp//cmp/cmpopts:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index b40fded5b..4c2ac6ff0 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -21,12 +21,13 @@ import ( "strings" "syscall" - "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/runsc/boot" - "gvisor.dev/gvisor/runsc/boot/platforms" + "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/specutils" ) @@ -82,8 +83,13 @@ type Boot struct { // sandbox (e.g. gofer) and sent through this FD. mountsFD int - // pidns is set if the sanadbox is in its own pid namespace. + // pidns is set if the sandbox is in its own pid namespace. pidns bool + + // attached is set to true to kill the sandbox process when the parent process + // terminates. This flag is set when the command execve's itself because + // parent death signal doesn't propagate through execve when uid/gid changes. + attached bool } // Name implements subcommands.Command.Name. @@ -118,6 +124,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) { f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup") f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).") + f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates") } // Execute implements subcommands.Command.Execute. It starts a sandbox in a @@ -133,29 +140,32 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) conf := args[0].(*boot.Config) + if b.attached { + // Ensure this process is killed after parent process terminates when + // attached mode is enabled. In the unfortunate event that the parent + // terminates before this point, this process leaks. + if err := unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0); err != nil { + Fatalf("error setting parent death signal: %v", err) + } + } + if b.setUpRoot { if err := setUpChroot(b.pidns); err != nil { Fatalf("error setting up chroot: %v", err) } - if !b.applyCaps { - // Remove --setup-root arg to call myself. - var args []string - for _, arg := range os.Args { - if !strings.Contains(arg, "setup-root") { - args = append(args, arg) - } - } - if !conf.Rootless { - // Note that we've already read the spec from the spec FD, and - // we will read it again after the exec call. This works - // because the ReadSpecFromFile function seeks to the beginning - // of the file before reading. - if err := callSelfAsNobody(args); err != nil { - Fatalf("%v", err) - } - panic("callSelfAsNobody must never return success") + if !b.applyCaps && !conf.Rootless { + // Remove --apply-caps arg to call myself. It has already been done. + args := prepareArgs(b.attached, "setup-root") + + // Note that we've already read the spec from the spec FD, and + // we will read it again after the exec call. This works + // because the ReadSpecFromFile function seeks to the beginning + // of the file before reading. + if err := callSelfAsNobody(args); err != nil { + Fatalf("%v", err) } + panic("callSelfAsNobody must never return success") } } @@ -173,7 +183,12 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) if caps == nil { caps = &specs.LinuxCapabilities{} } - if conf.Platform == platforms.Ptrace { + + gPlatform, err := platform.Lookup(conf.Platform) + if err != nil { + Fatalf("loading platform: %v", err) + } + if gPlatform.Requirements().RequiresCapSysPtrace { // Ptrace platform requires extra capabilities. const c = "CAP_SYS_PTRACE" caps.Bounding = append(caps.Bounding, c) @@ -181,13 +196,9 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) caps.Permitted = append(caps.Permitted, c) } - // Remove --apply-caps arg to call myself. - var args []string - for _, arg := range os.Args { - if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") { - args = append(args, arg) - } - } + // Remove --apply-caps and --setup-root arg to call myself. Both have + // already been done. + args := prepareArgs(b.attached, "setup-root", "apply-caps") // Note that we've already read the spec from the spec FD, and // we will read it again after the exec call. This works @@ -258,3 +269,22 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) l.Destroy() return subcommands.ExitSuccess } + +func prepareArgs(attached bool, exclude ...string) []string { + var args []string + for _, arg := range os.Args { + for _, excl := range exclude { + if strings.Contains(arg, excl) { + goto skip + } + } + args = append(args, arg) + if attached && arg == "boot" { + // Strategicaly place "--attached" after the command. This is needed + // to ensure the new process is killed when the parent process terminates. + args = append(args, "--attached") + } + skip: + } + return args +} diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go index 0c27f7313..a84067112 100644 --- a/runsc/cmd/capability_test.go +++ b/runsc/cmd/capability_test.go @@ -23,10 +23,10 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" ) func init() { @@ -85,21 +85,20 @@ func TestCapabilities(t *testing.T) { Inheritable: caps, } - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) // Use --network=host to make sandbox use spec's capabilities. conf.Network = boot.NetworkHost - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := container.Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go index d8b3a8573..8a29e521e 100644 --- a/runsc/cmd/checkpoint.go +++ b/runsc/cmd/checkpoint.go @@ -20,11 +20,11 @@ import ( "path/filepath" "syscall" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/specutils" ) diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go index b5a0ce17d..189244765 100644 --- a/runsc/cmd/chroot.go +++ b/runsc/cmd/chroot.go @@ -50,7 +50,7 @@ func pivotRoot(root string) error { // new_root, so after umounting the old_root, we will see only // the new_root in "/". if err := syscall.PivotRoot(".", "."); err != nil { - return fmt.Errorf("error changing root filesystem: %v", err) + return fmt.Errorf("pivot_root failed, make sure that the root mount has a parent: %v", err) } if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil { diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go index a4e3071b3..910e97577 100644 --- a/runsc/cmd/create.go +++ b/runsc/cmd/create.go @@ -16,10 +16,11 @@ package cmd import ( "context" - "flag" + "github.com/google/subcommands" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/specutils" ) diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go index 7313e473f..b5de2588b 100644 --- a/runsc/cmd/debug.go +++ b/runsc/cmd/debug.go @@ -22,26 +22,30 @@ import ( "syscall" "time" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" ) // Debug implements subcommands.Command for the "debug" command. type Debug struct { - pid int - stacks bool - signal int - profileHeap string - profileCPU string - profileDelay int - trace string - strace string - logLevel string - logPackets string + pid int + stacks bool + signal int + profileHeap string + profileCPU string + profileGoroutine string + profileBlock string + profileMutex string + trace string + strace string + logLevel string + logPackets string + duration time.Duration + ps bool } // Name implements subcommands.Command. @@ -65,12 +69,16 @@ func (d *Debug) SetFlags(f *flag.FlagSet) { f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log") f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.") f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.") - f.IntVar(&d.profileDelay, "profile-delay", 5, "amount of time to wait before stoping CPU profile") + f.StringVar(&d.profileGoroutine, "profile-goroutine", "", "writes goroutine profile to the given file.") + f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.") + f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.") + f.DurationVar(&d.duration, "duration", time.Second, "amount of time to wait for CPU and trace profiles") f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.") f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox") f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all`) f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).") f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.") + f.BoolVar(&d.ps, "ps", false, "lists processes") } // Execute implements subcommands.Command.Execute. @@ -145,6 +153,42 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } log.Infof("Heap profile written to %q", d.profileHeap) } + if d.profileGoroutine != "" { + f, err := os.Create(d.profileGoroutine) + if err != nil { + return Errorf(err.Error()) + } + defer f.Close() + + if err := c.Sandbox.GoroutineProfile(f); err != nil { + return Errorf(err.Error()) + } + log.Infof("Goroutine profile written to %q", d.profileGoroutine) + } + if d.profileBlock != "" { + f, err := os.Create(d.profileBlock) + if err != nil { + return Errorf(err.Error()) + } + defer f.Close() + + if err := c.Sandbox.BlockProfile(f); err != nil { + return Errorf(err.Error()) + } + log.Infof("Block profile written to %q", d.profileBlock) + } + if d.profileMutex != "" { + f, err := os.Create(d.profileMutex) + if err != nil { + return Errorf(err.Error()) + } + defer f.Close() + + if err := c.Sandbox.MutexProfile(f); err != nil { + return Errorf(err.Error()) + } + log.Infof("Mutex profile written to %q", d.profileMutex) + } delay := false if d.profileCPU != "" { @@ -163,7 +207,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) if err := c.Sandbox.StartCPUProfile(f); err != nil { return Errorf(err.Error()) } - log.Infof("CPU profile started for %d sec, writing to %q", d.profileDelay, d.profileCPU) + log.Infof("CPU profile started for %v, writing to %q", d.duration, d.profileCPU) } if d.trace != "" { delay = true @@ -181,8 +225,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) if err := c.Sandbox.StartTrace(f); err != nil { return Errorf(err.Error()) } - log.Infof("Tracing started for %d sec, writing to %q", d.profileDelay, d.trace) - + log.Infof("Tracing started for %v, writing to %q", d.duration, d.trace) } if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 { @@ -241,9 +284,20 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } log.Infof("Logging options changed") } + if d.ps { + pList, err := c.Processes() + if err != nil { + Fatalf("getting processes for container: %v", err) + } + o, err := control.ProcessListToJSON(pList) + if err != nil { + Fatalf("generating JSON: %v", err) + } + log.Infof(o) + } if delay { - time.Sleep(time.Duration(d.profileDelay) * time.Second) + time.Sleep(d.duration) } return subcommands.ExitSuccess diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go index 30d8164b1..0e4863f50 100644 --- a/runsc/cmd/delete.go +++ b/runsc/cmd/delete.go @@ -19,11 +19,11 @@ import ( "fmt" "os" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" ) // Delete implements subcommands.Command for the "delete" command. diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go index 9a8a49054..b184bd402 100644 --- a/runsc/cmd/do.go +++ b/runsc/cmd/do.go @@ -27,12 +27,12 @@ import ( "strings" "syscall" - "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/specutils" ) diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go index 3972e9224..51f6a98ed 100644 --- a/runsc/cmd/events.go +++ b/runsc/cmd/events.go @@ -20,11 +20,11 @@ import ( "os" "time" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" ) // Events implements subcommands.Command for the "events" command. diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index d1e99243b..d9a94903e 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -27,7 +27,6 @@ import ( "syscall" "time" - "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/log" @@ -37,6 +36,7 @@ import ( "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/console" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/specutils" ) diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 4831210c0..28f0d54b9 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -21,17 +21,17 @@ import ( "os" "path/filepath" "strings" - "sync" "syscall" - "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/fsgofer" "gvisor.dev/gvisor/runsc/fsgofer/filter" "gvisor.dev/gvisor/runsc/specutils" @@ -272,9 +272,8 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error { root := spec.Root.Path if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { - // FIXME: runsc can't be re-executed without - // /proc, so we create a tmpfs mount, mount ./proc and ./root - // there, then move this mount to the root and after + // runsc can't be re-executed without /proc, so we create a tmpfs mount, + // mount ./proc and ./root there, then move this mount to the root and after // setCapsAndCallSelf, runsc will chroot into /root. // // We need a directory to construct a new root and we know that @@ -335,7 +334,7 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error { if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { if err := pivotRoot("/proc"); err != nil { - Fatalf("faild to change the root file system: %v", err) + Fatalf("failed to change the root file system: %v", err) } if err := os.Chdir("/"); err != nil { Fatalf("failed to change working directory") diff --git a/runsc/cmd/help.go b/runsc/cmd/help.go index ff4f901cb..c7d210140 100644 --- a/runsc/cmd/help.go +++ b/runsc/cmd/help.go @@ -1,4 +1,4 @@ -// Copyright 2018 Google LLC +// Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ import ( "context" "fmt" - "flag" "github.com/google/subcommands" + "gvisor.dev/gvisor/runsc/flag" ) // NewHelp returns a help command for the given commander. diff --git a/runsc/cmd/install.go b/runsc/cmd/install.go index 441c1db0d..2e223e3be 100644 --- a/runsc/cmd/install.go +++ b/runsc/cmd/install.go @@ -23,8 +23,8 @@ import ( "os" "path" - "flag" "github.com/google/subcommands" + "gvisor.dev/gvisor/runsc/flag" ) // Install implements subcommands.Command. diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go index 6c1f197a6..8282ea0e0 100644 --- a/runsc/cmd/kill.go +++ b/runsc/cmd/kill.go @@ -21,11 +21,11 @@ import ( "strings" "syscall" - "flag" "github.com/google/subcommands" "golang.org/x/sys/unix" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" ) // Kill implements subcommands.Command for the "kill" command. diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go index dd2d99a6b..d8d906fe3 100644 --- a/runsc/cmd/list.go +++ b/runsc/cmd/list.go @@ -22,11 +22,11 @@ import ( "text/tabwriter" "time" - "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" ) // List implements subcommands.Command for the "list" command for the "list" command. diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go index 9c0e92001..6f95a9837 100644 --- a/runsc/cmd/pause.go +++ b/runsc/cmd/pause.go @@ -17,10 +17,10 @@ package cmd import ( "context" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" ) // Pause implements subcommands.Command for the "pause" command. diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go index 45c644f3f..7fb8041af 100644 --- a/runsc/cmd/ps.go +++ b/runsc/cmd/ps.go @@ -18,11 +18,11 @@ import ( "context" "fmt" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" ) // PS implements subcommands.Command for the "ps" command. diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go index 7be60cd7d..72584b326 100644 --- a/runsc/cmd/restore.go +++ b/runsc/cmd/restore.go @@ -19,10 +19,10 @@ import ( "path/filepath" "syscall" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/specutils" ) diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go index b2df5c640..61a55a554 100644 --- a/runsc/cmd/resume.go +++ b/runsc/cmd/resume.go @@ -17,10 +17,10 @@ package cmd import ( "context" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" ) // Resume implements subcommands.Command for the "resume" command. diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go index 33f4bc12b..cf41581ad 100644 --- a/runsc/cmd/run.go +++ b/runsc/cmd/run.go @@ -18,10 +18,10 @@ import ( "context" "syscall" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/specutils" ) diff --git a/runsc/cmd/spec.go b/runsc/cmd/spec.go index 344da13ba..8e2b36e85 100644 --- a/runsc/cmd/spec.go +++ b/runsc/cmd/spec.go @@ -20,8 +20,8 @@ import ( "os" "path/filepath" - "flag" "github.com/google/subcommands" + "gvisor.dev/gvisor/runsc/flag" ) var specTemplate = []byte(`{ diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go index de2115dff..0205fd9f7 100644 --- a/runsc/cmd/start.go +++ b/runsc/cmd/start.go @@ -16,10 +16,11 @@ package cmd import ( "context" - "flag" + "github.com/google/subcommands" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" ) // Start implements subcommands.Command for the "start" command. diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go index e9f41cbd8..cf2413deb 100644 --- a/runsc/cmd/state.go +++ b/runsc/cmd/state.go @@ -19,11 +19,11 @@ import ( "encoding/json" "os" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" ) // State implements subcommands.Command for the "state" command. diff --git a/runsc/cmd/statefile.go b/runsc/cmd/statefile.go new file mode 100644 index 000000000..e6f1907da --- /dev/null +++ b/runsc/cmd/statefile.go @@ -0,0 +1,143 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "fmt" + "os" + + "github.com/google/subcommands" + "gvisor.dev/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/state/statefile" + "gvisor.dev/gvisor/runsc/flag" +) + +// Statefile implements subcommands.Command for the "statefile" command. +type Statefile struct { + list bool + get string + key string + output string + html bool +} + +// Name implements subcommands.Command. +func (*Statefile) Name() string { + return "state" +} + +// Synopsis implements subcommands.Command. +func (*Statefile) Synopsis() string { + return "shows information about a statefile" +} + +// Usage implements subcommands.Command. +func (*Statefile) Usage() string { + return `statefile [flags] <statefile>` +} + +// SetFlags implements subcommands.Command. +func (s *Statefile) SetFlags(f *flag.FlagSet) { + f.BoolVar(&s.list, "list", false, "lists the metdata in the statefile.") + f.StringVar(&s.get, "get", "", "extracts the given metadata key.") + f.StringVar(&s.key, "key", "", "the integrity key for the file.") + f.StringVar(&s.output, "output", "", "target to write the result.") + f.BoolVar(&s.html, "html", false, "outputs in HTML format.") +} + +// Execute implements subcommands.Command.Execute. +func (s *Statefile) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + // Check arguments. + if s.list && s.get != "" { + Fatalf("error: can't specify -list and -get simultaneously.") + } + + // Setup output. + var output = os.Stdout // Default. + if s.output != "" { + f, err := os.OpenFile(s.output, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644) + if err != nil { + Fatalf("error opening output: %v", err) + } + defer func() { + if err := f.Close(); err != nil { + Fatalf("error flushing output: %v", err) + } + }() + output = f + } + + // Open the file. + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + input, err := os.Open(f.Arg(0)) + if err != nil { + Fatalf("error opening input: %v\n", err) + } + + if s.html { + fmt.Fprintf(output, "<html><body>\n") + defer fmt.Fprintf(output, "</body></html>\n") + } + + // Dump the full file? + if !s.list && s.get == "" { + var key []byte + if s.key != "" { + key = []byte(s.key) + } + rc, _, err := statefile.NewReader(input, key) + if err != nil { + Fatalf("error parsing statefile: %v", err) + } + if err := state.PrettyPrint(output, rc, s.html); err != nil { + Fatalf("error printing state: %v", err) + } + return subcommands.ExitSuccess + } + + // Load just the metadata. + metadata, err := statefile.MetadataUnsafe(input) + if err != nil { + Fatalf("error reading metadata: %v", err) + } + + // Is it a single key? + if s.get != "" { + val, ok := metadata[s.get] + if !ok { + Fatalf("metadata key %s: not found", s.get) + } + fmt.Fprintf(output, "%s\n", val) + return subcommands.ExitSuccess + } + + // List all keys. + if s.html { + fmt.Fprintf(output, " <ul>\n") + defer fmt.Fprintf(output, " </ul>\n") + } + for key := range metadata { + if s.html { + fmt.Fprintf(output, " <li>%s</li>\n", key) + } else { + fmt.Fprintf(output, "%s\n", key) + } + } + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go index fb6c1ab29..7072547be 100644 --- a/runsc/cmd/syscalls.go +++ b/runsc/cmd/syscalls.go @@ -25,9 +25,9 @@ import ( "strconv" "text/tabwriter" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/runsc/flag" ) // Syscalls implements subcommands.Command for the "syscalls" command. diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go index 046489687..29c0a15f0 100644 --- a/runsc/cmd/wait.go +++ b/runsc/cmd/wait.go @@ -20,10 +20,10 @@ import ( "os" "syscall" - "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" ) const ( diff --git a/runsc/console/BUILD b/runsc/console/BUILD index e623c1a0f..06924bccd 100644 --- a/runsc/console/BUILD +++ b/runsc/console/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -7,7 +7,6 @@ go_library( srcs = [ "console.go", ], - importpath = "gvisor.dev/gvisor/runsc/console", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 26d1cd5ab..331b8e866 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -7,9 +7,9 @@ go_library( srcs = [ "container.go", "hook.go", + "state_file.go", "status.go", ], - importpath = "gvisor.dev/gvisor/runsc/container", visibility = [ "//runsc:__subpackages__", "//test:__subpackages__", @@ -17,6 +17,7 @@ go_library( deps = [ "//pkg/log", "//pkg/sentry/control", + "//pkg/sync", "//runsc/boot", "//runsc/cgroup", "//runsc/sandbox", @@ -29,18 +30,20 @@ go_library( go_test( name = "container_test", - size = "medium", + size = "large", srcs = [ "console_test.go", + "container_norace_test.go", + "container_race_test.go", "container_test.go", "multi_container_test.go", "shared_volume_test.go", ], data = [ "//runsc", - "//runsc/container/test_app", + "//test/cmd/test_app", ], - embed = [":container"], + library = ":container", shard_count = 5, tags = [ "requires-kvm", @@ -52,12 +55,13 @@ go_test( "//pkg/sentry/control", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sync", + "//pkg/test/testutil", "//pkg/unet", "//pkg/urpc", "//runsc/boot", "//runsc/boot/platforms", "//runsc/specutils", - "//runsc/testutil", "@com_github_cenkalti_backoff//:go_default_library", "@com_github_kr_pty//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 7d67c3a75..294dca5e7 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -20,7 +20,6 @@ import ( "io" "os" "path/filepath" - "sync" "syscall" "testing" "time" @@ -28,9 +27,11 @@ import ( "github.com/kr/pty" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/urpc" - "gvisor.dev/gvisor/runsc/testutil" ) // socketPath creates a path inside bundleDir and ensures that the returned @@ -57,25 +58,26 @@ func socketPath(bundleDir string) (string, error) { } // createConsoleSocket creates a socket at the given path that will receive a -// console fd from the sandbox. If no error occurs, it returns the server -// socket and a cleanup function. -func createConsoleSocket(path string) (*unet.ServerSocket, func() error, error) { +// console fd from the sandbox. If an error occurs, t.Fatalf will be called. +// The function returning should be deferred as cleanup. +func createConsoleSocket(t *testing.T, path string) (*unet.ServerSocket, func()) { + t.Helper() srv, err := unet.BindAndListen(path, false) if err != nil { - return nil, nil, fmt.Errorf("error binding and listening to socket %q: %v", path, err) + t.Fatalf("error binding and listening to socket %q: %v", path, err) } - cleanup := func() error { + cleanup := func() { + // Log errors; nothing can be done. if err := srv.Close(); err != nil { - return fmt.Errorf("error closing socket %q: %v", path, err) + t.Logf("error closing socket %q: %v", path, err) } if err := os.Remove(path); err != nil { - return fmt.Errorf("error removing socket %q: %v", path, err) + t.Logf("error removing socket %q: %v", path, err) } - return nil } - return srv, cleanup, nil + return srv, cleanup } // receiveConsolePTY accepts a connection on the server socket and reads fds. @@ -117,63 +119,59 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) { // Test that an pty FD is sent over the console socket if one is provided. func TestConsoleSocket(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - spec := testutil.NewSpecWithArgs("true") - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("true") + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - sock, err := socketPath(bundleDir) - if err != nil { - t.Fatalf("error getting socket path: %v", err) - } - srv, cleanup, err := createConsoleSocket(sock) - if err != nil { - t.Fatalf("error creating socket at %q: %v", sock, err) - } - defer cleanup() - - // Create the container and pass the socket name. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - ConsoleSocket: sock, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() + sock, err := socketPath(bundleDir) + if err != nil { + t.Fatalf("error getting socket path: %v", err) + } + srv, cleanup := createConsoleSocket(t, sock) + defer cleanup() + + // Create the container and pass the socket name. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: sock, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() - // Make sure we get a console PTY. - ptyMaster, err := receiveConsolePTY(srv) - if err != nil { - t.Fatalf("error receiving console FD: %v", err) - } - ptyMaster.Close() + // Make sure we get a console PTY. + ptyMaster, err := receiveConsolePTY(srv) + if err != nil { + t.Fatalf("error receiving console FD: %v", err) + } + ptyMaster.Close() + }) } } // Test that job control signals work on a console created with "exec -ti". func TestJobControlSignalExec(t *testing.T) { spec := testutil.NewSpecWithArgs("/bin/sleep", "10000") - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -195,7 +193,10 @@ func TestJobControlSignalExec(t *testing.T) { defer ptyMaster.Close() defer ptySlave.Close() - // Exec bash and attach a terminal. + // Exec bash and attach a terminal. Note that occasionally /bin/sh + // may be a different shell or have a different configuration (such + // as disabling interactive mode and job control). Since we want to + // explicitly test interactive mode, use /bin/bash. See b/116981926. execArgs := &control.ExecArgs{ Filename: "/bin/bash", // Don't let bash execute from profile or rc files, otherwise @@ -219,9 +220,9 @@ func TestJobControlSignalExec(t *testing.T) { // Make sure all the processes are running. expectedPL := []*control.Process{ // Root container process. - {PID: 1, Cmd: "sleep"}, + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, // Bash from exec process. - {PID: 2, Cmd: "bash"}, + {PID: 2, Cmd: "bash", Threads: []kernel.ThreadID{2}}, } if err := waitForProcessList(c, expectedPL); err != nil { t.Error(err) @@ -231,7 +232,7 @@ func TestJobControlSignalExec(t *testing.T) { ptyMaster.Write([]byte("sleep 100\n")) // Wait for it to start. Sleep's PPID is bash's PID. - expectedPL = append(expectedPL, &control.Process{PID: 3, PPID: 2, Cmd: "sleep"}) + expectedPL = append(expectedPL, &control.Process{PID: 3, PPID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{3}}) if err := waitForProcessList(c, expectedPL); err != nil { t.Error(err) } @@ -282,32 +283,28 @@ func TestJobControlSignalExec(t *testing.T) { // Test that job control signals work on a console created with "run -ti". func TestJobControlSignalRootContainer(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) // Don't let bash execute from profile or rc files, otherwise our PID // counts get messed up. spec := testutil.NewSpecWithArgs("/bin/bash", "--noprofile", "--norc") spec.Process.Terminal = true - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() sock, err := socketPath(bundleDir) if err != nil { t.Fatalf("error getting socket path: %v", err) } - srv, cleanup, err := createConsoleSocket(sock) - if err != nil { - t.Fatalf("error creating socket at %q: %v", sock, err) - } + srv, cleanup := createConsoleSocket(t, sock) defer cleanup() // Create the container and pass the socket name. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, ConsoleSocket: sock, @@ -329,13 +326,13 @@ func TestJobControlSignalRootContainer(t *testing.T) { // file. Writes after a certain point will block unless we drain the // PTY, so we must continually copy from it. // - // We log the output to stdout for debugabilitly, and also to a buffer, + // We log the output to stderr for debugabilitly, and also to a buffer, // since we wait on particular output from bash below. We use a custom // blockingBuffer which is thread-safe and also blocks on Read calls, // which makes this a suitable Reader for WaitUntilRead. ptyBuf := newBlockingBuffer() tee := io.TeeReader(ptyMaster, ptyBuf) - go io.Copy(os.Stdout, tee) + go io.Copy(os.Stderr, tee) // Start the container. if err := c.Start(conf); err != nil { @@ -361,19 +358,19 @@ func TestJobControlSignalRootContainer(t *testing.T) { // Wait for bash to start. expectedPL := []*control.Process{ - {PID: 1, Cmd: "bash"}, + {PID: 1, Cmd: "bash", Threads: []kernel.ThreadID{1}}, } if err := waitForProcessList(c, expectedPL); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Execute sleep via the terminal. ptyMaster.Write([]byte("sleep 100\n")) // Wait for sleep to start. - expectedPL = append(expectedPL, &control.Process{PID: 2, PPID: 1, Cmd: "sleep"}) + expectedPL = append(expectedPL, &control.Process{PID: 2, PPID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{2}}) if err := waitForProcessList(c, expectedPL); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Reset the pty buffer, so there is less output for us to scan later. diff --git a/runsc/container/container.go b/runsc/container/container.go index 32510d427..117ea7d7b 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -17,13 +17,12 @@ package container import ( "context" - "encoding/json" + "errors" "fmt" "io/ioutil" "os" "os/exec" "os/signal" - "path/filepath" "regexp" "strconv" "strings" @@ -31,7 +30,6 @@ import ( "time" "github.com/cenkalti/backoff" - "github.com/gofrs/flock" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" @@ -41,17 +39,6 @@ import ( "gvisor.dev/gvisor/runsc/specutils" ) -const ( - // metadataFilename is the name of the metadata file relative to the - // container root directory that holds sandbox metadata. - metadataFilename = "meta.json" - - // metadataLockFilename is the name of a lock file in the container - // root directory that is used to prevent concurrent modifications to - // the container state and metadata. - metadataLockFilename = "meta.lock" -) - // validateID validates the container id. func validateID(id string) error { // See libcontainer/factory_linux.go. @@ -99,11 +86,6 @@ type Container struct { // BundleDir is the directory containing the container bundle. BundleDir string `json:"bundleDir"` - // Root is the directory containing the container metadata file. If this - // container is the root container, Root and RootContainerDir will be the - // same. - Root string `json:"root"` - // CreatedAt is the time the container was created. CreatedAt time.Time `json:"createdAt"` @@ -121,21 +103,24 @@ type Container struct { // be 0 if the gofer has been killed. GoferPid int `json:"goferPid"` + // Sandbox is the sandbox this container is running in. It's set when the + // container is created and reset when the sandbox is destroyed. + Sandbox *sandbox.Sandbox `json:"sandbox"` + + // Saver handles load from/save to the state file safely from multiple + // processes. + Saver StateFile `json:"saver"` + + // + // Fields below this line are not saved in the state file and will not + // be preserved across commands. + // + // goferIsChild is set if a gofer process is a child of the current process. // // This field isn't saved to json, because only a creator of a gofer // process will have it as a child process. goferIsChild bool - - // Sandbox is the sandbox this container is running in. It's set when the - // container is created and reset when the sandbox is destroyed. - Sandbox *sandbox.Sandbox `json:"sandbox"` - - // RootContainerDir is the root directory containing the metadata file of the - // sandbox root container. It's used to lock in order to serialize creating - // and deleting this Container's metadata directory. If this container is the - // root container, this is the same as Root. - RootContainerDir string } // loadSandbox loads all containers that belong to the sandbox with the given @@ -166,43 +151,35 @@ func loadSandbox(rootDir, id string) ([]*Container, error) { return containers, nil } -// Load loads a container with the given id from a metadata file. id may be an -// abbreviation of the full container id, in which case Load loads the -// container to which id unambiguously refers to. -// Returns ErrNotExist if container doesn't exist. -func Load(rootDir, id string) (*Container, error) { - log.Debugf("Load container %q %q", rootDir, id) - if err := validateID(id); err != nil { +// Load loads a container with the given id from a metadata file. partialID may +// be an abbreviation of the full container id, in which case Load loads the +// container to which id unambiguously refers to. Returns ErrNotExist if +// container doesn't exist. +func Load(rootDir, partialID string) (*Container, error) { + log.Debugf("Load container %q %q", rootDir, partialID) + if err := validateID(partialID); err != nil { return nil, fmt.Errorf("validating id: %v", err) } - cRoot, err := findContainerRoot(rootDir, id) + id, err := findContainerID(rootDir, partialID) if err != nil { // Preserve error so that callers can distinguish 'not found' errors. return nil, err } - // Lock the container metadata to prevent other runsc instances from - // writing to it while we are reading it. - unlock, err := lockContainerMetadata(cRoot) - if err != nil { - return nil, err + state := StateFile{ + RootDir: rootDir, + ID: id, } - defer unlock() + defer state.close() - // Read the container metadata file and create a new Container from it. - metaFile := filepath.Join(cRoot, metadataFilename) - metaBytes, err := ioutil.ReadFile(metaFile) - if err != nil { + c := &Container{} + if err := state.load(c); err != nil { if os.IsNotExist(err) { // Preserve error so that callers can distinguish 'not found' errors. return nil, err } - return nil, fmt.Errorf("reading container metadata file %q: %v", metaFile, err) - } - var c Container - if err := json.Unmarshal(metaBytes, &c); err != nil { - return nil, fmt.Errorf("unmarshaling container metadata from %q: %v", metaFile, err) + return nil, fmt.Errorf("reading container metadata file %q: %v", state.statePath(), err) } // If the status is "Running" or "Created", check that the sandbox @@ -223,57 +200,37 @@ func Load(rootDir, id string) (*Container, error) { } } - return &c, nil + return c, nil } -func findContainerRoot(rootDir, partialID string) (string, error) { +func findContainerID(rootDir, partialID string) (string, error) { // Check whether the id fully specifies an existing container. - cRoot := filepath.Join(rootDir, partialID) - if _, err := os.Stat(cRoot); err == nil { - return cRoot, nil + stateFile := buildStatePath(rootDir, partialID) + if _, err := os.Stat(stateFile); err == nil { + return partialID, nil } // Now see whether id could be an abbreviation of exactly 1 of the // container ids. If id is ambiguous (it could match more than 1 // container), it is an error. - cRoot = "" ids, err := List(rootDir) if err != nil { return "", err } + rv := "" for _, id := range ids { if strings.HasPrefix(id, partialID) { - if cRoot != "" { - return "", fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, cRoot, id) + if rv != "" { + return "", fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, rv, id) } - cRoot = id + rv = id } } - if cRoot == "" { + if rv == "" { return "", os.ErrNotExist } - log.Debugf("abbreviated id %q resolves to full id %q", partialID, cRoot) - return filepath.Join(rootDir, cRoot), nil -} - -// List returns all container ids in the given root directory. -func List(rootDir string) ([]string, error) { - log.Debugf("List containers %q", rootDir) - fs, err := ioutil.ReadDir(rootDir) - if err != nil { - return nil, fmt.Errorf("reading dir %q: %v", rootDir, err) - } - var out []string - for _, f := range fs { - // Filter out directories that do no belong to a container. - cid := f.Name() - if validateID(cid) == nil { - if _, err := os.Stat(filepath.Join(rootDir, cid, metadataFilename)); err == nil { - out = append(out, f.Name()) - } - } - } - return out, nil + log.Debugf("abbreviated id %q resolves to full id %q", partialID, rv) + return rv, nil } // Args is used to configure a new container. @@ -316,44 +273,34 @@ func New(conf *boot.Config, args Args) (*Container, error) { return nil, err } - unlockRoot, err := maybeLockRootContainer(args.Spec, conf.RootDir) - if err != nil { - return nil, err + if err := os.MkdirAll(conf.RootDir, 0711); err != nil { + return nil, fmt.Errorf("creating container root directory %q: %v", conf.RootDir, err) } - defer unlockRoot() + + c := &Container{ + ID: args.ID, + Spec: args.Spec, + ConsoleSocket: args.ConsoleSocket, + BundleDir: args.BundleDir, + Status: Creating, + CreatedAt: time.Now(), + Owner: os.Getenv("USER"), + Saver: StateFile{ + RootDir: conf.RootDir, + ID: args.ID, + }, + } + // The Cleanup object cleans up partially created containers when an error + // occurs. Any errors occurring during cleanup itself are ignored. + cu := specutils.MakeCleanup(func() { _ = c.Destroy() }) + defer cu.Clean() // Lock the container metadata file to prevent concurrent creations of // containers with the same id. - containerRoot := filepath.Join(conf.RootDir, args.ID) - unlock, err := lockContainerMetadata(containerRoot) - if err != nil { + if err := c.Saver.lockForNew(); err != nil { return nil, err } - defer unlock() - - // Check if the container already exists by looking for the metadata - // file. - if _, err := os.Stat(filepath.Join(containerRoot, metadataFilename)); err == nil { - return nil, fmt.Errorf("container with id %q already exists", args.ID) - } else if !os.IsNotExist(err) { - return nil, fmt.Errorf("looking for existing container in %q: %v", containerRoot, err) - } - - c := &Container{ - ID: args.ID, - Spec: args.Spec, - ConsoleSocket: args.ConsoleSocket, - BundleDir: args.BundleDir, - Root: containerRoot, - Status: Creating, - CreatedAt: time.Now(), - Owner: os.Getenv("USER"), - RootContainerDir: conf.RootDir, - } - // The Cleanup object cleans up partially created containers when an error occurs. - // Any errors occuring during cleanup itself are ignored. - cu := specutils.MakeCleanup(func() { _ = c.Destroy() }) - defer cu.Clean() + defer c.Saver.unlock() // If the metadata annotations indicate that this container should be // started in an existing sandbox, we must do so. The metadata will @@ -431,7 +378,7 @@ func New(conf *boot.Config, args Args) (*Container, error) { c.changeStatus(Created) // Save the metadata file. - if err := c.save(); err != nil { + if err := c.saveLocked(); err != nil { return nil, err } @@ -451,17 +398,12 @@ func New(conf *boot.Config, args Args) (*Container, error) { func (c *Container) Start(conf *boot.Config) error { log.Debugf("Start container %q", c.ID) - unlockRoot, err := maybeLockRootContainer(c.Spec, c.RootContainerDir) - if err != nil { + if err := c.Saver.lock(); err != nil { return err } - defer unlockRoot() + unlock := specutils.MakeCleanup(func() { c.Saver.unlock() }) + defer unlock.Clean() - unlock, err := c.lock() - if err != nil { - return err - } - defer unlock() if err := c.requireStatus("start", Created); err != nil { return err } @@ -509,14 +451,15 @@ func (c *Container) Start(conf *boot.Config) error { } c.changeStatus(Running) - if err := c.save(); err != nil { + if err := c.saveLocked(); err != nil { return err } - // Adjust the oom_score_adj for sandbox. This must be done after - // save(). - err = adjustSandboxOOMScoreAdj(c.Sandbox, c.RootContainerDir, false) - if err != nil { + // Release lock before adjusting OOM score because the lock is acquired there. + unlock.Clean() + + // Adjust the oom_score_adj for sandbox. This must be done after saveLocked(). + if err := adjustSandboxOOMScoreAdj(c.Sandbox, c.Saver.RootDir, false); err != nil { return err } @@ -529,11 +472,10 @@ func (c *Container) Start(conf *boot.Config) error { // to restore a container from its state file. func (c *Container) Restore(spec *specs.Spec, conf *boot.Config, restoreFile string) error { log.Debugf("Restore container %q", c.ID) - unlock, err := c.lock() - if err != nil { + if err := c.Saver.lock(); err != nil { return err } - defer unlock() + defer c.Saver.unlock() if err := c.requireStatus("restore", Created); err != nil { return err @@ -551,7 +493,7 @@ func (c *Container) Restore(spec *specs.Spec, conf *boot.Config, restoreFile str return err } c.changeStatus(Running) - return c.save() + return c.saveLocked() } // Run is a helper that calls Create + Start + Wait. @@ -711,11 +653,10 @@ func (c *Container) Checkpoint(f *os.File) error { // The call only succeeds if the container's status is created or running. func (c *Container) Pause() error { log.Debugf("Pausing container %q", c.ID) - unlock, err := c.lock() - if err != nil { + if err := c.Saver.lock(); err != nil { return err } - defer unlock() + defer c.Saver.unlock() if c.Status != Created && c.Status != Running { return fmt.Errorf("cannot pause container %q in state %v", c.ID, c.Status) @@ -725,18 +666,17 @@ func (c *Container) Pause() error { return fmt.Errorf("pausing container: %v", err) } c.changeStatus(Paused) - return c.save() + return c.saveLocked() } // Resume unpauses the container and its kernel. // The call only succeeds if the container's status is paused. func (c *Container) Resume() error { log.Debugf("Resuming container %q", c.ID) - unlock, err := c.lock() - if err != nil { + if err := c.Saver.lock(); err != nil { return err } - defer unlock() + defer c.Saver.unlock() if c.Status != Paused { return fmt.Errorf("cannot resume container %q in state %v", c.ID, c.Status) @@ -745,7 +685,7 @@ func (c *Container) Resume() error { return fmt.Errorf("resuming container: %v", err) } c.changeStatus(Running) - return c.save() + return c.saveLocked() } // State returns the metadata of the container. @@ -773,6 +713,17 @@ func (c *Container) Processes() ([]*control.Process, error) { func (c *Container) Destroy() error { log.Debugf("Destroy container %q", c.ID) + if err := c.Saver.lock(); err != nil { + return err + } + defer func() { + c.Saver.unlock() + c.Saver.close() + }() + + // Stored for later use as stop() sets c.Sandbox to nil. + sb := c.Sandbox + // We must perform the following cleanup steps: // * stop the container and gofer processes, // * remove the container filesystem on the host, and @@ -782,48 +733,43 @@ func (c *Container) Destroy() error { // do our best to perform all of the cleanups. Hence, we keep a slice // of errors return their concatenation. var errs []string - - unlock, err := maybeLockRootContainer(c.Spec, c.RootContainerDir) - if err != nil { - return err - } - defer unlock() - - // Stored for later use as stop() sets c.Sandbox to nil. - sb := c.Sandbox - if err := c.stop(); err != nil { err = fmt.Errorf("stopping container: %v", err) log.Warningf("%v", err) errs = append(errs, err.Error()) } - if err := os.RemoveAll(c.Root); err != nil && !os.IsNotExist(err) { - err = fmt.Errorf("deleting container root directory %q: %v", c.Root, err) + if err := c.Saver.destroy(); err != nil { + err = fmt.Errorf("deleting container state files: %v", err) log.Warningf("%v", err) errs = append(errs, err.Error()) } c.changeStatus(Stopped) - // Adjust oom_score_adj for the sandbox. This must be done after the - // container is stopped and the directory at c.Root is removed. - // We must test if the sandbox is nil because Destroy should be - // idempotent. - if sb != nil { - if err := adjustSandboxOOMScoreAdj(sb, c.RootContainerDir, true); err != nil { + // Adjust oom_score_adj for the sandbox. This must be done after the container + // is stopped and the directory at c.Root is removed. Adjustment can be + // skipped if the root container is exiting, because it brings down the entire + // sandbox. + // + // Use 'sb' to tell whether it has been executed before because Destroy must + // be idempotent. + if sb != nil && !isRoot(c.Spec) { + if err := adjustSandboxOOMScoreAdj(sb, c.Saver.RootDir, true); err != nil { errs = append(errs, err.Error()) } } // "If any poststop hook fails, the runtime MUST log a warning, but the - // remaining hooks and lifecycle continue as if the hook had succeeded" -OCI spec. - // Based on the OCI, "The post-stop hooks MUST be called after the container is - // deleted but before the delete operation returns" + // remaining hooks and lifecycle continue as if the hook had + // succeeded" - OCI spec. + // + // Based on the OCI, "The post-stop hooks MUST be called after the container + // is deleted but before the delete operation returns" // Run it here to: // 1) Conform to the OCI. - // 2) Make sure it only runs once, because the root has been deleted, the container - // can't be loaded again. + // 2) Make sure it only runs once, because the root has been deleted, the + // container can't be loaded again. if c.Spec.Hooks != nil { executeHooksBestEffort(c.Spec.Hooks.Poststop, c.State()) } @@ -834,18 +780,13 @@ func (c *Container) Destroy() error { return fmt.Errorf(strings.Join(errs, "\n")) } -// save saves the container metadata to a file. +// saveLocked saves the container metadata to a file. // // Precondition: container must be locked with container.lock(). -func (c *Container) save() error { +func (c *Container) saveLocked() error { log.Debugf("Save container %q", c.ID) - metaFile := filepath.Join(c.Root, metadataFilename) - meta, err := json.Marshal(c) - if err != nil { - return fmt.Errorf("invalid container metadata: %v", err) - } - if err := ioutil.WriteFile(metaFile, meta, 0640); err != nil { - return fmt.Errorf("writing container metadata: %v", err) + if err := c.Saver.saveLocked(c); err != nil { + return fmt.Errorf("saving container metadata: %v", err) } return nil } @@ -1106,48 +1047,6 @@ func (c *Container) requireStatus(action string, statuses ...Status) error { return fmt.Errorf("cannot %s container %q in state %s", action, c.ID, c.Status) } -// lock takes a file lock on the container metadata lock file. -func (c *Container) lock() (func() error, error) { - return lockContainerMetadata(filepath.Join(c.Root, c.ID)) -} - -// lockContainerMetadata takes a file lock on the metadata lock file in the -// given container root directory. -func lockContainerMetadata(containerRootDir string) (func() error, error) { - if err := os.MkdirAll(containerRootDir, 0711); err != nil { - return nil, fmt.Errorf("creating container root directory %q: %v", containerRootDir, err) - } - f := filepath.Join(containerRootDir, metadataLockFilename) - l := flock.NewFlock(f) - if err := l.Lock(); err != nil { - return nil, fmt.Errorf("acquiring lock on container lock file %q: %v", f, err) - } - return l.Unlock, nil -} - -// maybeLockRootContainer locks the sandbox root container. It is used to -// prevent races to create and delete child container sandboxes. -func maybeLockRootContainer(spec *specs.Spec, rootDir string) (func() error, error) { - if isRoot(spec) { - return func() error { return nil }, nil - } - - sbid, ok := specutils.SandboxID(spec) - if !ok { - return nil, fmt.Errorf("no sandbox ID found when locking root container") - } - sb, err := Load(rootDir, sbid) - if err != nil { - return nil, err - } - - unlock, err := sb.lock() - if err != nil { - return nil, err - } - return unlock, nil -} - func isRoot(spec *specs.Spec) bool { return specutils.SpecContainerType(spec) != specutils.ContainerTypeContainer } @@ -1168,22 +1067,19 @@ func runInCgroup(cg *cgroup.Cgroup, fn func() error) error { // adjustGoferOOMScoreAdj sets the oom_store_adj for the container's gofer. func (c *Container) adjustGoferOOMScoreAdj() error { - if c.GoferPid != 0 && c.Spec.Process.OOMScoreAdj != nil { - if err := setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj); err != nil { - return fmt.Errorf("setting gofer oom_score_adj for container %q: %v", c.ID, err) - } + if c.GoferPid == 0 || c.Spec.Process.OOMScoreAdj == nil { + return nil } - - return nil + return setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj) } // adjustSandboxOOMScoreAdj sets the oom_score_adj for the sandbox. // oom_score_adj is set to the lowest oom_score_adj among the containers // running in the sandbox. // -// TODO(gvisor.dev/issue/512): This call could race with other containers being +// TODO(gvisor.dev/issue/238): This call could race with other containers being // created at the same time and end up setting the wrong oom_score_adj to the -// sandbox. +// sandbox. Use rpc client to synchronize. func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool) error { containers, err := loadSandbox(rootDir, s.ID) if err != nil { @@ -1251,24 +1147,29 @@ func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool) } // Set the lowest of all containers oom_score_adj to the sandbox. - if err := setOOMScoreAdj(s.Pid, lowScore); err != nil { - return fmt.Errorf("setting oom_score_adj for sandbox %q: %v", s.ID, err) - } - - return nil + return setOOMScoreAdj(s.Pid, lowScore) } // setOOMScoreAdj sets oom_score_adj to the given value for the given PID. // /proc must be available and mounted read-write. scoreAdj should be between -// -1000 and 1000. +// -1000 and 1000. It's a noop if the process has already exited. func setOOMScoreAdj(pid int, scoreAdj int) error { f, err := os.OpenFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid), os.O_WRONLY, 0644) if err != nil { + // Ignore NotExist errors because it can race with process exit. + if os.IsNotExist(err) { + log.Warningf("Process (%d) not found setting oom_score_adj", pid) + return nil + } return err } defer f.Close() if _, err := f.WriteString(strconv.Itoa(scoreAdj)); err != nil { - return err + if errors.Is(err, syscall.ESRCH) { + log.Warningf("Process (%d) exited while setting oom_score_adj", pid) + return nil + } + return fmt.Errorf("setting oom_score_adj to %q: %v", scoreAdj, err) } return nil } diff --git a/runsc/container/container_norace_test.go b/runsc/container/container_norace_test.go new file mode 100644 index 000000000..838c1e20a --- /dev/null +++ b/runsc/container/container_norace_test.go @@ -0,0 +1,20 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !race + +package container + +// Allow both kvm and ptrace for non-race builds. +var platformOptions = []configOption{ptrace, kvm} diff --git a/runsc/container/container_race_test.go b/runsc/container/container_race_test.go new file mode 100644 index 000000000..9fb4c4fc0 --- /dev/null +++ b/runsc/container/container_race_test.go @@ -0,0 +1,20 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build race + +package container + +// Only enabled ptrace with race builds. +var platformOptions = []configOption{ptrace} diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index c4c56b2e0..a1d4d3b7e 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -26,7 +26,6 @@ import ( "reflect" "strconv" "strings" - "sync" "syscall" "testing" "time" @@ -37,11 +36,13 @@ import ( "gvisor.dev/gvisor/pkg/bits" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/boot/platforms" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" ) // waitForProcessList waits for the given process list to show up in the container. @@ -52,8 +53,9 @@ func waitForProcessList(cont *Container, want []*control.Process) error { err = fmt.Errorf("error getting process data from container: %v", err) return &backoff.PermanentError{Err: err} } - if !procListsEqual(got, want) { - return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(want)) + if r, err := procListsEqual(got, want); !r { + return fmt.Errorf("container got process list: %s, want: %s: error: %v", + procListToString(got), procListToString(want), err) } return nil } @@ -69,6 +71,7 @@ func waitForProcessCount(cont *Container, want int) error { return &backoff.PermanentError{Err: err} } if got := len(pss); got != want { + log.Infof("Waiting for process count to reach %d. Current: %d", want, got) return fmt.Errorf("wrong process count, got: %d, want: %d", got, want) } return nil @@ -91,35 +94,34 @@ func blockUntilWaitable(pid int) error { // procListsEqual is used to check whether 2 Process lists are equal for all // implemented fields. -func procListsEqual(got, want []*control.Process) bool { +func procListsEqual(got, want []*control.Process) (bool, error) { if len(got) != len(want) { - return false + return false, nil } for i := range got { pd1 := got[i] pd2 := want[i] - // Zero out unimplemented and timing dependant fields. + // Zero out timing dependant fields. pd1.Time = "" pd1.STime = "" pd1.C = 0 - if *pd1 != *pd2 { - return false + // Ignore TTY field too, since it's not relevant in the cases + // where we use this method. Tests that care about the TTY + // field should check for it themselves. + pd1.TTY = "" + pd1Json, err := control.ProcessListToJSON([]*control.Process{pd1}) + if err != nil { + return false, err + } + pd2Json, err := control.ProcessListToJSON([]*control.Process{pd2}) + if err != nil { + return false, err + } + if pd1Json != pd2Json { + return false, nil } } - return true -} - -// getAndCheckProcLists is similar to waitForProcessList, but does not wait and retry the -// test for equality. This is because we already confirmed that exec occurred. -func getAndCheckProcLists(cont *Container, want []*control.Process) error { - got, err := cont.Processes() - if err != nil { - return fmt.Errorf("error getting process data from container: %v", err) - } - if procListsEqual(got, want) { - return nil - } - return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(want)) + return true, nil } func procListToString(pl []*control.Process) string { @@ -145,7 +147,7 @@ func createWriteableOutputFile(path string) (*os.File, error) { return outputFile, nil } -func waitForFile(f *os.File) error { +func waitForFileNotEmpty(f *os.File) error { op := func() error { fi, err := f.Stat() if err != nil { @@ -160,6 +162,17 @@ func waitForFile(f *os.File) error { return testutil.Poll(op, 30*time.Second) } +func waitForFileExist(path string) error { + op := func() error { + if _, err := os.Stat(path); os.IsNotExist(err) { + return err + } + return nil + } + + return testutil.Poll(op, 30*time.Second) +} + // readOutputNum reads a file at given filepath and returns the int at the // requested position. func readOutputNum(file string, position int) (int, error) { @@ -169,7 +182,7 @@ func readOutputNum(file string, position int) (int, error) { } // Ensure that there is content in output file. - if err := waitForFile(f); err != nil { + if err := waitForFileNotEmpty(f); err != nil { return 0, fmt.Errorf("error waiting for output file: %v", err) } @@ -202,16 +215,15 @@ func readOutputNum(file string, position int) (int, error) { // run starts the sandbox and waits for it to exit, checking that the // application succeeded. func run(spec *specs.Spec, conf *boot.Config) error { - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { return fmt.Errorf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create, start and wait for the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, Attached: true, @@ -230,39 +242,57 @@ type configOption int const ( overlay configOption = iota + ptrace kvm nonExclusiveFS ) -var noOverlay = []configOption{kvm, nonExclusiveFS} -var all = append(noOverlay, overlay) +var ( + noOverlay = append(platformOptions, nonExclusiveFS) + all = append(noOverlay, overlay) +) // configs generates different configurations to run tests. -func configs(opts ...configOption) []*boot.Config { +func configs(t *testing.T, opts ...configOption) map[string]*boot.Config { // Always load the default config. - cs := []*boot.Config{testutil.TestConfig()} - + cs := make(map[string]*boot.Config) for _, o := range opts { - c := testutil.TestConfig() switch o { case overlay: + c := testutil.TestConfig(t) c.Overlay = true + cs["overlay"] = c + case ptrace: + c := testutil.TestConfig(t) + c.Platform = platforms.Ptrace + cs["ptrace"] = c case kvm: - // TODO(b/112165693): KVM tests are flaky. Disable until fixed. - continue - + c := testutil.TestConfig(t) c.Platform = platforms.KVM + cs["kvm"] = c case nonExclusiveFS: + c := testutil.TestConfig(t) c.FileAccess = boot.FileAccessShared + cs["non-exclusive"] = c default: panic(fmt.Sprintf("unknown config option %v", o)) - } - cs = append(cs, c) } return cs } +func configsWithVFS2(t *testing.T, opts []configOption) map[string]*boot.Config { + vfs1 := configs(t, opts...) + vfs2 := configs(t, opts...) + + for key, value := range vfs2 { + value.VFS2 = true + vfs1[key+"VFS2"] = value + } + + return vfs1 +} + // TestLifecycle tests the basic Create/Start/Signal/Destroy container lifecycle. // It verifies after each step that the container can be loaded from disk, and // has the correct status. @@ -272,132 +302,133 @@ func TestLifecycle(t *testing.T) { childReaper.Start() defer childReaper.Stop() - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - // The container will just sleep for a long time. We will kill it before - // it finishes sleeping. - spec := testutil.NewSpecWithArgs("sleep", "100") + for name, conf := range configsWithVFS2(t, all) { + t.Run(name, func(t *testing.T) { + // The container will just sleep for a long time. We will kill it before + // it finishes sleeping. + spec := testutil.NewSpecWithArgs("sleep", "100") - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) - - // expectedPL lists the expected process state of the container. - expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - }, - } - // Create the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - - // Load the container from disk and check the status. - c, err = Load(rootDir, args.ID) - if err != nil { - t.Fatalf("error loading container: %v", err) - } - if got, want := c.Status, Created; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // List should return the container id. - ids, err := List(rootDir) - if err != nil { - t.Fatalf("error listing containers: %v", err) - } - if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) { - t.Errorf("container list got %v, want %v", got, want) - } + // expectedPL lists the expected process state of the container. + expectedPL := []*control.Process{ + { + UID: 0, + PID: 1, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{1}, + }, + } + // Create the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() - // Start the container. - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Load the container from disk and check the status. + c, err = Load(rootDir, args.ID) + if err != nil { + t.Fatalf("error loading container: %v", err) + } + if got, want := c.Status, Created; got != want { + t.Errorf("container status got %v, want %v", got, want) + } - // Load the container from disk and check the status. - c, err = Load(rootDir, args.ID) - if err != nil { - t.Fatalf("error loading container: %v", err) - } - if got, want := c.Status, Running; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + // List should return the container id. + ids, err := List(rootDir) + if err != nil { + t.Fatalf("error listing containers: %v", err) + } + if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) { + t.Errorf("container list got %v, want %v", got, want) + } - // Verify that "sleep 100" is running. - if err := waitForProcessList(c, expectedPL); err != nil { - t.Error(err) - } + // Start the container. + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Wait on the container. - var wg sync.WaitGroup - wg.Add(1) - ch := make(chan struct{}) - go func() { - ch <- struct{}{} - ws, err := c.Wait() + // Load the container from disk and check the status. + c, err = Load(rootDir, args.ID) if err != nil { - t.Fatalf("error waiting on container: %v", err) + t.Fatalf("error loading container: %v", err) } - if got, want := ws.Signal(), syscall.SIGTERM; got != want { - t.Fatalf("got signal %v, want %v", got, want) + if got, want := c.Status, Running; got != want { + t.Errorf("container status got %v, want %v", got, want) } - wg.Done() - }() - // Wait a bit to ensure that we've started waiting on the - // container before we signal. - <-ch - time.Sleep(100 * time.Millisecond) - // Send the container a SIGTERM which will cause it to stop. - if err := c.SignalContainer(syscall.SIGTERM, false); err != nil { - t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err) - } - // Wait for it to die. - wg.Wait() + // Verify that "sleep 100" is running. + if err := waitForProcessList(c, expectedPL); err != nil { + t.Error(err) + } - // Load the container from disk and check the status. - c, err = Load(rootDir, args.ID) - if err != nil { - t.Fatalf("error loading container: %v", err) - } - if got, want := c.Status, Stopped; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + // Wait on the container. + ch := make(chan error) + go func() { + ws, err := c.Wait() + if err != nil { + ch <- err + } + if got, want := ws.Signal(), syscall.SIGTERM; got != want { + ch <- fmt.Errorf("got signal %v, want %v", got, want) + } + ch <- nil + }() - // Destroy the container. - if err := c.Destroy(); err != nil { - t.Fatalf("error destroying container: %v", err) - } + // Wait a bit to ensure that we've started waiting on + // the container before we signal. + time.Sleep(time.Second) - // List should not return the container id. - ids, err = List(rootDir) - if err != nil { - t.Fatalf("error listing containers: %v", err) - } - if len(ids) != 0 { - t.Errorf("expected container list to be empty, but got %v", ids) - } + // Send the container a SIGTERM which will cause it to stop. + if err := c.SignalContainer(syscall.SIGTERM, false); err != nil { + t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err) + } - // Loading the container by id should fail. - if _, err = Load(rootDir, args.ID); err == nil { - t.Errorf("expected loading destroyed container to fail, but it did not") - } + // Wait for it to die. + if err := <-ch; err != nil { + t.Fatalf("error waiting for container: %v", err) + } + + // Load the container from disk and check the status. + c, err = Load(rootDir, args.ID) + if err != nil { + t.Fatalf("error loading container: %v", err) + } + if got, want := c.Status, Stopped; got != want { + t.Errorf("container status got %v, want %v", got, want) + } + + // Destroy the container. + if err := c.Destroy(); err != nil { + t.Fatalf("error destroying container: %v", err) + } + + // List should not return the container id. + ids, err = List(rootDir) + if err != nil { + t.Fatalf("error listing containers: %v", err) + } + if len(ids) != 0 { + t.Errorf("expected container list to be empty, but got %v", ids) + } + + // Loading the container by id should fail. + if _, err = Load(rootDir, args.ID); err == nil { + t.Errorf("expected loading destroyed container to fail, but it did not") + } + }) } } @@ -406,12 +437,14 @@ func TestExePath(t *testing.T) { // Create two directories that will be prepended to PATH. firstPath, err := ioutil.TempDir(testutil.TmpDir(), "first") if err != nil { - t.Fatal(err) + t.Fatalf("error creating temporary directory: %v", err) } + defer os.RemoveAll(firstPath) secondPath, err := ioutil.TempDir(testutil.TmpDir(), "second") if err != nil { - t.Fatal(err) + t.Fatalf("error creating temporary directory: %v", err) } + defer os.RemoveAll(secondPath) // Create two minimal executables in the second path, two of which // will be masked by files in first path. @@ -419,11 +452,11 @@ func TestExePath(t *testing.T) { path := filepath.Join(secondPath, p) f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0777) if err != nil { - t.Fatal(err) + t.Fatalf("error opening path: %v", err) } defer f.Close() if _, err := io.WriteString(f, "#!/bin/true\n"); err != nil { - t.Fatal(err) + t.Fatalf("error writing contents: %v", err) } } @@ -432,7 +465,7 @@ func TestExePath(t *testing.T) { nonExecutable := filepath.Join(firstPath, "masked1") f2, err := os.OpenFile(nonExecutable, os.O_CREATE|os.O_EXCL, 0666) if err != nil { - t.Fatal(err) + t.Fatalf("error opening file: %v", err) } f2.Close() @@ -440,85 +473,95 @@ func TestExePath(t *testing.T) { // executable in the second. nonRegular := filepath.Join(firstPath, "masked2") if err := os.Mkdir(nonRegular, 0777); err != nil { - t.Fatal(err) - } - - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - for _, test := range []struct { - path string - success bool - }{ - {path: "true", success: true}, - {path: "bin/true", success: true}, - {path: "/bin/true", success: true}, - {path: "thisfiledoesntexit", success: false}, - {path: "bin/thisfiledoesntexit", success: false}, - {path: "/bin/thisfiledoesntexit", success: false}, - - {path: "unmasked", success: true}, - {path: filepath.Join(firstPath, "unmasked"), success: false}, - {path: filepath.Join(secondPath, "unmasked"), success: true}, - - {path: "masked1", success: true}, - {path: filepath.Join(firstPath, "masked1"), success: false}, - {path: filepath.Join(secondPath, "masked1"), success: true}, - - {path: "masked2", success: true}, - {path: filepath.Join(firstPath, "masked2"), success: false}, - {path: filepath.Join(secondPath, "masked2"), success: true}, - } { - spec := testutil.NewSpecWithArgs(test.path) - spec.Process.Env = []string{ - fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")), - } - - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("exec: %s, error setting up container: %v", test.path, err) - } - - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - Attached: true, - } - ws, err := Run(conf, args) - - os.RemoveAll(rootDir) - os.RemoveAll(bundleDir) - - if test.success { - if err != nil { - t.Errorf("exec: %s, error running container: %v", test.path, err) - } - if ws.ExitStatus() != 0 { - t.Errorf("exec: %s, got exit status %v want %v", test.path, ws.ExitStatus(), 0) - } - } else { - if err == nil { - t.Errorf("exec: %s, got: no error, want: error", test.path) - } + t.Fatalf("error making directory: %v", err) + } + + for name, conf := range configsWithVFS2(t, []configOption{overlay}) { + t.Run(name, func(t *testing.T) { + for _, test := range []struct { + path string + success bool + }{ + {path: "true", success: true}, + {path: "bin/true", success: true}, + {path: "/bin/true", success: true}, + {path: "thisfiledoesntexit", success: false}, + {path: "bin/thisfiledoesntexit", success: false}, + {path: "/bin/thisfiledoesntexit", success: false}, + + {path: "unmasked", success: true}, + {path: filepath.Join(firstPath, "unmasked"), success: false}, + {path: filepath.Join(secondPath, "unmasked"), success: true}, + + {path: "masked1", success: true}, + {path: filepath.Join(firstPath, "masked1"), success: false}, + {path: filepath.Join(secondPath, "masked1"), success: true}, + + {path: "masked2", success: true}, + {path: filepath.Join(firstPath, "masked2"), success: false}, + {path: filepath.Join(secondPath, "masked2"), success: true}, + } { + t.Run(fmt.Sprintf("path=%s,success=%t", test.path, test.success), func(t *testing.T) { + spec := testutil.NewSpecWithArgs(test.path) + spec.Process.Env = []string{ + fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")), + } + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("exec: error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + Attached: true, + } + ws, err := Run(conf, args) + + if test.success { + if err != nil { + t.Errorf("exec: error running container: %v", err) + } + if ws.ExitStatus() != 0 { + t.Errorf("exec: got exit status %v want %v", ws.ExitStatus(), 0) + } + } else { + if err == nil { + t.Errorf("exec: got: no error, want: error") + } + } + }) } - } + }) } } // Test the we can retrieve the application exit status from the container. func TestAppExitStatus(t *testing.T) { + doAppExitStatus(t, false) +} + +// This is TestAppExitStatus for VFSv2. +func TestAppExitStatusVFS2(t *testing.T) { + doAppExitStatus(t, true) +} + +func doAppExitStatus(t *testing.T, vfs2 bool) { // First container will succeed. succSpec := testutil.NewSpecWithArgs("true") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(succSpec, conf) + conf := testutil.TestConfig(t) + conf.VFS2 = vfs2 + _, bundleDir, cleanup, err := testutil.SetupContainer(succSpec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: succSpec, BundleDir: bundleDir, Attached: true, @@ -535,15 +578,14 @@ func TestAppExitStatus(t *testing.T) { wantStatus := 123 errSpec := testutil.NewSpecWithArgs("bash", "-c", fmt.Sprintf("exit %d", wantStatus)) - rootDir2, bundleDir2, err := testutil.SetupContainer(errSpec, conf) + _, bundleDir2, cleanup2, err := testutil.SetupContainer(errSpec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir2) - defer os.RemoveAll(bundleDir2) + defer cleanup2() args2 := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: errSpec, BundleDir: bundleDir2, Attached: true, @@ -559,164 +601,163 @@ func TestAppExitStatus(t *testing.T) { // TestExec verifies that a container can exec a new program. func TestExec(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + const uid = 343 + spec := testutil.NewSpecWithArgs("sleep", "100") - const uid = 343 - spec := testutil.NewSpecWithArgs("sleep", "100") + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // expectedPL lists the expected process state of the container. + expectedPL := []*control.Process{ + { + UID: 0, + PID: 1, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{1}, + }, + { + UID: uid, + PID: 2, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{2}, + }, + } - // expectedPL lists the expected process state of the container. - expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - }, - { - UID: uid, - PID: 2, - PPID: 0, - C: 0, - Cmd: "sleep", - }, - } + // Verify that "sleep 100" is running. + if err := waitForProcessList(cont, expectedPL[:1]); err != nil { + t.Error(err) + } - // Verify that "sleep 100" is running. - if err := waitForProcessList(cont, expectedPL[:1]); err != nil { - t.Error(err) - } + execArgs := &control.ExecArgs{ + Filename: "/bin/sleep", + Argv: []string{"/bin/sleep", "5"}, + WorkingDirectory: "/", + KUID: uid, + } - execArgs := &control.ExecArgs{ - Filename: "/bin/sleep", - Argv: []string{"/bin/sleep", "5"}, - WorkingDirectory: "/", - KUID: uid, - } + // Verify that "sleep 100" and "sleep 5" are running + // after exec. First, start running exec (whick + // blocks). + ch := make(chan error) + go func() { + exitStatus, err := cont.executeSync(execArgs) + if err != nil { + ch <- err + } else if exitStatus != 0 { + ch <- fmt.Errorf("failed with exit status: %v", exitStatus) + } else { + ch <- nil + } + }() - // Verify that "sleep 100" and "sleep 5" are running after exec. - // First, start running exec (whick blocks). - status := make(chan error, 1) - go func() { - exitStatus, err := cont.executeSync(execArgs) - if err != nil { - log.Debugf("error executing: %v", err) - status <- err - } else if exitStatus != 0 { - log.Debugf("bad status: %d", exitStatus) - status <- fmt.Errorf("failed with exit status: %v", exitStatus) - } else { - status <- nil + if err := waitForProcessList(cont, expectedPL); err != nil { + t.Fatalf("error waiting for processes: %v", err) } - }() - if err := waitForProcessList(cont, expectedPL); err != nil { - t.Fatal(err) - } - - // Ensure that exec finished without error. - select { - case <-time.After(10 * time.Second): - t.Fatalf("container timed out waiting for exec to finish.") - case st := <-status: - if st != nil { - t.Errorf("container failed to exec %v: %v", args, err) + // Ensure that exec finished without error. + select { + case <-time.After(10 * time.Second): + t.Fatalf("container timed out waiting for exec to finish.") + case err := <-ch: + if err != nil { + t.Errorf("container failed to exec %v: %v", args, err) + } } - } + }) } } // TestKillPid verifies that we can signal individual exec'd processes. func TestKillPid(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - - app, err := testutil.FindFile("runsc/container/test_app/test_app") - if err != nil { - t.Fatal("error finding test_app:", err) - } + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + app, err := testutil.FindFile("test/cmd/test_app/test_app") + if err != nil { + t.Fatal("error finding test_app:", err) + } - const nProcs = 4 - spec := testutil.NewSpecWithArgs(app, "task-tree", "--depth", strconv.Itoa(nProcs-1), "--width=1", "--pause=true") - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + const nProcs = 4 + spec := testutil.NewSpecWithArgs(app, "task-tree", "--depth", strconv.Itoa(nProcs-1), "--width=1", "--pause=true") + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Verify that all processes are running. - if err := waitForProcessCount(cont, nProcs); err != nil { - t.Fatalf("timed out waiting for processes to start: %v", err) - } + // Verify that all processes are running. + if err := waitForProcessCount(cont, nProcs); err != nil { + t.Fatalf("timed out waiting for processes to start: %v", err) + } - // Kill the child process with the largest PID. - procs, err := cont.Processes() - if err != nil { - t.Fatalf("failed to get process list: %v", err) - } - var pid int32 - for _, p := range procs { - if pid < int32(p.PID) { - pid = int32(p.PID) + // Kill the child process with the largest PID. + procs, err := cont.Processes() + if err != nil { + t.Fatalf("failed to get process list: %v", err) + } + var pid int32 + for _, p := range procs { + if pid < int32(p.PID) { + pid = int32(p.PID) + } + } + if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil { + t.Fatalf("failed to signal process %d: %v", pid, err) } - } - if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil { - t.Fatalf("failed to signal process %d: %v", pid, err) - } - // Verify that one process is gone. - if err := waitForProcessCount(cont, nProcs-1); err != nil { - t.Fatal(err) - } + // Verify that one process is gone. + if err := waitForProcessCount(cont, nProcs-1); err != nil { + t.Fatalf("error waiting for processes: %v", err) + } - procs, err = cont.Processes() - if err != nil { - t.Fatalf("failed to get process list: %v", err) - } - for _, p := range procs { - if pid == int32(p.PID) { - t.Fatalf("pid %d is still alive, which should be killed", pid) + procs, err = cont.Processes() + if err != nil { + t.Fatalf("failed to get process list: %v", err) } - } + for _, p := range procs { + if pid == int32(p.PID) { + t.Fatalf("pid %d is still alive, which should be killed", pid) + } + } + }) } } @@ -727,160 +768,160 @@ func TestKillPid(t *testing.T) { // be the next consecutive number after the last number from the checkpointed container. func TestCheckpointRestore(t *testing.T) { // Skip overlay because test requires writing to host file. - for _, conf := range configs(noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - - dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test") - if err != nil { - t.Fatalf("ioutil.TempDir failed: %v", err) - } - if err := os.Chmod(dir, 0777); err != nil { - t.Fatalf("error chmoding file: %q, %v", dir, err) - } + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test") + if err != nil { + t.Fatalf("ioutil.TempDir failed: %v", err) + } + defer os.RemoveAll(dir) + if err := os.Chmod(dir, 0777); err != nil { + t.Fatalf("error chmoding file: %q, %v", dir, err) + } - outputPath := filepath.Join(dir, "output") - outputFile, err := createWriteableOutputFile(outputPath) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile.Close() + outputPath := filepath.Join(dir, "output") + outputFile, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile.Close() - script := fmt.Sprintf("for ((i=0; ;i++)); do echo $i >> %q; sleep 1; done", outputPath) - spec := testutil.NewSpecWithArgs("bash", "-c", script) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + script := fmt.Sprintf("for ((i=0; ;i++)); do echo $i >> %q; sleep 1; done", outputPath) + spec := testutil.NewSpecWithArgs("bash", "-c", script) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Set the image path, which is where the checkpoint image will be saved. - imagePath := filepath.Join(dir, "test-image-file") + // Set the image path, which is where the checkpoint image will be saved. + imagePath := filepath.Join(dir, "test-image-file") - // Create the image file and open for writing. - file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) - if err != nil { - t.Fatalf("error opening new file at imagePath: %v", err) - } - defer file.Close() + // Create the image file and open for writing. + file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) + if err != nil { + t.Fatalf("error opening new file at imagePath: %v", err) + } + defer file.Close() - // Wait until application has ran. - if err := waitForFile(outputFile); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - // Checkpoint running container; save state into new file. - if err := cont.Checkpoint(file); err != nil { - t.Fatalf("error checkpointing container to empty file: %v", err) - } - defer os.RemoveAll(imagePath) + // Checkpoint running container; save state into new file. + if err := cont.Checkpoint(file); err != nil { + t.Fatalf("error checkpointing container to empty file: %v", err) + } + defer os.RemoveAll(imagePath) - lastNum, err := readOutputNum(outputPath, -1) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + lastNum, err := readOutputNum(outputPath, -1) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Delete and recreate file before restoring. - if err := os.Remove(outputPath); err != nil { - t.Fatalf("error removing file") - } - outputFile2, err := createWriteableOutputFile(outputPath) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile2.Close() + // Delete and recreate file before restoring. + if err := os.Remove(outputPath); err != nil { + t.Fatalf("error removing file") + } + outputFile2, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile2.Close() - // Restore into a new container. - args2 := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont2, err := New(conf, args2) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont2.Destroy() + // Restore into a new container. + args2 := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont2, err := New(conf, args2) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont2.Destroy() - if err := cont2.Restore(spec, conf, imagePath); err != nil { - t.Fatalf("error restoring container: %v", err) - } + if err := cont2.Restore(spec, conf, imagePath); err != nil { + t.Fatalf("error restoring container: %v", err) + } - // Wait until application has ran. - if err := waitForFile(outputFile2); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile2); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - firstNum, err := readOutputNum(outputPath, 0) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + firstNum, err := readOutputNum(outputPath, 0) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Check that lastNum is one less than firstNum and that the container picks - // up from where it left off. - if lastNum+1 != firstNum { - t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum) - } - cont2.Destroy() + // Check that lastNum is one less than firstNum and that the container picks + // up from where it left off. + if lastNum+1 != firstNum { + t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum) + } + cont2.Destroy() - // Restore into another container! - // Delete and recreate file before restoring. - if err := os.Remove(outputPath); err != nil { - t.Fatalf("error removing file") - } - outputFile3, err := createWriteableOutputFile(outputPath) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile3.Close() + // Restore into another container! + // Delete and recreate file before restoring. + if err := os.Remove(outputPath); err != nil { + t.Fatalf("error removing file") + } + outputFile3, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile3.Close() - // Restore into a new container. - args3 := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont3, err := New(conf, args3) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont3.Destroy() + // Restore into a new container. + args3 := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont3, err := New(conf, args3) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont3.Destroy() - if err := cont3.Restore(spec, conf, imagePath); err != nil { - t.Fatalf("error restoring container: %v", err) - } + if err := cont3.Restore(spec, conf, imagePath); err != nil { + t.Fatalf("error restoring container: %v", err) + } - // Wait until application has ran. - if err := waitForFile(outputFile3); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile3); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - firstNum2, err := readOutputNum(outputPath, 0) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + firstNum2, err := readOutputNum(outputPath, 0) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Check that lastNum is one less than firstNum and that the container picks - // up from where it left off. - if lastNum+1 != firstNum2 { - t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2) - } - cont3.Destroy() + // Check that lastNum is one less than firstNum and that the container picks + // up from where it left off. + if lastNum+1 != firstNum2 { + t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2) + } + cont3.Destroy() + }) } } @@ -888,256 +929,213 @@ func TestCheckpointRestore(t *testing.T) { // with filesystem Unix Domain Socket use. func TestUnixDomainSockets(t *testing.T) { // Skip overlay because test requires writing to host file. - for _, conf := range configs(noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - - // UDS path is limited to 108 chars for compatibility with older systems. - // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is - // not exceeded. Assumes '/tmp' exists in the system. - dir, err := ioutil.TempDir("/tmp", "uds-test") - if err != nil { - t.Fatalf("ioutil.TempDir failed: %v", err) - } - defer os.RemoveAll(dir) + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + // UDS path is limited to 108 chars for compatibility with older systems. + // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is + // not exceeded. Assumes '/tmp' exists in the system. + dir, err := ioutil.TempDir("/tmp", "uds-test") + if err != nil { + t.Fatalf("ioutil.TempDir failed: %v", err) + } + defer os.RemoveAll(dir) - outputPath := filepath.Join(dir, "uds_output") - outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile.Close() + outputPath := filepath.Join(dir, "uds_output") + outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile.Close() - app, err := testutil.FindFile("runsc/container/test_app/test_app") - if err != nil { - t.Fatal("error finding test_app:", err) - } + app, err := testutil.FindFile("test/cmd/test_app/test_app") + if err != nil { + t.Fatal("error finding test_app:", err) + } - socketPath := filepath.Join(dir, "uds_socket") - defer os.Remove(socketPath) + socketPath := filepath.Join(dir, "uds_socket") + defer os.Remove(socketPath) - spec := testutil.NewSpecWithArgs(app, "uds", "--file", outputPath, "--socket", socketPath) - spec.Process.User = specs.User{ - UID: uint32(os.Getuid()), - GID: uint32(os.Getgid()), - } - spec.Mounts = []specs.Mount{{ - Type: "bind", - Destination: dir, - Source: dir, - }} + spec := testutil.NewSpecWithArgs(app, "uds", "--file", outputPath, "--socket", socketPath) + spec.Process.User = specs.User{ + UID: uint32(os.Getuid()), + GID: uint32(os.Getgid()), + } + spec.Mounts = []specs.Mount{{ + Type: "bind", + Destination: dir, + Source: dir, + }} - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Set the image path, the location where the checkpoint image will be saved. - imagePath := filepath.Join(dir, "test-image-file") + // Set the image path, the location where the checkpoint image will be saved. + imagePath := filepath.Join(dir, "test-image-file") - // Create the image file and open for writing. - file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) - if err != nil { - t.Fatalf("error opening new file at imagePath: %v", err) - } - defer file.Close() - defer os.RemoveAll(imagePath) + // Create the image file and open for writing. + file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) + if err != nil { + t.Fatalf("error opening new file at imagePath: %v", err) + } + defer file.Close() + defer os.RemoveAll(imagePath) - // Wait until application has ran. - if err := waitForFile(outputFile); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - // Checkpoint running container; save state into new file. - if err := cont.Checkpoint(file); err != nil { - t.Fatalf("error checkpointing container to empty file: %v", err) - } + // Checkpoint running container; save state into new file. + if err := cont.Checkpoint(file); err != nil { + t.Fatalf("error checkpointing container to empty file: %v", err) + } - // Read last number outputted before checkpoint. - lastNum, err := readOutputNum(outputPath, -1) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + // Read last number outputted before checkpoint. + lastNum, err := readOutputNum(outputPath, -1) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Delete and recreate file before restoring. - if err := os.Remove(outputPath); err != nil { - t.Fatalf("error removing file") - } - outputFile2, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile2.Close() + // Delete and recreate file before restoring. + if err := os.Remove(outputPath); err != nil { + t.Fatalf("error removing file") + } + outputFile2, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile2.Close() - // Restore into a new container. - argsRestore := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - contRestore, err := New(conf, argsRestore) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer contRestore.Destroy() + // Restore into a new container. + argsRestore := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + contRestore, err := New(conf, argsRestore) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer contRestore.Destroy() - if err := contRestore.Restore(spec, conf, imagePath); err != nil { - t.Fatalf("error restoring container: %v", err) - } + if err := contRestore.Restore(spec, conf, imagePath); err != nil { + t.Fatalf("error restoring container: %v", err) + } - // Wait until application has ran. - if err := waitForFile(outputFile2); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile2); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - // Read first number outputted after restore. - firstNum, err := readOutputNum(outputPath, 0) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + // Read first number outputted after restore. + firstNum, err := readOutputNum(outputPath, 0) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Check that lastNum is one less than firstNum. - if lastNum+1 != firstNum { - t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum) - } - contRestore.Destroy() + // Check that lastNum is one less than firstNum. + if lastNum+1 != firstNum { + t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum) + } + contRestore.Destroy() + }) } } // TestPauseResume tests that we can successfully pause and resume a container. -// It checks starts running sleep and executes another sleep. It pauses and checks -// that both processes are still running: sleep will be paused and still exist. -// It will then unpause and confirm that both processes are running. Then it will -// wait until one sleep completes and check to make sure the other is running. +// The container will keep touching a file to indicate it's running. The test +// pauses the container, removes the file, and checks that it doesn't get +// recreated. Then it resumes the container, verify that the file gets created +// again. func TestPauseResume(t *testing.T) { - for _, conf := range configs(noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - const uid = 343 - spec := testutil.NewSpecWithArgs("sleep", "20") - - lock, err := ioutil.TempFile(testutil.TmpDir(), "lock") - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer lock.Close() - - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) - - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } - - // expectedPL lists the expected process state of the container. - expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - }, - { - UID: uid, - PID: 2, - PPID: 0, - C: 0, - Cmd: "bash", - }, - } - - script := fmt.Sprintf("while [[ -f %q ]]; do sleep 0.1; done", lock.Name()) - execArgs := &control.ExecArgs{ - Filename: "/bin/bash", - Argv: []string{"bash", "-c", script}, - WorkingDirectory: "/", - KUID: uid, - } + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "lock") + if err != nil { + t.Fatalf("error creating temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) - // First, start running exec. - _, err = cont.Execute(execArgs) - if err != nil { - t.Fatalf("error executing: %v", err) - } + running := path.Join(tmpDir, "running") + script := fmt.Sprintf("while [[ true ]]; do touch %q; sleep 0.1; done", running) + spec := testutil.NewSpecWithArgs("/bin/bash", "-c", script) - // Verify that "sleep 5" is running. - if err := waitForProcessList(cont, expectedPL); err != nil { - t.Fatal(err) - } + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Pause the running container. - if err := cont.Pause(); err != nil { - t.Errorf("error pausing container: %v", err) - } - if got, want := cont.Status, Paused; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - if err := os.Remove(lock.Name()); err != nil { - t.Fatalf("os.Remove(lock) failed: %v", err) - } - // Script loops and sleeps for 100ms. Give a bit a time for it to exit in - // case pause didn't work. - time.Sleep(200 * time.Millisecond) + // Wait until container starts running, observed by the existence of running + // file. + if err := waitForFileExist(running); err != nil { + t.Errorf("error waiting for container to start: %v", err) + } - // Verify that the two processes still exist. - if err := getAndCheckProcLists(cont, expectedPL); err != nil { - t.Fatal(err) - } + // Pause the running container. + if err := cont.Pause(); err != nil { + t.Errorf("error pausing container: %v", err) + } + if got, want := cont.Status, Paused; got != want { + t.Errorf("container status got %v, want %v", got, want) + } - // Resume the running container. - if err := cont.Resume(); err != nil { - t.Errorf("error pausing container: %v", err) - } - if got, want := cont.Status, Running; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + if err := os.Remove(running); err != nil { + t.Fatalf("os.Remove(%q) failed: %v", running, err) + } + // Script touches the file every 100ms. Give a bit a time for it to run to + // catch the case that pause didn't work. + time.Sleep(200 * time.Millisecond) + if _, err := os.Stat(running); !os.IsNotExist(err) { + t.Fatalf("container did not pause: file exist check: %v", err) + } - expectedPL2 := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - }, - } + // Resume the running container. + if err := cont.Resume(); err != nil { + t.Errorf("error pausing container: %v", err) + } + if got, want := cont.Status, Running; got != want { + t.Errorf("container status got %v, want %v", got, want) + } - // Verify that deleting the file triggered the process to exit. - if err := waitForProcessList(cont, expectedPL2); err != nil { - t.Fatal(err) - } + // Verify that the file is once again created by container. + if err := waitForFileExist(running); err != nil { + t.Fatalf("error resuming container: file exist check: %v", err) + } + }) } } @@ -1146,17 +1144,16 @@ func TestPauseResume(t *testing.T) { // occurs given the correct state. func TestPauseResumeStatus(t *testing.T) { spec := testutil.NewSpecWithArgs("sleep", "20") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1212,356 +1209,365 @@ func TestCapabilities(t *testing.T) { uid := auth.KUID(os.Getuid() + 1) gid := auth.KGID(os.Getgid() + 1) - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - spec := testutil.NewSpecWithArgs("sleep", "100") - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("sleep", "100") + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // expectedPL lists the expected process state of the container. - expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - }, - { - UID: uid, - PID: 2, - PPID: 0, - C: 0, - Cmd: "exe", - }, - } - if err := waitForProcessList(cont, expectedPL[:1]); err != nil { - t.Fatalf("Failed to wait for sleep to start, err: %v", err) - } + // expectedPL lists the expected process state of the container. + expectedPL := []*control.Process{ + { + UID: 0, + PID: 1, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{1}, + }, + { + UID: uid, + PID: 2, + PPID: 0, + C: 0, + Cmd: "exe", + Threads: []kernel.ThreadID{2}, + }, + } + if err := waitForProcessList(cont, expectedPL[:1]); err != nil { + t.Fatalf("Failed to wait for sleep to start, err: %v", err) + } - // Create an executable that can't be run with the specified UID:GID. - // This shouldn't be callable within the container until we add the - // CAP_DAC_OVERRIDE capability to skip the access check. - exePath := filepath.Join(rootDir, "exe") - if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil { - t.Fatalf("couldn't create executable: %v", err) - } - defer os.Remove(exePath) - - // Need to traverse the intermediate directory. - os.Chmod(rootDir, 0755) - - execArgs := &control.ExecArgs{ - Filename: exePath, - Argv: []string{exePath}, - WorkingDirectory: "/", - KUID: uid, - KGID: gid, - Capabilities: &auth.TaskCapabilities{}, - } + // Create an executable that can't be run with the specified UID:GID. + // This shouldn't be callable within the container until we add the + // CAP_DAC_OVERRIDE capability to skip the access check. + exePath := filepath.Join(rootDir, "exe") + if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil { + t.Fatalf("couldn't create executable: %v", err) + } + defer os.Remove(exePath) + + // Need to traverse the intermediate directory. + os.Chmod(rootDir, 0755) + + execArgs := &control.ExecArgs{ + Filename: exePath, + Argv: []string{exePath}, + WorkingDirectory: "/", + KUID: uid, + KGID: gid, + Capabilities: &auth.TaskCapabilities{}, + } - // "exe" should fail because we don't have the necessary permissions. - if _, err := cont.executeSync(execArgs); err == nil { - t.Fatalf("container executed without error, but an error was expected") - } + // "exe" should fail because we don't have the necessary permissions. + if _, err := cont.executeSync(execArgs); err == nil { + t.Fatalf("container executed without error, but an error was expected") + } - // Now we run with the capability enabled and should succeed. - execArgs.Capabilities = &auth.TaskCapabilities{ - EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE), - } - // "exe" should not fail this time. - if _, err := cont.executeSync(execArgs); err != nil { - t.Fatalf("container failed to exec %v: %v", args, err) - } + // Now we run with the capability enabled and should succeed. + execArgs.Capabilities = &auth.TaskCapabilities{ + EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE), + } + // "exe" should not fail this time. + if _, err := cont.executeSync(execArgs); err != nil { + t.Fatalf("container failed to exec %v: %v", args, err) + } + }) } } // TestRunNonRoot checks that sandbox can be configured when running as // non-privileged user. func TestRunNonRoot(t *testing.T) { - for _, conf := range configs(noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - - spec := testutil.NewSpecWithArgs("/bin/true") - - // Set a random user/group with no access to "blocked" dir. - spec.Process.User.UID = 343 - spec.Process.User.GID = 2401 - spec.Process.Capabilities = nil + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("/bin/true") + + // Set a random user/group with no access to "blocked" dir. + spec.Process.User.UID = 343 + spec.Process.User.GID = 2401 + spec.Process.Capabilities = nil + + // User running inside container can't list '$TMP/blocked' and would fail to + // mount it. + dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + if err := os.Chmod(dir, 0700); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", dir, err) + } + dir = path.Join(dir, "test") + if err := os.Mkdir(dir, 0755); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", dir, err) + } - // User running inside container can't list '$TMP/blocked' and would fail to - // mount it. - dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } - if err := os.Chmod(dir, 0700); err != nil { - t.Fatalf("os.MkDir(%q) failed: %v", dir, err) - } - dir = path.Join(dir, "test") - if err := os.Mkdir(dir, 0755); err != nil { - t.Fatalf("os.MkDir(%q) failed: %v", dir, err) - } + src, err := ioutil.TempDir(testutil.TmpDir(), "src") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } - src, err := ioutil.TempDir(testutil.TmpDir(), "src") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: dir, + Source: src, + Type: "bind", + }) - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: dir, - Source: src, - Type: "bind", + if err := run(spec, conf); err != nil { + t.Fatalf("error running sandbox: %v", err) + } }) - - if err := run(spec, conf); err != nil { - t.Fatalf("error running sandbox: %v", err) - } } } // TestMountNewDir checks that runsc will create destination directory if it // doesn't exit. func TestMountNewDir(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) + for name, conf := range configsWithVFS2(t, []configOption{overlay}) { + t.Run(name, func(t *testing.T) { + root, err := ioutil.TempDir(testutil.TmpDir(), "root") + if err != nil { + t.Fatal("ioutil.TempDir() failed:", err) + } - root, err := ioutil.TempDir(testutil.TmpDir(), "root") - if err != nil { - t.Fatal("ioutil.TempDir() failed:", err) - } + srcDir := path.Join(root, "src", "dir", "anotherdir") + if err := os.MkdirAll(srcDir, 0755); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", srcDir, err) + } - srcDir := path.Join(root, "src", "dir", "anotherdir") - if err := os.MkdirAll(srcDir, 0755); err != nil { - t.Fatalf("os.MkDir(%q) failed: %v", srcDir, err) - } + mountDir := path.Join(root, "dir", "anotherdir") - mountDir := path.Join(root, "dir", "anotherdir") + spec := testutil.NewSpecWithArgs("/bin/ls", mountDir) + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: mountDir, + Source: srcDir, + Type: "bind", + }) - spec := testutil.NewSpecWithArgs("/bin/ls", mountDir) - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: mountDir, - Source: srcDir, - Type: "bind", + if err := run(spec, conf); err != nil { + t.Fatalf("error running sandbox: %v", err) + } }) - - if err := run(spec, conf); err != nil { - t.Fatalf("error running sandbox: %v", err) - } } } func TestReadonlyRoot(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - - spec := testutil.NewSpecWithArgs("/bin/touch", "/foo") - spec.Root.Readonly = true - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configsWithVFS2(t, []configOption{overlay}) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("/bin/touch", "/foo") + spec.Root.Readonly = true + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create, start and wait for the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create, start and wait for the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { - t.Fatalf("container failed, waitStatus: %v", ws) - } + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { + t.Fatalf("container failed, waitStatus: %v", ws) + } + }) } } func TestUIDMap(t *testing.T) { - for _, conf := range configs(noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(testDir) - testFile := path.Join(testDir, "testfile") - - spec := testutil.NewSpecWithArgs("touch", "/tmp/testfile") - uid := os.Getuid() - gid := os.Getgid() - spec.Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - {Type: specs.UserNamespace}, - {Type: specs.PIDNamespace}, - {Type: specs.MountNamespace}, - }, - UIDMappings: []specs.LinuxIDMapping{ - { - ContainerID: 0, - HostID: uint32(uid), - Size: 1, + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + defer os.RemoveAll(testDir) + testFile := path.Join(testDir, "testfile") + + spec := testutil.NewSpecWithArgs("touch", "/tmp/testfile") + uid := os.Getuid() + gid := os.Getgid() + spec.Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + {Type: specs.UserNamespace}, + {Type: specs.PIDNamespace}, + {Type: specs.MountNamespace}, }, - }, - GIDMappings: []specs.LinuxIDMapping{ - { - ContainerID: 0, - HostID: uint32(gid), - Size: 1, + UIDMappings: []specs.LinuxIDMapping{ + { + ContainerID: 0, + HostID: uint32(uid), + Size: 1, + }, }, - }, - } + GIDMappings: []specs.LinuxIDMapping{ + { + ContainerID: 0, + HostID: uint32(gid), + Size: 1, + }, + }, + } - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: "/tmp", - Source: testDir, - Type: "bind", - }) + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: "/tmp", + Source: testDir, + Type: "bind", + }) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create, start and wait for the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create, start and wait for the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if !ws.Exited() || ws.ExitStatus() != 0 { - t.Fatalf("container failed, waitStatus: %v", ws) - } - st := syscall.Stat_t{} - if err := syscall.Stat(testFile, &st); err != nil { - t.Fatalf("error stat /testfile: %v", err) - } + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if !ws.Exited() || ws.ExitStatus() != 0 { + t.Fatalf("container failed, waitStatus: %v", ws) + } + st := syscall.Stat_t{} + if err := syscall.Stat(testFile, &st); err != nil { + t.Fatalf("error stat /testfile: %v", err) + } - if st.Uid != uint32(uid) || st.Gid != uint32(gid) { - t.Fatalf("UID: %d (%d) GID: %d (%d)", st.Uid, uid, st.Gid, gid) - } + if st.Uid != uint32(uid) || st.Gid != uint32(gid) { + t.Fatalf("UID: %d (%d) GID: %d (%d)", st.Uid, uid, st.Gid, gid) + } + }) } } func TestReadonlyMount(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - - dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount") - spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: dir, - Source: dir, - Type: "bind", - Options: []string{"ro"}, - }) - spec.Root.Readonly = false - - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configsWithVFS2(t, []configOption{overlay}) { + t.Run(name, func(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount") + spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: dir, + Source: dir, + Type: "bind", + Options: []string{"ro"}, + }) + spec.Root.Readonly = false + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create, start and wait for the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create, start and wait for the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { - t.Fatalf("container failed, waitStatus: %v", ws) - } + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { + t.Fatalf("container failed, waitStatus: %v", ws) + } + }) } } // TestAbbreviatedIDs checks that runsc supports using abbreviated container // IDs in place of full IDs. func TestAbbreviatedIDs(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + doAbbreviatedIDsTest(t, false) +} + +func TestAbbreviatedIDsVFS2(t *testing.T) { + doAbbreviatedIDsTest(t, true) +} + +func doAbbreviatedIDsTest(t *testing.T, vfs2 bool) { + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() - conf := testutil.TestConfigWithRoot(rootDir) + conf := testutil.TestConfig(t) + conf.RootDir = rootDir + conf.VFS2 = vfs2 cids := []string{ - "foo-" + testutil.UniqueContainerID(), - "bar-" + testutil.UniqueContainerID(), - "baz-" + testutil.UniqueContainerID(), + "foo-" + testutil.RandomContainerID(), + "bar-" + testutil.RandomContainerID(), + "baz-" + testutil.RandomContainerID(), } for _, cid := range cids { spec := testutil.NewSpecWithArgs("sleep", "100") - bundleDir, err := testutil.SetupBundleDir(spec) + bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ @@ -1604,18 +1610,27 @@ func TestAbbreviatedIDs(t *testing.T) { } func TestGoferExits(t *testing.T) { + doGoferExitTest(t, false) +} + +func TestGoferExitsVFS2(t *testing.T) { + doGoferExitTest(t, true) +} + +func doGoferExitTest(t *testing.T, vfs2 bool) { spec := testutil.NewSpecWithArgs("/bin/sleep", "10000") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + conf.VFS2 = vfs2 + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1644,7 +1659,7 @@ func TestGoferExits(t *testing.T) { } func TestRootNotMount(t *testing.T) { - appSym, err := testutil.FindFile("runsc/container/test_app/test_app") + appSym, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -1674,27 +1689,26 @@ func TestRootNotMount(t *testing.T) { spec.Root.Readonly = true spec.Mounts = nil - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) if err := run(spec, conf); err != nil { t.Fatalf("error running sandbox: %v", err) } } func TestUserLog(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } // sched_rr_get_interval = 148 - not implemented in gvisor. spec := testutil.NewSpecWithArgs(app, "syscall", "--syscall=148") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() dir, err := ioutil.TempDir(testutil.TmpDir(), "user_log_test") if err != nil { @@ -1704,7 +1718,7 @@ func TestUserLog(t *testing.T) { // Create, start and wait for the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, UserLog: userLog, @@ -1728,72 +1742,79 @@ func TestUserLog(t *testing.T) { } func TestWaitOnExitedSandbox(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - // Run a shell that sleeps for 1 second and then exits with a - // non-zero code. - const wantExit = 17 - cmd := fmt.Sprintf("sleep 1; exit %d", wantExit) - spec := testutil.NewSpecWithArgs("/bin/sh", "-c", cmd) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configsWithVFS2(t, all) { + t.Run(name, func(t *testing.T) { + // Run a shell that sleeps for 1 second and then exits with a + // non-zero code. + const wantExit = 17 + cmd := fmt.Sprintf("sleep 1; exit %d", wantExit) + spec := testutil.NewSpecWithArgs("/bin/sh", "-c", cmd) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and Start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and Start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Wait on the sandbox. This will make an RPC to the sandbox - // and get the actual exit status of the application. - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if got := ws.ExitStatus(); got != wantExit { - t.Errorf("got exit status %d, want %d", got, wantExit) - } + // Wait on the sandbox. This will make an RPC to the sandbox + // and get the actual exit status of the application. + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if got := ws.ExitStatus(); got != wantExit { + t.Errorf("got exit status %d, want %d", got, wantExit) + } - // Now the sandbox has exited, but the zombie sandbox process - // still exists. Calling Wait() now will return the sandbox - // exit status. - ws, err = c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if got := ws.ExitStatus(); got != wantExit { - t.Errorf("got exit status %d, want %d", got, wantExit) - } + // Now the sandbox has exited, but the zombie sandbox process + // still exists. Calling Wait() now will return the sandbox + // exit status. + ws, err = c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if got := ws.ExitStatus(); got != wantExit { + t.Errorf("got exit status %d, want %d", got, wantExit) + } + }) } } func TestDestroyNotStarted(t *testing.T) { + doDestroyNotStartedTest(t, false) +} + +func TestDestroyNotStartedVFS2(t *testing.T) { + doDestroyNotStartedTest(t, true) +} + +func doDestroyNotStartedTest(t *testing.T, vfs2 bool) { spec := testutil.NewSpecWithArgs("/bin/sleep", "100") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + conf.VFS2 = vfs2 + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create the container and check that it can be destroyed. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1808,19 +1829,27 @@ func TestDestroyNotStarted(t *testing.T) { // TestDestroyStarting attempts to force a race between start and destroy. func TestDestroyStarting(t *testing.T) { + doDestroyNotStartedTest(t, false) +} + +func TestDestroyStartedVFS2(t *testing.T) { + doDestroyNotStartedTest(t, true) +} + +func doDestroyStartingTest(t *testing.T, vfs2 bool) { for i := 0; i < 10; i++ { spec := testutil.NewSpecWithArgs("/bin/sleep", "100") - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + conf.VFS2 = vfs2 + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create the container and check that it can be destroyed. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1855,23 +1884,23 @@ func TestDestroyStarting(t *testing.T) { } func TestCreateWorkingDir(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - - tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } - dir := path.Join(tmpDir, "new/working/dir") + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + dir := path.Join(tmpDir, "new/working/dir") - // touch will fail if the directory doesn't exist. - spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) - spec.Process.Cwd = dir - spec.Root.Readonly = true + // touch will fail if the directory doesn't exist. + spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) + spec.Process.Cwd = dir + spec.Root.Readonly = true - if err := run(spec, conf); err != nil { - t.Fatalf("Error running container: %v", err) - } + if err := run(spec, conf); err != nil { + t.Fatalf("Error running container: %v", err) + } + }) } } @@ -1928,16 +1957,15 @@ func TestMountPropagation(t *testing.T) { }, } - conf := testutil.TestConfig() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1979,87 +2007,87 @@ func TestMountPropagation(t *testing.T) { } func TestMountSymlink(t *testing.T) { - for _, conf := range configs(overlay) { - t.Logf("Running test with conf: %+v", conf) - - dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + defer os.RemoveAll(dir) - source := path.Join(dir, "source") - target := path.Join(dir, "target") - for _, path := range []string{source, target} { - if err := os.MkdirAll(path, 0777); err != nil { - t.Fatalf("os.MkdirAll(): %v", err) + source := path.Join(dir, "source") + target := path.Join(dir, "target") + for _, path := range []string{source, target} { + if err := os.MkdirAll(path, 0777); err != nil { + t.Fatalf("os.MkdirAll(): %v", err) + } } - } - f, err := os.Create(path.Join(source, "file")) - if err != nil { - t.Fatalf("os.Create(): %v", err) - } - f.Close() + f, err := os.Create(path.Join(source, "file")) + if err != nil { + t.Fatalf("os.Create(): %v", err) + } + f.Close() - link := path.Join(dir, "link") - if err := os.Symlink(target, link); err != nil { - t.Fatalf("os.Symlink(%q, %q): %v", target, link, err) - } + link := path.Join(dir, "link") + if err := os.Symlink(target, link); err != nil { + t.Fatalf("os.Symlink(%q, %q): %v", target, link, err) + } - spec := testutil.NewSpecWithArgs("/bin/sleep", "1000") + spec := testutil.NewSpecWithArgs("/bin/sleep", "1000") - // Mount to a symlink to ensure the mount code will follow it and mount - // at the symlink target. - spec.Mounts = append(spec.Mounts, specs.Mount{ - Type: "bind", - Destination: link, - Source: source, - }) + // Mount to a symlink to ensure the mount code will follow it and mount + // at the symlink target. + spec.Mounts = append(spec.Mounts, specs.Mount{ + Type: "bind", + Destination: link, + Source: source, + }) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("creating container: %v", err) - } - defer cont.Destroy() + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("creating container: %v", err) + } + defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("starting container: %v", err) - } + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } - // Check that symlink was resolved and mount was created where the symlink - // is pointing to. - file := path.Join(target, "file") - execArgs := &control.ExecArgs{ - Filename: "/usr/bin/test", - Argv: []string{"test", "-f", file}, - } - if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { - t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err) - } + // Check that symlink was resolved and mount was created where the symlink + // is pointing to. + file := path.Join(target, "file") + execArgs := &control.ExecArgs{ + Filename: "/usr/bin/test", + Argv: []string{"test", "-f", file}, + } + if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { + t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err) + } + }) } } // Check that --net-raw disables the CAP_NET_RAW capability. func TestNetRaw(t *testing.T) { capNetRaw := strconv.FormatUint(bits.MaskOf64(int(linux.CAP_NET_RAW)), 10) - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } for _, enableRaw := range []bool{true, false} { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.EnableRaw = enableRaw test := "--enabled" @@ -2076,7 +2104,7 @@ func TestNetRaw(t *testing.T) { // TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works. func TestOverlayfsStaleRead(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.OverlayfsStaleRead = true in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in") @@ -2095,7 +2123,7 @@ func TestOverlayfsStaleRead(t *testing.T) { defer out.Close() const want = "foobar" - cmd := fmt.Sprintf("cat %q && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name()) + cmd := fmt.Sprintf("cat %q >&2 && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name()) spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd) if err := run(spec, conf); err != nil { t.Fatalf("Error running container: %v", err) @@ -2111,6 +2139,94 @@ func TestOverlayfsStaleRead(t *testing.T) { } } +// TestTTYField checks TTY field returned by container.Processes(). +func TestTTYField(t *testing.T) { + stop := testutil.StartReaper() + defer stop() + + testApp, err := testutil.FindFile("test/cmd/test_app/test_app") + if err != nil { + t.Fatal("error finding test_app:", err) + } + + testCases := []struct { + name string + useTTY bool + wantTTYField string + }{ + { + name: "no tty", + useTTY: false, + wantTTYField: "?", + }, + { + name: "tty used", + useTTY: true, + wantTTYField: "pts/0", + }, + } + + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + conf := testutil.TestConfig(t) + + // We will run /bin/sleep, possibly with an open TTY. + cmd := []string{"/bin/sleep", "10000"} + if test.useTTY { + // Run inside the "pty-runner". + cmd = append([]string{testApp, "pty-runner"}, cmd...) + } + + spec := testutil.NewSpecWithArgs(cmd...) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + + // Wait for sleep to be running, and check the TTY + // field. + var gotTTYField string + cb := func() error { + ps, err := c.Processes() + if err != nil { + err = fmt.Errorf("error getting process data from container: %v", err) + return &backoff.PermanentError{Err: err} + } + for _, p := range ps { + if strings.Contains(p.Cmd, "sleep") { + gotTTYField = p.TTY + return nil + } + } + return fmt.Errorf("sleep not running") + } + if err := testutil.Poll(cb, 30*time.Second); err != nil { + t.Fatalf("error waiting for sleep process: %v", err) + } + + if gotTTYField != test.wantTTYField { + t.Errorf("tty field got %q, want %q", gotTTYField, test.wantTTYField) + } + }) + } +} + // executeSync synchronously executes a new process. func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) { pid, err := cont.Execute(args) diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 9e02a825e..e3704b453 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -22,7 +22,6 @@ import ( "path" "path/filepath" "strings" - "sync" "syscall" "testing" "time" @@ -30,15 +29,16 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" ) func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { var specs []*specs.Spec var ids []string - rootID := testutil.UniqueContainerID() + rootID := testutil.RandomContainerID() for i, cmd := range cmds { spec := testutil.NewSpecWithArgs(cmd...) @@ -52,7 +52,7 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { specutils.ContainerdContainerTypeAnnotation: specutils.ContainerdContainerTypeContainer, specutils.ContainerdSandboxIDAnnotation: rootID, } - ids = append(ids, testutil.UniqueContainerID()) + ids = append(ids, testutil.RandomContainerID()) } specs = append(specs, spec) } @@ -60,33 +60,33 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { } func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*Container, func(), error) { - // Setup root dir if one hasn't been provided. if len(conf.RootDir) == 0 { - rootDir, err := testutil.SetupRootDir() - if err != nil { - return nil, nil, fmt.Errorf("error creating root dir: %v", err) - } - conf.RootDir = rootDir + panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.") } - var containers []*Container - var bundles []string - cleanup := func() { + var ( + containers []*Container + cleanups []func() + ) + cleanups = append(cleanups, func() { for _, c := range containers { c.Destroy() } - for _, b := range bundles { - os.RemoveAll(b) + }) + cleanupAll := func() { + for _, c := range cleanups { + c() } - os.RemoveAll(conf.RootDir) } + localClean := specutils.MakeCleanup(cleanupAll) + defer localClean.Clean() + for i, spec := range specs { - bundleDir, err := testutil.SetupBundleDir(spec) + bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { - cleanup() return nil, nil, fmt.Errorf("error setting up container: %v", err) } - bundles = append(bundles, bundleDir) + cleanups = append(cleanups, cleanup) args := Args{ ID: ids[i], @@ -95,17 +95,17 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C } cont, err := New(conf, args) if err != nil { - cleanup() return nil, nil, fmt.Errorf("error creating container: %v", err) } containers = append(containers, cont) if err := cont.Start(conf); err != nil { - cleanup() return nil, nil, fmt.Errorf("error starting container: %v", err) } } - return containers, cleanup, nil + + localClean.Release() + return containers, cleanupAll, nil } type execDesc struct { @@ -129,11 +129,11 @@ func execMany(execs []execDesc) error { func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) { for _, spec := range pod { - spec.Annotations[path.Join(boot.MountPrefix, name, "source")] = mount.Source - spec.Annotations[path.Join(boot.MountPrefix, name, "type")] = mount.Type - spec.Annotations[path.Join(boot.MountPrefix, name, "share")] = "pod" + spec.Annotations[boot.MountPrefix+name+".source"] = mount.Source + spec.Annotations[boot.MountPrefix+name+".type"] = mount.Type + spec.Annotations[boot.MountPrefix+name+".share"] = "pod" if len(mount.Options) > 0 { - spec.Annotations[path.Join(boot.MountPrefix, name, "options")] = strings.Join(mount.Options, ",") + spec.Annotations[boot.MountPrefix+name+".options"] = strings.Join(mount.Options, ",") } } } @@ -141,140 +141,169 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) { // TestMultiContainerSanity checks that it is possible to run 2 dead-simple // containers in the same sandbox. func TestMultiContainerSanity(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir - // Setup the containers. - sleep := []string{"sleep", "100"} - specs, ids := createSpecs(sleep, sleep) - containers, cleanup, err := startContainers(conf, specs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + // Setup the containers. + sleep := []string{"sleep", "100"} + specs, ids := createSpecs(sleep, sleep) + containers, cleanup, err := startContainers(conf, specs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that multiple processes are running. - expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep"}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } - expectedPL = []*control.Process{ - {PID: 2, Cmd: "sleep"}, - } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + expectedPL = []*control.Process{ + {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + }) } } // TestMultiPIDNS checks that it is possible to run 2 dead-simple // containers in the same sandbox with different pidns. func TestMultiPIDNS(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - // Setup the containers. - sleep := []string{"sleep", "100"} - testSpecs, ids := createSpecs(sleep, sleep) - testSpecs[1].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + testSpecs, ids := createSpecs(sleep, sleep) + testSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + }, }, - }, - } + } - containers, cleanup, err := startContainers(conf, testSpecs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, testSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that multiple processes are running. - expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep"}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } - expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep"}, - } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + expectedPL = []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + }) } } // TestMultiPIDNSPath checks the pidns path. func TestMultiPIDNSPath(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - // Setup the containers. - sleep := []string{"sleep", "100"} - testSpecs, ids := createSpecs(sleep, sleep, sleep) - testSpecs[0].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", - Path: "/proc/1/ns/pid", + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + testSpecs, ids := createSpecs(sleep, sleep, sleep) + testSpecs[0].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/1/ns/pid", + }, }, - }, - } - testSpecs[1].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", - Path: "/proc/1/ns/pid", + } + testSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/1/ns/pid", + }, }, - }, - } - testSpecs[2].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", - Path: "/proc/2/ns/pid", + } + testSpecs[2].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/2/ns/pid", + }, }, - }, - } + } - containers, cleanup, err := startContainers(conf, testSpecs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, testSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that multiple processes are running. - expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep"}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } - if err := waitForProcessList(containers[2], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + if err := waitForProcessList(containers[2], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - expectedPL = []*control.Process{ - {PID: 2, Cmd: "sleep"}, - } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + expectedPL = []*control.Process{ + {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + }) } } func TestMultiContainerWait(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + + conf := testutil.TestConfig(t) + conf.RootDir = rootDir + // The first container should run the entire duration of the test. cmd1 := []string{"sleep", "100"} // We'll wait on the second container, which is much shorter lived. cmd2 := []string{"sleep", "1"} specs, ids := createSpecs(cmd1, cmd2) - conf := testutil.TestConfig() containers, cleanup, err := startContainers(conf, specs, ids) if err != nil { t.Fatalf("error starting containers: %v", err) @@ -283,7 +312,7 @@ func TestMultiContainerWait(t *testing.T) { // Check via ps that multiple processes are running. expectedPL := []*control.Process{ - {PID: 2, Cmd: "sleep"}, + {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, } if err := waitForProcessList(containers[1], expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) @@ -328,7 +357,7 @@ func TestMultiContainerWait(t *testing.T) { // After Wait returns, ensure that the root container is running and // the child has finished. expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep"}, + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, } if err := waitForProcessList(containers[0], expectedPL); err != nil { t.Errorf("failed to wait for %q to start: %v", strings.Join(containers[0].Spec.Process.Args, " "), err) @@ -338,18 +367,20 @@ func TestMultiContainerWait(t *testing.T) { // TestExecWait ensures what we can wait containers and individual processes in the // sandbox that have already exited. func TestExecWait(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() + + conf := testutil.TestConfig(t) + conf.RootDir = rootDir // The first container should run the entire duration of the test. cmd1 := []string{"sleep", "100"} // We'll wait on the second container, which is much shorter lived. cmd2 := []string{"sleep", "1"} specs, ids := createSpecs(cmd1, cmd2) - conf := testutil.TestConfig() containers, cleanup, err := startContainers(conf, specs, ids) if err != nil { t.Fatalf("error starting containers: %v", err) @@ -358,7 +389,7 @@ func TestExecWait(t *testing.T) { // Check via ps that process is running. expectedPL := []*control.Process{ - {PID: 2, Cmd: "sleep"}, + {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, } if err := waitForProcessList(containers[1], expectedPL); err != nil { t.Fatalf("failed to wait for sleep to start: %v", err) @@ -393,7 +424,7 @@ func TestExecWait(t *testing.T) { // Wait for the exec'd process to exit. expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep"}, + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, } if err := waitForProcessList(containers[0], expectedPL); err != nil { t.Fatalf("failed to wait for second container to stop: %v", err) @@ -432,7 +463,15 @@ func TestMultiContainerMount(t *testing.T) { }) // Setup the containers. - conf := testutil.TestConfig() + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + + conf := testutil.TestConfig(t) + conf.RootDir = rootDir + containers, cleanup, err := startContainers(conf, sps, ids) if err != nil { t.Fatalf("error starting containers: %v", err) @@ -451,161 +490,184 @@ func TestMultiContainerMount(t *testing.T) { // TestMultiContainerSignal checks that it is possible to signal individual // containers without killing the entire sandbox. func TestMultiContainerSignal(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir - // Setup the containers. - sleep := []string{"sleep", "100"} - specs, ids := createSpecs(sleep, sleep) - containers, cleanup, err := startContainers(conf, specs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + // Setup the containers. + sleep := []string{"sleep", "100"} + specs, ids := createSpecs(sleep, sleep) + containers, cleanup, err := startContainers(conf, specs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that container 1 process is running. - expectedPL := []*control.Process{ - {PID: 2, Cmd: "sleep"}, - } + // Check via ps that container 1 process is running. + expectedPL := []*control.Process{ + {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - // Kill process 2. - if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil { - t.Errorf("failed to kill process 2: %v", err) - } + // Kill process 2. + if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil { + t.Errorf("failed to kill process 2: %v", err) + } - // Make sure process 1 is still running. - expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep"}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Make sure process 1 is still running. + expectedPL = []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - // goferPid is reset when container is destroyed. - goferPid := containers[1].GoferPid + // goferPid is reset when container is destroyed. + goferPid := containers[1].GoferPid - // Destroy container and ensure container's gofer process has exited. - if err := containers[1].Destroy(); err != nil { - t.Errorf("failed to destroy container: %v", err) - } - _, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) { - cpid, err := syscall.Wait4(goferPid, nil, 0, nil) - return uintptr(cpid), 0, err - }) - if err != syscall.ECHILD { - t.Errorf("error waiting for gofer to exit: %v", err) - } - // Make sure process 1 is still running. - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Destroy container and ensure container's gofer process has exited. + if err := containers[1].Destroy(); err != nil { + t.Errorf("failed to destroy container: %v", err) + } + _, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) { + cpid, err := syscall.Wait4(goferPid, nil, 0, nil) + return uintptr(cpid), 0, err + }) + if err != syscall.ECHILD { + t.Errorf("error waiting for gofer to exit: %v", err) + } + // Make sure process 1 is still running. + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - // Now that process 2 is gone, ensure we get an error trying to - // signal it again. - if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil { - t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID) - } + // Now that process 2 is gone, ensure we get an error trying to + // signal it again. + if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil { + t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID) + } - // Kill process 1. - if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil { - t.Errorf("failed to kill process 1: %v", err) - } + // Kill process 1. + if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil { + t.Errorf("failed to kill process 1: %v", err) + } - // Ensure that container's gofer and sandbox process are no more. - err = blockUntilWaitable(containers[0].GoferPid) - if err != nil && err != syscall.ECHILD { - t.Errorf("error waiting for gofer to exit: %v", err) - } + // Ensure that container's gofer and sandbox process are no more. + err = blockUntilWaitable(containers[0].GoferPid) + if err != nil && err != syscall.ECHILD { + t.Errorf("error waiting for gofer to exit: %v", err) + } - err = blockUntilWaitable(containers[0].Sandbox.Pid) - if err != nil && err != syscall.ECHILD { - t.Errorf("error waiting for sandbox to exit: %v", err) - } + err = blockUntilWaitable(containers[0].Sandbox.Pid) + if err != nil && err != syscall.ECHILD { + t.Errorf("error waiting for sandbox to exit: %v", err) + } - // The sentry should be gone, so signaling should yield an error. - if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil { - t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID) - } + // The sentry should be gone, so signaling should yield an error. + if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil { + t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID) + } - if err := containers[0].Destroy(); err != nil { - t.Errorf("failed to destroy container: %v", err) - } + if err := containers[0].Destroy(); err != nil { + t.Errorf("failed to destroy container: %v", err) + } + }) } } // TestMultiContainerDestroy checks that container are properly cleaned-up when // they are destroyed. func TestMultiContainerDestroy(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - // First container will remain intact while the second container is killed. - podSpecs, ids := createSpecs( - []string{"sleep", "100"}, - []string{app, "fork-bomb"}) - - // Run the fork bomb in a PID namespace to prevent processes to be - // re-parented to PID=1 in the root container. - podSpecs[1].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{{Type: "pid"}}, - } - containers, cleanup, err := startContainers(conf, podSpecs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // First container will remain intact while the second container is killed. + podSpecs, ids := createSpecs( + []string{"sleep", "100"}, + []string{app, "fork-bomb"}) + + // Run the fork bomb in a PID namespace to prevent processes to be + // re-parented to PID=1 in the root container. + podSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{{Type: "pid"}}, + } + containers, cleanup, err := startContainers(conf, podSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Exec more processes to ensure signal all works for exec'd processes too. - args := &control.ExecArgs{ - Filename: app, - Argv: []string{app, "fork-bomb"}, - } - if _, err := containers[1].Execute(args); err != nil { - t.Fatalf("error exec'ing: %v", err) - } + // Exec more processes to ensure signal all works for exec'd processes too. + args := &control.ExecArgs{ + Filename: app, + Argv: []string{app, "fork-bomb"}, + } + if _, err := containers[1].Execute(args); err != nil { + t.Fatalf("error exec'ing: %v", err) + } - // Let it brew... - time.Sleep(500 * time.Millisecond) + // Let it brew... + time.Sleep(500 * time.Millisecond) - if err := containers[1].Destroy(); err != nil { - t.Fatalf("error destroying container: %v", err) - } + if err := containers[1].Destroy(); err != nil { + t.Fatalf("error destroying container: %v", err) + } - // Check that destroy killed all processes belonging to the container and - // waited for them to exit before returning. - pss, err := containers[0].Sandbox.Processes("") - if err != nil { - t.Fatalf("error getting process data from sandbox: %v", err) - } - expectedPL := []*control.Process{{PID: 1, Cmd: "sleep"}} - if !procListsEqual(pss, expectedPL) { - t.Errorf("container got process list: %s, want: %s", procListToString(pss), procListToString(expectedPL)) - } + // Check that destroy killed all processes belonging to the container and + // waited for them to exit before returning. + pss, err := containers[0].Sandbox.Processes("") + if err != nil { + t.Fatalf("error getting process data from sandbox: %v", err) + } + expectedPL := []*control.Process{{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}} + if r, err := procListsEqual(pss, expectedPL); !r { + t.Errorf("container got process list: %s, want: %s: error: %v", + procListToString(pss), procListToString(expectedPL), err) + } - // Check that cont.Destroy is safe to call multiple times. - if err := containers[1].Destroy(); err != nil { - t.Errorf("error destroying container: %v", err) - } + // Check that cont.Destroy is safe to call multiple times. + if err := containers[1].Destroy(); err != nil { + t.Errorf("error destroying container: %v", err) + } + }) } } func TestMultiContainerProcesses(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + + conf := testutil.TestConfig(t) + conf.RootDir = rootDir + // Note: use curly braces to keep 'sh' process around. Otherwise, shell // will just execve into 'sleep' and both containers will look the // same. specs, ids := createSpecs( []string{"sleep", "100"}, []string{"sh", "-c", "{ sleep 100; }"}) - conf := testutil.TestConfig() containers, cleanup, err := startContainers(conf, specs, ids) if err != nil { t.Fatalf("error starting containers: %v", err) @@ -614,7 +676,7 @@ func TestMultiContainerProcesses(t *testing.T) { // Check root's container process list doesn't include other containers. expectedPL0 := []*control.Process{ - {PID: 1, Cmd: "sleep"}, + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, } if err := waitForProcessList(containers[0], expectedPL0); err != nil { t.Errorf("failed to wait for process to start: %v", err) @@ -622,8 +684,8 @@ func TestMultiContainerProcesses(t *testing.T) { // Same for the other container. expectedPL1 := []*control.Process{ - {PID: 2, Cmd: "sh"}, - {PID: 3, PPID: 2, Cmd: "sleep"}, + {PID: 2, Cmd: "sh", Threads: []kernel.ThreadID{2}}, + {PID: 3, PPID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{3}}, } if err := waitForProcessList(containers[1], expectedPL1); err != nil { t.Errorf("failed to wait for process to start: %v", err) @@ -637,7 +699,7 @@ func TestMultiContainerProcesses(t *testing.T) { if _, err := containers[1].Execute(args); err != nil { t.Fatalf("error exec'ing: %v", err) } - expectedPL1 = append(expectedPL1, &control.Process{PID: 4, Cmd: "sleep"}) + expectedPL1 = append(expectedPL1, &control.Process{PID: 4, Cmd: "sleep", Threads: []kernel.ThreadID{4}}) if err := waitForProcessList(containers[1], expectedPL1); err != nil { t.Errorf("failed to wait for process to start: %v", err) } @@ -650,13 +712,22 @@ func TestMultiContainerProcesses(t *testing.T) { // TestMultiContainerKillAll checks that all process that belong to a container // are killed when SIGKILL is sent to *all* processes in that container. func TestMultiContainerKillAll(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + + conf := testutil.TestConfig(t) + conf.RootDir = rootDir + for _, tc := range []struct { killContainer bool }{ {killContainer: true}, {killContainer: false}, } { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -665,7 +736,6 @@ func TestMultiContainerKillAll(t *testing.T) { specs, ids := createSpecs( []string{app, "task-tree", "--depth=2", "--width=2"}, []string{app, "task-tree", "--depth=4", "--width=2"}) - conf := testutil.TestConfig() containers, cleanup, err := startContainers(conf, specs, ids) if err != nil { t.Fatalf("error starting containers: %v", err) @@ -675,11 +745,11 @@ func TestMultiContainerKillAll(t *testing.T) { // Wait until all processes are created. rootProcCount := int(math.Pow(2, 3) - 1) if err := waitForProcessCount(containers[0], rootProcCount); err != nil { - t.Fatal(err) + t.Fatalf("error waitting for processes: %v", err) } procCount := int(math.Pow(2, 5) - 1) if err := waitForProcessCount(containers[1], procCount); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Exec more processes to ensure signal works for exec'd processes too. @@ -693,7 +763,7 @@ func TestMultiContainerKillAll(t *testing.T) { // Wait for these new processes to start. procCount += int(math.Pow(2, 3) - 1) if err := waitForProcessCount(containers[1], procCount); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } if tc.killContainer { @@ -726,11 +796,11 @@ func TestMultiContainerKillAll(t *testing.T) { // Check that all processes are gone. if err := waitForProcessCount(containers[1], 0); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Check that root container was not affected. if err := waitForProcessCount(containers[0], rootProcCount); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } } } @@ -739,25 +809,18 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) { specs, ids := createSpecs( []string{"/bin/sleep", "100"}, []string{"/bin/sleep", "100"}) - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - - conf := testutil.TestConfigWithRoot(rootDir) - // Create and start root container. - rootBundleDir, err := testutil.SetupBundleDir(specs[0]) + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(specs[0], conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootBundleDir) + defer cleanup() rootArgs := Args{ ID: ids[0], Spec: specs[0], - BundleDir: rootBundleDir, + BundleDir: bundleDir, } root, err := New(conf, rootArgs) if err != nil { @@ -769,11 +832,11 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) { } // Create and destroy sub-container. - bundleDir, err := testutil.SetupBundleDir(specs[1]) + bundleDir, cleanupSub, err := testutil.SetupBundleDir(specs[1]) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanupSub() args := Args{ ID: ids[1], @@ -800,25 +863,17 @@ func TestMultiContainerDestroyStarting(t *testing.T) { } specs, ids := createSpecs(cmds...) - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - - conf := testutil.TestConfigWithRoot(rootDir) - - // Create and start root container. - rootBundleDir, err := testutil.SetupBundleDir(specs[0]) + conf := testutil.TestConfig(t) + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(specs[0], conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootBundleDir) + defer cleanup() rootArgs := Args{ ID: ids[0], Spec: specs[0], - BundleDir: rootBundleDir, + BundleDir: bundleDir, } root, err := New(conf, rootArgs) if err != nil { @@ -835,16 +890,16 @@ func TestMultiContainerDestroyStarting(t *testing.T) { continue // skip root container } - bundleDir, err := testutil.SetupBundleDir(specs[i]) + bundleDir, cleanup, err := testutil.SetupBundleDir(specs[i]) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanup() rootArgs := Args{ ID: ids[i], Spec: specs[i], - BundleDir: rootBundleDir, + BundleDir: bundleDir, } cont, err := New(conf, rootArgs) if err != nil { @@ -886,9 +941,17 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) { script := fmt.Sprintf("if [ -f %q ]; then exit 1; else touch %q; fi", filename, filename) cmd := []string{"sh", "-c", script} + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + + conf := testutil.TestConfig(t) + conf.RootDir = rootDir + // Make sure overlay is enabled, and none of the root filesystems are // read-only, otherwise we won't be able to create the file. - conf := testutil.TestConfig() conf.Overlay = true specs, ids := createSpecs(cmdRoot, cmd, cmd) for _, s := range specs { @@ -918,7 +981,7 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) { // TestMultiContainerContainerDestroyStress tests that IO operations continue // to work after containers have been stopped and gofers killed. func TestMultiContainerContainerDestroyStress(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -941,26 +1004,20 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { } allSpecs, allIDs := createSpecs(cmds...) - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - // Split up the specs and IDs. rootSpec := allSpecs[0] rootID := allIDs[0] childrenSpecs := allSpecs[1:] childrenIDs := allIDs[1:] - bundleDir, err := testutil.SetupBundleDir(rootSpec) + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(rootSpec, conf) if err != nil { - t.Fatalf("error setting up bundle dir: %v", err) + t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanup() // Start root container. - conf := testutil.TestConfigWithRoot(rootDir) rootArgs := Args{ ID: rootID, Spec: rootSpec, @@ -984,11 +1041,11 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { var children []*Container for j, spec := range specs { - bundleDir, err := testutil.SetupBundleDir(spec) + bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanup() args := Args{ ID: ids[j], @@ -1026,282 +1083,309 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { // Test that pod shared mounts are properly mounted in 2 containers and that // changes from one container is reflected in the other. func TestMultiContainerSharedMount(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - // Setup the containers. - sleep := []string{"sleep", "100"} - podSpec, ids := createSpecs(sleep, sleep) - mnt0 := specs.Mount{ - Destination: "/mydir/test", - Source: "/some/dir", - Type: "tmpfs", - Options: nil, - } - podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: nil, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) - mnt1 := mnt0 - mnt1.Destination = "/mydir2/test2" - podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) - createSharedMount(mnt0, "test-mount", podSpec...) + createSharedMount(mnt0, "test-mount", podSpec...) - containers, cleanup, err := startContainers(conf, podSpec, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - file0 := path.Join(mnt0.Destination, "abc") - file1 := path.Join(mnt1.Destination, "abc") - execs := []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, - desc: "directory is mounted in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, - desc: "directory is mounted in container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/touch", file0}, - desc: "create file in container0", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-f", file0}, - desc: "file appears in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-f", file1}, - desc: "file appears in container1", - }, - { - c: containers[1], - cmd: []string{"/bin/rm", file1}, - desc: "file removed from container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "!", "-f", file0}, - desc: "file removed from container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "!", "-f", file1}, - desc: "file removed from container1", - }, - { - c: containers[1], - cmd: []string{"/bin/mkdir", file1}, - desc: "create directory in container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-d", file0}, - desc: "dir appears in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-d", file1}, - desc: "dir appears in container1", - }, - { - c: containers[0], - cmd: []string{"/bin/rmdir", file0}, - desc: "create directory in container0", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "!", "-d", file0}, - desc: "dir removed from container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "!", "-d", file1}, - desc: "dir removed from container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, + desc: "directory is mounted in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, + desc: "directory is mounted in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + desc: "create file in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file appears in container1", + }, + { + c: containers[1], + cmd: []string{"/bin/rm", file1}, + desc: "file removed from container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-f", file0}, + desc: "file removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-f", file1}, + desc: "file removed from container1", + }, + { + c: containers[1], + cmd: []string{"/bin/mkdir", file1}, + desc: "create directory in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", file0}, + desc: "dir appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", file1}, + desc: "dir appears in container1", + }, + { + c: containers[0], + cmd: []string{"/bin/rmdir", file0}, + desc: "create directory in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-d", file0}, + desc: "dir removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-d", file1}, + desc: "dir removed from container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + }) } } // Test that pod mounts are mounted as readonly when requested. func TestMultiContainerSharedMountReadonly(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - // Setup the containers. - sleep := []string{"sleep", "100"} - podSpec, ids := createSpecs(sleep, sleep) - mnt0 := specs.Mount{ - Destination: "/mydir/test", - Source: "/some/dir", - Type: "tmpfs", - Options: []string{"ro"}, - } - podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: []string{"ro"}, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) - mnt1 := mnt0 - mnt1.Destination = "/mydir2/test2" - podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) - createSharedMount(mnt0, "test-mount", podSpec...) + createSharedMount(mnt0, "test-mount", podSpec...) - containers, cleanup, err := startContainers(conf, podSpec, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - file0 := path.Join(mnt0.Destination, "abc") - file1 := path.Join(mnt1.Destination, "abc") - execs := []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, - desc: "directory is mounted in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, - desc: "directory is mounted in container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/touch", file0}, - want: 1, - desc: "fails to write to container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/touch", file1}, - want: 1, - desc: "fails to write to container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, + desc: "directory is mounted in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, + desc: "directory is mounted in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + want: 1, + desc: "fails to write to container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/touch", file1}, + want: 1, + desc: "fails to write to container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + }) } } // Test that shared pod mounts continue to work after container is restarted. func TestMultiContainerSharedMountRestart(t *testing.T) { - for _, conf := range configs(all...) { - t.Logf("Running test with conf: %+v", conf) - - // Setup the containers. - sleep := []string{"sleep", "100"} - podSpec, ids := createSpecs(sleep, sleep) - mnt0 := specs.Mount{ - Destination: "/mydir/test", - Source: "/some/dir", - Type: "tmpfs", - Options: nil, - } - podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: nil, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) - mnt1 := mnt0 - mnt1.Destination = "/mydir2/test2" - podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) - createSharedMount(mnt0, "test-mount", podSpec...) + createSharedMount(mnt0, "test-mount", podSpec...) - containers, cleanup, err := startContainers(conf, podSpec, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - file0 := path.Join(mnt0.Destination, "abc") - file1 := path.Join(mnt1.Destination, "abc") - execs := []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/touch", file0}, - desc: "create file in container0", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-f", file0}, - desc: "file appears in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-f", file1}, - desc: "file appears in container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + desc: "create file in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file appears in container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } - containers[1].Destroy() + containers[1].Destroy() - bundleDir, err := testutil.SetupBundleDir(podSpec[1]) - if err != nil { - t.Fatalf("error restarting container: %v", err) - } - defer os.RemoveAll(bundleDir) + bundleDir, cleanup, err := testutil.SetupBundleDir(podSpec[1]) + if err != nil { + t.Fatalf("error restarting container: %v", err) + } + defer cleanup() - args := Args{ - ID: ids[1], - Spec: podSpec[1], - BundleDir: bundleDir, - } - containers[1], err = New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - if err := containers[1].Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + args := Args{ + ID: ids[1], + Spec: podSpec[1], + BundleDir: bundleDir, + } + containers[1], err = New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + if err := containers[1].Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - execs = []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-f", file0}, - desc: "file is still in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-f", file1}, - desc: "file is still in container1", - }, - { - c: containers[1], - cmd: []string{"/bin/rm", file1}, - desc: "file removed from container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "!", "-f", file0}, - desc: "file removed from container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "!", "-f", file1}, - desc: "file removed from container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + execs = []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file is still in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file is still in container1", + }, + { + c: containers[1], + cmd: []string{"/bin/rm", file1}, + desc: "file removed from container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-f", file0}, + desc: "file removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-f", file1}, + desc: "file removed from container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + }) } } // Test that unsupported pod mounts options are ignored when matching master and // slave mounts. func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { - conf := testutil.TestConfig() - t.Logf("Running test with conf: %+v", conf) + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + + conf := testutil.TestConfig(t) + conf.RootDir = rootDir // Setup the containers. sleep := []string{"/bin/sleep", "100"} @@ -1347,7 +1431,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { // Test that one container can send an FD to another container, even though // they have distinct MountNamespaces. func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -1376,6 +1460,15 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { Type: "tmpfs", } + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + + conf := testutil.TestConfig(t) + conf.RootDir = rootDir + // Create the specs. specs, ids := createSpecs( []string{"sleep", "1000"}, @@ -1386,7 +1479,6 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { specs[1].Mounts = append(specs[2].Mounts, sharedMnt, writeableMnt) specs[2].Mounts = append(specs[1].Mounts, sharedMnt) - conf := testutil.TestConfig() containers, cleanup, err := startContainers(conf, specs, ids) if err != nil { t.Fatalf("error starting containers: %v", err) @@ -1405,9 +1497,17 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { // Test that container is destroyed when Gofer is killed. func TestMultiContainerGoferKilled(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + + conf := testutil.TestConfig(t) + conf.RootDir = rootDir + sleep := []string{"sleep", "100"} specs, ids := createSpecs(sleep, sleep, sleep) - conf := testutil.TestConfig() containers, cleanup, err := startContainers(conf, specs, ids) if err != nil { t.Fatalf("error starting containers: %v", err) @@ -1417,7 +1517,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { // Ensure container is running c := containers[2] expectedPL := []*control.Process{ - {PID: 3, Cmd: "sleep"}, + {PID: 3, Cmd: "sleep", Threads: []kernel.ThreadID{3}}, } if err := waitForProcessList(c, expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) @@ -1445,7 +1545,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { continue // container[2] has been killed. } pl := []*control.Process{ - {PID: kernel.ThreadID(i + 1), Cmd: "sleep"}, + {PID: kernel.ThreadID(i + 1), Cmd: "sleep", Threads: []kernel.ThreadID{kernel.ThreadID(i + 1)}}, } if err := waitForProcessList(c, pl); err != nil { t.Errorf("Container %q was affected by another container: %v", c.ID, err) @@ -1465,7 +1565,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { // Wait until sandbox stops. waitForProcessList will loop until sandbox exits // and RPC errors out. impossiblePL := []*control.Process{ - {PID: 100, Cmd: "non-existent-process"}, + {PID: 100, Cmd: "non-existent-process", Threads: []kernel.ThreadID{100}}, } if err := waitForProcessList(c, impossiblePL); err == nil { t.Fatalf("Sandbox was not killed after gofer death") @@ -1483,7 +1583,15 @@ func TestMultiContainerGoferKilled(t *testing.T) { func TestMultiContainerLoadSandbox(t *testing.T) { sleep := []string{"sleep", "100"} specs, ids := createSpecs(sleep, sleep, sleep) - conf := testutil.TestConfig() + + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + + conf := testutil.TestConfig(t) + conf.RootDir = rootDir // Create containers for the sandbox. wants, cleanup, err := startContainers(conf, specs, ids) @@ -1509,7 +1617,7 @@ func TestMultiContainerLoadSandbox(t *testing.T) { } // Create a valid but empty container directory. - randomCID := testutil.UniqueContainerID() + randomCID := testutil.RandomContainerID() dir = filepath.Join(conf.RootDir, randomCID) if err := os.MkdirAll(dir, 0755); err != nil { t.Fatalf("os.MkdirAll(%q)=%v", dir, err) @@ -1576,7 +1684,15 @@ func TestMultiContainerRunNonRoot(t *testing.T) { Type: "bind", }) - conf := testutil.TestConfig() + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + + conf := testutil.TestConfig(t) + conf.RootDir = rootDir + pod, cleanup, err := startContainers(conf, podSpecs, ids) if err != nil { t.Fatalf("error starting containers: %v", err) diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go index dc4194134..bac177a88 100644 --- a/runsc/container/shared_volume_test.go +++ b/runsc/container/shared_volume_test.go @@ -24,16 +24,15 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" - "gvisor.dev/gvisor/runsc/testutil" ) // TestSharedVolume checks that modifications to a volume mount are propagated // into and out of the sandbox. func TestSharedVolume(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.FileAccess = boot.FileAccessShared - t.Logf("Running test with conf: %+v", conf) // Main process just sleeps. We will use "exec" to probe the state of // the filesystem. @@ -44,16 +43,15 @@ func TestSharedVolume(t *testing.T) { t.Fatalf("TempDir failed: %v", err) } - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -190,9 +188,8 @@ func checkFile(c *Container, filename string, want []byte) error { // TestSharedVolumeFile tests that changes to file content outside the sandbox // is reflected inside. func TestSharedVolumeFile(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.FileAccess = boot.FileAccessShared - t.Logf("Running test with conf: %+v", conf) // Main process just sleeps. We will use "exec" to probe the state of // the filesystem. @@ -203,16 +200,15 @@ func TestSharedVolumeFile(t *testing.T) { t.Fatalf("TempDir failed: %v", err) } - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } diff --git a/runsc/container/state_file.go b/runsc/container/state_file.go new file mode 100644 index 000000000..17a251530 --- /dev/null +++ b/runsc/container/state_file.go @@ -0,0 +1,185 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package container + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "github.com/gofrs/flock" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" +) + +const stateFileExtension = ".state" + +// StateFile handles load from/save to container state safely from multiple +// processes. It uses a lock file to provide synchronization between operations. +// +// The lock file is located at: "${s.RootDir}/${s.ID}.lock". +// The state file is located at: "${s.RootDir}/${s.ID}.state". +type StateFile struct { + // RootDir is the directory containing the container metadata file. + RootDir string `json:"rootDir"` + + // ID is the container ID. + ID string `json:"id"` + + // + // Fields below this line are not saved in the state file and will not + // be preserved across commands. + // + + once sync.Once + flock *flock.Flock +} + +// List returns all container ids in the given root directory. +func List(rootDir string) ([]string, error) { + log.Debugf("List containers %q", rootDir) + list, err := filepath.Glob(filepath.Join(rootDir, "*"+stateFileExtension)) + if err != nil { + return nil, err + } + var out []string + for _, path := range list { + // Filter out files that do no belong to a container. + fileName := filepath.Base(path) + if len(fileName) < len(stateFileExtension) { + panic(fmt.Sprintf("invalid file match %q", path)) + } + // Remove the extension. + cid := fileName[:len(fileName)-len(stateFileExtension)] + if validateID(cid) == nil { + out = append(out, cid) + } + } + return out, nil +} + +// lock globally locks all locking operations for the container. +func (s *StateFile) lock() error { + s.once.Do(func() { + s.flock = flock.NewFlock(s.lockPath()) + }) + + if err := s.flock.Lock(); err != nil { + return fmt.Errorf("acquiring lock on %q: %v", s.flock, err) + } + return nil +} + +// lockForNew acquires the lock and checks if the state file doesn't exist. This +// is done to ensure that more than one creation didn't race to create +// containers with the same ID. +func (s *StateFile) lockForNew() error { + if err := s.lock(); err != nil { + return err + } + + // Checks if the container already exists by looking for the metadata file. + if _, err := os.Stat(s.statePath()); err == nil { + s.unlock() + return fmt.Errorf("container already exists") + } else if !os.IsNotExist(err) { + s.unlock() + return fmt.Errorf("looking for existing container: %v", err) + } + return nil +} + +// unlock globally unlocks all locking operations for the container. +func (s *StateFile) unlock() error { + if !s.flock.Locked() { + panic("unlock called without lock held") + } + + if err := s.flock.Unlock(); err != nil { + log.Warningf("Error to release lock on %q: %v", s.flock, err) + return fmt.Errorf("releasing lock on %q: %v", s.flock, err) + } + return nil +} + +// saveLocked saves 'v' to the state file. +// +// Preconditions: lock() must been called before. +func (s *StateFile) saveLocked(v interface{}) error { + if !s.flock.Locked() { + panic("saveLocked called without lock held") + } + + meta, err := json.Marshal(v) + if err != nil { + return err + } + if err := ioutil.WriteFile(s.statePath(), meta, 0640); err != nil { + return fmt.Errorf("writing json file: %v", err) + } + return nil +} + +func (s *StateFile) load(v interface{}) error { + if err := s.lock(); err != nil { + return err + } + defer s.unlock() + + metaBytes, err := ioutil.ReadFile(s.statePath()) + if err != nil { + return err + } + return json.Unmarshal(metaBytes, &v) +} + +func (s *StateFile) close() error { + if s.flock == nil { + return nil + } + if s.flock.Locked() { + panic("Closing locked file") + } + return s.flock.Close() +} + +func buildStatePath(rootDir, id string) string { + return filepath.Join(rootDir, id+stateFileExtension) +} + +// statePath is the full path to the state file. +func (s *StateFile) statePath() string { + return buildStatePath(s.RootDir, s.ID) +} + +// lockPath is the full path to the lock file. +func (s *StateFile) lockPath() string { + return filepath.Join(s.RootDir, s.ID+".lock") +} + +// destroy deletes all state created by the stateFile. It may be called with the +// lock file held. In that case, the lock file must still be unlocked and +// properly closed after destroy returns. +func (s *StateFile) destroy() error { + if err := os.Remove(s.statePath()); err != nil && !os.IsNotExist(err) { + return err + } + if err := os.Remove(s.lockPath()); err != nil && !os.IsNotExist(err) { + return err + } + return nil +} diff --git a/runsc/container/test_app/BUILD b/runsc/container/test_app/BUILD deleted file mode 100644 index 9bf9e6e9d..000000000 --- a/runsc/container/test_app/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") - -package(licenses = ["notice"]) - -go_binary( - name = "test_app", - testonly = 1, - srcs = [ - "fds.go", - "test_app.go", - ], - pure = "on", - visibility = ["//runsc/container:__pkg__"], - deps = [ - "//pkg/unet", - "//runsc/testutil", - "@com_github_google_subcommands//:go_default_library", - ], -) diff --git a/runsc/container/test_app/fds.go b/runsc/container/test_app/fds.go deleted file mode 100644 index a90cc1662..000000000 --- a/runsc/container/test_app/fds.go +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "context" - "io/ioutil" - "log" - "os" - "time" - - "flag" - "github.com/google/subcommands" - "gvisor.dev/gvisor/pkg/unet" - "gvisor.dev/gvisor/runsc/testutil" -) - -const fileContents = "foobarbaz" - -// fdSender will open a file and send the FD over a unix domain socket. -type fdSender struct { - socketPath string -} - -// Name implements subcommands.Command.Name. -func (*fdSender) Name() string { - return "fd_sender" -} - -// Synopsis implements subcommands.Command.Synopsys. -func (*fdSender) Synopsis() string { - return "creates a file and sends the FD over the socket" -} - -// Usage implements subcommands.Command.Usage. -func (*fdSender) Usage() string { - return "fd_sender <flags>" -} - -// SetFlags implements subcommands.Command.SetFlags. -func (fds *fdSender) SetFlags(f *flag.FlagSet) { - f.StringVar(&fds.socketPath, "socket", "", "path to socket") -} - -// Execute implements subcommands.Command.Execute. -func (fds *fdSender) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if fds.socketPath == "" { - log.Fatalf("socket flag must be set") - } - - dir, err := ioutil.TempDir("", "") - if err != nil { - log.Fatalf("TempDir failed: %v", err) - } - - fileToSend, err := ioutil.TempFile(dir, "") - if err != nil { - log.Fatalf("TempFile failed: %v", err) - } - defer fileToSend.Close() - - if _, err := fileToSend.WriteString(fileContents); err != nil { - log.Fatalf("Write(%q) failed: %v", fileContents, err) - } - - // Receiver may not be started yet, so try connecting in a poll loop. - var s *unet.Socket - if err := testutil.Poll(func() error { - var err error - s, err = unet.Connect(fds.socketPath, true /* SEQPACKET, so we can send empty message with FD */) - return err - }, 10*time.Second); err != nil { - log.Fatalf("Error connecting to socket %q: %v", fds.socketPath, err) - } - defer s.Close() - - w := s.Writer(true) - w.ControlMessage.PackFDs(int(fileToSend.Fd())) - if _, err := w.WriteVec([][]byte{[]byte{'a'}}); err != nil { - log.Fatalf("Error sending FD %q over socket %q: %v", fileToSend.Fd(), fds.socketPath, err) - } - - log.Print("FD SENDER exiting successfully") - return subcommands.ExitSuccess -} - -// fdReceiver receives an FD from a unix domain socket and does things to it. -type fdReceiver struct { - socketPath string -} - -// Name implements subcommands.Command.Name. -func (*fdReceiver) Name() string { - return "fd_receiver" -} - -// Synopsis implements subcommands.Command.Synopsys. -func (*fdReceiver) Synopsis() string { - return "reads an FD from a unix socket, and then does things to it" -} - -// Usage implements subcommands.Command.Usage. -func (*fdReceiver) Usage() string { - return "fd_receiver <flags>" -} - -// SetFlags implements subcommands.Command.SetFlags. -func (fdr *fdReceiver) SetFlags(f *flag.FlagSet) { - f.StringVar(&fdr.socketPath, "socket", "", "path to socket") -} - -// Execute implements subcommands.Command.Execute. -func (fdr *fdReceiver) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if fdr.socketPath == "" { - log.Fatalf("Flags cannot be empty, given: socket: %q", fdr.socketPath) - } - - ss, err := unet.BindAndListen(fdr.socketPath, true /* packet */) - if err != nil { - log.Fatalf("BindAndListen(%q) failed: %v", fdr.socketPath, err) - } - defer ss.Close() - - var s *unet.Socket - c := make(chan error, 1) - go func() { - var err error - s, err = ss.Accept() - c <- err - }() - - select { - case err := <-c: - if err != nil { - log.Fatalf("Accept() failed: %v", err) - } - case <-time.After(10 * time.Second): - log.Fatalf("Timeout waiting for accept") - } - - r := s.Reader(true) - r.EnableFDs(1) - b := [][]byte{{'a'}} - if n, err := r.ReadVec(b); n != 1 || err != nil { - log.Fatalf("ReadVec got n=%d err %v (wanted 0, nil)", n, err) - } - - fds, err := r.ExtractFDs() - if err != nil { - log.Fatalf("ExtractFD() got err %v", err) - } - if len(fds) != 1 { - log.Fatalf("ExtractFD() got %d FDs, wanted 1", len(fds)) - } - fd := fds[0] - - file := os.NewFile(uintptr(fd), "received file") - defer file.Close() - if _, err := file.Seek(0, os.SEEK_SET); err != nil { - log.Fatalf("Seek(0, 0) failed: %v", err) - } - - got, err := ioutil.ReadAll(file) - if err != nil { - log.Fatalf("ReadAll failed: %v", err) - } - if string(got) != fileContents { - log.Fatalf("ReadAll got %q want %q", string(got), fileContents) - } - - log.Print("FD RECEIVER exiting successfully") - return subcommands.ExitSuccess -} diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go deleted file mode 100644 index 913d781c6..000000000 --- a/runsc/container/test_app/test_app.go +++ /dev/null @@ -1,354 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Binary test_app is like a swiss knife for tests that need to run anything -// inside the sandbox. New functionality can be added with new commands. -package main - -import ( - "context" - "fmt" - "io/ioutil" - "log" - "net" - "os" - "os/exec" - "regexp" - "strconv" - sys "syscall" - "time" - - "flag" - "github.com/google/subcommands" - "gvisor.dev/gvisor/runsc/testutil" -) - -func main() { - subcommands.Register(subcommands.HelpCommand(), "") - subcommands.Register(subcommands.FlagsCommand(), "") - subcommands.Register(new(capability), "") - subcommands.Register(new(fdReceiver), "") - subcommands.Register(new(fdSender), "") - subcommands.Register(new(forkBomb), "") - subcommands.Register(new(reaper), "") - subcommands.Register(new(syscall), "") - subcommands.Register(new(taskTree), "") - subcommands.Register(new(uds), "") - - flag.Parse() - - exitCode := subcommands.Execute(context.Background()) - os.Exit(int(exitCode)) -} - -type uds struct { - fileName string - socketPath string -} - -// Name implements subcommands.Command.Name. -func (*uds) Name() string { - return "uds" -} - -// Synopsis implements subcommands.Command.Synopsys. -func (*uds) Synopsis() string { - return "creates unix domain socket client and server. Client sends a contant flow of sequential numbers. Server prints them to --file" -} - -// Usage implements subcommands.Command.Usage. -func (*uds) Usage() string { - return "uds <flags>" -} - -// SetFlags implements subcommands.Command.SetFlags. -func (c *uds) SetFlags(f *flag.FlagSet) { - f.StringVar(&c.fileName, "file", "", "name of output file") - f.StringVar(&c.socketPath, "socket", "", "path to socket") -} - -// Execute implements subcommands.Command.Execute. -func (c *uds) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if c.fileName == "" || c.socketPath == "" { - log.Fatalf("Flags cannot be empty, given: fileName: %q, socketPath: %q", c.fileName, c.socketPath) - return subcommands.ExitFailure - } - outputFile, err := os.OpenFile(c.fileName, os.O_WRONLY|os.O_CREATE, 0666) - if err != nil { - log.Fatal("error opening output file:", err) - } - - defer os.Remove(c.socketPath) - - listener, err := net.Listen("unix", c.socketPath) - if err != nil { - log.Fatal("error listening on socket %q:", c.socketPath, err) - } - - go server(listener, outputFile) - for i := 0; ; i++ { - conn, err := net.Dial("unix", c.socketPath) - if err != nil { - log.Fatal("error dialing:", err) - } - if _, err := conn.Write([]byte(strconv.Itoa(i))); err != nil { - log.Fatal("error writing:", err) - } - conn.Close() - time.Sleep(100 * time.Millisecond) - } -} - -func server(listener net.Listener, out *os.File) { - buf := make([]byte, 16) - - for { - c, err := listener.Accept() - if err != nil { - log.Fatal("error accepting connection:", err) - } - nr, err := c.Read(buf) - if err != nil { - log.Fatal("error reading from buf:", err) - } - data := buf[0:nr] - fmt.Fprint(out, string(data)+"\n") - } -} - -type taskTree struct { - depth int - width int - pause bool -} - -// Name implements subcommands.Command. -func (*taskTree) Name() string { - return "task-tree" -} - -// Synopsis implements subcommands.Command. -func (*taskTree) Synopsis() string { - return "creates a tree of tasks" -} - -// Usage implements subcommands.Command. -func (*taskTree) Usage() string { - return "task-tree <flags>" -} - -// SetFlags implements subcommands.Command. -func (c *taskTree) SetFlags(f *flag.FlagSet) { - f.IntVar(&c.depth, "depth", 1, "number of levels to create") - f.IntVar(&c.width, "width", 1, "number of tasks at each level") - f.BoolVar(&c.pause, "pause", false, "whether the tasks should pause perpetually") -} - -// Execute implements subcommands.Command. -func (c *taskTree) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - stop := testutil.StartReaper() - defer stop() - - if c.depth == 0 { - log.Printf("Child sleeping, PID: %d\n", os.Getpid()) - select {} - } - log.Printf("Parent %d sleeping, PID: %d\n", c.depth, os.Getpid()) - - var cmds []*exec.Cmd - for i := 0; i < c.width; i++ { - cmd := exec.Command( - "/proc/self/exe", c.Name(), - "--depth", strconv.Itoa(c.depth-1), - "--width", strconv.Itoa(c.width), - "--pause", strconv.FormatBool(c.pause)) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - if err := cmd.Start(); err != nil { - log.Fatal("failed to call self:", err) - } - cmds = append(cmds, cmd) - } - - for _, c := range cmds { - c.Wait() - } - - if c.pause { - select {} - } - - return subcommands.ExitSuccess -} - -type forkBomb struct { - delay time.Duration -} - -// Name implements subcommands.Command. -func (*forkBomb) Name() string { - return "fork-bomb" -} - -// Synopsis implements subcommands.Command. -func (*forkBomb) Synopsis() string { - return "creates child process until the end of times" -} - -// Usage implements subcommands.Command. -func (*forkBomb) Usage() string { - return "fork-bomb <flags>" -} - -// SetFlags implements subcommands.Command. -func (c *forkBomb) SetFlags(f *flag.FlagSet) { - f.DurationVar(&c.delay, "delay", 100*time.Millisecond, "amount of time to delay creation of child") -} - -// Execute implements subcommands.Command. -func (c *forkBomb) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - time.Sleep(c.delay) - - cmd := exec.Command("/proc/self/exe", c.Name()) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - log.Fatal("failed to call self:", err) - } - return subcommands.ExitSuccess -} - -type reaper struct{} - -// Name implements subcommands.Command. -func (*reaper) Name() string { - return "reaper" -} - -// Synopsis implements subcommands.Command. -func (*reaper) Synopsis() string { - return "reaps all children in a loop" -} - -// Usage implements subcommands.Command. -func (*reaper) Usage() string { - return "reaper <flags>" -} - -// SetFlags implements subcommands.Command. -func (*reaper) SetFlags(*flag.FlagSet) {} - -// Execute implements subcommands.Command. -func (c *reaper) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - stop := testutil.StartReaper() - defer stop() - select {} -} - -type syscall struct { - sysno uint64 -} - -// Name implements subcommands.Command. -func (*syscall) Name() string { - return "syscall" -} - -// Synopsis implements subcommands.Command. -func (*syscall) Synopsis() string { - return "syscall makes a syscall" -} - -// Usage implements subcommands.Command. -func (*syscall) Usage() string { - return "syscall <flags>" -} - -// SetFlags implements subcommands.Command. -func (s *syscall) SetFlags(f *flag.FlagSet) { - f.Uint64Var(&s.sysno, "syscall", 0, "syscall to call") -} - -// Execute implements subcommands.Command. -func (s *syscall) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if _, _, errno := sys.Syscall(uintptr(s.sysno), 0, 0, 0); errno != 0 { - fmt.Printf("syscall(%d, 0, 0...) failed: %v\n", s.sysno, errno) - } else { - fmt.Printf("syscall(%d, 0, 0...) success\n", s.sysno) - } - return subcommands.ExitSuccess -} - -type capability struct { - enabled uint64 - disabled uint64 -} - -// Name implements subcommands.Command. -func (*capability) Name() string { - return "capability" -} - -// Synopsis implements subcommands.Command. -func (*capability) Synopsis() string { - return "checks if effective capabilities are set/unset" -} - -// Usage implements subcommands.Command. -func (*capability) Usage() string { - return "capability [--enabled=number] [--disabled=number]" -} - -// SetFlags implements subcommands.Command. -func (c *capability) SetFlags(f *flag.FlagSet) { - f.Uint64Var(&c.enabled, "enabled", 0, "") - f.Uint64Var(&c.disabled, "disabled", 0, "") -} - -// Execute implements subcommands.Command. -func (c *capability) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if c.enabled == 0 && c.disabled == 0 { - fmt.Println("One of the flags must be set") - return subcommands.ExitUsageError - } - - status, err := ioutil.ReadFile("/proc/self/status") - if err != nil { - fmt.Printf("Error reading %q: %v\n", "proc/self/status", err) - return subcommands.ExitFailure - } - re := regexp.MustCompile("CapEff:\t([0-9a-f]+)\n") - matches := re.FindStringSubmatch(string(status)) - if matches == nil || len(matches) != 2 { - fmt.Printf("Effective capabilities not found in\n%s\n", status) - return subcommands.ExitFailure - } - caps, err := strconv.ParseUint(matches[1], 16, 64) - if err != nil { - fmt.Printf("failed to convert capabilities %q: %v\n", matches[1], err) - return subcommands.ExitFailure - } - - if c.enabled != 0 && (caps&c.enabled) != c.enabled { - fmt.Printf("Missing capabilities, want: %#x: got: %#x\n", c.enabled, caps) - return subcommands.ExitFailure - } - if c.disabled != 0 && (caps&c.disabled) != 0 { - fmt.Printf("Extra capabilities found, dont_want: %#x: got: %#x\n", c.disabled, caps) - return subcommands.ExitFailure - } - - return subcommands.ExitSuccess -} diff --git a/runsc/criutil/BUILD b/runsc/criutil/BUILD deleted file mode 100644 index 558133a0e..000000000 --- a/runsc/criutil/BUILD +++ /dev/null @@ -1,12 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "criutil", - testonly = 1, - srcs = ["criutil.go"], - importpath = "gvisor.dev/gvisor/runsc/criutil", - visibility = ["//:sandbox"], - deps = ["//runsc/testutil"], -) diff --git a/runsc/criutil/criutil.go b/runsc/criutil/criutil.go deleted file mode 100644 index 773f5a1c4..000000000 --- a/runsc/criutil/criutil.go +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package criutil contains utility functions for interacting with the -// Container Runtime Interface (CRI), principally via the crictl command line -// tool. This requires critools to be installed on the local system. -package criutil - -import ( - "encoding/json" - "fmt" - "os" - "os/exec" - "strings" - "time" - - "gvisor.dev/gvisor/runsc/testutil" -) - -const endpointPrefix = "unix://" - -// Crictl contains information required to run the crictl utility. -type Crictl struct { - executable string - timeout time.Duration - imageEndpoint string - runtimeEndpoint string -} - -// NewCrictl returns a Crictl configured with a timeout and an endpoint over -// which it will talk to containerd. -func NewCrictl(timeout time.Duration, endpoint string) *Crictl { - // Bazel doesn't pass PATH through, assume the location of crictl - // unless specified by environment variable. - executable := os.Getenv("CRICTL_PATH") - if executable == "" { - executable = "/usr/local/bin/crictl" - } - return &Crictl{ - executable: executable, - timeout: timeout, - imageEndpoint: endpointPrefix + endpoint, - runtimeEndpoint: endpointPrefix + endpoint, - } -} - -// Pull pulls an container image. It corresponds to `crictl pull`. -func (cc *Crictl) Pull(imageName string) error { - _, err := cc.run("pull", imageName) - return err -} - -// RunPod creates a sandbox. It corresponds to `crictl runp`. -func (cc *Crictl) RunPod(sbSpecFile string) (string, error) { - podID, err := cc.run("runp", sbSpecFile) - if err != nil { - return "", fmt.Errorf("runp failed: %v", err) - } - // Strip the trailing newline from crictl output. - return strings.TrimSpace(podID), nil -} - -// Create creates a container within a sandbox. It corresponds to `crictl -// create`. -func (cc *Crictl) Create(podID, contSpecFile, sbSpecFile string) (string, error) { - podID, err := cc.run("create", podID, contSpecFile, sbSpecFile) - if err != nil { - return "", fmt.Errorf("create failed: %v", err) - } - // Strip the trailing newline from crictl output. - return strings.TrimSpace(podID), nil -} - -// Start starts a container. It corresponds to `crictl start`. -func (cc *Crictl) Start(contID string) (string, error) { - output, err := cc.run("start", contID) - if err != nil { - return "", fmt.Errorf("start failed: %v", err) - } - return output, nil -} - -// Stop stops a container. It corresponds to `crictl stop`. -func (cc *Crictl) Stop(contID string) error { - _, err := cc.run("stop", contID) - return err -} - -// Exec execs a program inside a container. It corresponds to `crictl exec`. -func (cc *Crictl) Exec(contID string, args ...string) (string, error) { - a := []string{"exec", contID} - a = append(a, args...) - output, err := cc.run(a...) - if err != nil { - return "", fmt.Errorf("exec failed: %v", err) - } - return output, nil -} - -// Rm removes a container. It corresponds to `crictl rm`. -func (cc *Crictl) Rm(contID string) error { - _, err := cc.run("rm", contID) - return err -} - -// StopPod stops a pod. It corresponds to `crictl stopp`. -func (cc *Crictl) StopPod(podID string) error { - _, err := cc.run("stopp", podID) - return err -} - -// containsConfig is a minimal copy of -// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/apis/cri/runtime/v1alpha2/api.proto -// It only contains fields needed for testing. -type containerConfig struct { - Status containerStatus -} - -type containerStatus struct { - Network containerNetwork -} - -type containerNetwork struct { - IP string -} - -// PodIP returns a pod's IP address. -func (cc *Crictl) PodIP(podID string) (string, error) { - output, err := cc.run("inspectp", podID) - if err != nil { - return "", err - } - conf := &containerConfig{} - if err := json.Unmarshal([]byte(output), conf); err != nil { - return "", fmt.Errorf("failed to unmarshal JSON: %v, %s", err, output) - } - if conf.Status.Network.IP == "" { - return "", fmt.Errorf("no IP found in config: %s", output) - } - return conf.Status.Network.IP, nil -} - -// RmPod removes a container. It corresponds to `crictl rmp`. -func (cc *Crictl) RmPod(podID string) error { - _, err := cc.run("rmp", podID) - return err -} - -// StartContainer pulls the given image ands starts the container in the -// sandbox with the given podID. -func (cc *Crictl) StartContainer(podID, image, sbSpec, contSpec string) (string, error) { - // Write the specs to files that can be read by crictl. - sbSpecFile, err := testutil.WriteTmpFile("sbSpec", sbSpec) - if err != nil { - return "", fmt.Errorf("failed to write sandbox spec: %v", err) - } - contSpecFile, err := testutil.WriteTmpFile("contSpec", contSpec) - if err != nil { - return "", fmt.Errorf("failed to write container spec: %v", err) - } - - return cc.startContainer(podID, image, sbSpecFile, contSpecFile) -} - -func (cc *Crictl) startContainer(podID, image, sbSpecFile, contSpecFile string) (string, error) { - if err := cc.Pull(image); err != nil { - return "", fmt.Errorf("failed to pull %s: %v", image, err) - } - - contID, err := cc.Create(podID, contSpecFile, sbSpecFile) - if err != nil { - return "", fmt.Errorf("failed to create container in pod %q: %v", podID, err) - } - - if _, err := cc.Start(contID); err != nil { - return "", fmt.Errorf("failed to start container %q in pod %q: %v", contID, podID, err) - } - - return contID, nil -} - -// StopContainer stops and deletes the container with the given container ID. -func (cc *Crictl) StopContainer(contID string) error { - if err := cc.Stop(contID); err != nil { - return fmt.Errorf("failed to stop container %q: %v", contID, err) - } - - if err := cc.Rm(contID); err != nil { - return fmt.Errorf("failed to remove container %q: %v", contID, err) - } - - return nil -} - -// StartPodAndContainer pulls an image, then starts a sandbox and container in -// that sandbox. It returns the pod ID and container ID. -func (cc *Crictl) StartPodAndContainer(image, sbSpec, contSpec string) (string, string, error) { - // Write the specs to files that can be read by crictl. - sbSpecFile, err := testutil.WriteTmpFile("sbSpec", sbSpec) - if err != nil { - return "", "", fmt.Errorf("failed to write sandbox spec: %v", err) - } - contSpecFile, err := testutil.WriteTmpFile("contSpec", contSpec) - if err != nil { - return "", "", fmt.Errorf("failed to write container spec: %v", err) - } - - podID, err := cc.RunPod(sbSpecFile) - if err != nil { - return "", "", err - } - - contID, err := cc.startContainer(podID, image, sbSpecFile, contSpecFile) - - return podID, contID, err -} - -// StopPodAndContainer stops a container and pod. -func (cc *Crictl) StopPodAndContainer(podID, contID string) error { - if err := cc.StopContainer(contID); err != nil { - return fmt.Errorf("failed to stop container %q in pod %q: %v", contID, podID, err) - } - - if err := cc.StopPod(podID); err != nil { - return fmt.Errorf("failed to stop pod %q: %v", podID, err) - } - - if err := cc.RmPod(podID); err != nil { - return fmt.Errorf("failed to remove pod %q: %v", podID, err) - } - - return nil -} - -// run runs crictl with the given args and returns an error if it takes longer -// than cc.Timeout to run. -func (cc *Crictl) run(args ...string) (string, error) { - defaultArgs := []string{ - "--image-endpoint", cc.imageEndpoint, - "--runtime-endpoint", cc.runtimeEndpoint, - } - cmd := exec.Command(cc.executable, append(defaultArgs, args...)...) - - // Run the command with a timeout. - done := make(chan string) - errCh := make(chan error) - go func() { - output, err := cmd.CombinedOutput() - if err != nil { - errCh <- fmt.Errorf("error: \"%v\", output: %s", err, string(output)) - return - } - done <- string(output) - }() - select { - case output := <-done: - return output, nil - case err := <-errCh: - return "", err - case <-time.After(cc.timeout): - if err := testutil.KillCommand(cmd); err != nil { - return "", fmt.Errorf("timed out, then couldn't kill process %+v: %v", cmd, err) - } - return "", fmt.Errorf("timed out: %+v", cmd) - } -} diff --git a/runsc/debian/description b/runsc/debian/description index 6e3b1b2c0..9e8e08805 100644 --- a/runsc/debian/description +++ b/runsc/debian/description @@ -1,5 +1 @@ -gVisor is a user-space kernel, written in Go, that implements a substantial -portion of the Linux system surface. It includes an Open Container Initiative -(OCI) runtime called runsc that provides an isolation boundary between the -application and the host kernel. The runsc runtime integrates with Docker and -Kubernetes, making it simple to run sandboxed containers. +gVisor container sandbox runtime diff --git a/runsc/dockerutil/BUILD b/runsc/dockerutil/BUILD deleted file mode 100644 index 0e0423504..000000000 --- a/runsc/dockerutil/BUILD +++ /dev/null @@ -1,15 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "dockerutil", - testonly = 1, - srcs = ["dockerutil.go"], - importpath = "gvisor.dev/gvisor/runsc/dockerutil", - visibility = ["//:sandbox"], - deps = [ - "//runsc/testutil", - "@com_github_kr_pty//:go_default_library", - ], -) diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go deleted file mode 100644 index 57f6ae8de..000000000 --- a/runsc/dockerutil/dockerutil.go +++ /dev/null @@ -1,467 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package dockerutil is a collection of utility functions, primarily for -// testing. -package dockerutil - -import ( - "encoding/json" - "flag" - "fmt" - "io/ioutil" - "log" - "os" - "os/exec" - "path" - "regexp" - "strconv" - "strings" - "syscall" - "time" - - "github.com/kr/pty" - "gvisor.dev/gvisor/runsc/testutil" -) - -var ( - runtime = flag.String("runtime", "runsc", "specify which runtime to use") - config = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths") -) - -// EnsureSupportedDockerVersion checks if correct docker is installed. -func EnsureSupportedDockerVersion() { - cmd := exec.Command("docker", "version") - out, err := cmd.CombinedOutput() - if err != nil { - log.Fatalf("Error running %q: %v", "docker version", err) - } - re := regexp.MustCompile(`Version:\s+(\d+)\.(\d+)\.\d.*`) - matches := re.FindStringSubmatch(string(out)) - if len(matches) != 3 { - log.Fatalf("Invalid docker output: %s", out) - } - major, _ := strconv.Atoi(matches[1]) - minor, _ := strconv.Atoi(matches[2]) - if major < 17 || (major == 17 && minor < 9) { - log.Fatalf("Docker version 17.09.0 or greater is required, found: %02d.%02d", major, minor) - } -} - -// RuntimePath returns the binary path for the current runtime. -func RuntimePath() (string, error) { - // Read the configuration data; the file must exist. - configBytes, err := ioutil.ReadFile(*config) - if err != nil { - return "", err - } - - // Unmarshal the configuration. - c := make(map[string]interface{}) - if err := json.Unmarshal(configBytes, &c); err != nil { - return "", err - } - - // Decode the expected configuration. - r, ok := c["runtimes"] - if !ok { - return "", fmt.Errorf("no runtimes declared: %v", c) - } - rs, ok := r.(map[string]interface{}) - if !ok { - // The runtimes are not a map. - return "", fmt.Errorf("unexpected format: %v", c) - } - r, ok = rs[*runtime] - if !ok { - // The expected runtime is not declared. - return "", fmt.Errorf("runtime %q not found: %v", *runtime, c) - } - rs, ok = r.(map[string]interface{}) - if !ok { - // The runtime is not a map. - return "", fmt.Errorf("unexpected format: %v", c) - } - p, ok := rs["path"].(string) - if !ok { - // The runtime does not declare a path. - return "", fmt.Errorf("unexpected format: %v", c) - } - return p, nil -} - -// MountMode describes if the mount should be ro or rw. -type MountMode int - -const ( - // ReadOnly is what the name says. - ReadOnly MountMode = iota - // ReadWrite is what the name says. - ReadWrite -) - -// String returns the mount mode argument for this MountMode. -func (m MountMode) String() string { - switch m { - case ReadOnly: - return "ro" - case ReadWrite: - return "rw" - } - panic(fmt.Sprintf("invalid mode: %d", m)) -} - -// MountArg formats the volume argument to mount in the container. -func MountArg(source, target string, mode MountMode) string { - return fmt.Sprintf("-v=%s:%s:%v", source, target, mode) -} - -// LinkArg formats the link argument. -func LinkArg(source *Docker, target string) string { - return fmt.Sprintf("--link=%s:%s", source.Name, target) -} - -// PrepareFiles creates temp directory to copy files there. The sandbox doesn't -// have access to files in the test dir. -func PrepareFiles(names ...string) (string, error) { - dir, err := ioutil.TempDir("", "image-test") - if err != nil { - return "", fmt.Errorf("ioutil.TempDir failed: %v", err) - } - if err := os.Chmod(dir, 0777); err != nil { - return "", fmt.Errorf("os.Chmod(%q, 0777) failed: %v", dir, err) - } - for _, name := range names { - src := getLocalPath(name) - dst := path.Join(dir, name) - if err := testutil.Copy(src, dst); err != nil { - return "", fmt.Errorf("testutil.Copy(%q, %q) failed: %v", src, dst, err) - } - } - return dir, nil -} - -func getLocalPath(file string) string { - return path.Join(".", file) -} - -// do executes docker command. -func do(args ...string) (string, error) { - log.Printf("Running: docker %s\n", args) - cmd := exec.Command("docker", args...) - out, err := cmd.CombinedOutput() - if err != nil { - return "", fmt.Errorf("error executing docker %s: %v\nout: %s", args, err, out) - } - return string(out), nil -} - -// doWithPty executes docker command with stdio attached to a pty. -func doWithPty(args ...string) (*exec.Cmd, *os.File, error) { - log.Printf("Running with pty: docker %s\n", args) - cmd := exec.Command("docker", args...) - ptmx, err := pty.Start(cmd) - if err != nil { - return nil, nil, fmt.Errorf("error executing docker %s with a pty: %v", args, err) - } - return cmd, ptmx, nil -} - -// Pull pulls a docker image. This is used in tests to isolate the -// time to pull the image off the network from the time to actually -// start the container, to avoid timeouts over slow networks. -func Pull(image string) error { - _, err := do("pull", image) - return err -} - -// Docker contains the name and the runtime of a docker container. -type Docker struct { - Runtime string - Name string -} - -// MakeDocker sets up the struct for a Docker container. -// Names of containers will be unique. -func MakeDocker(namePrefix string) Docker { - return Docker{ - Name: testutil.RandomName(namePrefix), - Runtime: *runtime, - } -} - -// logDockerID logs a container id, which is needed to find container runsc logs. -func (d *Docker) logDockerID() { - id, err := d.ID() - if err != nil { - log.Printf("%v\n", err) - } - log.Printf("Name: %s ID: %v\n", d.Name, id) -} - -// Create calls 'docker create' with the arguments provided. -func (d *Docker) Create(args ...string) error { - a := []string{"create", "--runtime", d.Runtime, "--name", d.Name} - a = append(a, args...) - _, err := do(a...) - if err == nil { - d.logDockerID() - } - return err -} - -// Start calls 'docker start'. -func (d *Docker) Start() error { - if _, err := do("start", d.Name); err != nil { - return fmt.Errorf("error starting container %q: %v", d.Name, err) - } - return nil -} - -// Stop calls 'docker stop'. -func (d *Docker) Stop() error { - if _, err := do("stop", d.Name); err != nil { - return fmt.Errorf("error stopping container %q: %v", d.Name, err) - } - return nil -} - -// Run calls 'docker run' with the arguments provided. The container starts -// running in the background and the call returns immediately. -func (d *Docker) Run(args ...string) error { - a := d.runArgs("-d") - a = append(a, args...) - _, err := do(a...) - if err == nil { - d.logDockerID() - } - return err -} - -// RunWithPty is like Run but with an attached pty. -func (d *Docker) RunWithPty(args ...string) (*exec.Cmd, *os.File, error) { - a := d.runArgs("-it") - a = append(a, args...) - return doWithPty(a...) -} - -// RunFg calls 'docker run' with the arguments provided in the foreground. It -// blocks until the container exits and returns the output. -func (d *Docker) RunFg(args ...string) (string, error) { - a := d.runArgs(args...) - out, err := do(a...) - if err == nil { - d.logDockerID() - } - return string(out), err -} - -func (d *Docker) runArgs(args ...string) []string { - // Environment variable RUNSC_TEST_NAME is picked up by the runtime and added - // to the log name, so one can easily identify the corresponding logs for - // this test. - rv := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-e", "RUNSC_TEST_NAME=" + d.Name} - return append(rv, args...) -} - -// Logs calls 'docker logs'. -func (d *Docker) Logs() (string, error) { - return do("logs", d.Name) -} - -// Exec calls 'docker exec' with the arguments provided. -func (d *Docker) Exec(args ...string) (string, error) { - return d.ExecWithFlags(nil, args...) -} - -// ExecWithFlags calls 'docker exec <flags> name <args>'. -func (d *Docker) ExecWithFlags(flags []string, args ...string) (string, error) { - a := []string{"exec"} - a = append(a, flags...) - a = append(a, d.Name) - a = append(a, args...) - return do(a...) -} - -// ExecAsUser calls 'docker exec' as the given user with the arguments -// provided. -func (d *Docker) ExecAsUser(user string, args ...string) (string, error) { - a := []string{"exec", "--user", user, d.Name} - a = append(a, args...) - return do(a...) -} - -// ExecWithTerminal calls 'docker exec -it' with the arguments provided and -// attaches a pty to stdio. -func (d *Docker) ExecWithTerminal(args ...string) (*exec.Cmd, *os.File, error) { - a := []string{"exec", "-it", d.Name} - a = append(a, args...) - return doWithPty(a...) -} - -// Pause calls 'docker pause'. -func (d *Docker) Pause() error { - if _, err := do("pause", d.Name); err != nil { - return fmt.Errorf("error pausing container %q: %v", d.Name, err) - } - return nil -} - -// Unpause calls 'docker pause'. -func (d *Docker) Unpause() error { - if _, err := do("unpause", d.Name); err != nil { - return fmt.Errorf("error unpausing container %q: %v", d.Name, err) - } - return nil -} - -// Checkpoint calls 'docker checkpoint'. -func (d *Docker) Checkpoint(name string) error { - if _, err := do("checkpoint", "create", d.Name, name); err != nil { - return fmt.Errorf("error pausing container %q: %v", d.Name, err) - } - return nil -} - -// Restore calls 'docker start --checkname [name]'. -func (d *Docker) Restore(name string) error { - if _, err := do("start", "--checkpoint", name, d.Name); err != nil { - return fmt.Errorf("error starting container %q: %v", d.Name, err) - } - return nil -} - -// Remove calls 'docker rm'. -func (d *Docker) Remove() error { - if _, err := do("rm", d.Name); err != nil { - return fmt.Errorf("error deleting container %q: %v", d.Name, err) - } - return nil -} - -// CleanUp kills and deletes the container (best effort). -func (d *Docker) CleanUp() { - d.logDockerID() - if _, err := do("kill", d.Name); err != nil { - if strings.Contains(err.Error(), "is not running") { - // Nothing to kill. Don't log the error in this case. - } else { - log.Printf("error killing container %q: %v", d.Name, err) - } - } - if err := d.Remove(); err != nil { - log.Print(err) - } -} - -// FindPort returns the host port that is mapped to 'sandboxPort'. This calls -// docker to allocate a free port in the host and prevent conflicts. -func (d *Docker) FindPort(sandboxPort int) (int, error) { - format := fmt.Sprintf(`{{ (index (index .NetworkSettings.Ports "%d/tcp") 0).HostPort }}`, sandboxPort) - out, err := do("inspect", "-f", format, d.Name) - if err != nil { - return -1, fmt.Errorf("error retrieving port: %v", err) - } - port, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - return -1, fmt.Errorf("error parsing port %q: %v", out, err) - } - return port, nil -} - -// SandboxPid returns the PID to the sandbox process. -func (d *Docker) SandboxPid() (int, error) { - out, err := do("inspect", "-f={{.State.Pid}}", d.Name) - if err != nil { - return -1, fmt.Errorf("error retrieving pid: %v", err) - } - pid, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - return -1, fmt.Errorf("error parsing pid %q: %v", out, err) - } - return pid, nil -} - -// ID returns the container ID. -func (d *Docker) ID() (string, error) { - out, err := do("inspect", "-f={{.Id}}", d.Name) - if err != nil { - return "", fmt.Errorf("error retrieving ID: %v", err) - } - return strings.TrimSpace(string(out)), nil -} - -// Wait waits for container to exit, up to the given timeout. Returns error if -// wait fails or timeout is hit. Returns the application return code otherwise. -// Note that the application may have failed even if err == nil, always check -// the exit code. -func (d *Docker) Wait(timeout time.Duration) (syscall.WaitStatus, error) { - timeoutChan := time.After(timeout) - waitChan := make(chan (syscall.WaitStatus)) - errChan := make(chan (error)) - - go func() { - out, err := do("wait", d.Name) - if err != nil { - errChan <- fmt.Errorf("error waiting for container %q: %v", d.Name, err) - } - exit, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - errChan <- fmt.Errorf("error parsing exit code %q: %v", out, err) - } - waitChan <- syscall.WaitStatus(uint32(exit)) - }() - - select { - case ws := <-waitChan: - return ws, nil - case err := <-errChan: - return syscall.WaitStatus(1), err - case <-timeoutChan: - return syscall.WaitStatus(1), fmt.Errorf("timeout waiting for container %q", d.Name) - } -} - -// WaitForOutput calls 'docker logs' to retrieve containers output and searches -// for the given pattern. -func (d *Docker) WaitForOutput(pattern string, timeout time.Duration) (string, error) { - matches, err := d.WaitForOutputSubmatch(pattern, timeout) - if err != nil { - return "", err - } - if len(matches) == 0 { - return "", nil - } - return matches[0], nil -} - -// WaitForOutputSubmatch calls 'docker logs' to retrieve containers output and -// searches for the given pattern. It returns any regexp submatches as well. -func (d *Docker) WaitForOutputSubmatch(pattern string, timeout time.Duration) ([]string, error) { - re := regexp.MustCompile(pattern) - var out string - for exp := time.Now().Add(timeout); time.Now().Before(exp); { - var err error - out, err = d.Logs() - if err != nil { - return nil, err - } - if matches := re.FindStringSubmatch(out); matches != nil { - // Success! - return matches, nil - } - time.Sleep(100 * time.Millisecond) - } - return nil, fmt.Errorf("timeout waiting for output %q: %s", re.String(), out) -} diff --git a/runsc/flag/BUILD b/runsc/flag/BUILD new file mode 100644 index 000000000..5cb7604a8 --- /dev/null +++ b/runsc/flag/BUILD @@ -0,0 +1,9 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "flag", + srcs = ["flag.go"], + visibility = ["//:sandbox"], +) diff --git a/runsc/flag/flag.go b/runsc/flag/flag.go new file mode 100644 index 000000000..0ca4829d7 --- /dev/null +++ b/runsc/flag/flag.go @@ -0,0 +1,33 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flag + +import ( + "flag" +) + +type FlagSet = flag.FlagSet + +var ( + NewFlagSet = flag.NewFlagSet + String = flag.String + Bool = flag.Bool + Int = flag.Int + Uint = flag.Uint + CommandLine = flag.CommandLine + Parse = flag.Parse +) + +const ContinueOnError = flag.ContinueOnError diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index 80a4aa2fe..64a406ae2 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -6,17 +6,17 @@ go_library( name = "fsgofer", srcs = [ "fsgofer.go", + "fsgofer_amd64_unsafe.go", + "fsgofer_arm64_unsafe.go", "fsgofer_unsafe.go", ], - importpath = "gvisor.dev/gvisor/runsc/fsgofer", - visibility = [ - "//runsc:__subpackages__", - ], + visibility = ["//runsc:__subpackages__"], deps = [ "//pkg/abi/linux", "//pkg/fd", "//pkg/log", "//pkg/p9", + "//pkg/sync", "//pkg/syserr", "//runsc/specutils", "@org_golang_x_sys//unix:go_default_library", @@ -27,7 +27,7 @@ go_test( name = "fsgofer_test", size = "small", srcs = ["fsgofer_test.go"], - embed = [":fsgofer"], + library = ":fsgofer", deps = [ "//pkg/log", "//pkg/p9", diff --git a/runsc/fsgofer/filter/BUILD b/runsc/fsgofer/filter/BUILD index 02168ad1b..82b48ef32 100644 --- a/runsc/fsgofer/filter/BUILD +++ b/runsc/fsgofer/filter/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,12 +6,13 @@ go_library( name = "filter", srcs = [ "config.go", + "config_amd64.go", + "config_arm64.go", "extra_filters.go", "extra_filters_msan.go", "extra_filters_race.go", "filter.go", ], - importpath = "gvisor.dev/gvisor/runsc/fsgofer/filter", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go index 2ea95f8fb..1dce36965 100644 --- a/runsc/fsgofer/filter/config.go +++ b/runsc/fsgofer/filter/config.go @@ -25,11 +25,7 @@ import ( // allowedSyscalls is the set of syscalls executed by the gofer. var allowedSyscalls = seccomp.SyscallRules{ - syscall.SYS_ACCEPT: {}, - syscall.SYS_ARCH_PRCTL: []seccomp.Rule{ - {seccomp.AllowValue(linux.ARCH_GET_FS)}, - {seccomp.AllowValue(linux.ARCH_SET_FS)}, - }, + syscall.SYS_ACCEPT: {}, syscall.SYS_CLOCK_GETTIME: {}, syscall.SYS_CLONE: []seccomp.Rule{ { @@ -132,6 +128,18 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_MADVISE: {}, unix.SYS_MEMFD_CREATE: {}, /// Used by flipcall.PacketWindowAllocator.Init(). syscall.SYS_MKDIRAT: {}, + // Used by the Go runtime as a temporarily workaround for a Linux + // 5.2-5.4 bug. + // + // See src/runtime/os_linux_x86.go. + // + // TODO(b/148688965): Remove once this is gone from Go. + syscall.SYS_MLOCK: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(4096), + }, + }, syscall.SYS_MMAP: []seccomp.Rule{ { seccomp.AllowAny{}, @@ -155,7 +163,6 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_MPROTECT: {}, syscall.SYS_MUNMAP: {}, syscall.SYS_NANOSLEEP: {}, - syscall.SYS_NEWFSTATAT: {}, syscall.SYS_OPENAT: {}, syscall.SYS_PPOLL: {}, syscall.SYS_PREAD64: {}, diff --git a/runsc/fsgofer/filter/config_amd64.go b/runsc/fsgofer/filter/config_amd64.go new file mode 100644 index 000000000..a4b28cb8b --- /dev/null +++ b/runsc/fsgofer/filter/config_amd64.go @@ -0,0 +1,33 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package filter + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/seccomp" +) + +func init() { + allowedSyscalls[syscall.SYS_ARCH_PRCTL] = []seccomp.Rule{ + {seccomp.AllowValue(linux.ARCH_GET_FS)}, + {seccomp.AllowValue(linux.ARCH_SET_FS)}, + } + + allowedSyscalls[syscall.SYS_NEWFSTATAT] = []seccomp.Rule{} +} diff --git a/runsc/fsgofer/filter/config_arm64.go b/runsc/fsgofer/filter/config_arm64.go new file mode 100644 index 000000000..d2697deb7 --- /dev/null +++ b/runsc/fsgofer/filter/config_arm64.go @@ -0,0 +1,27 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package filter + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/seccomp" +) + +func init() { + allowedSyscalls[syscall.SYS_FSTATAT] = []seccomp.Rule{} +} diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 3fceecb3d..1942f50d7 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -29,7 +29,6 @@ import ( "path/filepath" "runtime" "strconv" - "sync" "syscall" "golang.org/x/sys/unix" @@ -37,6 +36,7 @@ import ( "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/runsc/specutils" ) @@ -199,6 +199,7 @@ func (a *attachPoint) makeQID(stat syscall.Stat_t) p9.QID { // The reason that the file is not opened initially as read-write is for better // performance with 'overlay2' storage driver. overlay2 eagerly copies the // entire file up when it's opened in write mode, and would perform badly when +// multiple files are only being opened for read (esp. startup). type localFile struct { p9.DefaultWalkGetAttr @@ -366,23 +367,24 @@ func fchown(fd int, uid p9.UID, gid p9.GID) error { } // Open implements p9.File. -func (l *localFile) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { +func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { if l.isOpen() { panic(fmt.Sprintf("attempting to open already opened file: %q", l.hostPath)) } // Check if control file can be used or if a new open must be created. var newFile *fd.FD - if mode == p9.ReadOnly { - log.Debugf("Open reusing control file, mode: %v, %q", mode, l.hostPath) + if flags == p9.ReadOnly { + log.Debugf("Open reusing control file, flags: %v, %q", flags, l.hostPath) newFile = l.file } else { // Ideally reopen would call name_to_handle_at (with empty name) and // open_by_handle_at to reopen the file without using 'hostPath'. However, // name_to_handle_at and open_by_handle_at aren't supported by overlay2. - log.Debugf("Open reopening file, mode: %v, %q", mode, l.hostPath) + log.Debugf("Open reopening file, flags: %v, %q", flags, l.hostPath) var err error - newFile, err = reopenProcFd(l.file, openFlags|mode.OSFlags()) + // Constrain open flags to the open mode and O_TRUNC. + newFile, err = reopenProcFd(l.file, openFlags|(flags.OSFlags()&(syscall.O_ACCMODE|syscall.O_TRUNC))) if err != nil { return nil, p9.QID{}, 0, extractErrno(err) } @@ -409,7 +411,7 @@ func (l *localFile) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { } l.file = newFile } - l.mode = mode + l.mode = flags & p9.OpenFlagsModeMask return fd, l.attachPoint.makeQID(stat), 0, nil } @@ -601,7 +603,7 @@ func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) Mode: p9.FileMode(stat.Mode), UID: p9.UID(stat.Uid), GID: p9.GID(stat.Gid), - NLink: stat.Nlink, + NLink: uint64(stat.Nlink), RDev: stat.Rdev, Size: uint64(stat.Size), BlockSize: uint64(stat.Blksize), @@ -765,6 +767,22 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { return err } +func (*localFile) GetXattr(string, uint64) (string, error) { + return "", syscall.EOPNOTSUPP +} + +func (*localFile) SetXattr(string, string, uint32) error { + return syscall.EOPNOTSUPP +} + +func (*localFile) ListXattr(uint64) (map[string]struct{}, error) { + return nil, syscall.EOPNOTSUPP +} + +func (*localFile) RemoveXattr(string) error { + return syscall.EOPNOTSUPP +} + // Allocate implements p9.File. func (l *localFile) Allocate(mode p9.AllocateMode, offset, length uint64) error { if !l.isOpen() { @@ -778,7 +796,7 @@ func (l *localFile) Allocate(mode p9.AllocateMode, offset, length uint64) error } // Rename implements p9.File; this should never be called. -func (l *localFile) Rename(p9.File, string) error { +func (*localFile) Rename(p9.File, string) error { panic("rename called directly") } @@ -955,14 +973,14 @@ func (l *localFile) Readdir(offset uint64, count uint32) ([]p9.Dirent, error) { } func (l *localFile) readDirent(f int, offset uint64, count uint32, skip uint64) ([]p9.Dirent, error) { + var dirents []p9.Dirent + // Limit 'count' to cap the slice size that is returned. const maxCount = 100000 if count > maxCount { count = maxCount } - dirents := make([]p9.Dirent, 0, count) - // Pre-allocate buffers that will be reused to get partial results. direntsBuf := make([]byte, 8192) names := make([]string, 0, 100) diff --git a/runsc/fsgofer/fsgofer_amd64_unsafe.go b/runsc/fsgofer/fsgofer_amd64_unsafe.go new file mode 100644 index 000000000..5d4aab597 --- /dev/null +++ b/runsc/fsgofer/fsgofer_amd64_unsafe.go @@ -0,0 +1,49 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package fsgofer + +import ( + "syscall" + "unsafe" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/syserr" +) + +func statAt(dirFd int, name string) (syscall.Stat_t, error) { + nameBytes, err := syscall.BytePtrFromString(name) + if err != nil { + return syscall.Stat_t{}, err + } + namePtr := unsafe.Pointer(nameBytes) + + var stat syscall.Stat_t + statPtr := unsafe.Pointer(&stat) + + if _, _, errno := syscall.Syscall6( + syscall.SYS_NEWFSTATAT, + uintptr(dirFd), + uintptr(namePtr), + uintptr(statPtr), + linux.AT_SYMLINK_NOFOLLOW, + 0, + 0); errno != 0 { + + return syscall.Stat_t{}, syserr.FromHost(errno).ToError() + } + return stat, nil +} diff --git a/runsc/fsgofer/fsgofer_arm64_unsafe.go b/runsc/fsgofer/fsgofer_arm64_unsafe.go new file mode 100644 index 000000000..8041fd352 --- /dev/null +++ b/runsc/fsgofer/fsgofer_arm64_unsafe.go @@ -0,0 +1,49 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package fsgofer + +import ( + "syscall" + "unsafe" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/syserr" +) + +func statAt(dirFd int, name string) (syscall.Stat_t, error) { + nameBytes, err := syscall.BytePtrFromString(name) + if err != nil { + return syscall.Stat_t{}, err + } + namePtr := unsafe.Pointer(nameBytes) + + var stat syscall.Stat_t + statPtr := unsafe.Pointer(&stat) + + if _, _, errno := syscall.Syscall6( + syscall.SYS_FSTATAT, + uintptr(dirFd), + uintptr(namePtr), + uintptr(statPtr), + linux.AT_SYMLINK_NOFOLLOW, + 0, + 0); errno != 0 { + + return syscall.Stat_t{}, syserr.FromHost(errno).ToError() + } + return stat, nil +} diff --git a/runsc/fsgofer/fsgofer_unsafe.go b/runsc/fsgofer/fsgofer_unsafe.go index ff2556aee..542b54365 100644 --- a/runsc/fsgofer/fsgofer_unsafe.go +++ b/runsc/fsgofer/fsgofer_unsafe.go @@ -18,34 +18,9 @@ import ( "syscall" "unsafe" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/syserr" ) -func statAt(dirFd int, name string) (syscall.Stat_t, error) { - nameBytes, err := syscall.BytePtrFromString(name) - if err != nil { - return syscall.Stat_t{}, err - } - namePtr := unsafe.Pointer(nameBytes) - - var stat syscall.Stat_t - statPtr := unsafe.Pointer(&stat) - - if _, _, errno := syscall.Syscall6( - syscall.SYS_NEWFSTATAT, - uintptr(dirFd), - uintptr(namePtr), - uintptr(statPtr), - linux.AT_SYMLINK_NOFOLLOW, - 0, - 0); errno != 0 { - - return syscall.Stat_t{}, syserr.FromHost(errno).ToError() - } - return stat, nil -} - func utimensat(dirFd int, name string, times [2]syscall.Timespec, flags int) error { // utimensat(2) doesn't accept empty name, instead name must be nil to make it // operate directly on 'dirFd' unlike other *at syscalls. diff --git a/runsc/main.go b/runsc/main.go index ae906c661..2baba90f8 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -26,8 +26,7 @@ import ( "path/filepath" "strings" "syscall" - - "flag" + "time" "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/log" @@ -35,6 +34,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/cmd" + "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/specutils" ) @@ -46,15 +46,19 @@ var ( logFormat = flag.String("log-format", "text", "log format: text (default), json, or json-k8s.") debug = flag.Bool("debug", false, "enable debug logging.") showVersion = flag.Bool("version", false, "show version and exit.") + // TODO(gvisor.dev/issue/193): support systemd cgroups + systemdCgroup = flag.Bool("systemd-cgroup", false, "Use systemd for cgroups. NOT SUPPORTED.") // These flags are unique to runsc, and are used to configure parts of the // system that are not covered by the runtime spec. // Debugging flags. debugLog = flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.") + panicLog = flag.String("panic-log", "", "file path were panic reports and other Go's runtime messages are written.") logPackets = flag.Bool("log-packets", false, "enable network packet logging.") logFD = flag.Int("log-fd", -1, "file descriptor to log to. If set, the 'log' flag is ignored.") debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored.") + panicLogFD = flag.Int("panic-log-fd", -1, "file descriptor to write Go's runtime messages.") debugLogFormat = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s.") alsoLogToStderr = flag.Bool("alsologtostderr", false, "send log messages to stderr.") @@ -79,6 +83,8 @@ var ( numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.") rootless = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.") referenceLeakMode = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), log-names, log-traces.") + cpuNumFromQuota = flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)") + vfs2Enabled = flag.Bool("vfs2", false, "TEST ONLY; use while VFSv2 is landing. This uses the new experimental VFS layer.") // Test flags, not to be used outside tests, ever. testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.") @@ -113,8 +119,8 @@ func main() { subcommands.Register(new(cmd.Resume), "") subcommands.Register(new(cmd.Run), "") subcommands.Register(new(cmd.Spec), "") - subcommands.Register(new(cmd.Start), "") subcommands.Register(new(cmd.State), "") + subcommands.Register(new(cmd.Start), "") subcommands.Register(new(cmd.Wait), "") // Register internal commands with the internal group name. This causes @@ -124,6 +130,7 @@ func main() { subcommands.Register(new(cmd.Boot), internalGroup) subcommands.Register(new(cmd.Debug), internalGroup) subcommands.Register(new(cmd.Gofer), internalGroup) + subcommands.Register(new(cmd.Statefile), internalGroup) // All subcommands must be registered before flag parsing. flag.Parse() @@ -136,6 +143,12 @@ func main() { os.Exit(0) } + // TODO(gvisor.dev/issue/193): support systemd cgroups + if *systemdCgroup { + fmt.Fprintln(os.Stderr, "systemd cgroup flag passed, but systemd cgroups not supported. See gvisor.dev/issue/193") + os.Exit(1) + } + var errorLogger io.Writer if *logFD > -1 { errorLogger = os.NewFile(uintptr(*logFD), "error log file") @@ -196,6 +209,7 @@ func main() { LogFilename: *logFilename, LogFormat: *logFormat, DebugLog: *debugLog, + PanicLog: *panicLog, DebugLogFormat: *debugLogFormat, FileAccess: fsAccess, FSGoferHostUDS: *fsGoferHostUDS, @@ -216,6 +230,8 @@ func main() { AlsoLogToStderr: *alsoLogToStderr, ReferenceLeakMode: refsLeakMode, OverlayfsStaleRead: *overlayfsStaleRead, + CPUNumFromQuota: *cpuNumFromQuota, + VFS2: *vfs2Enabled, TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot, TestOnlyTestNameEnv: *testOnlyTestNameEnv, @@ -229,26 +245,24 @@ func main() { log.SetLevel(log.Debug) } + // Logging will include the local date and time via the time package. + // + // On first use, time.Local initializes the local time zone, which + // involves opening tzdata files on the host. Since this requires + // opening host files, it must be done before syscall filter + // installation. + // + // Generally there will be a log message before filter installation + // that will force initialization, but force initialization here in + // case that does not occur. + _ = time.Local.String() + subcommand := flag.CommandLine.Arg(0) var e log.Emitter if *debugLogFD > -1 { f := os.NewFile(uintptr(*debugLogFD), "debug log file") - // Quick sanity check to make sure no other commands get passed - // a log fd (they should use log dir instead). - if subcommand != "boot" && subcommand != "gofer" { - cmd.Fatalf("flag --debug-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand) - } - - // If we are the boot process, then we own our stdio FDs and can do what we - // want with them. Since Docker and Containerd both eat boot's stderr, we - // dup our stderr to the provided log FD so that panics will appear in the - // logs, rather than just disappear. - if err := syscall.Dup3(int(f.Fd()), int(os.Stderr.Fd()), 0); err != nil { - cmd.Fatalf("error dup'ing fd %d to stderr: %v", f.Fd(), err) - } - e = newEmitter(*debugLogFormat, f) } else if *debugLog != "" { @@ -264,8 +278,26 @@ func main() { e = newEmitter("text", ioutil.Discard) } - if *alsoLogToStderr { - e = log.MultiEmitter{e, newEmitter(*debugLogFormat, os.Stderr)} + if *panicLogFD > -1 || *debugLogFD > -1 { + fd := *panicLogFD + if fd < 0 { + fd = *debugLogFD + } + // Quick sanity check to make sure no other commands get passed + // a log fd (they should use log dir instead). + if subcommand != "boot" && subcommand != "gofer" { + cmd.Fatalf("flags --debug-log-fd and --panic-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand) + } + + // If we are the boot process, then we own our stdio FDs and can do what we + // want with them. Since Docker and Containerd both eat boot's stderr, we + // dup our stderr to the provided log FD so that panics will appear in the + // logs, rather than just disappear. + if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil { + cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err) + } + } else if *alsoLogToStderr { + e = &log.MultiEmitter{e, newEmitter(*debugLogFormat, os.Stderr)} } log.SetTarget(e) @@ -281,6 +313,7 @@ func main() { log.Infof("\t\tFileAccess: %v, overlay: %t", conf.FileAccess, conf.Overlay) log.Infof("\t\tNetwork: %v, logging: %t", conf.Network, conf.LogPackets) log.Infof("\t\tStrace: %t, max size: %d, syscalls: %s", conf.Strace, conf.StraceLogSize, conf.StraceSyscalls) + log.Infof("\t\tVFS2 enabled: %v", conf.VFS2) log.Infof("***************************") if *testOnlyAllowRunAsCurrentUserWithoutChroot { @@ -310,11 +343,11 @@ func main() { func newEmitter(format string, logFile io.Writer) log.Emitter { switch format { case "text": - return &log.GoogleEmitter{&log.Writer{Next: logFile}} + return log.GoogleEmitter{&log.Writer{Next: logFile}} case "json": - return &log.JSONEmitter{log.Writer{Next: logFile}} + return log.JSONEmitter{&log.Writer{Next: logFile}} case "json-k8s": - return &log.K8sJSONEmitter{log.Writer{Next: logFile}} + return log.K8sJSONEmitter{&log.Writer{Next: logFile}} } cmd.Fatalf("invalid log format %q, must be 'text', 'json', or 'json-k8s'", format) panic("unreachable") diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index 27459e6d1..c95d50294 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "network_unsafe.go", "sandbox.go", ], - importpath = "gvisor.dev/gvisor/runsc/sandbox", visibility = [ "//runsc:__subpackages__", ], @@ -19,6 +18,8 @@ go_library( "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/platform", + "//pkg/sync", + "//pkg/tcpip/header", "//pkg/tcpip/stack", "//pkg/urpc", "//runsc/boot", diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index d42de0176..bc093fba5 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -21,13 +21,13 @@ import ( "path/filepath" "runtime" "strconv" - "strings" "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/vishvananda/netlink" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/urpc" "gvisor.dev/gvisor/runsc/boot" @@ -74,30 +74,8 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi } func createDefaultLoopbackInterface(conn *urpc.Client) error { - link := boot.LoopbackLink{ - Name: "lo", - Addresses: []net.IP{ - net.IP("\x7f\x00\x00\x01"), - net.IPv6loopback, - }, - Routes: []boot.Route{ - { - Destination: net.IPNet{ - - IP: net.IPv4(0x7f, 0, 0, 0), - Mask: net.IPv4Mask(0xff, 0, 0, 0), - }, - }, - { - Destination: net.IPNet{ - IP: net.IPv6loopback, - Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)), - }, - }, - }, - } if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &boot.CreateLinksAndRoutesArgs{ - LoopbackLinks: []boot.LoopbackLink{link}, + LoopbackLinks: []boot.LoopbackLink{boot.DefaultLoopbackLink}, }, nil); err != nil { return fmt.Errorf("creating loopback link and routes: %v", err) } @@ -173,46 +151,49 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG return fmt.Errorf("fetching interface addresses for %q: %v", iface.Name, err) } - // We build our own loopback devices. + // We build our own loopback device. if iface.Flags&net.FlagLoopback != 0 { - links, err := loopbackLinks(iface, allAddrs) + link, err := loopbackLink(iface, allAddrs) if err != nil { - return fmt.Errorf("getting loopback routes and links for iface %q: %v", iface.Name, err) + return fmt.Errorf("getting loopback link for iface %q: %v", iface.Name, err) } - args.LoopbackLinks = append(args.LoopbackLinks, links...) + args.LoopbackLinks = append(args.LoopbackLinks, link) continue } - // Keep only IPv4 addresses. - var ip4addrs []*net.IPNet + var ipAddrs []*net.IPNet for _, ifaddr := range allAddrs { ipNet, ok := ifaddr.(*net.IPNet) if !ok { return fmt.Errorf("address is not IPNet: %+v", ifaddr) } - if ipNet.IP.To4() == nil { - log.Warningf("IPv6 is not supported, skipping: %v", ipNet) - continue - } - ip4addrs = append(ip4addrs, ipNet) + ipAddrs = append(ipAddrs, ipNet) } - if len(ip4addrs) == 0 { - log.Warningf("No IPv4 address found for interface %q, skipping", iface.Name) + if len(ipAddrs) == 0 { + log.Warningf("No usable IP addresses found for interface %q, skipping", iface.Name) continue } // Scrape the routes before removing the address, since that // will remove the routes as well. - routes, def, err := routesForIface(iface) + routes, defv4, defv6, err := routesForIface(iface) if err != nil { return fmt.Errorf("getting routes for interface %q: %v", iface.Name, err) } - if def != nil { - if !args.DefaultGateway.Route.Empty() { - return fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, def, args.DefaultGateway) + if defv4 != nil { + if !args.Defaultv4Gateway.Route.Empty() { + return fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, defv4, args.Defaultv4Gateway) + } + args.Defaultv4Gateway.Route = *defv4 + args.Defaultv4Gateway.Name = iface.Name + } + + if defv6 != nil { + if !args.Defaultv6Gateway.Route.Empty() { + return fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, defv6, args.Defaultv6Gateway) } - args.DefaultGateway.Route = *def - args.DefaultGateway.Name = iface.Name + args.Defaultv6Gateway.Route = *defv6 + args.Defaultv6Gateway.Name = iface.Name } link := boot.FDBasedLink{ @@ -247,6 +228,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG } args.FilePayload.Files = append(args.FilePayload.Files, socketEntry.deviceFile) } + if link.GSOMaxSize == 0 && softwareGSO { // Hardware GSO is disabled. Let's enable software GSO. link.GSOMaxSize = stack.SoftwareGSOMaxSize @@ -255,7 +237,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG // Collect the addresses for the interface, enable forwarding, // and remove them from the host. - for _, addr := range ip4addrs { + for _, addr := range ipAddrs { link.Addresses = append(link.Addresses, addr.IP) // Steal IP address from NIC. @@ -316,81 +298,96 @@ func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) ( } } - // Use SO_RCVBUFFORCE because on linux the receive buffer for an - // AF_PACKET socket is capped by "net.core.rmem_max". rmem_max - // defaults to a unusually low value of 208KB. This is too low - // for gVisor to be able to receive packets at high throughputs - // without incurring packet drops. - const rcvBufSize = 4 << 20 // 4MB. + // Use SO_RCVBUFFORCE/SO_SNDBUFFORCE because on linux the receive/send buffer + // for an AF_PACKET socket is capped by "net.core.rmem_max/wmem_max". + // wmem_max/rmem_max default to a unusually low value of 208KB. This is too low + // for gVisor to be able to receive packets at high throughputs without + // incurring packet drops. + const bufSize = 4 << 20 // 4MB. + + if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, bufSize); err != nil { + return nil, fmt.Errorf("failed to increase socket rcv buffer to %d: %v", bufSize, err) + } - if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, rcvBufSize); err != nil { - return nil, fmt.Errorf("failed to increase socket rcv buffer to %d: %v", rcvBufSize, err) + if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUFFORCE, bufSize); err != nil { + return nil, fmt.Errorf("failed to increase socket snd buffer to %d: %v", bufSize, err) } + return &socketEntry{deviceFile, gsoMaxSize}, nil } -// loopbackLinks collects the links for a loopback interface. -func loopbackLinks(iface net.Interface, addrs []net.Addr) ([]boot.LoopbackLink, error) { - var links []boot.LoopbackLink +// loopbackLink returns the link with addresses and routes for a loopback +// interface. +func loopbackLink(iface net.Interface, addrs []net.Addr) (boot.LoopbackLink, error) { + link := boot.LoopbackLink{ + Name: iface.Name, + } for _, addr := range addrs { ipNet, ok := addr.(*net.IPNet) if !ok { - return nil, fmt.Errorf("address is not IPNet: %+v", addr) + return boot.LoopbackLink{}, fmt.Errorf("address is not IPNet: %+v", addr) } dst := *ipNet dst.IP = dst.IP.Mask(dst.Mask) - links = append(links, boot.LoopbackLink{ - Name: iface.Name, - Addresses: []net.IP{ipNet.IP}, - Routes: []boot.Route{{ - Destination: dst, - }}, + link.Addresses = append(link.Addresses, ipNet.IP) + link.Routes = append(link.Routes, boot.Route{ + Destination: dst, }) } - return links, nil + return link, nil } // routesForIface iterates over all routes for the given interface and converts -// them to boot.Routes. -func routesForIface(iface net.Interface) ([]boot.Route, *boot.Route, error) { +// them to boot.Routes. It also returns the a default v4/v6 route if found. +func routesForIface(iface net.Interface) ([]boot.Route, *boot.Route, *boot.Route, error) { link, err := netlink.LinkByIndex(iface.Index) if err != nil { - return nil, nil, err + return nil, nil, nil, err } rs, err := netlink.RouteList(link, netlink.FAMILY_ALL) if err != nil { - return nil, nil, fmt.Errorf("getting routes from %q: %v", iface.Name, err) + return nil, nil, nil, fmt.Errorf("getting routes from %q: %v", iface.Name, err) } - var def *boot.Route + var defv4, defv6 *boot.Route var routes []boot.Route for _, r := range rs { // Is it a default route? if r.Dst == nil { if r.Gw == nil { - return nil, nil, fmt.Errorf("default route with no gateway %q: %+v", iface.Name, r) - } - if r.Gw.To4() == nil { - log.Warningf("IPv6 is not supported, skipping default route: %v", r) - continue - } - if def != nil { - return nil, nil, fmt.Errorf("more than one default route found %q, def: %+v, route: %+v", iface.Name, def, r) + return nil, nil, nil, fmt.Errorf("default route with no gateway %q: %+v", iface.Name, r) } // Create a catch all route to the gateway. - def = &boot.Route{ - Destination: net.IPNet{ - IP: net.IPv4zero, - Mask: net.IPMask(net.IPv4zero), - }, - Gateway: r.Gw, + switch len(r.Gw) { + case header.IPv4AddressSize: + if defv4 != nil { + return nil, nil, nil, fmt.Errorf("more than one default route found %q, def: %+v, route: %+v", iface.Name, defv4, r) + } + defv4 = &boot.Route{ + Destination: net.IPNet{ + IP: net.IPv4zero, + Mask: net.IPMask(net.IPv4zero), + }, + Gateway: r.Gw, + } + case header.IPv6AddressSize: + if defv6 != nil { + return nil, nil, nil, fmt.Errorf("more than one default route found %q, def: %+v, route: %+v", iface.Name, defv6, r) + } + + defv6 = &boot.Route{ + Destination: net.IPNet{ + IP: net.IPv6zero, + Mask: net.IPMask(net.IPv6zero), + }, + Gateway: r.Gw, + } + default: + return nil, nil, nil, fmt.Errorf("unexpected address size for gateway: %+v for route: %+v", r.Gw, r) } continue } - if r.Dst.IP.To4() == nil { - log.Warningf("IPv6 is not supported, skipping route: %v", r) - continue - } + dst := *r.Dst dst.IP = dst.IP.Mask(dst.Mask) routes = append(routes, boot.Route{ @@ -398,7 +395,7 @@ func routesForIface(iface net.Interface) ([]boot.Route, *boot.Route, error) { Gateway: r.Gw, }) } - return routes, def, nil + return routes, defv4, defv6, nil } // removeAddress removes IP address from network device. It's equivalent to: diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index ee9327fc8..e4ec16e2f 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -18,10 +18,12 @@ package sandbox import ( "context" "fmt" + "io" + "math" "os" "os/exec" "strconv" - "sync" + "strings" "syscall" "time" @@ -33,6 +35,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/urpc" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/boot/platforms" @@ -141,7 +144,19 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) { // Wait until the sandbox has booted. b := make([]byte, 1) if l, err := clientSyncFile.Read(b); err != nil || l != 1 { - return nil, fmt.Errorf("waiting for sandbox to start: %v", err) + err := fmt.Errorf("waiting for sandbox to start: %v", err) + // If the sandbox failed to start, it may be because the binary + // permissions were incorrect. Check the bits and return a more helpful + // error message. + // + // NOTE: The error message is checked because error types are lost over + // rpc calls. + if strings.Contains(err.Error(), io.EOF.Error()) { + if permsErr := checkBinaryPermissions(conf); permsErr != nil { + return nil, fmt.Errorf("%v: %v", err, permsErr) + } + } + return nil, err } c.Release() @@ -368,8 +383,24 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF cmd.Args = append(cmd.Args, "--debug-log-fd="+strconv.Itoa(nextFD)) nextFD++ } + if conf.PanicLog != "" { + test := "" + if len(conf.TestOnlyTestNameEnv) != 0 { + // Fetch test name if one is provided and the test only flag was set. + if t, ok := specutils.EnvVar(args.Spec.Process.Env, conf.TestOnlyTestNameEnv); ok { + test = t + } + } - cmd.Args = append(cmd.Args, "--panic-signal="+strconv.Itoa(int(syscall.SIGTERM))) + panicLogFile, err := specutils.DebugLogFile(conf.PanicLog, "panic", test) + if err != nil { + return fmt.Errorf("opening debug log file in %q: %v", conf.PanicLog, err) + } + defer panicLogFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, panicLogFile) + cmd.Args = append(cmd.Args, "--panic-log-fd="+strconv.Itoa(nextFD)) + nextFD++ + } // Add the "boot" command to the args. // @@ -415,9 +446,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } - // If the platform needs a device FD we must pass it in. - if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil { + gPlatform, err := platform.Lookup(conf.Platform) + if err != nil { return err + } + + if deviceFile, err := gPlatform.OpenDevice(); err != nil { + return fmt.Errorf("opening device file for platform %q: %v", gPlatform, err) } else if deviceFile != nil { defer deviceFile.Close() cmd.ExtraFiles = append(cmd.ExtraFiles, deviceFile) @@ -425,6 +460,12 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } + // TODO(b/151157106): syscall tests fail by timeout if asyncpreemptoff + // isn't set. + if conf.Platform == "kvm" { + cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1") + } + // The current process' stdio must be passed to the application via the // --stdio-fds flag. The stdio of the sandbox process itself must not // be connected to the same FDs, otherwise we risk leaking sandbox @@ -502,7 +543,7 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF {Type: specs.UTSNamespace}, } - if conf.Platform == platforms.Ptrace { + if gPlatform.Requirements().RequiresCurrentPIDNS { // TODO(b/75837838): Also set a new PID namespace so that we limit // access to other host processes. log.Infof("Sandbox will be started in the current PID namespace") @@ -563,45 +604,32 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace}) cmd.Args = append(cmd.Args, "--setup-root") + const nobody = 65534 if conf.Rootless { - log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid()) + log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid()) cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ { - ContainerID: 0, + ContainerID: nobody, HostID: os.Getuid(), Size: 1, }, } cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ { - ContainerID: 0, + ContainerID: nobody, HostID: os.Getgid(), Size: 1, }, } - cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0} } else { // Map nobody in the new namespace to nobody in the parent namespace. // // A sandbox process will construct an empty - // root for itself, so it has to have the CAP_SYS_ADMIN - // capability. - // - // FIXME(b/122554829): The current implementations of - // os/exec doesn't allow to set ambient capabilities if - // a process is started in a new user namespace. As a - // workaround, we start the sandbox process with the 0 - // UID and then it constructs a chroot and sets UID to - // nobody. https://github.com/golang/go/issues/2315 - const nobody = 65534 + // root for itself, so it has to have + // CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities. cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ { - ContainerID: 0, - HostID: nobody - 1, - Size: 1, - }, - { ContainerID: nobody, HostID: nobody, Size: 1, @@ -614,11 +642,11 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF Size: 1, }, } - - // Set credentials to run as user and group nobody. - cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody} } + // Set credentials to run as user and group nobody. + cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody} + cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT)) } else { return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID") } @@ -631,6 +659,26 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF if err != nil { return fmt.Errorf("getting cpu count from cgroups: %v", err) } + if conf.CPUNumFromQuota { + // Dropping below 2 CPUs can trigger application to disable + // locks that can lead do hard to debug errors, so just + // leaving two cores as reasonable default. + const minCPUs = 2 + + quota, err := s.Cgroup.CPUQuota() + if err != nil { + return fmt.Errorf("getting cpu qouta from cgroups: %v", err) + } + if n := int(math.Ceil(quota)); n > 0 { + if n < minCPUs { + n = minCPUs + } + if n < cpuNum { + // Only lower the cpu number. + cpuNum = n + } + } + } cmd.Args = append(cmd.Args, "--cpu-num", strconv.Itoa(cpuNum)) mem, err := s.Cgroup.MemoryLimit() @@ -656,6 +704,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } + if args.Attached { + // Kill sandbox if parent process exits in attached mode. + cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + // Tells boot that any process it creates must have pdeathsig set. + cmd.Args = append(cmd.Args, "--attached") + } + // Add container as the last argument. cmd.Args = append(cmd.Args, s.ID) @@ -664,15 +719,22 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF log.Debugf("Donating FD %d: %q", i+3, f.Name()) } - if args.Attached { - // Kill sandbox if parent process exits in attached mode. - cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL - } - log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args) log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr) if err := specutils.StartInNS(cmd, nss); err != nil { - return fmt.Errorf("Sandbox: %v", err) + err := fmt.Errorf("starting sandbox: %v", err) + // If the sandbox failed to start, it may be because the binary + // permissions were incorrect. Check the bits and return a more helpful + // error message. + // + // NOTE: The error message is checked because error types are lost over + // rpc calls. + if strings.Contains(err.Error(), syscall.EACCES.Error()) { + if permsErr := checkBinaryPermissions(conf); permsErr != nil { + return fmt.Errorf("%v: %v", err, permsErr) + } + } + return err } s.child = true s.Pid = cmd.Process.Pid @@ -951,6 +1013,66 @@ func (s *Sandbox) StopCPUProfile() error { return nil } +// GoroutineProfile writes a goroutine profile to the given file. +func (s *Sandbox) GoroutineProfile(f *os.File) error { + log.Debugf("Goroutine profile %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + opts := control.ProfileOpts{ + FilePayload: urpc.FilePayload{ + Files: []*os.File{f}, + }, + } + if err := conn.Call(boot.GoroutineProfile, &opts, nil); err != nil { + return fmt.Errorf("getting sandbox %q goroutine profile: %v", s.ID, err) + } + return nil +} + +// BlockProfile writes a block profile to the given file. +func (s *Sandbox) BlockProfile(f *os.File) error { + log.Debugf("Block profile %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + opts := control.ProfileOpts{ + FilePayload: urpc.FilePayload{ + Files: []*os.File{f}, + }, + } + if err := conn.Call(boot.BlockProfile, &opts, nil); err != nil { + return fmt.Errorf("getting sandbox %q block profile: %v", s.ID, err) + } + return nil +} + +// MutexProfile writes a mutex profile to the given file. +func (s *Sandbox) MutexProfile(f *os.File) error { + log.Debugf("Mutex profile %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + opts := control.ProfileOpts{ + FilePayload: urpc.FilePayload{ + Files: []*os.File{f}, + }, + } + if err := conn.Call(boot.MutexProfile, &opts, nil); err != nil { + return fmt.Errorf("getting sandbox %q mutex profile: %v", s.ID, err) + } + return nil +} + // StartTrace start trace writing to the given file. func (s *Sandbox) StartTrace(f *os.File) error { log.Debugf("Trace start %q", s.ID) @@ -1004,16 +1126,22 @@ func (s *Sandbox) ChangeLogging(args control.LoggingArgs) error { // DestroyContainer destroys the given container. If it is the root container, // then the entire sandbox is destroyed. func (s *Sandbox) DestroyContainer(cid string) error { + if err := s.destroyContainer(cid); err != nil { + // If the sandbox isn't running, the container has already been destroyed, + // ignore the error in this case. + if s.IsRunning() { + return err + } + } + return nil +} + +func (s *Sandbox) destroyContainer(cid string) error { if s.IsRootContainer(cid) { log.Debugf("Destroying root container %q by destroying sandbox", cid) return s.destroy() } - if !s.IsRunning() { - // Sandbox isn't running anymore, container is already destroyed. - return nil - } - log.Debugf("Destroying container %q in sandbox %q", cid, s.ID) conn, err := s.sandboxConnect() if err != nil { @@ -1069,3 +1197,31 @@ func deviceFileForPlatform(name string) (*os.File, error) { } return f, nil } + +// checkBinaryPermissions verifies that the required binary bits are set on +// the runsc executable. +func checkBinaryPermissions(conf *boot.Config) error { + // All platforms need the other exe bit + neededBits := os.FileMode(0001) + if conf.Platform == platforms.Ptrace { + // Ptrace needs the other read bit + neededBits |= os.FileMode(0004) + } + + exePath, err := os.Executable() + if err != nil { + return fmt.Errorf("getting exe path: %v", err) + } + + // Check the permissions of the runsc binary and print an error if it + // doesn't match expectations. + info, err := os.Stat(exePath) + if err != nil { + return fmt.Errorf("stat file: %v", err) + } + + if info.Mode().Perm()&neededBits != neededBits { + return fmt.Errorf(specutils.FaqErrorMsg("runsc-perms", fmt.Sprintf("%s does not have the correct permissions", exePath))) + } + return nil +} diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD index 205638803..4ccd77f63 100644 --- a/runsc/specutils/BUILD +++ b/runsc/specutils/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +10,6 @@ go_library( "namespace.go", "specutils.go", ], - importpath = "gvisor.dev/gvisor/runsc/specutils", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", @@ -28,6 +27,6 @@ go_test( name = "specutils_test", size = "small", srcs = ["specutils_test.go"], - embed = [":specutils"], + library = ":specutils", deps = ["@com_github_opencontainers_runtime-spec//specs-go:go_default_library"], ) diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index c7dd3051c..60bb7b7ee 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -252,6 +252,9 @@ func MaybeRunAsRoot() error { }, Credential: &syscall.Credential{Uid: 0, Gid: 0}, GidMappingsEnableSetgroups: false, + + // Make sure child is killed when the parent terminates. + Pdeathsig: syscall.SIGKILL, } cmd.Env = os.Environ() diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index d3c2e4e78..837d5e238 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -92,6 +92,12 @@ func ValidateSpec(spec *specs.Spec) error { log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile) } + // PR_SET_NO_NEW_PRIVS is assumed to always be set. + // See kernel.Task.updateCredsForExecLocked. + if !spec.Process.NoNewPrivileges { + log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.") + } + // TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox. if spec.Linux != nil && spec.Linux.Seccomp != nil { log.Warningf("Seccomp spec is being ignored") @@ -528,3 +534,8 @@ func EnvVar(env []string, name string) (string, bool) { } return "", false } + +// FaqErrorMsg returns an error message pointing to the FAQ. +func FaqErrorMsg(anchor, msg string) string { + return fmt.Sprintf("%s; see https://gvisor.dev/faq#%s for more details", msg, anchor) +} diff --git a/runsc/testutil/BUILD b/runsc/testutil/BUILD deleted file mode 100644 index c96ca2eb6..000000000 --- a/runsc/testutil/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "testutil", - testonly = 1, - srcs = ["testutil.go"], - importpath = "gvisor.dev/gvisor/runsc/testutil", - visibility = ["//:sandbox"], - deps = [ - "//pkg/log", - "//runsc/boot", - "//runsc/specutils", - "@com_github_cenkalti_backoff//:go_default_library", - "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", - ], -) diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go deleted file mode 100644 index 26467bdc7..000000000 --- a/runsc/testutil/testutil.go +++ /dev/null @@ -1,483 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package testutil contains utility functions for runsc tests. -package testutil - -import ( - "bufio" - "context" - "debug/elf" - "encoding/base32" - "encoding/json" - "flag" - "fmt" - "io" - "io/ioutil" - "math" - "math/rand" - "net/http" - "os" - "os/exec" - "os/signal" - "path/filepath" - "strconv" - "strings" - "sync" - "sync/atomic" - "syscall" - "time" - - "github.com/cenkalti/backoff" - specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/runsc/boot" - "gvisor.dev/gvisor/runsc/specutils" -) - -var ( - checkpoint = flag.Bool("checkpoint", true, "control checkpoint/restore support") -) - -func init() { - rand.Seed(time.Now().UnixNano()) -} - -// IsCheckpointSupported returns the relevant command line flag. -func IsCheckpointSupported() bool { - return *checkpoint -} - -// TmpDir returns the absolute path to a writable directory that can be used as -// scratch by the test. -func TmpDir() string { - dir := os.Getenv("TEST_TMPDIR") - if dir == "" { - dir = "/tmp" - } - return dir -} - -// ConfigureExePath configures the executable for runsc in the test environment. -func ConfigureExePath() error { - path, err := FindFile("runsc/runsc") - if err != nil { - return err - } - specutils.ExePath = path - return nil -} - -// FindFile searchs for a file inside the test run environment. It returns the -// full path to the file. It fails if none or more than one file is found. -func FindFile(path string) (string, error) { - wd, err := os.Getwd() - if err != nil { - return "", err - } - - // The test root is demarcated by a path element called "__main__". Search for - // it backwards from the working directory. - root := wd - for { - dir, name := filepath.Split(root) - if name == "__main__" { - break - } - if len(dir) == 0 { - return "", fmt.Errorf("directory __main__ not found in %q", wd) - } - // Remove ending slash to loop around. - root = dir[:len(dir)-1] - } - - // Annoyingly, bazel adds the build type to the directory path for go - // binaries, but not for c++ binaries. We use two different patterns to - // to find our file. - patterns := []string{ - // Try the obvious path first. - filepath.Join(root, path), - // If it was a go binary, use a wildcard to match the build - // type. The pattern is: /test-path/__main__/directories/*/file. - filepath.Join(root, filepath.Dir(path), "*", filepath.Base(path)), - } - - for _, p := range patterns { - matches, err := filepath.Glob(p) - if err != nil { - // "The only possible returned error is ErrBadPattern, - // when pattern is malformed." -godoc - return "", fmt.Errorf("error globbing %q: %v", p, err) - } - switch len(matches) { - case 0: - // Try the next pattern. - case 1: - // We found it. - return matches[0], nil - default: - return "", fmt.Errorf("more than one match found for %q: %s", path, matches) - } - } - return "", fmt.Errorf("file %q not found", path) -} - -// TestConfig returns the default configuration to use in tests. Note that -// 'RootDir' must be set by caller if required. -func TestConfig() *boot.Config { - return &boot.Config{ - Debug: true, - LogFormat: "text", - DebugLogFormat: "text", - AlsoLogToStderr: true, - LogPackets: true, - Network: boot.NetworkNone, - Strace: true, - Platform: "ptrace", - FileAccess: boot.FileAccessExclusive, - TestOnlyAllowRunAsCurrentUserWithoutChroot: true, - NumNetworkChannels: 1, - } -} - -// TestConfigWithRoot returns the default configuration to use in tests. -func TestConfigWithRoot(rootDir string) *boot.Config { - conf := TestConfig() - conf.RootDir = rootDir - return conf -} - -// NewSpecWithArgs creates a simple spec with the given args suitable for use -// in tests. -func NewSpecWithArgs(args ...string) *specs.Spec { - return &specs.Spec{ - // The host filesystem root is the container root. - Root: &specs.Root{ - Path: "/", - Readonly: true, - }, - Process: &specs.Process{ - Args: args, - Env: []string{ - "PATH=" + os.Getenv("PATH"), - }, - Capabilities: specutils.AllCapabilities(), - }, - Mounts: []specs.Mount{ - // Root is readonly, but many tests want to write to tmpdir. - // This creates a writable mount inside the root. Also, when tmpdir points - // to "/tmp", it makes the the actual /tmp to be mounted and not a tmpfs - // inside the sentry. - { - Type: "bind", - Destination: TmpDir(), - Source: TmpDir(), - }, - }, - Hostname: "runsc-test-hostname", - } -} - -// SetupRootDir creates a root directory for containers. -func SetupRootDir() (string, error) { - rootDir, err := ioutil.TempDir(TmpDir(), "containers") - if err != nil { - return "", fmt.Errorf("error creating root dir: %v", err) - } - return rootDir, nil -} - -// SetupContainer creates a bundle and root dir for the container, generates a -// test config, and writes the spec to config.json in the bundle dir. -func SetupContainer(spec *specs.Spec, conf *boot.Config) (rootDir, bundleDir string, err error) { - rootDir, err = SetupRootDir() - if err != nil { - return "", "", err - } - conf.RootDir = rootDir - bundleDir, err = SetupBundleDir(spec) - return rootDir, bundleDir, err -} - -// SetupBundleDir creates a bundle dir and writes the spec to config.json. -func SetupBundleDir(spec *specs.Spec) (bundleDir string, err error) { - bundleDir, err = ioutil.TempDir(TmpDir(), "bundle") - if err != nil { - return "", fmt.Errorf("error creating bundle dir: %v", err) - } - - if err = writeSpec(bundleDir, spec); err != nil { - return "", fmt.Errorf("error writing spec: %v", err) - } - return bundleDir, nil -} - -// writeSpec writes the spec to disk in the given directory. -func writeSpec(dir string, spec *specs.Spec) error { - b, err := json.Marshal(spec) - if err != nil { - return err - } - return ioutil.WriteFile(filepath.Join(dir, "config.json"), b, 0755) -} - -// UniqueContainerID generates a unique container id for each test. -// -// The container id is used to create an abstract unix domain socket, which must -// be unique. While the container forbids creating two containers with the same -// name, sometimes between test runs the socket does not get cleaned up quickly -// enough, causing container creation to fail. -func UniqueContainerID() string { - // Read 20 random bytes. - b := make([]byte, 20) - // "[Read] always returns len(p) and a nil error." --godoc - if _, err := rand.Read(b); err != nil { - panic("rand.Read failed: " + err.Error()) - } - // base32 encode the random bytes, so that the name is a valid - // container id and can be used as a socket name in the filesystem. - return fmt.Sprintf("test-container-%s", base32.StdEncoding.EncodeToString(b)) -} - -// Copy copies file from src to dst. -func Copy(src, dst string) error { - in, err := os.Open(src) - if err != nil { - return err - } - defer in.Close() - - out, err := os.Create(dst) - if err != nil { - return err - } - defer out.Close() - - _, err = io.Copy(out, in) - return err -} - -// Poll is a shorthand function to poll for something with given timeout. -func Poll(cb func() error, timeout time.Duration) error { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) - return backoff.Retry(cb, b) -} - -// WaitForHTTP tries GET requests on a port until the call succeeds or timeout. -func WaitForHTTP(port int, timeout time.Duration) error { - cb := func() error { - c := &http.Client{ - // Calculate timeout to be able to do minimum 5 attempts. - Timeout: timeout / 5, - } - url := fmt.Sprintf("http://localhost:%d/", port) - resp, err := c.Get(url) - if err != nil { - log.Infof("Waiting %s: %v", url, err) - return err - } - resp.Body.Close() - return nil - } - return Poll(cb, timeout) -} - -// Reaper reaps child processes. -type Reaper struct { - // mu protects ch, which will be nil if the reaper is not running. - mu sync.Mutex - ch chan os.Signal -} - -// Start starts reaping child processes. -func (r *Reaper) Start() { - r.mu.Lock() - defer r.mu.Unlock() - - if r.ch != nil { - panic("reaper.Start called on a running reaper") - } - - r.ch = make(chan os.Signal, 1) - signal.Notify(r.ch, syscall.SIGCHLD) - - go func() { - for { - r.mu.Lock() - ch := r.ch - r.mu.Unlock() - if ch == nil { - return - } - - _, ok := <-ch - if !ok { - // Channel closed. - return - } - for { - cpid, _ := syscall.Wait4(-1, nil, syscall.WNOHANG, nil) - if cpid < 1 { - break - } - } - } - }() -} - -// Stop stops reaping child processes. -func (r *Reaper) Stop() { - r.mu.Lock() - defer r.mu.Unlock() - - if r.ch == nil { - panic("reaper.Stop called on a stopped reaper") - } - - signal.Stop(r.ch) - close(r.ch) - r.ch = nil -} - -// StartReaper is a helper that starts a new Reaper and returns a function to -// stop it. -func StartReaper() func() { - r := &Reaper{} - r.Start() - return r.Stop -} - -// WaitUntilRead reads from the given reader until the wanted string is found -// or until timeout. -func WaitUntilRead(r io.Reader, want string, split bufio.SplitFunc, timeout time.Duration) error { - sc := bufio.NewScanner(r) - if split != nil { - sc.Split(split) - } - // done must be accessed atomically. A value greater than 0 indicates - // that the read loop can exit. - var done uint32 - doneCh := make(chan struct{}) - go func() { - for sc.Scan() { - t := sc.Text() - if strings.Contains(t, want) { - atomic.StoreUint32(&done, 1) - close(doneCh) - break - } - if atomic.LoadUint32(&done) > 0 { - break - } - } - }() - select { - case <-time.After(timeout): - atomic.StoreUint32(&done, 1) - return fmt.Errorf("timeout waiting to read %q", want) - case <-doneCh: - return nil - } -} - -// KillCommand kills the process running cmd unless it hasn't been started. It -// returns an error if it cannot kill the process unless the reason is that the -// process has already exited. -func KillCommand(cmd *exec.Cmd) error { - if cmd.Process == nil { - return nil - } - if err := cmd.Process.Kill(); err != nil { - if !strings.Contains(err.Error(), "process already finished") { - return fmt.Errorf("failed to kill process %v: %v", cmd, err) - } - } - return nil -} - -// WriteTmpFile writes text to a temporary file, closes the file, and returns -// the name of the file. -func WriteTmpFile(pattern, text string) (string, error) { - file, err := ioutil.TempFile(TmpDir(), pattern) - if err != nil { - return "", err - } - defer file.Close() - if _, err := file.Write([]byte(text)); err != nil { - return "", err - } - return file.Name(), nil -} - -// RandomName create a name with a 6 digit random number appended to it. -func RandomName(prefix string) string { - return fmt.Sprintf("%s-%06d", prefix, rand.Int31n(1000000)) -} - -// IsStatic returns true iff the given file is a static binary. -func IsStatic(filename string) (bool, error) { - f, err := elf.Open(filename) - if err != nil { - return false, err - } - for _, prog := range f.Progs { - if prog.Type == elf.PT_INTERP { - return false, nil // Has interpreter. - } - } - return true, nil -} - -// TestBoundsForShard calculates the beginning and end indices for the test -// based on the TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. The -// returned ints are the beginning (inclusive) and end (exclusive) of the -// subslice corresponding to the shard. If either of the env vars are not -// present, then the function will return bounds that include all tests. If -// there are more shards than there are tests, then the returned list may be -// empty. -func TestBoundsForShard(numTests int) (int, int, error) { - var ( - begin = 0 - end = numTests - ) - indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS") - if indexStr == "" || totalStr == "" { - return begin, end, nil - } - - // Parse index and total to ints. - shardIndex, err := strconv.Atoi(indexStr) - if err != nil { - return 0, 0, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err) - } - shardTotal, err := strconv.Atoi(totalStr) - if err != nil { - return 0, 0, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err) - } - - // Calculate! - shardSize := int(math.Ceil(float64(numTests) / float64(shardTotal))) - begin = shardIndex * shardSize - end = ((shardIndex + 1) * shardSize) - if begin > numTests { - // Nothing to run. - return 0, 0, nil - } - if end > numTests { - end = numTests - } - return begin, end, nil -} diff --git a/runsc/version_test.sh b/runsc/version_test.sh index cc0ca3f05..747350654 100755 --- a/runsc/version_test.sh +++ b/runsc/version_test.sh @@ -16,7 +16,7 @@ set -euf -x -o pipefail -readonly runsc="${TEST_SRCDIR}/__main__/runsc/linux_amd64_pure_stripped/runsc" +readonly runsc="$1" readonly version=$($runsc --version) # Version should should not match VERSION, which is the default and which will |