summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/BUILD9
-rw-r--r--runsc/boot/BUILD12
-rw-r--r--runsc/boot/compat.go16
-rw-r--r--runsc/boot/compat_amd64.go2
-rw-r--r--runsc/boot/compat_test.go2
-rw-r--r--runsc/boot/config.go47
-rw-r--r--runsc/boot/controller.go45
-rw-r--r--runsc/boot/debug.go2
-rw-r--r--runsc/boot/events.go4
-rw-r--r--runsc/boot/fds.go35
-rw-r--r--runsc/boot/filter/BUILD4
-rw-r--r--runsc/boot/filter/config.go29
-rw-r--r--runsc/boot/filter/extra_filters.go4
-rw-r--r--runsc/boot/filter/extra_filters_msan.go2
-rw-r--r--runsc/boot/filter/extra_filters_race.go3
-rw-r--r--runsc/boot/filter/filter.go19
-rw-r--r--runsc/boot/fs.go324
-rw-r--r--runsc/boot/fs_test.go193
-rw-r--r--runsc/boot/limits.go4
-rw-r--r--runsc/boot/loader.go208
-rw-r--r--runsc/boot/loader_test.go22
-rw-r--r--runsc/boot/network.go22
-rw-r--r--runsc/boot/platforms/BUILD16
-rw-r--r--runsc/boot/platforms/platforms.go30
-rw-r--r--runsc/boot/pprof.go18
-rw-r--r--runsc/boot/strace.go2
-rw-r--r--runsc/boot/user.go146
-rw-r--r--runsc/boot/user_test.go253
-rw-r--r--runsc/cgroup/BUILD2
-rw-r--r--runsc/cgroup/cgroup.go4
-rw-r--r--runsc/cmd/BUILD3
-rw-r--r--runsc/cmd/boot.go31
-rw-r--r--runsc/cmd/capability.go2
-rw-r--r--runsc/cmd/capability_test.go19
-rw-r--r--runsc/cmd/checkpoint.go15
-rw-r--r--runsc/cmd/chroot.go4
-rw-r--r--runsc/cmd/cmd.go4
-rw-r--r--runsc/cmd/create.go24
-rw-r--r--runsc/cmd/debug.go99
-rw-r--r--runsc/cmd/delete.go6
-rw-r--r--runsc/cmd/delete_test.go2
-rw-r--r--runsc/cmd/do.go64
-rw-r--r--runsc/cmd/error.go2
-rw-r--r--runsc/cmd/events.go6
-rw-r--r--runsc/cmd/exec.go22
-rw-r--r--runsc/cmd/exec_test.go8
-rw-r--r--runsc/cmd/gofer.go18
-rw-r--r--runsc/cmd/kill.go4
-rw-r--r--runsc/cmd/list.go4
-rw-r--r--runsc/cmd/pause.go4
-rw-r--r--runsc/cmd/ps.go6
-rw-r--r--runsc/cmd/restore.go27
-rw-r--r--runsc/cmd/resume.go4
-rw-r--r--runsc/cmd/run.go25
-rw-r--r--runsc/cmd/start.go4
-rw-r--r--runsc/cmd/state.go6
-rw-r--r--runsc/cmd/syscalls.go4
-rw-r--r--runsc/cmd/wait.go4
-rw-r--r--runsc/console/BUILD6
-rw-r--r--runsc/container/BUILD3
-rw-r--r--runsc/container/console_test.go37
-rw-r--r--runsc/container/container.go125
-rw-r--r--runsc/container/container_test.go307
-rw-r--r--runsc/container/hook.go2
-rw-r--r--runsc/container/multi_container_test.go361
-rw-r--r--runsc/container/shared_volume_test.go22
-rw-r--r--runsc/container/test_app/test_app.go2
-rw-r--r--runsc/fsgofer/BUILD2
-rw-r--r--runsc/fsgofer/filter/BUILD2
-rw-r--r--runsc/fsgofer/filter/config.go4
-rw-r--r--runsc/fsgofer/filter/extra_filters.go4
-rw-r--r--runsc/fsgofer/filter/extra_filters_msan.go4
-rw-r--r--runsc/fsgofer/filter/extra_filters_race.go4
-rw-r--r--runsc/fsgofer/filter/filter.go2
-rw-r--r--runsc/fsgofer/fsgofer.go41
-rw-r--r--runsc/fsgofer/fsgofer_test.go8
-rw-r--r--runsc/fsgofer/fsgofer_unsafe.go4
-rw-r--r--runsc/main.go76
-rw-r--r--runsc/sandbox/BUILD5
-rw-r--r--runsc/sandbox/network.go10
-rw-r--r--runsc/sandbox/sandbox.go232
-rw-r--r--runsc/specutils/BUILD7
-rw-r--r--runsc/specutils/fs.go42
-rw-r--r--runsc/specutils/namespace.go56
-rw-r--r--runsc/specutils/specutils.go8
-rw-r--r--runsc/test/BUILD0
-rw-r--r--runsc/test/build_defs.bzl19
-rw-r--r--runsc/test/image/BUILD7
-rw-r--r--runsc/test/image/image_test.go9
-rw-r--r--runsc/test/integration/BUILD7
-rw-r--r--runsc/test/integration/exec_test.go4
-rw-r--r--runsc/test/integration/integration_test.go12
-rw-r--r--runsc/test/integration/regression_test.go2
-rw-r--r--runsc/test/root/BUILD2
-rw-r--r--runsc/test/root/cgroup_test.go4
-rw-r--r--runsc/test/root/chroot_test.go4
-rw-r--r--runsc/test/root/crictl_test.go6
-rw-r--r--runsc/test/root/testdata/BUILD2
-rw-r--r--runsc/test/testutil/BUILD3
-rw-r--r--runsc/test/testutil/docker.go10
-rw-r--r--runsc/test/testutil/testutil.go55
101 files changed, 2634 insertions, 788 deletions
diff --git a/runsc/BUILD b/runsc/BUILD
index 3d6c92e4c..6b8c92706 100644
--- a/runsc/BUILD
+++ b/runsc/BUILD
@@ -16,6 +16,7 @@ go_binary(
x_defs = {"main.version": "{VERSION}"},
deps = [
"//pkg/log",
+ "//pkg/sentry/platform",
"//runsc/boot",
"//runsc/cmd",
"//runsc/specutils",
@@ -47,6 +48,7 @@ go_binary(
x_defs = {"main.version": "{VERSION}"},
deps = [
"//pkg/log",
+ "//pkg/sentry/platform",
"//runsc/boot",
"//runsc/cmd",
"//runsc/specutils",
@@ -82,7 +84,7 @@ pkg_tar(
genrule(
name = "deb-version",
outs = ["version.txt"],
- cmd = "$(location :runsc) -version | head -n 1 | sed 's/^[^0-9]*//' > $@",
+ cmd = "$(location :runsc) -version | grep 'runsc version' | sed 's/^[^0-9]*//' > $@",
stamp = 1,
tools = [":runsc"],
)
@@ -96,6 +98,11 @@ pkg_deb(
maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>",
package = "runsc",
postinst = "debian/postinst.sh",
+ tags = [
+ # TODO(b/135475885): pkg_deb requires python2:
+ # https://github.com/bazelbuild/bazel/issues/8443
+ "manual",
+ ],
version_file = ":version.txt",
visibility = [
"//visibility:public",
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index ac28c4339..5025401dd 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -16,9 +16,11 @@ go_library(
"limits.go",
"loader.go",
"network.go",
+ "pprof.go",
"strace.go",
+ "user.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/boot",
+ importpath = "gvisor.dev/gvisor/runsc/boot",
visibility = [
"//runsc:__subpackages__",
"//test:__subpackages__",
@@ -32,6 +34,7 @@ go_library(
"//pkg/log",
"//pkg/memutil",
"//pkg/rand",
+ "//pkg/refs",
"//pkg/sentry/arch",
"//pkg/sentry/arch:registers_go_proto",
"//pkg/sentry/context",
@@ -49,13 +52,10 @@ go_library(
"//pkg/sentry/kernel",
"//pkg/sentry/kernel:uncaught_signal_go_proto",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/limits",
"//pkg/sentry/loader",
"//pkg/sentry/pgalloc",
"//pkg/sentry/platform",
- "//pkg/sentry/platform/kvm",
- "//pkg/sentry/platform/ptrace",
"//pkg/sentry/sighandling",
"//pkg/sentry/socket/epsocket",
"//pkg/sentry/socket/hostinet",
@@ -68,6 +68,7 @@ go_library(
"//pkg/sentry/time",
"//pkg/sentry/unimpl:unimplemented_syscall_go_proto",
"//pkg/sentry/usage",
+ "//pkg/sentry/usermem",
"//pkg/sentry/watchdog",
"//pkg/syserror",
"//pkg/tcpip",
@@ -83,6 +84,7 @@ go_library(
"//pkg/tcpip/transport/udp",
"//pkg/urpc",
"//runsc/boot/filter",
+ "//runsc/boot/platforms",
"//runsc/specutils",
"@com_github_golang_protobuf//proto:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
@@ -94,7 +96,9 @@ go_test(
size = "small",
srcs = [
"compat_test.go",
+ "fs_test.go",
"loader_test.go",
+ "user_test.go",
],
embed = [":boot"],
deps = [
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index c369e4d64..07e35ab10 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -21,14 +21,14 @@ import (
"syscall"
"github.com/golang/protobuf/proto"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto"
- ucspb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/strace"
- spb "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
+ ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
+ spb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
)
func initCompatLogs(fd int) error {
diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go
index 99df5e614..43cd0db94 100644
--- a/runsc/boot/compat_amd64.go
+++ b/runsc/boot/compat_amd64.go
@@ -17,7 +17,7 @@ package boot
import (
"fmt"
- rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
)
// reportLimit is the max number of events that should be reported per tracker.
diff --git a/runsc/boot/compat_test.go b/runsc/boot/compat_test.go
index ccec3d20c..388298d8d 100644
--- a/runsc/boot/compat_test.go
+++ b/runsc/boot/compat_test.go
@@ -17,7 +17,7 @@ package boot
import (
"testing"
- rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
)
func TestOnceTracker(t *testing.T) {
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 8564c502d..6f1eb9a41 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -19,43 +19,9 @@ import (
"strconv"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
)
-// PlatformType tells which platform to use.
-type PlatformType int
-
-const (
- // PlatformPtrace runs the sandbox with the ptrace platform.
- PlatformPtrace PlatformType = iota
-
- // PlatformKVM runs the sandbox with the KVM platform.
- PlatformKVM
-)
-
-// MakePlatformType converts type from string.
-func MakePlatformType(s string) (PlatformType, error) {
- switch s {
- case "ptrace":
- return PlatformPtrace, nil
- case "kvm":
- return PlatformKVM, nil
- default:
- return 0, fmt.Errorf("invalid platform type %q", s)
- }
-}
-
-func (p PlatformType) String() string {
- switch p {
- case PlatformPtrace:
- return "ptrace"
- case PlatformKVM:
- return "kvm"
- default:
- return fmt.Sprintf("unknown(%d)", p)
- }
-}
-
// FileAccessType tells how the filesystem is accessed.
type FileAccessType int
@@ -187,7 +153,7 @@ type Config struct {
LogPackets bool
// Platform is the platform to run on.
- Platform PlatformType
+ Platform string
// Strace indicates that strace should be enabled.
Strace bool
@@ -226,6 +192,12 @@ type Config struct {
// to the same underlying network device. This allows netstack to better
// scale for high throughput use cases.
NumNetworkChannels int
+
+ // Rootless allows the sandbox to be started with a user that is not root.
+ // Defense is depth measures are weaker with rootless. Specifically, the
+ // sandbox and Gofer process run as root inside a user namespace with root
+ // mapped to the caller's user.
+ Rootless bool
}
// ToFlags returns a slice of flags that correspond to the given Config.
@@ -241,7 +213,7 @@ func (c *Config) ToFlags() []string {
"--overlay=" + strconv.FormatBool(c.Overlay),
"--network=" + c.Network.String(),
"--log-packets=" + strconv.FormatBool(c.LogPackets),
- "--platform=" + c.Platform.String(),
+ "--platform=" + c.Platform,
"--strace=" + strconv.FormatBool(c.Strace),
"--strace-syscalls=" + strings.Join(c.StraceSyscalls, ","),
"--strace-log-size=" + strconv.Itoa(int(c.StraceLogSize)),
@@ -250,6 +222,7 @@ func (c *Config) ToFlags() []string {
"--profile=" + strconv.FormatBool(c.ProfileEnable),
"--net-raw=" + strconv.FormatBool(c.EnableRaw),
"--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels),
+ "--rootless=" + strconv.FormatBool(c.Rootless),
}
if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
// Only include if set since it is never to be used by users.
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index a277145b1..d79aaff60 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -22,17 +22,17 @@ import (
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/control/server"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/state"
- "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
+ "gvisor.dev/gvisor/pkg/sentry/state"
+ "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/urpc"
)
const (
@@ -96,8 +96,10 @@ const (
// SandboxStacks collects sandbox stacks for debugging.
SandboxStacks = "debug.Stacks"
+)
- // Profiling related commands (see pprof.go for more details).
+// Profiling related commands (see pprof.go for more details).
+const (
StartCPUProfile = "Profile.StartCPUProfile"
StopCPUProfile = "Profile.StopCPUProfile"
HeapProfile = "Profile.HeapProfile"
@@ -105,6 +107,11 @@ const (
StopTrace = "Profile.StopTrace"
)
+// Logging related commands (see logging.go for more details).
+const (
+ ChangeLogging = "Logging.Change"
+)
+
// ControlSocketAddr generates an abstract unix socket name for the given ID.
func ControlSocketAddr(id string) string {
return fmt.Sprintf("\x00runsc-sandbox.%s", id)
@@ -143,6 +150,7 @@ func newController(fd int, l *Loader) (*controller, error) {
}
srv.Register(&debug{})
+ srv.Register(&control.Logging{})
if l.conf.ProfileEnable {
srv.Register(&control.Profile{})
}
@@ -340,7 +348,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
cm.l.k = k
// Set up the restore environment.
- mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k)
+ mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k, cm.l.mountHints)
renv, err := mntr.createRestoreEnvironment(cm.l.conf)
if err != nil {
return fmt.Errorf("creating RestoreEnvironment: %v", err)
@@ -359,6 +367,17 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
return fmt.Errorf("file cannot be empty")
}
+ if cm.l.conf.ProfileEnable {
+ // initializePProf opens /proc/self/maps, so has to be
+ // called before installing seccomp filters.
+ initializePProf()
+ }
+
+ // Seccomp filters have to be applied before parsing the state file.
+ if err := cm.l.installSeccompFilters(); err != nil {
+ return err
+ }
+
// Load the state.
loadOpts := state.LoadOpts{Source: specFile}
if err := loadOpts.Load(k, networkStack); err != nil {
diff --git a/runsc/boot/debug.go b/runsc/boot/debug.go
index 79f7387ac..1fb32c527 100644
--- a/runsc/boot/debug.go
+++ b/runsc/boot/debug.go
@@ -15,7 +15,7 @@
package boot
import (
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
type debug struct {
diff --git a/runsc/boot/events.go b/runsc/boot/events.go
index ffd99f5e9..422f4da00 100644
--- a/runsc/boot/events.go
+++ b/runsc/boot/events.go
@@ -15,8 +15,8 @@
package boot
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
)
// Event struct for encoding the event data to JSON. Corresponds to runc's
diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go
index 0811e10f4..e5de1f3d7 100644
--- a/runsc/boot/fds.go
+++ b/runsc/boot/fds.go
@@ -17,36 +17,27 @@ package boot
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
)
-// createFDMap creates an FD map that contains stdin, stdout, and stderr. If
-// console is true, then ioctl calls will be passed through to the host FD.
+// createFDTable creates an FD table that contains stdin, stdout, and stderr.
+// If console is true, then ioctl calls will be passed through to the host FD.
// Upon success, createFDMap dups then closes stdioFDs.
-func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs []int) (*kernel.FDMap, error) {
+func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) {
if len(stdioFDs) != 3 {
return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
}
k := kernel.KernelFromContext(ctx)
- fdm := k.NewFDMap()
- defer fdm.DecRef()
+ fdTable := k.NewFDTable()
+ defer fdTable.DecRef()
mounter := fs.FileOwnerFromContext(ctx)
- // Maps sandbox FD to host FD.
- fdMap := map[int]int{
- 0: stdioFDs[0],
- 1: stdioFDs[1],
- 2: stdioFDs[2],
- }
-
var ttyFile *fs.File
- for appFD, hostFD := range fdMap {
+ for appFD, hostFD := range stdioFDs {
var appFile *fs.File
if console && appFD < 3 {
@@ -80,11 +71,11 @@ func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs
}
// Add the file to the FD map.
- if err := fdm.NewFDAt(kdefs.FD(appFD), appFile, kernel.FDFlags{}, l); err != nil {
+ if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil {
return nil, err
}
}
- fdm.IncRef()
- return fdm, nil
+ fdTable.IncRef()
+ return fdTable, nil
}
diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD
index 3b6020cf3..f5509b6b7 100644
--- a/runsc/boot/filter/BUILD
+++ b/runsc/boot/filter/BUILD
@@ -11,7 +11,7 @@ go_library(
"extra_filters_race.go",
"filter.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/boot/filter",
+ importpath = "gvisor.dev/gvisor/runsc/boot/filter",
visibility = [
"//runsc/boot:__subpackages__",
],
@@ -20,8 +20,6 @@ go_library(
"//pkg/log",
"//pkg/seccomp",
"//pkg/sentry/platform",
- "//pkg/sentry/platform/kvm",
- "//pkg/sentry/platform/ptrace",
"//pkg/tcpip/link/fdbased",
"@org_golang_x_sys//unix:go_default_library",
],
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index ef2dbfad2..0ee5b8bbd 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -19,9 +19,9 @@ import (
"syscall"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
)
// allowedSyscalls is the set of syscalls executed by the Sentry to the host OS.
@@ -437,29 +437,6 @@ func hostInetFilters() seccomp.SyscallRules {
}
}
-// ptraceFilters returns syscalls made exclusively by the ptrace platform.
-func ptraceFilters() seccomp.SyscallRules {
- return seccomp.SyscallRules{
- unix.SYS_GETCPU: {},
- unix.SYS_SCHED_SETAFFINITY: {},
- syscall.SYS_PTRACE: {},
- syscall.SYS_TGKILL: {},
- syscall.SYS_WAIT4: {},
- }
-}
-
-// kvmFilters returns syscalls made exclusively by the KVM platform.
-func kvmFilters() seccomp.SyscallRules {
- return seccomp.SyscallRules{
- syscall.SYS_ARCH_PRCTL: {},
- syscall.SYS_IOCTL: {},
- syscall.SYS_MMAP: {},
- syscall.SYS_RT_SIGSUSPEND: {},
- syscall.SYS_RT_SIGTIMEDWAIT: {},
- 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host.
- }
-}
-
func controlServerFilters(fd int) seccomp.SyscallRules {
return seccomp.SyscallRules{
syscall.SYS_ACCEPT: []seccomp.Rule{
diff --git a/runsc/boot/filter/extra_filters.go b/runsc/boot/filter/extra_filters.go
index 5c5ec4e06..e28d4b8d6 100644
--- a/runsc/boot/filter/extra_filters.go
+++ b/runsc/boot/filter/extra_filters.go
@@ -17,11 +17,11 @@
package filter
import (
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by
-// Go intrumentation tools, e.g. -race, -msan.
+// Go instrumentation tools, e.g. -race, -msan.
// Returns empty when disabled.
func instrumentationFilters() seccomp.SyscallRules {
return nil
diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go
index ac5a0f1aa..5e5a3c998 100644
--- a/runsc/boot/filter/extra_filters_msan.go
+++ b/runsc/boot/filter/extra_filters_msan.go
@@ -19,7 +19,7 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by MSAN.
diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go
index ba3c1ce87..9ff80276a 100644
--- a/runsc/boot/filter/extra_filters_race.go
+++ b/runsc/boot/filter/extra_filters_race.go
@@ -19,7 +19,7 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by TSAN.
@@ -33,6 +33,7 @@ func instrumentationFilters() seccomp.SyscallRules {
syscall.SYS_MUNLOCK: {},
syscall.SYS_NANOSLEEP: {},
syscall.SYS_OPEN: {},
+ syscall.SYS_OPENAT: {},
syscall.SYS_SET_ROBUST_LIST: {},
// Used within glibc's malloc.
syscall.SYS_TIME: {},
diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go
index 17479e0dd..e80c171b3 100644
--- a/runsc/boot/filter/filter.go
+++ b/runsc/boot/filter/filter.go
@@ -18,13 +18,9 @@
package filter
import (
- "fmt"
-
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
)
// Options are seccomp filter related options.
@@ -53,14 +49,7 @@ func Install(opt Options) error {
s.Merge(profileFilters())
}
- switch p := opt.Platform.(type) {
- case *ptrace.PTrace:
- s.Merge(ptraceFilters())
- case *kvm.KVM:
- s.Merge(kvmFilters())
- default:
- return fmt.Errorf("unknown platform type %T", p)
- }
+ s.Merge(opt.Platform.SyscallFilters())
return seccomp.Install(s)
}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 939f2419c..d3e3196fd 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -18,29 +18,30 @@ import (
"fmt"
"path"
"path/filepath"
+ "sort"
"strconv"
"strings"
"syscall"
// Include filesystem types that OCI spec might mount.
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/dev"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tty"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/dev"
+ "gvisor.dev/gvisor/pkg/sentry/fs/gofer"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/proc"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/sys"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/tty"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/gofer"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -50,6 +51,9 @@ const (
// Device name for root mount.
rootDevice = "9pfs-/"
+ // MountPrefix is the annotation prefix for mount hints.
+ MountPrefix = "gvisor.dev/spec/mount"
+
// ChildContainersDir is the directory where child container root
// filesystems are mounted.
ChildContainersDir = "/__runsc_containers__"
@@ -72,7 +76,7 @@ func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string,
tmpFS := mustFindFilesystem("tmpfs")
if !fs.IsDir(lower.StableAttr) {
// Create overlay on top of mount file, e.g. /etc/hostname.
- msrc := fs.NewCachingMountSource(tmpFS, upperFlags)
+ msrc := fs.NewCachingMountSource(ctx, tmpFS, upperFlags)
return fs.NewOverlayRootFile(ctx, msrc, lower, upperFlags)
}
@@ -222,7 +226,11 @@ func mustFindFilesystem(name string) fs.Filesystem {
// addSubmountOverlay overlays the inode over a ramfs tree containing the given
// paths.
func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string) (*fs.Inode, error) {
- msrc := fs.NewPseudoMountSource()
+ // Construct a ramfs tree of mount points. The contents never
+ // change, so this can be fully caching. There's no real
+ // filesystem backing this tree, so we set the filesystem to
+ // nil.
+ msrc := fs.NewCachingMountSource(ctx, nil, fs.MountSourceFlags{})
mountTree, err := ramfs.MakeDirectoryTree(ctx, msrc, submounts)
if err != nil {
return nil, fmt.Errorf("creating mount tree: %v", err)
@@ -292,6 +300,174 @@ func (f *fdDispenser) empty() bool {
return len(f.fds) == 0
}
+type shareType int
+
+const (
+ invalid shareType = iota
+
+ // container shareType indicates that the mount is used by a single container.
+ container
+
+ // pod shareType indicates that the mount is used by more than one container
+ // inside the pod.
+ pod
+
+ // shared shareType indicates that the mount can also be shared with a process
+ // outside the pod, e.g. NFS.
+ shared
+)
+
+func parseShare(val string) (shareType, error) {
+ switch val {
+ case "container":
+ return container, nil
+ case "pod":
+ return pod, nil
+ case "shared":
+ return shared, nil
+ default:
+ return 0, fmt.Errorf("invalid share value %q", val)
+ }
+}
+
+func (s shareType) String() string {
+ switch s {
+ case invalid:
+ return "invalid"
+ case container:
+ return "container"
+ case pod:
+ return "pod"
+ case shared:
+ return "shared"
+ default:
+ return fmt.Sprintf("invalid share value %d", s)
+ }
+}
+
+// mountHint represents extra information about mounts that are provided via
+// annotations. They can override mount type, and provide sharing information
+// so that mounts can be correctly shared inside the pod.
+type mountHint struct {
+ name string
+ share shareType
+ mount specs.Mount
+
+ // root is the inode where the volume is mounted. For mounts with 'pod' share
+ // the volume is mounted once and then bind mounted inside the containers.
+ root *fs.Inode
+}
+
+func (m *mountHint) setField(key, val string) error {
+ switch key {
+ case "source":
+ if len(val) == 0 {
+ return fmt.Errorf("source cannot be empty")
+ }
+ m.mount.Source = val
+ case "type":
+ return m.setType(val)
+ case "share":
+ share, err := parseShare(val)
+ if err != nil {
+ return err
+ }
+ m.share = share
+ case "options":
+ return m.setOptions(val)
+ default:
+ return fmt.Errorf("invalid mount annotation: %s=%s", key, val)
+ }
+ return nil
+}
+
+func (m *mountHint) setType(val string) error {
+ switch val {
+ case "tmpfs", "bind":
+ m.mount.Type = val
+ default:
+ return fmt.Errorf("invalid type %q", val)
+ }
+ return nil
+}
+
+func (m *mountHint) setOptions(val string) error {
+ opts := strings.Split(val, ",")
+ if err := specutils.ValidateMountOptions(opts); err != nil {
+ return err
+ }
+ // Sort options so it can be compared with container mount options later on.
+ sort.Strings(opts)
+ m.mount.Options = opts
+ return nil
+}
+
+func (m *mountHint) isSupported() bool {
+ return m.mount.Type == tmpfs && m.share == pod
+}
+
+// podMountHints contains a collection of mountHints for the pod.
+type podMountHints struct {
+ mounts map[string]*mountHint
+}
+
+func newPodMountHints(spec *specs.Spec) (*podMountHints, error) {
+ mnts := make(map[string]*mountHint)
+ for k, v := range spec.Annotations {
+ // Look for 'gvisor.dev/spec/mount' annotations and parse them.
+ if strings.HasPrefix(k, MountPrefix) {
+ parts := strings.Split(k, "/")
+ if len(parts) != 5 {
+ return nil, fmt.Errorf("invalid mount annotation: %s=%s", k, v)
+ }
+ name := parts[3]
+ if len(name) == 0 || path.Clean(name) != name {
+ return nil, fmt.Errorf("invalid mount name: %s", name)
+ }
+ mnt := mnts[name]
+ if mnt == nil {
+ mnt = &mountHint{name: name}
+ mnts[name] = mnt
+ }
+ if err := mnt.setField(parts[4], v); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ // Validate all hints after done parsing.
+ for name, m := range mnts {
+ log.Infof("Mount annotation found, name: %s, source: %q, type: %s, share: %v", name, m.mount.Source, m.mount.Type, m.share)
+ if m.share == invalid {
+ return nil, fmt.Errorf("share field for %q has not been set", m.name)
+ }
+ if len(m.mount.Source) == 0 {
+ return nil, fmt.Errorf("source field for %q has not been set", m.name)
+ }
+ if len(m.mount.Type) == 0 {
+ return nil, fmt.Errorf("type field for %q has not been set", m.name)
+ }
+
+ // Check for duplicate mount sources.
+ for name2, m2 := range mnts {
+ if name != name2 && m.mount.Source == m2.mount.Source {
+ return nil, fmt.Errorf("mounts %q and %q have the same mount source %q", m.name, m2.name, m.mount.Source)
+ }
+ }
+ }
+
+ return &podMountHints{mounts: mnts}, nil
+}
+
+func (p *podMountHints) findMount(mount specs.Mount) *mountHint {
+ for _, m := range p.mounts {
+ if m.mount.Source == mount.Source {
+ return m
+ }
+ }
+ return nil
+}
+
type containerMounter struct {
// cid is the container ID. May be set to empty for the root container.
cid string
@@ -306,15 +482,18 @@ type containerMounter struct {
fds fdDispenser
k *kernel.Kernel
+
+ hints *podMountHints
}
-func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel) *containerMounter {
+func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter {
return &containerMounter{
cid: cid,
root: spec.Root,
mounts: compileMounts(spec),
fds: fdDispenser{fds: goferFDs},
k: k,
+ hints: hints,
}
}
@@ -476,6 +655,15 @@ func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error
// 'setMountNS' is called after namespace is created. It must set the mount NS
// to 'rootCtx'.
func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx context.Context, conf *Config, setMountNS func(*fs.MountNamespace)) error {
+ for _, hint := range c.hints.mounts {
+ log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
+ inode, err := c.mountSharedMaster(rootCtx, conf, hint)
+ if err != nil {
+ return fmt.Errorf("mounting shared master %q: %v", hint.name, err)
+ }
+ hint.root = inode
+ }
+
// Create a tmpfs mount where we create and mount a root filesystem for
// each child container.
c.mounts = append(c.mounts, specs.Mount{
@@ -498,21 +686,57 @@ func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx c
return c.mountSubmounts(rootCtx, conf, mns, root)
}
+// mountSharedMaster mounts the master of a volume that is shared among
+// containers in a pod. It returns the root mount's inode.
+func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *Config, hint *mountHint) (*fs.Inode, error) {
+ // Map mount type to filesystem name, and parse out the options that we are
+ // capable of dealing with.
+ fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, hint.mount)
+ if err != nil {
+ return nil, err
+ }
+ if len(fsName) == 0 {
+ return nil, fmt.Errorf("mount type not supported %q", hint.mount.Type)
+ }
+
+ // Mount with revalidate because it's shared among containers.
+ opts = append(opts, "cache=revalidate")
+
+ // All filesystem names should have been mapped to something we know.
+ filesystem := mustFindFilesystem(fsName)
+
+ mf := mountFlags(hint.mount.Options)
+ if useOverlay {
+ // All writes go to upper, be paranoid and make lower readonly.
+ mf.ReadOnly = true
+ }
+
+ inode, err := filesystem.Mount(ctx, mountDevice(hint.mount), mf, strings.Join(opts, ","), nil)
+ if err != nil {
+ return nil, fmt.Errorf("creating mount %q: %v", hint.name, err)
+ }
+
+ if useOverlay {
+ log.Debugf("Adding overlay on top of shared mount %q", hint.name)
+ inode, err = addOverlay(ctx, conf, inode, hint.mount.Type, mf)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return inode, nil
+}
+
// createRootMount creates the root filesystem.
func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*fs.Inode, error) {
// First construct the filesystem from the spec.Root.
mf := fs.MountSourceFlags{ReadOnly: c.root.Readonly || conf.Overlay}
- var (
- rootInode *fs.Inode
- err error
- )
-
fd := c.fds.remove()
log.Infof("Mounting root over 9P, ioFD: %d", fd)
p9FS := mustFindFilesystem("9p")
opts := p9MountOptions(fd, conf.FileAccess)
- rootInode, err = p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
+ rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
if err != nil {
return nil, fmt.Errorf("creating root mount point: %v", err)
}
@@ -579,8 +803,14 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error {
for _, m := range c.mounts {
- if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil {
- return fmt.Errorf("mount submount %q: %v", m.Destination, err)
+ if hint := c.hints.findMount(m); hint != nil && hint.isSupported() {
+ if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil {
+ return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, m.Destination, err)
+ }
+ } else {
+ if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil {
+ return fmt.Errorf("mount submount %q: %v", m.Destination, err)
+ }
}
}
@@ -653,6 +883,40 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns
return nil
}
+// mountSharedSubmount binds mount to a previously mounted volume that is shared
+// among containers in the same pod.
+func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.MountNamespace, root *fs.Dirent, mount specs.Mount, source *mountHint) error {
+ // For now enforce that all options are the same. Once bind mount is properly
+ // supported, then we should ensure the master is less restrictive than the
+ // container, e.g. master can be 'rw' while container mounts as 'ro'.
+ if len(mount.Options) != len(source.mount.Options) {
+ return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", source.mount.Options, mount.Options)
+ }
+ sort.Strings(mount.Options)
+ for i, opt := range mount.Options {
+ if opt != source.mount.Options[i] {
+ return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", source.mount.Options, mount.Options)
+ }
+ }
+
+ maxTraversals := uint(0)
+ target, err := mns.FindInode(ctx, root, root, mount.Destination, &maxTraversals)
+ if err != nil {
+ return fmt.Errorf("can't find mount destination %q: %v", mount.Destination, err)
+ }
+ defer target.DecRef()
+
+ // Take a ref on the inode that is about to be (re)-mounted.
+ source.root.IncRef()
+ if err := mns.Mount(ctx, target, source.root); err != nil {
+ source.root.DecRef()
+ return fmt.Errorf("bind mount %q error: %v", mount.Destination, err)
+ }
+
+ log.Infof("Mounted %q type shared bind to %q", mount.Destination, source.name)
+ return nil
+}
+
// addRestoreMount adds a mount to the MountSources map used for restoring a
// checkpointed container.
func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount) error {
@@ -678,8 +942,8 @@ func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnviron
return nil
}
-// createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding the mounts
-// to the environment.
+// createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding
+// the mounts to the environment.
func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEnvironment, error) {
renv := &fs.RestoreEnvironment{
MountSources: make(map[string][]fs.MountArgs),
@@ -730,7 +994,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
// Technically we don't have to mount tmpfs at /tmp, as we could just rely on
// the host /tmp, but this is a nice optimization, and fixes some apps that call
// mknod in /tmp. It's unsafe to mount tmpfs if:
-// 1. /tmp is mounted explictly: we should not override user's wish
+// 1. /tmp is mounted explicitly: we should not override user's wish
// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp
//
// Note that when there are submounts inside of '/tmp', directories for the
diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go
new file mode 100644
index 000000000..49ab34b33
--- /dev/null
+++ b/runsc/boot/fs_test.go
@@ -0,0 +1,193 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "path"
+ "reflect"
+ "strings"
+ "testing"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func TestPodMountHintsHappy(t *testing.T) {
+ spec := &specs.Spec{
+ Annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+
+ path.Join(MountPrefix, "mount2", "source"): "bar",
+ path.Join(MountPrefix, "mount2", "type"): "bind",
+ path.Join(MountPrefix, "mount2", "share"): "container",
+ path.Join(MountPrefix, "mount2", "options"): "rw,private",
+ },
+ }
+ podHints, err := newPodMountHints(spec)
+ if err != nil {
+ t.Errorf("newPodMountHints failed: %v", err)
+ }
+
+ // Check that fields were set correctly.
+ mount1 := podHints.mounts["mount1"]
+ if want := "mount1"; want != mount1.name {
+ t.Errorf("mount1 name, want: %q, got: %q", want, mount1.name)
+ }
+ if want := "foo"; want != mount1.mount.Source {
+ t.Errorf("mount1 source, want: %q, got: %q", want, mount1.mount.Source)
+ }
+ if want := "tmpfs"; want != mount1.mount.Type {
+ t.Errorf("mount1 type, want: %q, got: %q", want, mount1.mount.Type)
+ }
+ if want := pod; want != mount1.share {
+ t.Errorf("mount1 type, want: %q, got: %q", want, mount1.share)
+ }
+ if want := []string(nil); !reflect.DeepEqual(want, mount1.mount.Options) {
+ t.Errorf("mount1 type, want: %q, got: %q", want, mount1.mount.Options)
+ }
+
+ mount2 := podHints.mounts["mount2"]
+ if want := "mount2"; want != mount2.name {
+ t.Errorf("mount2 name, want: %q, got: %q", want, mount2.name)
+ }
+ if want := "bar"; want != mount2.mount.Source {
+ t.Errorf("mount2 source, want: %q, got: %q", want, mount2.mount.Source)
+ }
+ if want := "bind"; want != mount2.mount.Type {
+ t.Errorf("mount2 type, want: %q, got: %q", want, mount2.mount.Type)
+ }
+ if want := container; want != mount2.share {
+ t.Errorf("mount2 type, want: %q, got: %q", want, mount2.share)
+ }
+ if want := []string{"private", "rw"}; !reflect.DeepEqual(want, mount2.mount.Options) {
+ t.Errorf("mount2 type, want: %q, got: %q", want, mount2.mount.Options)
+ }
+}
+
+func TestPodMountHintsErrors(t *testing.T) {
+ for _, tst := range []struct {
+ name string
+ annotations map[string]string
+ error string
+ }{
+ {
+ name: "too short",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1"): "foo",
+ },
+ error: "invalid mount annotation",
+ },
+ {
+ name: "no name",
+ annotations: map[string]string{
+ MountPrefix + "//source": "foo",
+ },
+ error: "invalid mount name",
+ },
+ {
+ name: "missing source",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "source field",
+ },
+ {
+ name: "missing type",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "type field",
+ },
+ {
+ name: "missing share",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ },
+ error: "share field",
+ },
+ {
+ name: "invalid field name",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "invalid"): "foo",
+ },
+ error: "invalid mount annotation",
+ },
+ {
+ name: "invalid source",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "source cannot be empty",
+ },
+ {
+ name: "invalid type",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "invalid-type",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "invalid type",
+ },
+ {
+ name: "invalid share",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "invalid-share",
+ },
+ error: "invalid share",
+ },
+ {
+ name: "invalid options",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ path.Join(MountPrefix, "mount1", "options"): "invalid-option",
+ },
+ error: "unknown mount option",
+ },
+ {
+ name: "duplicate source",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+
+ path.Join(MountPrefix, "mount2", "source"): "foo",
+ path.Join(MountPrefix, "mount2", "type"): "bind",
+ path.Join(MountPrefix, "mount2", "share"): "container",
+ },
+ error: "have the same mount source",
+ },
+ } {
+ t.Run(tst.name, func(t *testing.T) {
+ spec := &specs.Spec{Annotations: tst.annotations}
+ podHints, err := newPodMountHints(spec)
+ if err == nil || !strings.Contains(err.Error(), tst.error) {
+ t.Errorf("newPodMountHints invalid error, want: .*%s.*, got: %v", tst.error, err)
+ }
+ if podHints != nil {
+ t.Errorf("newPodMountHints must return nil on failure: %+v", podHints)
+ }
+ })
+ }
+}
diff --git a/runsc/boot/limits.go b/runsc/boot/limits.go
index 3364aa5e6..d1c0bb9b5 100644
--- a/runsc/boot/limits.go
+++ b/runsc/boot/limits.go
@@ -20,8 +20,8 @@ import (
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
)
// Mapping from linux resource names to limits.LimitType.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 42bddb2e8..8e8c6105b 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -20,51 +20,52 @@ import (
mrand "math/rand"
"os"
"runtime"
+ "strings"
"sync"
"sync/atomic"
"syscall"
gtime "time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/memutil"
- "gvisor.googlesource.com/gvisor/pkg/rand"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/loader"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace"
- "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling"
- slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp"
- "gvisor.googlesource.com/gvisor/runsc/boot/filter"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/memutil"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/loader"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/sighandling"
+ slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+ "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/arp"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/icmp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+ "gvisor.dev/gvisor/runsc/boot/filter"
+ _ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms.
+ "gvisor.dev/gvisor/runsc/specutils"
// Include supported socket providers.
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/route"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix"
+ "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/hostinet"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
)
// Loader keeps state needed to start the kernel and run the container..
@@ -117,6 +118,10 @@ type Loader struct {
//
// processes is guardded by mu.
processes map[execID]*execProcess
+
+ // mountHints provides extra information about mounts for containers that
+ // apply to the entire pod.
+ mountHints *podMountHints
}
// execID uniquely identifies a sentry process that is executed in a container.
@@ -201,7 +206,9 @@ func New(args Args) (*Loader, error) {
// Create VDSO.
//
// Pass k as the platform since it is savable, unlike the actual platform.
- vdso, err := loader.PrepareVDSO(k)
+ //
+ // FIXME(b/109889800): Use non-nil context.
+ vdso, err := loader.PrepareVDSO(nil, k)
if err != nil {
return nil, fmt.Errorf("creating vdso: %v", err)
}
@@ -255,7 +262,7 @@ func New(args Args) (*Loader, error) {
// Adjust the total memory returned by the Sentry so that applications that
// use /proc/meminfo can make allocations based on this limit.
usage.MinimumTotalMemoryBytes = args.TotalMem
- log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(2^30))
+ log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(1<<30))
}
// Initiate the Kernel object, which is required by the Context passed
@@ -299,6 +306,11 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("initializing compat logs: %v", err)
}
+ mountHints, err := newPodMountHints(args.Spec)
+ if err != nil {
+ return nil, fmt.Errorf("creating pod mount hints: %v", err)
+ }
+
eid := execID{cid: args.ID}
l := &Loader{
k: k,
@@ -311,6 +323,7 @@ func New(args Args) (*Loader, error) {
rootProcArgs: procArgs,
sandboxID: args.ID,
processes: map[execID]*execProcess{eid: {}},
+ mountHints: mountHints,
}
// We don't care about child signals; some platforms can generate a
@@ -402,19 +415,12 @@ func (l *Loader) Destroy() {
}
func createPlatform(conf *Config, deviceFile *os.File) (platform.Platform, error) {
- switch conf.Platform {
- case PlatformPtrace:
- log.Infof("Platform: ptrace")
- return ptrace.New()
- case PlatformKVM:
- log.Infof("Platform: kvm")
- if deviceFile == nil {
- return nil, fmt.Errorf("kvm device file must be provided")
- }
- return kvm.New(deviceFile)
- default:
- return nil, fmt.Errorf("invalid platform %v", conf.Platform)
+ p, err := platform.Lookup(conf.Platform)
+ if err != nil {
+ panic(fmt.Sprintf("invalid platform %v: %v", conf.Platform, err))
}
+ log.Infof("Platform: %s", conf.Platform)
+ return p.New(deviceFile)
}
func createMemoryFile() (*pgalloc.MemoryFile, error) {
@@ -435,6 +441,23 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
return mf, nil
}
+func (l *Loader) installSeccompFilters() error {
+ if l.conf.DisableSeccomp {
+ filter.Report("syscall filter is DISABLED. Running in less secure mode.")
+ } else {
+ opts := filter.Options{
+ Platform: l.k.Platform,
+ HostNetwork: l.conf.Network == NetworkHost,
+ ProfileEnable: l.conf.ProfileEnable,
+ ControllerFD: l.ctrl.srv.FD(),
+ }
+ if err := filter.Install(opts); err != nil {
+ return fmt.Errorf("installing seccomp filters: %v", err)
+ }
+ }
+ return nil
+}
+
// Run runs the root container.
func (l *Loader) Run() error {
err := l.run()
@@ -470,39 +493,33 @@ func (l *Loader) run() error {
return fmt.Errorf("trying to start deleted container %q", l.sandboxID)
}
- // Finally done with all configuration. Setup filters before user code
- // is loaded.
- if l.conf.DisableSeccomp {
- filter.Report("syscall filter is DISABLED. Running in less secure mode.")
- } else {
- opts := filter.Options{
- Platform: l.k.Platform,
- HostNetwork: l.conf.Network == NetworkHost,
- ProfileEnable: l.conf.ProfileEnable,
- ControllerFD: l.ctrl.srv.FD(),
- }
- if err := filter.Install(opts); err != nil {
- return fmt.Errorf("installing seccomp filters: %v", err)
- }
- }
-
// If we are restoring, we do not want to create a process.
// l.restore is set by the container manager when a restore call is made.
if !l.restore {
+ if l.conf.ProfileEnable {
+ initializePProf()
+ }
+
+ // Finally done with all configuration. Setup filters before user code
+ // is loaded.
+ if err := l.installSeccompFilters(); err != nil {
+ return err
+ }
+
// Create the FD map, which will set stdin, stdout, and stderr. If console
// is true, then ioctl calls will be passed through to the host fd.
ctx := l.rootProcArgs.NewContext(l.k)
- fdm, err := createFDMap(ctx, l.rootProcArgs.Limits, l.console, l.stdioFDs)
+ fdTable, err := createFDTable(ctx, l.console, l.stdioFDs)
if err != nil {
return fmt.Errorf("importing fds: %v", err)
}
// CreateProcess takes a reference on FDMap if successful. We won't need
// ours either way.
- l.rootProcArgs.FDMap = fdm
+ l.rootProcArgs.FDTable = fdTable
// cid for root container can be empty. Only subcontainers need it to set
// the mount location.
- mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k)
+ mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k, l.mountHints)
if err := mntr.setupFS(ctx, l.conf, &l.rootProcArgs, l.rootProcArgs.Credentials); err != nil {
return err
}
@@ -513,19 +530,37 @@ func (l *Loader) run() error {
return err
}
+ // Read /etc/passwd for the user's HOME directory and set the HOME
+ // environment variable as required by POSIX if it is not overridden by
+ // the user.
+ hasHomeEnvv := false
+ for _, envv := range l.rootProcArgs.Envv {
+ if strings.HasPrefix(envv, "HOME=") {
+ hasHomeEnvv = true
+ }
+ }
+ if !hasHomeEnvv {
+ homeDir, err := getExecUserHome(rootCtx, rootMns, uint32(l.rootProcArgs.Credentials.RealKUID))
+ if err != nil {
+ return fmt.Errorf("error reading exec user: %v", err)
+ }
+
+ l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir)
+ }
+
// Create the root container init task. It will begin running
// when the kernel is started.
if _, _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
return fmt.Errorf("creating init process: %v", err)
}
- // CreateProcess takes a reference on FDMap if successful.
- l.rootProcArgs.FDMap.DecRef()
+ // CreateProcess takes a reference on FDTable if successful.
+ l.rootProcArgs.FDTable.DecRef()
}
ep.tg = l.k.GlobalInit()
if l.console {
- ttyFile := l.rootProcArgs.FDMap.GetFile(0)
+ ttyFile, _ := l.rootProcArgs.FDTable.Get(0)
defer ttyFile.DecRef()
ep.tty = ttyFile.FileOperations.(*host.TTYFileOperations)
@@ -605,13 +640,13 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
// Create the FD map, which will set stdin, stdout, and stderr.
ctx := procArgs.NewContext(l.k)
- fdm, err := createFDMap(ctx, procArgs.Limits, false, stdioFDs)
+ fdTable, err := createFDTable(ctx, false, stdioFDs)
if err != nil {
return fmt.Errorf("importing fds: %v", err)
}
- // CreateProcess takes a reference on FDMap if successful. We won't need ours
- // either way.
- procArgs.FDMap = fdm
+ // CreateProcess takes a reference on fdTable if successful. We won't
+ // need ours either way.
+ procArgs.FDTable = fdTable
// Can't take ownership away from os.File. dup them to get a new FDs.
var goferFDs []int
@@ -623,7 +658,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
goferFDs = append(goferFDs, fd)
}
- mntr := newContainerMounter(spec, cid, goferFDs, l.k)
+ mntr := newContainerMounter(spec, cid, goferFDs, l.k, l.mountHints)
if err := mntr.setupFS(ctx, conf, &procArgs, creds); err != nil {
return fmt.Errorf("configuring container FS: %v", err)
}
@@ -640,8 +675,8 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
}
l.k.StartProcess(tg)
- // CreateProcess takes a reference on FDMap if successful.
- procArgs.FDMap.DecRef()
+ // CreateProcess takes a reference on FDTable if successful.
+ procArgs.FDTable.DecRef()
l.processes[eid].tg = tg
return nil
@@ -805,9 +840,17 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
// privileges.
Raw: true,
})}
+
+ // Enable SACK Recovery.
if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil {
return nil, fmt.Errorf("failed to enable SACK: %v", err)
}
+
+ // Enable Receive Buffer Auto-Tuning.
+ if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
+ return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
+ }
+
return &s, nil
default:
@@ -956,3 +999,8 @@ func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host
}
return ep.tg, ep.tty, nil
}
+
+func init() {
+ // TODO(gvisor.dev/issue/365): Make this configurable.
+ refs.SetLeakMode(refs.NoLeakChecking)
+}
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 6393cb3fb..ff713660d 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -25,18 +25,21 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/control/server"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/runsc/fsgofer"
+ "gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/runsc/fsgofer"
)
func init() {
log.SetLevel(log.Debug)
rand.Seed(time.Now().UnixNano())
+ if err := fsgofer.OpenProcSelfFD(); err != nil {
+ panic(err)
+ }
}
func testConfig() *Config {
@@ -44,6 +47,7 @@ func testConfig() *Config {
RootDir: "unused_root_dir",
Network: NetworkNone,
DisableSeccomp: true,
+ Platform: "ptrace",
}
}
@@ -404,7 +408,7 @@ func TestCreateMountNamespace(t *testing.T) {
mns = m
ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
}
- mntr := newContainerMounter(&tc.spec, "", []int{sandEnd}, nil)
+ mntr := newContainerMounter(&tc.spec, "", []int{sandEnd}, nil, &podMountHints{})
if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err)
}
@@ -610,7 +614,7 @@ func TestRestoreEnvironment(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
conf := testConfig()
- mntr := newContainerMounter(tc.spec, "", tc.ioFDs, nil)
+ mntr := newContainerMounter(tc.spec, "", tc.ioFDs, nil, &podMountHints{})
actualRenv, err := mntr.createRestoreEnvironment(conf)
if !tc.errorExpected && err != nil {
t.Fatalf("could not create restore environment for test:%s", tc.name)
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 82c259f47..d3d98243d 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -19,16 +19,16 @@ import (
"net"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
+ "gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/arp"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/urpc"
)
// Network exposes methods that can be used to configure a network stack.
@@ -56,7 +56,7 @@ type FDBasedLink struct {
Addresses []net.IP
Routes []Route
GSOMaxSize uint32
- LinkAddress []byte
+ LinkAddress net.HardwareAddr
// NumChannels controls how many underlying FD's are to be used to
// create this endpoint.
diff --git a/runsc/boot/platforms/BUILD b/runsc/boot/platforms/BUILD
new file mode 100644
index 000000000..03391cdca
--- /dev/null
+++ b/runsc/boot/platforms/BUILD
@@ -0,0 +1,16 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "platforms",
+ srcs = ["platforms.go"],
+ importpath = "gvisor.dev/gvisor/runsc/boot/platforms",
+ visibility = [
+ "//runsc:__subpackages__",
+ ],
+ deps = [
+ "//pkg/sentry/platform/kvm",
+ "//pkg/sentry/platform/ptrace",
+ ],
+)
diff --git a/runsc/boot/platforms/platforms.go b/runsc/boot/platforms/platforms.go
new file mode 100644
index 000000000..056b46ad5
--- /dev/null
+++ b/runsc/boot/platforms/platforms.go
@@ -0,0 +1,30 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package platforms imports all available platform packages.
+package platforms
+
+import (
+ // Import platforms that runsc might use.
+ _ "gvisor.dev/gvisor/pkg/sentry/platform/kvm"
+ _ "gvisor.dev/gvisor/pkg/sentry/platform/ptrace"
+)
+
+const (
+ // Ptrace runs the sandbox with the ptrace platform.
+ Ptrace = "ptrace"
+
+ // KVM runs the sandbox with the KVM platform.
+ KVM = "kvm"
+)
diff --git a/runsc/boot/pprof.go b/runsc/boot/pprof.go
new file mode 100644
index 000000000..463362f02
--- /dev/null
+++ b/runsc/boot/pprof.go
@@ -0,0 +1,18 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+func initializePProf() {
+}
diff --git a/runsc/boot/strace.go b/runsc/boot/strace.go
index 19c7f8fbd..fbfd3b07c 100644
--- a/runsc/boot/strace.go
+++ b/runsc/boot/strace.go
@@ -15,7 +15,7 @@
package boot
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/strace"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
)
func enableStrace(conf *Config) error {
diff --git a/runsc/boot/user.go b/runsc/boot/user.go
new file mode 100644
index 000000000..d1d423a5c
--- /dev/null
+++ b/runsc/boot/user.go
@@ -0,0 +1,146 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "bufio"
+ "io"
+ "strconv"
+ "strings"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+)
+
+type fileReader struct {
+ // Ctx is the context for the file reader.
+ Ctx context.Context
+
+ // File is the file to read from.
+ File *fs.File
+}
+
+// Read implements io.Reader.Read.
+func (r *fileReader) Read(buf []byte) (int, error) {
+ n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf))
+ return int(n), err
+}
+
+// getExecUserHome returns the home directory of the executing user read from
+// /etc/passwd as read from the container filesystem.
+func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32) (string, error) {
+ // The default user home directory to return if no user matching the user
+ // if found in the /etc/passwd found in the image.
+ const defaultHome = "/"
+
+ // Open the /etc/passwd file from the dirent via the root mount namespace.
+ mnsRoot := rootMns.Root()
+ maxTraversals := uint(linux.MaxSymlinkTraversals)
+ dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals)
+ if err != nil {
+ // NOTE: Ignore errors opening the passwd file. If the passwd file
+ // doesn't exist we will return the default home directory.
+ return defaultHome, nil
+ }
+ defer dirent.DecRef()
+
+ // Check read permissions on the file.
+ if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil {
+ // NOTE: Ignore permissions errors here and return default root dir.
+ return defaultHome, nil
+ }
+
+ // Only open regular files. We don't open other files like named pipes as
+ // they may block and might present some attack surface to the container.
+ // Note that runc does not seem to do this kind of checking.
+ if !fs.IsRegular(dirent.Inode.StableAttr) {
+ return defaultHome, nil
+ }
+
+ f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false})
+ if err != nil {
+ return "", err
+ }
+ defer f.DecRef()
+
+ r := &fileReader{
+ Ctx: ctx,
+ File: f,
+ }
+
+ homeDir, err := findHomeInPasswd(uid, r, defaultHome)
+ if err != nil {
+ return "", err
+ }
+
+ return homeDir, nil
+}
+
+// findHomeInPasswd parses a passwd file and returns the given user's home
+// directory. This function does it's best to replicate the runc's behavior.
+func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) {
+ s := bufio.NewScanner(passwd)
+
+ for s.Scan() {
+ if err := s.Err(); err != nil {
+ return "", err
+ }
+
+ line := strings.TrimSpace(s.Text())
+ if line == "" {
+ continue
+ }
+
+ // Pull out part of passwd entry. Loosely parse the passwd entry as some
+ // passwd files could be poorly written and for compatibility with runc.
+ //
+ // Per 'man 5 passwd'
+ // /etc/passwd contains one line for each user account, with seven
+ // fields delimited by colons (“:”). These fields are:
+ //
+ // - login name
+ // - optional encrypted password
+ // - numerical user ID
+ // - numerical group ID
+ // - user name or comment field
+ // - user home directory
+ // - optional user command interpreter
+ parts := strings.Split(line, ":")
+
+ found := false
+ homeDir := ""
+ for i, p := range parts {
+ switch i {
+ case 2:
+ parsedUID, err := strconv.ParseUint(p, 10, 32)
+ if err == nil && parsedUID == uint64(uid) {
+ found = true
+ }
+ case 5:
+ homeDir = p
+ }
+ }
+ if found {
+ // NOTE: If the uid is present but the home directory is not
+ // present in the /etc/passwd entry we return an empty string. This
+ // is, for better or worse, what runc does.
+ return homeDir, nil
+ }
+ }
+
+ return defaultHome, nil
+}
diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go
new file mode 100644
index 000000000..834003430
--- /dev/null
+++ b/runsc/boot/user_test.go
@@ -0,0 +1,253 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strings"
+ "syscall"
+ "testing"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+)
+
+func setupTempDir() (string, error) {
+ tmpDir, err := ioutil.TempDir(os.TempDir(), "exec-user-test")
+ if err != nil {
+ return "", err
+ }
+ return tmpDir, nil
+}
+
+func setupPasswd(contents string, perms os.FileMode) func() (string, error) {
+ return func() (string, error) {
+ tmpDir, err := setupTempDir()
+ if err != nil {
+ return "", err
+ }
+
+ if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
+ return "", err
+ }
+
+ f, err := os.Create(filepath.Join(tmpDir, "etc", "passwd"))
+ if err != nil {
+ return "", err
+ }
+ defer f.Close()
+
+ _, err = f.WriteString(contents)
+ if err != nil {
+ return "", err
+ }
+
+ err = f.Chmod(perms)
+ if err != nil {
+ return "", err
+ }
+ return tmpDir, nil
+ }
+}
+
+// TestGetExecUserHome tests the getExecUserHome function.
+func TestGetExecUserHome(t *testing.T) {
+ tests := map[string]struct {
+ uid uint32
+ createRoot func() (string, error)
+ expected string
+ }{
+ "success": {
+ uid: 1000,
+ createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0666),
+ expected: "/home/adin",
+ },
+ "no_passwd": {
+ uid: 1000,
+ createRoot: setupTempDir,
+ expected: "/",
+ },
+ "no_perms": {
+ uid: 1000,
+ createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0000),
+ expected: "/",
+ },
+ "directory": {
+ uid: 1000,
+ createRoot: func() (string, error) {
+ tmpDir, err := setupTempDir()
+ if err != nil {
+ return "", err
+ }
+
+ if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
+ return "", err
+ }
+
+ if err := syscall.Mkdir(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil {
+ return "", err
+ }
+
+ return tmpDir, nil
+ },
+ expected: "/",
+ },
+ // Currently we don't allow named pipes.
+ "named_pipe": {
+ uid: 1000,
+ createRoot: func() (string, error) {
+ tmpDir, err := setupTempDir()
+ if err != nil {
+ return "", err
+ }
+
+ if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
+ return "", err
+ }
+
+ if err := syscall.Mkfifo(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil {
+ return "", err
+ }
+
+ return tmpDir, nil
+ },
+ expected: "/",
+ },
+ }
+
+ for name, tc := range tests {
+ t.Run(name, func(t *testing.T) {
+ tmpDir, err := tc.createRoot()
+ if err != nil {
+ t.Fatalf("failed to create root dir: %v", err)
+ }
+
+ sandEnd, cleanup, err := startGofer(tmpDir)
+ if err != nil {
+ t.Fatalf("failed to create gofer: %v", err)
+ }
+ defer cleanup()
+
+ ctx := contexttest.Context(t)
+ conf := &Config{
+ RootDir: "unused_root_dir",
+ Network: NetworkNone,
+ DisableSeccomp: true,
+ }
+
+ spec := &specs.Spec{
+ Root: &specs.Root{
+ Path: tmpDir,
+ Readonly: true,
+ },
+ // Add /proc mount as tmpfs to avoid needing a kernel.
+ Mounts: []specs.Mount{
+ {
+ Destination: "/proc",
+ Type: "tmpfs",
+ },
+ },
+ }
+
+ var mns *fs.MountNamespace
+ setMountNS := func(m *fs.MountNamespace) {
+ mns = m
+ ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
+ }
+ mntr := newContainerMounter(spec, "", []int{sandEnd}, nil, &podMountHints{})
+ if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
+ t.Fatalf("failed to create mount namespace: %v", err)
+ }
+
+ got, err := getExecUserHome(ctx, mns, tc.uid)
+ if err != nil {
+ t.Fatalf("failed to get user home: %v", err)
+ }
+
+ if got != tc.expected {
+ t.Fatalf("expected %v, got: %v", tc.expected, got)
+ }
+ })
+ }
+}
+
+// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing.
+func TestFindHomeInPasswd(t *testing.T) {
+ tests := map[string]struct {
+ uid uint32
+ passwd string
+ expected string
+ def string
+ }{
+ "empty": {
+ uid: 1000,
+ passwd: "",
+ expected: "/",
+ def: "/",
+ },
+ "whitespace": {
+ uid: 1000,
+ passwd: " ",
+ expected: "/",
+ def: "/",
+ },
+ "full": {
+ uid: 1000,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh",
+ expected: "/home/adin",
+ def: "/",
+ },
+ // For better or worse, this is how runc works.
+ "partial": {
+ uid: 1000,
+ passwd: "adin::1000:1111:",
+ expected: "",
+ def: "/",
+ },
+ "multiple": {
+ uid: 1001,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh",
+ expected: "/home/ian",
+ def: "/",
+ },
+ "duplicate": {
+ uid: 1000,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh",
+ expected: "/home/adin",
+ def: "/",
+ },
+ "empty_lines": {
+ uid: 1001,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh",
+ expected: "/home/ian",
+ def: "/",
+ },
+ }
+
+ for name, tc := range tests {
+ t.Run(name, func(t *testing.T) {
+ got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def)
+ if err != nil {
+ t.Fatalf("error parsing passwd: %v", err)
+ }
+ if tc.expected != got {
+ t.Fatalf("expected %v, got: %v", tc.expected, got)
+ }
+ })
+ }
+}
diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD
index 620d33a19..ab2387614 100644
--- a/runsc/cgroup/BUILD
+++ b/runsc/cgroup/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "cgroup",
srcs = ["cgroup.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/cgroup",
+ importpath = "gvisor.dev/gvisor/runsc/cgroup",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 7431b17d6..ab3a25b9b 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -30,8 +30,8 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index df6af0ced..5223b9972 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -33,7 +33,7 @@ go_library(
"syscalls.go",
"wait.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/cmd",
+ importpath = "gvisor.dev/gvisor/runsc/cmd",
visibility = [
"//runsc:__subpackages__",
],
@@ -46,6 +46,7 @@ go_library(
"//pkg/unet",
"//pkg/urpc",
"//runsc/boot",
+ "//runsc/boot/platforms",
"//runsc/console",
"//runsc/container",
"//runsc/fsgofer",
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index 3a547d4aa..b40fded5b 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -24,9 +24,10 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/boot/platforms"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Boot implements subcommands.Command for the "boot" command which starts a
@@ -130,6 +131,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Ensure that if there is a panic, all goroutine stacks are printed.
debug.SetTraceback("all")
+ conf := args[0].(*boot.Config)
+
if b.setUpRoot {
if err := setUpChroot(b.pidns); err != nil {
Fatalf("error setting up chroot: %v", err)
@@ -143,14 +146,16 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
args = append(args, arg)
}
}
- // Note that we've already read the spec from the spec FD, and
- // we will read it again after the exec call. This works
- // because the ReadSpecFromFile function seeks to the beginning
- // of the file before reading.
- if err := callSelfAsNobody(args); err != nil {
- Fatalf("%v", err)
+ if !conf.Rootless {
+ // Note that we've already read the spec from the spec FD, and
+ // we will read it again after the exec call. This works
+ // because the ReadSpecFromFile function seeks to the beginning
+ // of the file before reading.
+ if err := callSelfAsNobody(args); err != nil {
+ Fatalf("%v", err)
+ }
+ panic("callSelfAsNobody must never return success")
}
- panic("callSelfAsNobody must never return success")
}
}
@@ -163,15 +168,12 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
specutils.LogSpec(spec)
- conf := args[0].(*boot.Config)
- waitStatus := args[1].(*syscall.WaitStatus)
-
if b.applyCaps {
caps := spec.Process.Capabilities
if caps == nil {
caps = &specs.LinuxCapabilities{}
}
- if conf.Platform == boot.PlatformPtrace {
+ if conf.Platform == platforms.Ptrace {
// Ptrace platform requires extra capabilities.
const c = "CAP_SYS_PTRACE"
caps.Bounding = append(caps.Bounding, c)
@@ -251,6 +253,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
ws := l.WaitExit()
log.Infof("application exiting with %+v", ws)
+ waitStatus := args[1].(*syscall.WaitStatus)
*waitStatus = syscall.WaitStatus(ws.Status())
l.Destroy()
return subcommands.ExitSuccess
diff --git a/runsc/cmd/capability.go b/runsc/cmd/capability.go
index 312e5b471..abfbb7cfc 100644
--- a/runsc/cmd/capability.go
+++ b/runsc/cmd/capability.go
@@ -19,7 +19,7 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
var allCapTypes = []capability.CapType{
diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go
index ee74d33d8..3ae25a257 100644
--- a/runsc/cmd/capability_test.go
+++ b/runsc/cmd/capability_test.go
@@ -21,11 +21,11 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func init() {
@@ -97,7 +97,12 @@ func TestCapabilities(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- c, err := container.Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := container.Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := container.New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -116,6 +121,6 @@ func TestCapabilities(t *testing.T) {
}
func TestMain(m *testing.M) {
- testutil.RunAsRoot()
+ specutils.MaybeRunAsRoot()
os.Exit(m.Run())
}
diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go
index 96d3c3378..d8b3a8573 100644
--- a/runsc/cmd/checkpoint.go
+++ b/runsc/cmd/checkpoint.go
@@ -22,10 +22,10 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// File containing the container's saved image/state within the given image-path's directory.
@@ -133,7 +133,12 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa
Fatalf("destroying container: %v", err)
}
- cont, err = container.Create(id, spec, conf, bundleDir, "", "", "")
+ contArgs := container.Args{
+ ID: id,
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err = container.New(conf, contArgs)
if err != nil {
Fatalf("restoring container: %v", err)
}
diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go
index 1a774db04..b5a0ce17d 100644
--- a/runsc/cmd/chroot.go
+++ b/runsc/cmd/chroot.go
@@ -20,8 +20,8 @@ import (
"path/filepath"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// mountInChroot creates the destination mount point in the given chroot and
diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go
index 5b4cc4a39..f1a4887ef 100644
--- a/runsc/cmd/cmd.go
+++ b/runsc/cmd/cmd.go
@@ -22,8 +22,8 @@ import (
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// intFlags can be used with int flags that appear multiple times.
diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go
index 8bf9b7dcf..a4e3071b3 100644
--- a/runsc/cmd/create.go
+++ b/runsc/cmd/create.go
@@ -18,9 +18,9 @@ import (
"context"
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Create implements subcommands.Command for the "create" command.
@@ -82,21 +82,33 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
id := f.Arg(0)
conf := args[0].(*boot.Config)
+ if conf.Rootless {
+ return Errorf("Rootless mode not supported with %q", c.Name())
+ }
+
bundleDir := c.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
- Fatalf("reading spec: %v", err)
+ return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
// Create the container. A new sandbox will be created for the
// container unless the metadata specifies that it should be run in an
// existing container.
- if _, err := container.Create(id, spec, conf, bundleDir, c.consoleSocket, c.pidFile, c.userLog); err != nil {
- Fatalf("creating container: %v", err)
+ contArgs := container.Args{
+ ID: id,
+ Spec: spec,
+ BundleDir: bundleDir,
+ ConsoleSocket: c.consoleSocket,
+ PIDFile: c.pidFile,
+ UserLog: c.userLog,
+ }
+ if _, err := container.New(conf, contArgs); err != nil {
+ return Errorf("creating container: %v", err)
}
return subcommands.ExitSuccess
}
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 27eb51172..7313e473f 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -17,14 +17,17 @@ package cmd
import (
"context"
"os"
+ "strconv"
+ "strings"
"syscall"
"time"
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Debug implements subcommands.Command for the "debug" command.
@@ -36,6 +39,9 @@ type Debug struct {
profileCPU string
profileDelay int
trace string
+ strace string
+ logLevel string
+ logPackets string
}
// Name implements subcommands.Command.
@@ -62,6 +68,9 @@ func (d *Debug) SetFlags(f *flag.FlagSet) {
f.IntVar(&d.profileDelay, "profile-delay", 5, "amount of time to wait before stoping CPU profile")
f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
+ f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all`)
+ f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).")
+ f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.")
}
// Execute implements subcommands.Command.Execute.
@@ -78,7 +87,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
var err error
c, err = container.Load(conf.RootDir, f.Arg(0))
if err != nil {
- Fatalf("loading container %q: %v", f.Arg(0), err)
+ return Errorf("loading container %q: %v", f.Arg(0), err)
}
} else {
if f.NArg() != 0 {
@@ -88,12 +97,12 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Go over all sandboxes and find the one that matches PID.
ids, err := container.List(conf.RootDir)
if err != nil {
- Fatalf("listing containers: %v", err)
+ return Errorf("listing containers: %v", err)
}
for _, id := range ids {
candidate, err := container.Load(conf.RootDir, id)
if err != nil {
- Fatalf("loading container %q: %v", id, err)
+ return Errorf("loading container %q: %v", id, err)
}
if candidate.SandboxPid() == d.pid {
c = candidate
@@ -101,38 +110,38 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
}
if c == nil {
- Fatalf("container with PID %d not found", d.pid)
+ return Errorf("container with PID %d not found", d.pid)
}
}
if c.Sandbox == nil || !c.Sandbox.IsRunning() {
- Fatalf("container sandbox is not running")
+ return Errorf("container sandbox is not running")
}
log.Infof("Found sandbox %q, PID: %d", c.Sandbox.ID, c.Sandbox.Pid)
if d.signal > 0 {
log.Infof("Sending signal %d to process: %d", d.signal, c.Sandbox.Pid)
if err := syscall.Kill(c.Sandbox.Pid, syscall.Signal(d.signal)); err != nil {
- Fatalf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid)
+ return Errorf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid)
}
}
if d.stacks {
log.Infof("Retrieving sandbox stacks")
stacks, err := c.Sandbox.Stacks()
if err != nil {
- Fatalf("retrieving stacks: %v", err)
+ return Errorf("retrieving stacks: %v", err)
}
log.Infof(" *** Stack dump ***\n%s", stacks)
}
if d.profileHeap != "" {
f, err := os.Create(d.profileHeap)
if err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
defer f.Close()
if err := c.Sandbox.HeapProfile(f); err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
log.Infof("Heap profile written to %q", d.profileHeap)
}
@@ -142,7 +151,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
delay = true
f, err := os.Create(d.profileCPU)
if err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
defer func() {
f.Close()
@@ -152,7 +161,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
log.Infof("CPU profile written to %q", d.profileCPU)
}()
if err := c.Sandbox.StartCPUProfile(f); err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
log.Infof("CPU profile started for %d sec, writing to %q", d.profileDelay, d.profileCPU)
}
@@ -160,7 +169,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
delay = true
f, err := os.Create(d.trace)
if err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
defer func() {
f.Close()
@@ -170,15 +179,71 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
log.Infof("Trace written to %q", d.trace)
}()
if err := c.Sandbox.StartTrace(f); err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
log.Infof("Tracing started for %d sec, writing to %q", d.profileDelay, d.trace)
}
+ if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 {
+ args := control.LoggingArgs{}
+ switch strings.ToLower(d.strace) {
+ case "":
+ // strace not set, nothing to do here.
+
+ case "off":
+ log.Infof("Disabling strace")
+ args.SetStrace = true
+
+ case "all":
+ log.Infof("Enabling all straces")
+ args.SetStrace = true
+ args.EnableStrace = true
+
+ default:
+ log.Infof("Enabling strace for syscalls: %s", d.strace)
+ args.SetStrace = true
+ args.EnableStrace = true
+ args.StraceWhitelist = strings.Split(d.strace, ",")
+ }
+
+ if len(d.logLevel) != 0 {
+ args.SetLevel = true
+ switch strings.ToLower(d.logLevel) {
+ case "warning", "0":
+ args.Level = log.Warning
+ case "info", "1":
+ args.Level = log.Info
+ case "debug", "2":
+ args.Level = log.Debug
+ default:
+ return Errorf("invalid log level %q", d.logLevel)
+ }
+ log.Infof("Setting log level %v", args.Level)
+ }
+
+ if len(d.logPackets) != 0 {
+ args.SetLogPackets = true
+ lp, err := strconv.ParseBool(d.logPackets)
+ if err != nil {
+ return Errorf("invalid value for log_packets %q", d.logPackets)
+ }
+ args.LogPackets = lp
+ if args.LogPackets {
+ log.Infof("Enabling packet logging")
+ } else {
+ log.Infof("Disabling packet logging")
+ }
+ }
+
+ if err := c.Sandbox.ChangeLogging(args); err != nil {
+ return Errorf(err.Error())
+ }
+ log.Infof("Logging options changed")
+ }
+
if delay {
time.Sleep(time.Duration(d.profileDelay) * time.Second)
-
}
return subcommands.ExitSuccess
diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go
index 9039723e9..30d8164b1 100644
--- a/runsc/cmd/delete.go
+++ b/runsc/cmd/delete.go
@@ -21,9 +21,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Delete implements subcommands.Command for the "delete" command.
diff --git a/runsc/cmd/delete_test.go b/runsc/cmd/delete_test.go
index 45fc91016..cb59516a3 100644
--- a/runsc/cmd/delete_test.go
+++ b/runsc/cmd/delete_test.go
@@ -18,7 +18,7 @@ import (
"io/ioutil"
"testing"
- "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/boot"
)
func TestNotFound(t *testing.T) {
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index 8ea59046c..9a8a49054 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -30,19 +30,19 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Do implements subcommands.Command for the "do" command. It sets up a simple
// sandbox and executes the command inside it. See Usage() for more details.
type Do struct {
- root string
- cwd string
- ip string
- networkNamespace bool
+ root string
+ cwd string
+ ip string
+ quiet bool
}
// Name implements subcommands.Command.Name.
@@ -72,7 +72,7 @@ func (c *Do) SetFlags(f *flag.FlagSet) {
f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`)
f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory")
f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox")
- f.BoolVar(&c.networkNamespace, "netns", true, "run in a new network namespace")
+ f.BoolVar(&c.quiet, "quiet", false, "suppress runsc messages to stdout. Application output is still sent to stdout and stderr")
}
// Execute implements subcommands.Command.Execute.
@@ -85,15 +85,21 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
- // Map the entire host file system, but make it readonly with a writable
- // overlay on top (ignore --overlay option).
- conf.Overlay = true
+ if conf.Rootless {
+ if err := specutils.MaybeRunAsRoot(); err != nil {
+ return Errorf("Error executing inside namespace: %v", err)
+ }
+ // Execution will continue here if no more capabilities are needed...
+ }
hostname, err := os.Hostname()
if err != nil {
return Errorf("Error to retrieve hostname: %v", err)
}
+ // Map the entire host file system, but make it readonly with a writable
+ // overlay on top (ignore --overlay option).
+ conf.Overlay = true
absRoot, err := resolvePath(c.root)
if err != nil {
return Errorf("Error resolving root: %v", err)
@@ -119,11 +125,22 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
specutils.LogSpec(spec)
cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000))
- if !c.networkNamespace {
- if conf.Network != boot.NetworkHost {
- Fatalf("The current network namespace can be used only if --network=host is set", nil)
+ if conf.Network == boot.NetworkNone {
+ netns := specs.LinuxNamespace{
+ Type: specs.NetworkNamespace,
}
- } else if conf.Network != boot.NetworkNone {
+ if spec.Linux != nil {
+ panic("spec.Linux is not nil")
+ }
+ spec.Linux = &specs.Linux{Namespaces: []specs.LinuxNamespace{netns}}
+
+ } else if conf.Rootless {
+ if conf.Network == boot.NetworkSandbox {
+ c.notifyUser("*** Warning: using host network due to --rootless ***")
+ conf.Network = boot.NetworkHost
+ }
+
+ } else {
clean, err := c.setupNet(cid, spec)
if err != nil {
return Errorf("Error setting up network: %v", err)
@@ -149,7 +166,13 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
return Errorf("Error write spec: %v", err)
}
- ws, err := container.Run(cid, spec, conf, tmpDir, "", "", "", false)
+ runArgs := container.Args{
+ ID: cid,
+ Spec: spec,
+ BundleDir: tmpDir,
+ Attached: true,
+ }
+ ws, err := container.Run(conf, runArgs)
if err != nil {
return Errorf("running container: %v", err)
}
@@ -158,6 +181,13 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
return subcommands.ExitSuccess
}
+func (c *Do) notifyUser(format string, v ...interface{}) {
+ if !c.quiet {
+ fmt.Printf(format+"\n", v...)
+ }
+ log.Warningf(format, v...)
+}
+
func resolvePath(path string) (string, error) {
var err error
path, err = filepath.Abs(path)
diff --git a/runsc/cmd/error.go b/runsc/cmd/error.go
index 700b19f14..3585b5448 100644
--- a/runsc/cmd/error.go
+++ b/runsc/cmd/error.go
@@ -22,7 +22,7 @@ import (
"time"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
// ErrorLogger is where error messages should be written to. These messages are
diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go
index c6bc8fc3a..3972e9224 100644
--- a/runsc/cmd/events.go
+++ b/runsc/cmd/events.go
@@ -22,9 +22,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Events implements subcommands.Command for the "events" command.
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index 0eeaaadba..e817eff77 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -30,14 +30,14 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/console"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/console"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Exec implements subcommands.Command for the "exec" command.
@@ -235,7 +235,11 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi
cmd.SysProcAttr = &syscall.SysProcAttr{
Setsid: true,
Setctty: true,
- Ctty: int(tty.Fd()),
+ // The Ctty FD must be the FD in the child process's FD
+ // table. Since we set cmd.Stdin/Stdout/Stderr to the
+ // tty FD, we can use any of 0, 1, or 2 here.
+ // See https://github.com/golang/go/issues/29458.
+ Ctty: 0,
}
}
diff --git a/runsc/cmd/exec_test.go b/runsc/cmd/exec_test.go
index 6f0f258c0..eb38a431f 100644
--- a/runsc/cmd/exec_test.go
+++ b/runsc/cmd/exec_test.go
@@ -21,10 +21,10 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/urpc"
)
func TestUser(t *testing.T) {
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index bccb29397..9faabf494 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -27,13 +27,13 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/fsgofer"
- "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/fsgofer"
+ "gvisor.dev/gvisor/runsc/fsgofer/filter"
+ "gvisor.dev/gvisor/runsc/specutils"
)
var caps = []string{
@@ -152,6 +152,10 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// modes exactly as sent by the sandbox, which will have applied its own umask.
syscall.Umask(0)
+ if err := fsgofer.OpenProcSelfFD(); err != nil {
+ Fatalf("failed to open /proc/self/fd: %v", err)
+ }
+
if err := syscall.Chroot(root); err != nil {
Fatalf("failed to chroot to %q: %v", root, err)
}
diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go
index aed5f3291..6c1f197a6 100644
--- a/runsc/cmd/kill.go
+++ b/runsc/cmd/kill.go
@@ -24,8 +24,8 @@ import (
"flag"
"github.com/google/subcommands"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Kill implements subcommands.Command for the "kill" command.
diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go
index 1f5ca2473..dd2d99a6b 100644
--- a/runsc/cmd/list.go
+++ b/runsc/cmd/list.go
@@ -25,8 +25,8 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// List implements subcommands.Command for the "list" command for the "list" command.
diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go
index 11b36aa10..9c0e92001 100644
--- a/runsc/cmd/pause.go
+++ b/runsc/cmd/pause.go
@@ -19,8 +19,8 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Pause implements subcommands.Command for the "pause" command.
diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go
index 3a3e6f17a..45c644f3f 100644
--- a/runsc/cmd/ps.go
+++ b/runsc/cmd/ps.go
@@ -20,9 +20,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// PS implements subcommands.Command for the "ps" command.
diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go
index 3ab2f5676..7be60cd7d 100644
--- a/runsc/cmd/restore.go
+++ b/runsc/cmd/restore.go
@@ -21,9 +21,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Restore implements subcommands.Command for the "restore" command.
@@ -80,25 +80,38 @@ func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
+ if conf.Rootless {
+ return Errorf("Rootless mode not supported with %q", r.Name())
+ }
+
bundleDir := r.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
- Fatalf("reading spec: %v", err)
+ return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
if r.imagePath == "" {
- Fatalf("image-path flag must be provided")
+ return Errorf("image-path flag must be provided")
}
conf.RestoreFile = filepath.Join(r.imagePath, checkpointFileName)
- ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
+ runArgs := container.Args{
+ ID: id,
+ Spec: spec,
+ BundleDir: bundleDir,
+ ConsoleSocket: r.consoleSocket,
+ PIDFile: r.pidFile,
+ UserLog: r.userLog,
+ Attached: !r.detach,
+ }
+ ws, err := container.Run(conf, runArgs)
if err != nil {
- Fatalf("running container: %v", err)
+ return Errorf("running container: %v", err)
}
*waitStatus = ws
diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go
index 9a2ade41e..b2df5c640 100644
--- a/runsc/cmd/resume.go
+++ b/runsc/cmd/resume.go
@@ -19,8 +19,8 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Resume implements subcommands.Command for the "resume" command.
diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go
index c228b4f93..33f4bc12b 100644
--- a/runsc/cmd/run.go
+++ b/runsc/cmd/run.go
@@ -20,9 +20,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Run implements subcommands.Command for the "run" command.
@@ -67,19 +67,32 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
+ if conf.Rootless {
+ return Errorf("Rootless mode not supported with %q", r.Name())
+ }
+
bundleDir := r.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
- Fatalf("reading spec: %v", err)
+ return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
- ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
+ runArgs := container.Args{
+ ID: id,
+ Spec: spec,
+ BundleDir: bundleDir,
+ ConsoleSocket: r.consoleSocket,
+ PIDFile: r.pidFile,
+ UserLog: r.userLog,
+ Attached: !r.detach,
+ }
+ ws, err := container.Run(conf, runArgs)
if err != nil {
- Fatalf("running container: %v", err)
+ return Errorf("running container: %v", err)
}
*waitStatus = ws
diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go
index 31e8f42bb..de2115dff 100644
--- a/runsc/cmd/start.go
+++ b/runsc/cmd/start.go
@@ -18,8 +18,8 @@ import (
"context"
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Start implements subcommands.Command for the "start" command.
diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go
index f0d449b19..e9f41cbd8 100644
--- a/runsc/cmd/state.go
+++ b/runsc/cmd/state.go
@@ -21,9 +21,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// State implements subcommands.Command for the "state" command.
diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go
index 9c8a66490..fb6c1ab29 100644
--- a/runsc/cmd/syscalls.go
+++ b/runsc/cmd/syscalls.go
@@ -27,7 +27,7 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
)
// Syscalls implements subcommands.Command for the "syscalls" command.
@@ -41,7 +41,7 @@ type Syscalls struct {
// Maps operating system to architecture to ArchInfo.
type CompatibilityInfo map[string]map[string]ArchInfo
-// ArchInfo is compatbility doc for an architecture.
+// ArchInfo is compatibility doc for an architecture.
type ArchInfo struct {
// Syscalls maps syscall number for the architecture to the doc.
Syscalls map[uintptr]SyscallDoc `json:"syscalls"`
diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go
index 58fd01974..046489687 100644
--- a/runsc/cmd/wait.go
+++ b/runsc/cmd/wait.go
@@ -22,8 +22,8 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
const (
diff --git a/runsc/console/BUILD b/runsc/console/BUILD
index 3ff9eba27..e623c1a0f 100644
--- a/runsc/console/BUILD
+++ b/runsc/console/BUILD
@@ -4,8 +4,10 @@ package(licenses = ["notice"])
go_library(
name = "console",
- srcs = ["console.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/console",
+ srcs = [
+ "console.go",
+ ],
+ importpath = "gvisor.dev/gvisor/runsc/console",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 13709a0ae..e246c38ae 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -9,7 +9,7 @@ go_library(
"hook.go",
"status.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/container",
+ importpath = "gvisor.dev/gvisor/runsc/container",
visibility = [
"//runsc:__subpackages__",
"//test:__subpackages__",
@@ -53,6 +53,7 @@ go_test(
"//pkg/unet",
"//pkg/urpc",
"//runsc/boot",
+ "//runsc/boot/platforms",
"//runsc/specutils",
"//runsc/test/testutil",
"@com_github_cenkalti_backoff//:go_default_library",
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index d016533e6..e9372989f 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -27,10 +27,10 @@ import (
"github.com/kr/pty"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// socketPath creates a path inside bundleDir and ensures that the returned
@@ -138,8 +138,13 @@ func TestConsoleSocket(t *testing.T) {
defer cleanup()
// Create the container and pass the socket name.
- id := testutil.UniqueContainerID()
- c, err := Create(id, spec, conf, bundleDir, sock, "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ ConsoleSocket: sock,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -167,7 +172,12 @@ func TestJobControlSignalExec(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -186,7 +196,7 @@ func TestJobControlSignalExec(t *testing.T) {
defer ptySlave.Close()
// Exec bash and attach a terminal.
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: "/bin/bash",
// Don't let bash execute from profile or rc files, otherwise
// our PID counts get messed up.
@@ -198,7 +208,7 @@ func TestJobControlSignalExec(t *testing.T) {
StdioIsPty: true,
}
- pid, err := c.Execute(args)
+ pid, err := c.Execute(execArgs)
if err != nil {
t.Fatalf("error executing: %v", err)
}
@@ -296,8 +306,13 @@ func TestJobControlSignalRootContainer(t *testing.T) {
defer cleanup()
// Create the container and pass the socket name.
- id := testutil.UniqueContainerID()
- c, err := Create(id, spec, conf, bundleDir, sock, "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ ConsoleSocket: sock,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 04b611b56..8320bb2ca 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -33,12 +33,12 @@ import (
"github.com/cenkalti/backoff"
"github.com/gofrs/flock"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/cgroup"
- "gvisor.googlesource.com/gvisor/runsc/sandbox"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/cgroup"
+ "gvisor.dev/gvisor/runsc/sandbox"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -206,7 +206,7 @@ func findContainerRoot(rootDir, partialID string) (string, error) {
}
// Now see whether id could be an abbreviation of exactly 1 of the
- // container ids. If id is ambigious (it could match more than 1
+ // container ids. If id is ambiguous (it could match more than 1
// container), it is an error.
cRoot = ""
ids, err := List(rootDir)
@@ -242,16 +242,47 @@ func List(rootDir string) ([]string, error) {
return out, nil
}
-// Create creates the container in a new Sandbox process, unless the metadata
+// Args is used to configure a new container.
+type Args struct {
+ // ID is the container unique identifier.
+ ID string
+
+ // Spec is the OCI spec that describes the container.
+ Spec *specs.Spec
+
+ // BundleDir is the directory containing the container bundle.
+ BundleDir string
+
+ // ConsoleSocket is the path to a unix domain socket that will receive
+ // the console FD. It may be empty.
+ ConsoleSocket string
+
+ // PIDFile is the filename where the container's root process PID will be
+ // written to. It may be empty.
+ PIDFile string
+
+ // UserLog is the filename to send user-visible logs to. It may be empty.
+ //
+ // It only applies for the init container.
+ UserLog string
+
+ // Attached indicates that the sandbox lifecycle is attached with the caller.
+ // If the caller exits, the sandbox should exit too.
+ //
+ // It only applies for the init container.
+ Attached bool
+}
+
+// New creates the container in a new Sandbox process, unless the metadata
// indicates that an existing Sandbox should be used. The caller must call
// Destroy() on the container.
-func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile, userLog string) (*Container, error) {
- log.Debugf("Create container %q in root dir: %s", id, conf.RootDir)
- if err := validateID(id); err != nil {
+func New(conf *boot.Config, args Args) (*Container, error) {
+ log.Debugf("Create container %q in root dir: %s", args.ID, conf.RootDir)
+ if err := validateID(args.ID); err != nil {
return nil, err
}
- unlockRoot, err := maybeLockRootContainer(spec, conf.RootDir)
+ unlockRoot, err := maybeLockRootContainer(args.Spec, conf.RootDir)
if err != nil {
return nil, err
}
@@ -259,7 +290,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// Lock the container metadata file to prevent concurrent creations of
// containers with the same id.
- containerRoot := filepath.Join(conf.RootDir, id)
+ containerRoot := filepath.Join(conf.RootDir, args.ID)
unlock, err := lockContainerMetadata(containerRoot)
if err != nil {
return nil, err
@@ -269,16 +300,16 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// Check if the container already exists by looking for the metadata
// file.
if _, err := os.Stat(filepath.Join(containerRoot, metadataFilename)); err == nil {
- return nil, fmt.Errorf("container with id %q already exists", id)
+ return nil, fmt.Errorf("container with id %q already exists", args.ID)
} else if !os.IsNotExist(err) {
return nil, fmt.Errorf("looking for existing container in %q: %v", containerRoot, err)
}
c := &Container{
- ID: id,
- Spec: spec,
- ConsoleSocket: consoleSocket,
- BundleDir: bundleDir,
+ ID: args.ID,
+ Spec: args.Spec,
+ ConsoleSocket: args.ConsoleSocket,
+ BundleDir: args.BundleDir,
Root: containerRoot,
Status: Creating,
CreatedAt: time.Now(),
@@ -294,31 +325,47 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// started in an existing sandbox, we must do so. The metadata will
// indicate the ID of the sandbox, which is the same as the ID of the
// init container in the sandbox.
- if isRoot(spec) {
- log.Debugf("Creating new sandbox for container %q", id)
+ if isRoot(args.Spec) {
+ log.Debugf("Creating new sandbox for container %q", args.ID)
// Create and join cgroup before processes are created to ensure they are
- // part of the cgroup from the start (and all tneir children processes).
- cg, err := cgroup.New(spec)
+ // part of the cgroup from the start (and all their children processes).
+ cg, err := cgroup.New(args.Spec)
if err != nil {
return nil, err
}
if cg != nil {
// If there is cgroup config, install it before creating sandbox process.
- if err := cg.Install(spec.Linux.Resources); err != nil {
+ if err := cg.Install(args.Spec.Linux.Resources); err != nil {
return nil, fmt.Errorf("configuring cgroup: %v", err)
}
}
if err := runInCgroup(cg, func() error {
- ioFiles, specFile, err := c.createGoferProcess(spec, conf, bundleDir)
+ ioFiles, specFile, err := c.createGoferProcess(args.Spec, conf, args.BundleDir)
if err != nil {
return err
}
// Start a new sandbox for this container. Any errors after this point
// must destroy the container.
- c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, cg)
- return err
+ sandArgs := &sandbox.Args{
+ ID: args.ID,
+ Spec: args.Spec,
+ BundleDir: args.BundleDir,
+ ConsoleSocket: args.ConsoleSocket,
+ UserLog: args.UserLog,
+ IOFiles: ioFiles,
+ MountsFile: specFile,
+ Cgroup: cg,
+ Attached: args.Attached,
+ }
+ sand, err := sandbox.New(conf, sandArgs)
+ if err != nil {
+ return err
+ }
+ c.Sandbox = sand
+ return nil
+
}); err != nil {
return nil, err
}
@@ -331,7 +378,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// * A container struct whose sandbox ID is equal to the above
// container/sandbox ID, but that has a different container
// ID. This is the child container.
- sbid, ok := specutils.SandboxID(spec)
+ sbid, ok := specutils.SandboxID(args.Spec)
if !ok {
return nil, fmt.Errorf("no sandbox ID found when creating container")
}
@@ -356,8 +403,8 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// Write the PID file. Containerd considers the create complete after
// this file is created, so it must be the last thing we do.
- if pidFile != "" {
- if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(c.SandboxPid())), 0644); err != nil {
+ if args.PIDFile != "" {
+ if err := ioutil.WriteFile(args.PIDFile, []byte(strconv.Itoa(c.SandboxPid())), 0644); err != nil {
return nil, fmt.Errorf("error writing PID file: %v", err)
}
}
@@ -399,7 +446,7 @@ func (c *Container) Start(conf *boot.Config) error {
}
} else {
// Join cgroup to strt gofer process to ensure it's part of the cgroup from
- // the start (and all tneir children processes).
+ // the start (and all their children processes).
if err := runInCgroup(c.Sandbox.Cgroup, func() error {
// Create the gofer process.
ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir)
@@ -461,13 +508,13 @@ func (c *Container) Restore(spec *specs.Spec, conf *boot.Config, restoreFile str
}
// Run is a helper that calls Create + Start + Wait.
-func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile, userLog string, detach bool) (syscall.WaitStatus, error) {
- log.Debugf("Run container %q in root dir: %s", id, conf.RootDir)
- c, err := Create(id, spec, conf, bundleDir, consoleSocket, pidFile, userLog)
+func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) {
+ log.Debugf("Run container %q in root dir: %s", args.ID, conf.RootDir)
+ c, err := New(conf, args)
if err != nil {
return 0, fmt.Errorf("creating container: %v", err)
}
- // Clean up partially created container if an error ocurrs.
+ // Clean up partially created container if an error occurs.
// Any errors returned by Destroy() itself are ignored.
cu := specutils.MakeCleanup(func() {
c.Destroy()
@@ -476,7 +523,7 @@ func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
if conf.RestoreFile != "" {
log.Debugf("Restore: %v", conf.RestoreFile)
- if err := c.Restore(spec, conf, conf.RestoreFile); err != nil {
+ if err := c.Restore(args.Spec, conf, conf.RestoreFile); err != nil {
return 0, fmt.Errorf("starting container: %v", err)
}
} else {
@@ -484,11 +531,11 @@ func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
return 0, fmt.Errorf("starting container: %v", err)
}
}
- if detach {
- cu.Release()
- return 0, nil
+ if args.Attached {
+ return c.Wait()
}
- return c.Wait()
+ cu.Release()
+ return 0, nil
}
// Execute runs the specified command in the container. It returns the PID of
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 72c5ecbb0..c1d6ca7b8 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -17,6 +17,7 @@ package container
import (
"bytes"
"fmt"
+ "io"
"io/ioutil"
"os"
"path"
@@ -31,12 +32,14 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/boot/platforms"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// waitForProcessList waits for the given process list to show up in the container.
@@ -210,7 +213,13 @@ func run(spec *specs.Spec, conf *boot.Config) error {
defer os.RemoveAll(bundleDir)
// Create, start and wait for the container.
- ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "", false)
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ Attached: true,
+ }
+ ws, err := Run(conf, args)
if err != nil {
return fmt.Errorf("running container: %v", err)
}
@@ -249,7 +258,7 @@ func configs(opts ...configOption) []*boot.Config {
if testutil.RaceEnabled {
continue
}
- c.Platform = boot.PlatformKVM
+ c.Platform = platforms.KVM
case nonExclusiveFS:
c.FileAccess = boot.FileAccessShared
default:
@@ -294,15 +303,19 @@ func TestLifecycle(t *testing.T) {
},
}
// Create the container.
- id := testutil.UniqueContainerID()
- c, err := Create(id, spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
defer c.Destroy()
// Load the container from disk and check the status.
- c, err = Load(rootDir, id)
+ c, err = Load(rootDir, args.ID)
if err != nil {
t.Fatalf("error loading container: %v", err)
}
@@ -315,7 +328,7 @@ func TestLifecycle(t *testing.T) {
if err != nil {
t.Fatalf("error listing containers: %v", err)
}
- if got, want := ids, []string{id}; !reflect.DeepEqual(got, want) {
+ if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) {
t.Errorf("container list got %v, want %v", got, want)
}
@@ -325,7 +338,7 @@ func TestLifecycle(t *testing.T) {
}
// Load the container from disk and check the status.
- c, err = Load(rootDir, id)
+ c, err = Load(rootDir, args.ID)
if err != nil {
t.Fatalf("error loading container: %v", err)
}
@@ -366,7 +379,7 @@ func TestLifecycle(t *testing.T) {
wg.Wait()
// Load the container from disk and check the status.
- c, err = Load(rootDir, id)
+ c, err = Load(rootDir, args.ID)
if err != nil {
t.Fatalf("error loading container: %v", err)
}
@@ -389,7 +402,7 @@ func TestLifecycle(t *testing.T) {
}
// Loading the container by id should fail.
- if _, err = Load(rootDir, id); err == nil {
+ if _, err = Load(rootDir, args.ID); err == nil {
t.Errorf("expected loading destroyed container to fail, but it did not")
}
}
@@ -397,6 +410,46 @@ func TestLifecycle(t *testing.T) {
// Test the we can execute the application with different path formats.
func TestExePath(t *testing.T) {
+ // Create two directories that will be prepended to PATH.
+ firstPath, err := ioutil.TempDir(testutil.TmpDir(), "first")
+ if err != nil {
+ t.Fatal(err)
+ }
+ secondPath, err := ioutil.TempDir(testutil.TmpDir(), "second")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // Create two minimal executables in the second path, two of which
+ // will be masked by files in first path.
+ for _, p := range []string{"unmasked", "masked1", "masked2"} {
+ path := filepath.Join(secondPath, p)
+ f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0777)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer f.Close()
+ if _, err := io.WriteString(f, "#!/bin/true\n"); err != nil {
+ t.Fatal(err)
+ }
+ }
+
+ // Create a non-executable file in the first path which masks a healthy
+ // executable in the second.
+ nonExecutable := filepath.Join(firstPath, "masked1")
+ f2, err := os.OpenFile(nonExecutable, os.O_CREATE|os.O_EXCL, 0666)
+ if err != nil {
+ t.Fatal(err)
+ }
+ f2.Close()
+
+ // Create a non-regular file in the first path which masks a healthy
+ // executable in the second.
+ nonRegular := filepath.Join(firstPath, "masked2")
+ if err := os.Mkdir(nonRegular, 0777); err != nil {
+ t.Fatal(err)
+ }
+
for _, conf := range configs(overlay) {
t.Logf("Running test with conf: %+v", conf)
for _, test := range []struct {
@@ -409,14 +462,36 @@ func TestExePath(t *testing.T) {
{path: "thisfiledoesntexit", success: false},
{path: "bin/thisfiledoesntexit", success: false},
{path: "/bin/thisfiledoesntexit", success: false},
+
+ {path: "unmasked", success: true},
+ {path: filepath.Join(firstPath, "unmasked"), success: false},
+ {path: filepath.Join(secondPath, "unmasked"), success: true},
+
+ {path: "masked1", success: true},
+ {path: filepath.Join(firstPath, "masked1"), success: false},
+ {path: filepath.Join(secondPath, "masked1"), success: true},
+
+ {path: "masked2", success: true},
+ {path: filepath.Join(firstPath, "masked2"), success: false},
+ {path: filepath.Join(secondPath, "masked2"), success: true},
} {
spec := testutil.NewSpecWithArgs(test.path)
+ spec.Process.Env = []string{
+ fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")),
+ }
+
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("exec: %s, error setting up container: %v", test.path, err)
}
- ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "", false)
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ Attached: true,
+ }
+ ws, err := Run(conf, args)
os.RemoveAll(rootDir)
os.RemoveAll(bundleDir)
@@ -449,7 +524,13 @@ func TestAppExitStatus(t *testing.T) {
defer os.RemoveAll(rootDir)
defer os.RemoveAll(bundleDir)
- ws, err := Run(testutil.UniqueContainerID(), succSpec, conf, bundleDir, "", "", "", false)
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: succSpec,
+ BundleDir: bundleDir,
+ Attached: true,
+ }
+ ws, err := Run(conf, args)
if err != nil {
t.Fatalf("error running container: %v", err)
}
@@ -468,7 +549,13 @@ func TestAppExitStatus(t *testing.T) {
defer os.RemoveAll(rootDir2)
defer os.RemoveAll(bundleDir2)
- ws, err = Run(testutil.UniqueContainerID(), errSpec, conf, bundleDir2, "", "", "", false)
+ args2 := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: errSpec,
+ BundleDir: bundleDir2,
+ Attached: true,
+ }
+ ws, err = Run(conf, args2)
if err != nil {
t.Fatalf("error running container: %v", err)
}
@@ -493,7 +580,12 @@ func TestExec(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -525,7 +617,7 @@ func TestExec(t *testing.T) {
t.Error(err)
}
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: "/bin/sleep",
Argv: []string{"/bin/sleep", "5"},
WorkingDirectory: "/",
@@ -536,7 +628,7 @@ func TestExec(t *testing.T) {
// First, start running exec (whick blocks).
status := make(chan error, 1)
go func() {
- exitStatus, err := cont.executeSync(args)
+ exitStatus, err := cont.executeSync(execArgs)
if err != nil {
log.Debugf("error executing: %v", err)
status <- err
@@ -584,7 +676,12 @@ func TestKillPid(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -655,7 +752,12 @@ func TestCheckpointRestore(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -701,7 +803,12 @@ func TestCheckpointRestore(t *testing.T) {
defer outputFile2.Close()
// Restore into a new container.
- cont2, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args2 := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont2, err := New(conf, args2)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -740,7 +847,12 @@ func TestCheckpointRestore(t *testing.T) {
defer outputFile3.Close()
// Restore into a new container.
- cont3, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args3 := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont3, err := New(conf, args3)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -777,7 +889,7 @@ func TestUnixDomainSockets(t *testing.T) {
t.Logf("Running test with conf: %+v", conf)
// UDS path is limited to 108 chars for compatibility with older systems.
- // Use '/tmp' (instead of testutil.TmpDir) to to ensure the size limit is
+ // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is
// not exceeded. Assumes '/tmp' exists in the system.
dir, err := ioutil.TempDir("/tmp", "uds-test")
if err != nil {
@@ -819,7 +931,12 @@ func TestUnixDomainSockets(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -866,7 +983,12 @@ func TestUnixDomainSockets(t *testing.T) {
defer outputFile2.Close()
// Restore into a new container.
- contRestore, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ argsRestore := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ contRestore, err := New(conf, argsRestore)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -920,7 +1042,12 @@ func TestPauseResume(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -948,7 +1075,7 @@ func TestPauseResume(t *testing.T) {
}
script := fmt.Sprintf("while [[ -f %q ]]; do sleep 0.1; done", lock.Name())
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: "/bin/bash",
Argv: []string{"bash", "-c", script},
WorkingDirectory: "/",
@@ -956,7 +1083,7 @@ func TestPauseResume(t *testing.T) {
}
// First, start running exec.
- _, err = cont.Execute(args)
+ _, err = cont.Execute(execArgs)
if err != nil {
t.Fatalf("error executing: %v", err)
}
@@ -1025,7 +1152,12 @@ func TestPauseResumeStatus(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1089,7 +1221,12 @@ func TestCapabilities(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1131,7 +1268,7 @@ func TestCapabilities(t *testing.T) {
// Need to traverse the intermediate directory.
os.Chmod(rootDir, 0755)
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: exePath,
Argv: []string{exePath},
WorkingDirectory: "/",
@@ -1141,16 +1278,16 @@ func TestCapabilities(t *testing.T) {
}
// "exe" should fail because we don't have the necessary permissions.
- if _, err := cont.executeSync(args); err == nil {
+ if _, err := cont.executeSync(execArgs); err == nil {
t.Fatalf("container executed without error, but an error was expected")
}
// Now we run with the capability enabled and should succeed.
- args.Capabilities = &auth.TaskCapabilities{
+ execArgs.Capabilities = &auth.TaskCapabilities{
EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
}
// "exe" should not fail this time.
- if _, err := cont.executeSync(args); err != nil {
+ if _, err := cont.executeSync(execArgs); err != nil {
t.Fatalf("container failed to exec %v: %v", args, err)
}
}
@@ -1231,7 +1368,12 @@ func TestReadonlyRoot(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create, start and wait for the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1299,7 +1441,12 @@ func TestUIDMap(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create, start and wait for the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1351,7 +1498,12 @@ func TestReadonlyMount(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create, start and wait for the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1395,7 +1547,12 @@ func TestAbbreviatedIDs(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(cid, spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: cid,
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1440,7 +1597,12 @@ func TestGoferExits(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1519,7 +1681,14 @@ func TestUserLog(t *testing.T) {
userLog := filepath.Join(dir, "user.log")
// Create, start and wait for the container.
- ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", userLog, false)
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ UserLog: userLog,
+ Attached: true,
+ }
+ ws, err := Run(conf, args)
if err != nil {
t.Fatalf("error running container: %v", err)
}
@@ -1553,7 +1722,12 @@ func TestWaitOnExitedSandbox(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and Start the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1596,7 +1770,12 @@ func TestDestroyNotStarted(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create the container and check that it can be destroyed.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1618,15 +1797,19 @@ func TestDestroyStarting(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create the container and check that it can be destroyed.
- id := testutil.UniqueContainerID()
- c, err := Create(id, spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
// Container is not thread safe, so load another instance to run in
// concurrently.
- startCont, err := Load(rootDir, id)
+ startCont, err := Load(rootDir, args.ID)
if err != nil {
t.Fatalf("error loading container: %v", err)
}
@@ -1731,7 +1914,12 @@ func TestMountPropagation(t *testing.T) {
defer os.RemoveAll(rootDir)
defer os.RemoveAll(bundleDir)
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("creating container: %v", err)
}
@@ -1749,21 +1937,21 @@ func TestMountPropagation(t *testing.T) {
// Check that mount didn't propagate to private mount.
privFile := filepath.Join(priv, "mnt", "file")
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: "/usr/bin/test",
Argv: []string{"test", "!", "-f", privFile},
}
- if ws, err := cont.executeSync(args); err != nil || ws != 0 {
+ if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
t.Fatalf("exec: test ! -f %q, ws: %v, err: %v", privFile, ws, err)
}
// Check that mount propagated to slave mount.
slaveFile := filepath.Join(slave, "mnt", "file")
- args = &control.ExecArgs{
+ execArgs = &control.ExecArgs{
Filename: "/usr/bin/test",
Argv: []string{"test", "-f", slaveFile},
}
- if ws, err := cont.executeSync(args); err != nil || ws != 0 {
+ if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
t.Fatalf("exec: test -f %q, ws: %v, err: %v", privFile, ws, err)
}
}
@@ -1812,7 +2000,12 @@ func TestMountSymlink(t *testing.T) {
defer os.RemoveAll(rootDir)
defer os.RemoveAll(bundleDir)
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("creating container: %v", err)
}
@@ -1825,11 +2018,11 @@ func TestMountSymlink(t *testing.T) {
// Check that symlink was resolved and mount was created where the symlink
// is pointing to.
file := path.Join(target, "file")
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: "/usr/bin/test",
Argv: []string{"test", "-f", file},
}
- if ws, err := cont.executeSync(args); err != nil || ws != 0 {
+ if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err)
}
}
@@ -1853,7 +2046,7 @@ func TestMain(m *testing.M) {
if err := testutil.ConfigureExePath(); err != nil {
panic(err.Error())
}
- testutil.RunAsRoot()
+ specutils.MaybeRunAsRoot()
os.Exit(m.Run())
}
diff --git a/runsc/container/hook.go b/runsc/container/hook.go
index acae6781e..901607aee 100644
--- a/runsc/container/hook.go
+++ b/runsc/container/hook.go
@@ -24,7 +24,7 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
// This file implements hooks as defined in OCI spec:
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 4ea3c74ac..c0f9b372c 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -28,10 +28,10 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) {
@@ -84,7 +84,12 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
}
bundles = append(bundles, bundleDir)
- cont, err := Create(ids[i], spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: ids[i],
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
cleanup()
return nil, nil, fmt.Errorf("error creating container: %v", err)
@@ -99,6 +104,36 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
return containers, cleanup, nil
}
+type execDesc struct {
+ c *Container
+ cmd []string
+ want int
+ desc string
+}
+
+func execMany(execs []execDesc) error {
+ for _, exec := range execs {
+ args := &control.ExecArgs{Argv: exec.cmd}
+ if ws, err := exec.c.executeSync(args); err != nil {
+ return fmt.Errorf("error executing %+v: %v", args, err)
+ } else if ws.ExitStatus() != exec.want {
+ return fmt.Errorf("%q: exec %q got exit status: %d, want: %d", exec.desc, exec.cmd, ws.ExitStatus(), exec.want)
+ }
+ }
+ return nil
+}
+
+func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) {
+ for _, spec := range pod {
+ spec.Annotations[path.Join(boot.MountPrefix, name, "source")] = mount.Source
+ spec.Annotations[path.Join(boot.MountPrefix, name, "type")] = mount.Type
+ spec.Annotations[path.Join(boot.MountPrefix, name, "share")] = "pod"
+ if len(mount.Options) > 0 {
+ spec.Annotations[path.Join(boot.MountPrefix, name, "options")] = strings.Join(mount.Options, ",")
+ }
+ }
+}
+
// TestMultiContainerSanity checks that it is possible to run 2 dead-simple
// containers in the same sandbox.
func TestMultiContainerSanity(t *testing.T) {
@@ -631,7 +666,12 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) {
}
defer os.RemoveAll(rootBundleDir)
- root, err := Create(ids[0], specs[0], conf, rootBundleDir, "", "", "")
+ rootArgs := Args{
+ ID: ids[0],
+ Spec: specs[0],
+ BundleDir: rootBundleDir,
+ }
+ root, err := New(conf, rootArgs)
if err != nil {
t.Fatalf("error creating root container: %v", err)
}
@@ -647,7 +687,12 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) {
}
defer os.RemoveAll(bundleDir)
- cont, err := Create(ids[1], specs[1], conf, bundleDir, "", "", "")
+ args := Args{
+ ID: ids[1],
+ Spec: specs[1],
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -682,7 +727,12 @@ func TestMultiContainerDestroyStarting(t *testing.T) {
}
defer os.RemoveAll(rootBundleDir)
- root, err := Create(ids[0], specs[0], conf, rootBundleDir, "", "", "")
+ rootArgs := Args{
+ ID: ids[0],
+ Spec: specs[0],
+ BundleDir: rootBundleDir,
+ }
+ root, err := New(conf, rootArgs)
if err != nil {
t.Fatalf("error creating root container: %v", err)
}
@@ -703,7 +753,12 @@ func TestMultiContainerDestroyStarting(t *testing.T) {
}
defer os.RemoveAll(bundleDir)
- cont, err := Create(ids[i], specs[i], conf, bundleDir, "", "", "")
+ rootArgs := Args{
+ ID: ids[i],
+ Spec: specs[i],
+ BundleDir: rootBundleDir,
+ }
+ cont, err := New(conf, rootArgs)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -777,7 +832,12 @@ func TestMultiContainerGoferStop(t *testing.T) {
// Start root container.
conf := testutil.TestConfigWithRoot(rootDir)
- root, err := Create(rootID, rootSpec, conf, bundleDir, "", "", "")
+ rootArgs := Args{
+ ID: rootID,
+ Spec: rootSpec,
+ BundleDir: bundleDir,
+ }
+ root, err := New(conf, rootArgs)
if err != nil {
t.Fatalf("error creating root container: %v", err)
}
@@ -801,7 +861,12 @@ func TestMultiContainerGoferStop(t *testing.T) {
}
defer os.RemoveAll(bundleDir)
- child, err := Create(ids[j], spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: ids[j],
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ child, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -828,3 +893,277 @@ func TestMultiContainerGoferStop(t *testing.T) {
}
}
}
+
+// Test that pod shared mounts are properly mounted in 2 containers and that
+// changes from one container is reflected in the other.
+func TestMultiContainerSharedMount(t *testing.T) {
+ for _, conf := range configs(all...) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: nil,
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+
+ createSharedMount(mnt0, "test-mount", podSpec...)
+
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
+ desc: "directory is mounted in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
+ desc: "directory is mounted in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/touch", file0},
+ desc: "create file in container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ desc: "file appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ desc: "file appears in container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/rm", file1},
+ desc: "file removed from container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-f", file0},
+ desc: "file removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-f", file1},
+ desc: "file removed from container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/mkdir", file1},
+ desc: "create directory in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", file0},
+ desc: "dir appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", file1},
+ desc: "dir appears in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/bin/rmdir", file0},
+ desc: "create directory in container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-d", file0},
+ desc: "dir removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-d", file1},
+ desc: "dir removed from container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+ }
+}
+
+// Test that pod mounts are mounted as readonly when requested.
+func TestMultiContainerSharedMountReadonly(t *testing.T) {
+ for _, conf := range configs(all...) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: []string{"ro"},
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+
+ createSharedMount(mnt0, "test-mount", podSpec...)
+
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
+ desc: "directory is mounted in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
+ desc: "directory is mounted in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/touch", file0},
+ want: 1,
+ desc: "fails to write to container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/touch", file1},
+ want: 1,
+ desc: "fails to write to container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+ }
+}
+
+// Test that shared pod mounts continue to work after container is restarted.
+func TestMultiContainerSharedMountRestart(t *testing.T) {
+ for _, conf := range configs(all...) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: nil,
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+
+ createSharedMount(mnt0, "test-mount", podSpec...)
+
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/touch", file0},
+ desc: "create file in container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ desc: "file appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ desc: "file appears in container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+
+ containers[1].Destroy()
+
+ bundleDir, err := testutil.SetupBundleDir(podSpec[1])
+ if err != nil {
+ t.Fatalf("error restarting container: %v", err)
+ }
+ defer os.RemoveAll(bundleDir)
+
+ args := Args{
+ ID: ids[1],
+ Spec: podSpec[1],
+ BundleDir: bundleDir,
+ }
+ containers[1], err = New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ if err := containers[1].Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
+
+ execs = []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ desc: "file is still in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ desc: "file is still in container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/rm", file1},
+ desc: "file removed from container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-f", file0},
+ desc: "file removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-f", file1},
+ desc: "file removed from container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+ }
+}
diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go
index 9d5a592a5..1f90d2462 100644
--- a/runsc/container/shared_volume_test.go
+++ b/runsc/container/shared_volume_test.go
@@ -22,10 +22,10 @@ import (
"path/filepath"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// TestSharedVolume checks that modifications to a volume mount are propagated
@@ -52,7 +52,12 @@ func TestSharedVolume(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -206,7 +211,12 @@ func TestSharedVolumeFile(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go
index 62923f1ef..b7fc6498f 100644
--- a/runsc/container/test_app/test_app.go
+++ b/runsc/container/test_app/test_app.go
@@ -29,7 +29,7 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func main() {
diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD
index 4adc9c1bc..80a4aa2fe 100644
--- a/runsc/fsgofer/BUILD
+++ b/runsc/fsgofer/BUILD
@@ -8,7 +8,7 @@ go_library(
"fsgofer.go",
"fsgofer_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/fsgofer",
+ importpath = "gvisor.dev/gvisor/runsc/fsgofer",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/fsgofer/filter/BUILD b/runsc/fsgofer/filter/BUILD
index 78c5b526c..e2318a978 100644
--- a/runsc/fsgofer/filter/BUILD
+++ b/runsc/fsgofer/filter/BUILD
@@ -11,7 +11,7 @@ go_library(
"extra_filters_race.go",
"filter.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter",
+ importpath = "gvisor.dev/gvisor/runsc/fsgofer/filter",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index 4faab2946..2d50774d4 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -19,8 +19,8 @@ import (
"syscall"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// allowedSyscalls is the set of syscalls executed by the gofer.
diff --git a/runsc/fsgofer/filter/extra_filters.go b/runsc/fsgofer/filter/extra_filters.go
index 5c5ec4e06..e28d4b8d6 100644
--- a/runsc/fsgofer/filter/extra_filters.go
+++ b/runsc/fsgofer/filter/extra_filters.go
@@ -17,11 +17,11 @@
package filter
import (
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by
-// Go intrumentation tools, e.g. -race, -msan.
+// Go instrumentation tools, e.g. -race, -msan.
// Returns empty when disabled.
func instrumentationFilters() seccomp.SyscallRules {
return nil
diff --git a/runsc/fsgofer/filter/extra_filters_msan.go b/runsc/fsgofer/filter/extra_filters_msan.go
index 553060bc3..8c6179c8f 100644
--- a/runsc/fsgofer/filter/extra_filters_msan.go
+++ b/runsc/fsgofer/filter/extra_filters_msan.go
@@ -19,8 +19,8 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by MSAN.
diff --git a/runsc/fsgofer/filter/extra_filters_race.go b/runsc/fsgofer/filter/extra_filters_race.go
index 28555f898..885c92f7a 100644
--- a/runsc/fsgofer/filter/extra_filters_race.go
+++ b/runsc/fsgofer/filter/extra_filters_race.go
@@ -19,8 +19,8 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by TSAN.
diff --git a/runsc/fsgofer/filter/filter.go b/runsc/fsgofer/filter/filter.go
index ff8154369..65053415f 100644
--- a/runsc/fsgofer/filter/filter.go
+++ b/runsc/fsgofer/filter/filter.go
@@ -18,7 +18,7 @@
package filter
import (
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// Install installs seccomp filters.
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 2cf50290a..fe450c64f 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -28,15 +28,16 @@ import (
"path"
"path/filepath"
"runtime"
+ "strconv"
"sync"
"syscall"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -223,6 +224,28 @@ type localFile struct {
lastDirentOffset uint64
}
+var procSelfFD *fd.FD
+
+// OpenProcSelfFD opens the /proc/self/fd directory, which will be used to
+// reopen file descriptors.
+func OpenProcSelfFD() error {
+ d, err := syscall.Open("/proc/self/fd", syscall.O_RDONLY|syscall.O_DIRECTORY, 0)
+ if err != nil {
+ return fmt.Errorf("error opening /proc/self/fd: %v", err)
+ }
+ procSelfFD = fd.New(d)
+ return nil
+}
+
+func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) {
+ d, err := syscall.Openat(int(procSelfFD.FD()), strconv.Itoa(f.FD()), mode&^syscall.O_NOFOLLOW, 0)
+ if err != nil {
+ return nil, err
+ }
+
+ return fd.New(d), nil
+}
+
func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, error) {
path := path.Join(parent.hostPath, name)
f, err := openAnyFile(path, func(mode int) (*fd.FD, error) {
@@ -348,7 +371,7 @@ func (l *localFile) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
// name_to_handle_at and open_by_handle_at aren't supported by overlay2.
log.Debugf("Open reopening file, mode: %v, %q", mode, l.hostPath)
var err error
- newFile, err = fd.Open(l.hostPath, openFlags|mode.OSFlags(), 0)
+ newFile, err = reopenProcFd(l.file, openFlags|mode.OSFlags())
if err != nil {
return nil, p9.QID{}, 0, extractErrno(err)
}
@@ -477,7 +500,7 @@ func (l *localFile) Walk(names []string) ([]p9.QID, p9.File, error) {
// Duplicate current file if 'names' is empty.
if len(names) == 0 {
newFile, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) {
- return fd.Open(l.hostPath, openFlags|mode, 0)
+ return reopenProcFd(l.file, openFlags|mode)
})
if err != nil {
return nil, nil, extractErrno(err)
@@ -596,7 +619,7 @@ func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error)
}
// SetAttr implements p9.File. Due to mismatch in file API, options
-// cannot be changed atomicaly and user may see partial changes when
+// cannot be changed atomically and user may see partial changes when
// an error happens.
func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
conf := l.attachPoint.conf
@@ -635,7 +658,7 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
f := l.file
if l.ft == regular && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite {
var err error
- f, err = fd.Open(l.hostPath, openFlags|syscall.O_WRONLY, 0)
+ f, err = reopenProcFd(l.file, openFlags|os.O_WRONLY)
if err != nil {
return extractErrno(err)
}
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index 695836927..0a162bb8a 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -22,8 +22,8 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
)
func init() {
@@ -31,6 +31,10 @@ func init() {
allConfs = append(allConfs, rwConfs...)
allConfs = append(allConfs, roConfs...)
+
+ if err := OpenProcSelfFD(); err != nil {
+ panic(err)
+ }
}
func assertPanic(t *testing.T, f func()) {
diff --git a/runsc/fsgofer/fsgofer_unsafe.go b/runsc/fsgofer/fsgofer_unsafe.go
index 58af5e44d..ff2556aee 100644
--- a/runsc/fsgofer/fsgofer_unsafe.go
+++ b/runsc/fsgofer/fsgofer_unsafe.go
@@ -18,8 +18,8 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserr"
)
func statAt(dirFd int, name string) (syscall.Stat_t, error) {
diff --git a/runsc/main.go b/runsc/main.go
index a214f6ba0..bc83c57a2 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -29,10 +29,11 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/cmd"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/cmd"
+ "gvisor.dev/gvisor/runsc/specutils"
)
var (
@@ -61,16 +62,19 @@ var (
straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs")
// Flags that control sandbox runtime behavior.
- platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
- network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
- gso = flag.Bool("gso", true, "enable generic segmenation offload")
- fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
- overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
- watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
- panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
- profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
- netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
- numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
+ platformName = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
+ network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
+ gso = flag.Bool("gso", true, "enable generic segmenation offload")
+ fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
+ overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
+ watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
+ panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
+ profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
+ netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
+ numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
+ rootless = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
+
+ // Test flags, not to be used outside tests, ever.
testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
)
@@ -136,8 +140,8 @@ func main() {
}
cmd.ErrorLogger = errorLogger
- platformType, err := boot.MakePlatformType(*platform)
- if err != nil {
+ platformType := *platformName
+ if _, err := platform.Lookup(platformType); err != nil {
cmd.Fatalf("%v", err)
}
@@ -166,26 +170,28 @@ func main() {
// Create a new Config from the flags.
conf := &boot.Config{
- RootDir: *rootDir,
- Debug: *debug,
- LogFilename: *logFilename,
- LogFormat: *logFormat,
- DebugLog: *debugLog,
- DebugLogFormat: *debugLogFormat,
- FileAccess: fsAccess,
- Overlay: *overlay,
- Network: netType,
- GSO: *gso,
- LogPackets: *logPackets,
- Platform: platformType,
- Strace: *strace,
- StraceLogSize: *straceLogSize,
- WatchdogAction: wa,
- PanicSignal: *panicSignal,
- ProfileEnable: *profile,
- EnableRaw: *netRaw,
+ RootDir: *rootDir,
+ Debug: *debug,
+ LogFilename: *logFilename,
+ LogFormat: *logFormat,
+ DebugLog: *debugLog,
+ DebugLogFormat: *debugLogFormat,
+ FileAccess: fsAccess,
+ Overlay: *overlay,
+ Network: netType,
+ GSO: *gso,
+ LogPackets: *logPackets,
+ Platform: platformType,
+ Strace: *strace,
+ StraceLogSize: *straceLogSize,
+ WatchdogAction: wa,
+ PanicSignal: *panicSignal,
+ ProfileEnable: *profile,
+ EnableRaw: *netRaw,
+ NumNetworkChannels: *numNetworkChannels,
+ Rootless: *rootless,
+
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
- NumNetworkChannels: *numNetworkChannels,
}
if len(*straceSyscalls) != 0 {
conf.StraceSyscalls = strings.Split(*straceSyscalls, ",")
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index c0de9a28f..7fdceaab6 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -9,7 +9,7 @@ go_library(
"network_unsafe.go",
"sandbox.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/sandbox",
+ importpath = "gvisor.dev/gvisor/runsc/sandbox",
visibility = [
"//runsc:__subpackages__",
],
@@ -18,9 +18,10 @@ go_library(
"//pkg/control/server",
"//pkg/log",
"//pkg/sentry/control",
- "//pkg/sentry/platform/kvm",
+ "//pkg/sentry/platform",
"//pkg/urpc",
"//runsc/boot",
+ "//runsc/boot/platforms",
"//runsc/cgroup",
"//runsc/console",
"//runsc/specutils",
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index 1fd091514..a965a9dcb 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -27,10 +27,10 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -228,7 +228,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
if err != nil {
return fmt.Errorf("getting link for interface %q: %v", iface.Name, err)
}
- link.LinkAddress = []byte(ifaceLink.Attrs().HardwareAddr)
+ link.LinkAddress = ifaceLink.Attrs().HardwareAddr
log.Debugf("Setting up network channels")
// Create the socket for the device.
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 032190636..4a11f617d 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -28,16 +28,17 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/pkg/control/client"
- "gvisor.googlesource.com/gvisor/pkg/control/server"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/cgroup"
- "gvisor.googlesource.com/gvisor/runsc/console"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/control/client"
+ "gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/boot/platforms"
+ "gvisor.dev/gvisor/runsc/cgroup"
+ "gvisor.dev/gvisor/runsc/console"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Sandbox wraps a sandbox process.
@@ -73,10 +74,46 @@ type Sandbox struct {
statusMu sync.Mutex
}
+// Args is used to configure a new sandbox.
+type Args struct {
+ // ID is the sandbox unique identifier.
+ ID string
+
+ // Spec is the OCI spec that describes the container.
+ Spec *specs.Spec
+
+ // BundleDir is the directory containing the container bundle.
+ BundleDir string
+
+ // ConsoleSocket is the path to a unix domain socket that will receive
+ // the console FD. It may be empty.
+ ConsoleSocket string
+
+ // UserLog is the filename to send user-visible logs to. It may be empty.
+ UserLog string
+
+ // IOFiles is the list of files that connect to a 9P endpoint for the mounts
+ // points using Gofers. They must be in the same order as mounts appear in
+ // the spec.
+ IOFiles []*os.File
+
+ // MountsFile is a file container mount information from the spec. It's
+ // equivalent to the mounts from the spec, except that all paths have been
+ // resolved to their final absolute location.
+ MountsFile *os.File
+
+ // Gcgroup is the cgroup that the sandbox is part of.
+ Cgroup *cgroup.Cgroup
+
+ // Attached indicates that the sandbox lifecycle is attached with the caller.
+ // If the caller exits, the sandbox should exit too.
+ Attached bool
+}
+
// New creates the sandbox process. The caller must call Destroy() on the
// sandbox.
-func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, specFile *os.File, cg *cgroup.Cgroup) (*Sandbox, error) {
- s := &Sandbox{ID: id, Cgroup: cg}
+func New(conf *boot.Config, args *Args) (*Sandbox, error) {
+ s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup}
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
c := specutils.MakeCleanup(func() {
@@ -93,7 +130,7 @@ func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
defer clientSyncFile.Close()
// Create the sandbox process.
- err = s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, sandboxSyncFile)
+ err = s.createSandboxProcess(conf, args, sandboxSyncFile)
// sandboxSyncFile has to be closed to be able to detect when the sandbox
// process exits unexpectedly.
sandboxSyncFile.Close()
@@ -291,7 +328,7 @@ func (s *Sandbox) connError(err error) error {
// createSandboxProcess starts the sandbox as a subprocess by running the "boot"
// command, passing in the bundle dir.
-func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, mountsFile, startSyncFile *os.File) error {
+func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncFile *os.File) error {
// nextFD is used to get unused FDs that we can pass to the sandbox. It
// starts at 3 because 0, 1, and 2 are taken by stdin/out/err.
nextFD := 3
@@ -327,7 +364,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// Add the "boot" command to the args.
//
// All flags after this must be for the boot command
- cmd.Args = append(cmd.Args, "boot", "--bundle="+bundleDir)
+ cmd.Args = append(cmd.Args, "boot", "--bundle="+args.BundleDir)
// Create a socket for the control server and donate it to the sandbox.
addr := boot.ControlSocketAddr(s.ID)
@@ -342,12 +379,12 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
cmd.Args = append(cmd.Args, "--controller-fd="+strconv.Itoa(nextFD))
nextFD++
- defer mountsFile.Close()
- cmd.ExtraFiles = append(cmd.ExtraFiles, mountsFile)
+ defer args.MountsFile.Close()
+ cmd.ExtraFiles = append(cmd.ExtraFiles, args.MountsFile)
cmd.Args = append(cmd.Args, "--mounts-fd="+strconv.Itoa(nextFD))
nextFD++
- specFile, err := specutils.OpenSpec(bundleDir)
+ specFile, err := specutils.OpenSpec(args.BundleDir)
if err != nil {
return err
}
@@ -361,7 +398,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
nextFD++
// If there is a gofer, sends all socket ends to the sandbox.
- for _, f := range ioFiles {
+ for _, f := range args.IOFiles {
defer f.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
cmd.Args = append(cmd.Args, "--io-fds="+strconv.Itoa(nextFD))
@@ -389,23 +426,22 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// If the console control socket file is provided, then create a new
// pty master/slave pair and set the TTY on the sandbox process.
- if consoleSocket != "" {
+ if args.ConsoleSocket != "" {
cmd.Args = append(cmd.Args, "--console=true")
// console.NewWithSocket will send the master on the given
// socket, and return the slave.
- tty, err := console.NewWithSocket(consoleSocket)
+ tty, err := console.NewWithSocket(args.ConsoleSocket)
if err != nil {
- return fmt.Errorf("setting up console with socket %q: %v", consoleSocket, err)
+ return fmt.Errorf("setting up console with socket %q: %v", args.ConsoleSocket, err)
}
defer tty.Close()
// Set the TTY as a controlling TTY on the sandbox process.
- // Note that the Ctty field must be the FD of the TTY in the
- // *new* process, not this process. Since we are about to
- // assign the TTY to nextFD, we can use that value here.
- // stdin, we can use FD 0 here.
cmd.SysProcAttr.Setctty = true
+ // The Ctty FD must be the FD in the child process's FD table,
+ // which will be nextFD in this case.
+ // See https://github.com/golang/go/issues/29458.
cmd.SysProcAttr.Ctty = nextFD
// Pass the tty as all stdio fds to sandbox.
@@ -456,7 +492,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
{Type: specs.UTSNamespace},
}
- if conf.Platform == boot.PlatformPtrace {
+ if conf.Platform == platforms.Ptrace {
// TODO(b/75837838): Also set a new PID namespace so that we limit
// access to other host processes.
log.Infof("Sandbox will be started in the current PID namespace")
@@ -469,7 +505,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// Joins the network namespace if network is enabled. the sandbox talks
// directly to the host network, which may have been configured in the
// namespace.
- if ns, ok := specutils.GetNS(specs.NetworkNamespace, spec); ok && conf.Network != boot.NetworkNone {
+ if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != boot.NetworkNone {
log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
nss = append(nss, ns)
} else if conf.Network == boot.NetworkHost {
@@ -483,10 +519,10 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// inside the user namespace specified in the spec or the current namespace
// if none is configured.
if conf.Network == boot.NetworkHost {
- if userns, ok := specutils.GetNS(specs.UserNamespace, spec); ok {
+ if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok {
log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
nss = append(nss, userns)
- specutils.SetUIDGIDMappings(cmd, spec)
+ specutils.SetUIDGIDMappings(cmd, args.Spec)
} else {
log.Infof("Sandbox will be started in the current user namespace")
}
@@ -515,46 +551,64 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
log.Infof("Sandbox will be started in new user namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
+ cmd.Args = append(cmd.Args, "--setup-root")
- // Map nobody in the new namespace to nobody in the parent namespace.
- //
- // A sandbox process will construct an empty
- // root for itself, so it has to have the CAP_SYS_ADMIN
- // capability.
- //
- // FIXME(b/122554829): The current implementations of
- // os/exec doesn't allow to set ambient capabilities if
- // a process is started in a new user namespace. As a
- // workaround, we start the sandbox process with the 0
- // UID and then it constructs a chroot and sets UID to
- // nobody. https://github.com/golang/go/issues/2315
- const nobody = 65534
- cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: int(0),
- HostID: int(nobody - 1),
- Size: int(1),
- },
- {
- ContainerID: int(nobody),
- HostID: int(nobody),
- Size: int(1),
- },
- }
- cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: int(nobody),
- HostID: int(nobody),
- Size: int(1),
- },
+ if conf.Rootless {
+ log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
+ cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: os.Getuid(),
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: os.Getgid(),
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
+
+ } else {
+ // Map nobody in the new namespace to nobody in the parent namespace.
+ //
+ // A sandbox process will construct an empty
+ // root for itself, so it has to have the CAP_SYS_ADMIN
+ // capability.
+ //
+ // FIXME(b/122554829): The current implementations of
+ // os/exec doesn't allow to set ambient capabilities if
+ // a process is started in a new user namespace. As a
+ // workaround, we start the sandbox process with the 0
+ // UID and then it constructs a chroot and sets UID to
+ // nobody. https://github.com/golang/go/issues/2315
+ const nobody = 65534
+ cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: nobody - 1,
+ Size: 1,
+ },
+ {
+ ContainerID: nobody,
+ HostID: nobody,
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: nobody,
+ HostID: nobody,
+ Size: 1,
+ },
+ }
+
+ // Set credentials to run as user and group nobody.
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody}
}
- // Set credentials to run as user and group nobody.
- cmd.SysProcAttr.Credential = &syscall.Credential{
- Uid: 0,
- Gid: nobody,
- }
- cmd.Args = append(cmd.Args, "--setup-root")
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}
@@ -580,8 +634,8 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
}
}
- if userLog != "" {
- f, err := os.OpenFile(userLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664)
+ if args.UserLog != "" {
+ f, err := os.OpenFile(args.UserLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664)
if err != nil {
return fmt.Errorf("opening compat log file: %v", err)
}
@@ -600,6 +654,11 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
log.Debugf("Donating FD %d: %q", i+3, f.Name())
}
+ if args.Attached {
+ // Kill sandbox if parent process exits in attached mode.
+ cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
+ }
+
log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args)
log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr)
if err := specutils.StartInNS(cmd, nss); err != nil {
@@ -902,7 +961,7 @@ func (s *Sandbox) StartTrace(f *os.File) error {
return nil
}
-// StopTrace stops a previously started trace..
+// StopTrace stops a previously started trace.
func (s *Sandbox) StopTrace() error {
log.Debugf("Trace stop %q", s.ID)
conn, err := s.sandboxConnect()
@@ -917,6 +976,21 @@ func (s *Sandbox) StopTrace() error {
return nil
}
+// ChangeLogging changes logging options.
+func (s *Sandbox) ChangeLogging(args control.LoggingArgs) error {
+ log.Debugf("Change logging start %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ if err := conn.Call(boot.ChangeLogging, &args, nil); err != nil {
+ return fmt.Errorf("changing sandbox %q logging: %v", s.ID, err)
+ }
+ return nil
+}
+
// DestroyContainer destroys the given container. If it is the root container,
// then the entire sandbox is destroyed.
func (s *Sandbox) DestroyContainer(cid string) error {
@@ -973,19 +1047,15 @@ func (s *Sandbox) waitForStopped() error {
// deviceFileForPlatform opens the device file for the given platform. If the
// platform does not need a device file, then nil is returned.
-func deviceFileForPlatform(p boot.PlatformType) (*os.File, error) {
- var (
- f *os.File
- err error
- )
- switch p {
- case boot.PlatformKVM:
- f, err = kvm.OpenDevice()
- default:
- return nil, nil
+func deviceFileForPlatform(name string) (*os.File, error) {
+ p, err := platform.Lookup(name)
+ if err != nil {
+ return nil, err
}
+
+ f, err := p.OpenDevice()
if err != nil {
return nil, fmt.Errorf("opening device file for platform %q: %v", p, err)
}
- return f, err
+ return f, nil
}
diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD
index 15476de6f..fbfb8e2f8 100644
--- a/runsc/specutils/BUILD
+++ b/runsc/specutils/BUILD
@@ -9,11 +9,8 @@ go_library(
"namespace.go",
"specutils.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/specutils",
- visibility = [
- "//runsc:__subpackages__",
- "//test:__subpackages__",
- ],
+ importpath = "gvisor.dev/gvisor/runsc/specutils",
+ visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/log",
diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go
index 1f3afb4e4..138aa4dd1 100644
--- a/runsc/specutils/fs.go
+++ b/runsc/specutils/fs.go
@@ -16,6 +16,7 @@ package specutils
import (
"fmt"
+ "math/bits"
"path"
"syscall"
@@ -86,7 +87,7 @@ func OptionsToFlags(opts []string) uint32 {
// PropOptionsToFlags converts propagation mount options to syscall flags.
// Propagation options cannot be set other with other options and must be
-// handled separatedly.
+// handled separately.
func PropOptionsToFlags(opts []string) uint32 {
return optionsToFlags(opts, propOptionsMap)
}
@@ -105,22 +106,30 @@ func optionsToFlags(opts []string, source map[string]mapping) uint32 {
return rv
}
-// ValidateMount validates that spec mounts are correct.
+// validateMount validates that spec mounts are correct.
func validateMount(mnt *specs.Mount) error {
if !path.IsAbs(mnt.Destination) {
return fmt.Errorf("Mount.Destination must be an absolute path: %v", mnt)
}
-
if mnt.Type == "bind" {
- for _, o := range mnt.Options {
- if ContainsStr(invalidOptions, o) {
- return fmt.Errorf("mount option %q is not supported: %v", o, mnt)
- }
- _, ok1 := optionsMap[o]
- _, ok2 := propOptionsMap[o]
- if !ok1 && !ok2 {
- return fmt.Errorf("unknown mount option %q", o)
- }
+ return ValidateMountOptions(mnt.Options)
+ }
+ return nil
+}
+
+// ValidateMountOptions validates that mount options are correct.
+func ValidateMountOptions(opts []string) error {
+ for _, o := range opts {
+ if ContainsStr(invalidOptions, o) {
+ return fmt.Errorf("mount option %q is not supported", o)
+ }
+ _, ok1 := optionsMap[o]
+ _, ok2 := propOptionsMap[o]
+ if !ok1 && !ok2 {
+ return fmt.Errorf("unknown mount option %q", o)
+ }
+ if err := validatePropagation(o); err != nil {
+ return err
}
}
return nil
@@ -133,5 +142,14 @@ func validateRootfsPropagation(opt string) error {
if flags&(syscall.MS_SLAVE|syscall.MS_PRIVATE) == 0 {
return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt)
}
+ return validatePropagation(opt)
+}
+
+func validatePropagation(opt string) error {
+ flags := PropOptionsToFlags([]string{opt})
+ exclusive := flags & (syscall.MS_SLAVE | syscall.MS_PRIVATE | syscall.MS_SHARED | syscall.MS_UNBINDABLE)
+ if bits.OnesCount32(exclusive) > 1 {
+ return fmt.Errorf("mount propagation options are mutually exclusive: %q", opt)
+ }
return nil
}
diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go
index 7d194335c..d441419cb 100644
--- a/runsc/specutils/namespace.go
+++ b/runsc/specutils/namespace.go
@@ -25,7 +25,7 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
// nsCloneFlag returns the clone flag that can be used to set a namespace of
@@ -204,7 +204,7 @@ func SetUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) {
}
}
-// HasCapabilities returns true if the user has all capabilties in 'cs'.
+// HasCapabilities returns true if the user has all capabilities in 'cs'.
func HasCapabilities(cs ...capability.Cap) bool {
caps, err := capability.NewPid2(os.Getpid())
if err != nil {
@@ -220,3 +220,55 @@ func HasCapabilities(cs ...capability.Cap) bool {
}
return true
}
+
+// MaybeRunAsRoot ensures the process runs with capabilities needed to create a
+// sandbox, e.g. CAP_SYS_ADMIN, CAP_SYS_CHROOT, etc. If capabilities are needed,
+// it will create a new user namespace and re-execute the process as root
+// inside the namespace with the same arguments and environment.
+//
+// This function returns immediately when no new capability is needed. If
+// another process is executed, it returns straight from here with the same exit
+// code as the child.
+func MaybeRunAsRoot() error {
+ if HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT, capability.CAP_SETUID, capability.CAP_SETGID) {
+ return nil
+ }
+
+ // Current process doesn't have required capabilities, create user namespace
+ // and run as root inside the namespace to acquire capabilities.
+ log.Infof("*** Re-running as root in new user namespace ***")
+
+ cmd := exec.Command("/proc/self/exe", os.Args[1:]...)
+
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
+ // Set current user/group as root inside the namespace. Since we may not
+ // have CAP_SETUID/CAP_SETGID, just map root to the current user/group.
+ UidMappings: []syscall.SysProcIDMap{
+ {ContainerID: 0, HostID: os.Getuid(), Size: 1},
+ },
+ GidMappings: []syscall.SysProcIDMap{
+ {ContainerID: 0, HostID: os.Getgid(), Size: 1},
+ },
+ Credential: &syscall.Credential{Uid: 0, Gid: 0},
+ GidMappingsEnableSetgroups: false,
+ }
+
+ cmd.Env = os.Environ()
+ cmd.Stdin = os.Stdin
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ if err := cmd.Run(); err != nil {
+ if exit, ok := err.(*exec.ExitError); ok {
+ if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
+ os.Exit(ws.ExitStatus())
+ }
+ log.Warningf("No wait status provided, exiting with -1: %v", err)
+ os.Exit(-1)
+ }
+ return fmt.Errorf("re-executing self: %v", err)
+ }
+ // Child completed with success.
+ os.Exit(0)
+ panic("unreachable")
+}
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 2888f55db..0b40e38a3 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -29,9 +29,9 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
// ExePath must point to runsc binary, which is normally the same binary. It's
@@ -394,7 +394,7 @@ func WaitForReady(pid int, timeout time.Duration, ready func() (bool, error)) er
// DebugLogFile opens a log file using 'logPattern' as location. If 'logPattern'
// ends with '/', it's used as a directory with default file name.
-// 'logPattern' can contain variables that are substitued:
+// 'logPattern' can contain variables that are substituted:
// - %TIMESTAMP%: is replaced with a timestamp using the following format:
// <yyyymmdd-hhmmss.uuuuuu>
// - %COMMAND%: is replaced with 'command'
diff --git a/runsc/test/BUILD b/runsc/test/BUILD
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/runsc/test/BUILD
diff --git a/runsc/test/build_defs.bzl b/runsc/test/build_defs.bzl
new file mode 100644
index 000000000..ac28cc037
--- /dev/null
+++ b/runsc/test/build_defs.bzl
@@ -0,0 +1,19 @@
+"""Defines a rule for runsc test targets."""
+
+load("@io_bazel_rules_go//go:def.bzl", _go_test = "go_test")
+
+# runtime_test is a macro that will create targets to run the given test target
+# with different runtime options.
+def runtime_test(**kwargs):
+ """Runs the given test target with different runtime options."""
+ name = kwargs["name"]
+ _go_test(**kwargs)
+ kwargs["name"] = name + "_hostnet"
+ kwargs["args"] = ["--runtime-type=hostnet"]
+ _go_test(**kwargs)
+ kwargs["name"] = name + "_kvm"
+ kwargs["args"] = ["--runtime-type=kvm"]
+ _go_test(**kwargs)
+ kwargs["name"] = name + "_overlay"
+ kwargs["args"] = ["--runtime-type=overlay"]
+ _go_test(**kwargs)
diff --git a/runsc/test/image/BUILD b/runsc/test/image/BUILD
index e8b629c6a..58758fde5 100644
--- a/runsc/test/image/BUILD
+++ b/runsc/test/image/BUILD
@@ -1,8 +1,9 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("//runsc/test:build_defs.bzl", "runtime_test")
package(licenses = ["notice"])
-go_test(
+runtime_test(
name = "image_test",
size = "large",
srcs = [
@@ -26,5 +27,5 @@ go_test(
go_library(
name = "image",
srcs = ["image.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/image",
+ importpath = "gvisor.dev/gvisor/runsc/test/image",
)
diff --git a/runsc/test/image/image_test.go b/runsc/test/image/image_test.go
index b969731b0..ddaa2c13b 100644
--- a/runsc/test/image/image_test.go
+++ b/runsc/test/image/image_test.go
@@ -32,7 +32,7 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func TestHelloWorld(t *testing.T) {
@@ -209,11 +209,14 @@ func TestMysql(t *testing.T) {
}
func TestPythonHello(t *testing.T) {
- if err := testutil.Pull("google/python-hello"); err != nil {
+ // TODO(b/136503277): Once we have more complete python runtime tests,
+ // we can drop this one.
+ const img = "gcr.io/gvisor-presubmit/python-hello"
+ if err := testutil.Pull(img); err != nil {
t.Fatalf("docker pull failed: %v", err)
}
d := testutil.MakeDocker("python-hello-test")
- if err := d.Run("-p", "8080", "google/python-hello"); err != nil {
+ if err := d.Run("-p", "8080", img); err != nil {
t.Fatalf("docker run failed: %v", err)
}
defer d.CleanUp()
diff --git a/runsc/test/integration/BUILD b/runsc/test/integration/BUILD
index 04ed885c6..12065617c 100644
--- a/runsc/test/integration/BUILD
+++ b/runsc/test/integration/BUILD
@@ -1,8 +1,9 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("//runsc/test:build_defs.bzl", "runtime_test")
package(licenses = ["notice"])
-go_test(
+runtime_test(
name = "integration_test",
size = "large",
srcs = [
@@ -25,5 +26,5 @@ go_test(
go_library(
name = "integration",
srcs = ["integration.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/integration",
+ importpath = "gvisor.dev/gvisor/runsc/test/integration",
)
diff --git a/runsc/test/integration/exec_test.go b/runsc/test/integration/exec_test.go
index 7c0e61ac3..993136f96 100644
--- a/runsc/test/integration/exec_test.go
+++ b/runsc/test/integration/exec_test.go
@@ -34,8 +34,8 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func TestExecCapabilities(t *testing.T) {
diff --git a/runsc/test/integration/integration_test.go b/runsc/test/integration/integration_test.go
index c51cab3ae..7cef4b9dd 100644
--- a/runsc/test/integration/integration_test.go
+++ b/runsc/test/integration/integration_test.go
@@ -32,7 +32,7 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// httpRequestSucceeds sends a request to a given url and checks that the status is OK.
@@ -86,16 +86,17 @@ func TestLifeCycle(t *testing.T) {
}
func TestPauseResume(t *testing.T) {
+ const img = "gcr.io/gvisor-presubmit/python-hello"
if !testutil.IsPauseResumeSupported() {
t.Log("Pause/resume is not supported, skipping test.")
return
}
- if err := testutil.Pull("google/python-hello"); err != nil {
+ if err := testutil.Pull(img); err != nil {
t.Fatal("docker pull failed:", err)
}
d := testutil.MakeDocker("pause-resume-test")
- if err := d.Run("-p", "8080", "google/python-hello"); err != nil {
+ if err := d.Run("-p", "8080", img); err != nil {
t.Fatalf("docker run failed: %v", err)
}
defer d.CleanUp()
@@ -149,15 +150,16 @@ func TestPauseResume(t *testing.T) {
}
func TestCheckpointRestore(t *testing.T) {
+ const img = "gcr.io/gvisor-presubmit/python-hello"
if !testutil.IsPauseResumeSupported() {
t.Log("Pause/resume is not supported, skipping test.")
return
}
- if err := testutil.Pull("google/python-hello"); err != nil {
+ if err := testutil.Pull(img); err != nil {
t.Fatal("docker pull failed:", err)
}
d := testutil.MakeDocker("save-restore-test")
- if err := d.Run("-p", "8080", "google/python-hello"); err != nil {
+ if err := d.Run("-p", "8080", img); err != nil {
t.Fatalf("docker run failed: %v", err)
}
defer d.CleanUp()
diff --git a/runsc/test/integration/regression_test.go b/runsc/test/integration/regression_test.go
index 80bae9970..39b30e757 100644
--- a/runsc/test/integration/regression_test.go
+++ b/runsc/test/integration/regression_test.go
@@ -18,7 +18,7 @@ import (
"strings"
"testing"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// Test that UDS can be created using overlay when parent directory is in lower
diff --git a/runsc/test/root/BUILD b/runsc/test/root/BUILD
index 7ded78baa..500ef7b8e 100644
--- a/runsc/test/root/BUILD
+++ b/runsc/test/root/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "root",
srcs = ["root.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/root",
+ importpath = "gvisor.dev/gvisor/runsc/test/root",
)
go_test(
diff --git a/runsc/test/root/cgroup_test.go b/runsc/test/root/cgroup_test.go
index edb6dee1d..5392dc6e0 100644
--- a/runsc/test/root/cgroup_test.go
+++ b/runsc/test/root/cgroup_test.go
@@ -25,8 +25,8 @@ import (
"strings"
"testing"
- "gvisor.googlesource.com/gvisor/runsc/cgroup"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/cgroup"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func verifyPid(pid int, path string) error {
diff --git a/runsc/test/root/chroot_test.go b/runsc/test/root/chroot_test.go
index da2f473b9..d0f236580 100644
--- a/runsc/test/root/chroot_test.go
+++ b/runsc/test/root/chroot_test.go
@@ -31,8 +31,8 @@ import (
"testing"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// TestChroot verifies that the sandbox is chroot'd and that mounts are cleaned
diff --git a/runsc/test/root/crictl_test.go b/runsc/test/root/crictl_test.go
index 3cc176104..515ae2df1 100644
--- a/runsc/test/root/crictl_test.go
+++ b/runsc/test/root/crictl_test.go
@@ -29,9 +29,9 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/root/testdata"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/root/testdata"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// Tests for crictl have to be run as root (rather than in a user namespace)
diff --git a/runsc/test/root/testdata/BUILD b/runsc/test/root/testdata/BUILD
index 7f272dcd3..80dc5f214 100644
--- a/runsc/test/root/testdata/BUILD
+++ b/runsc/test/root/testdata/BUILD
@@ -11,7 +11,7 @@ go_library(
"httpd_mount_paths.go",
"sandbox.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/root/testdata",
+ importpath = "gvisor.dev/gvisor/runsc/test/root/testdata",
visibility = [
"//visibility:public",
],
diff --git a/runsc/test/testutil/BUILD b/runsc/test/testutil/BUILD
index ddec81444..327e7ca4d 100644
--- a/runsc/test/testutil/BUILD
+++ b/runsc/test/testutil/BUILD
@@ -10,7 +10,7 @@ go_library(
"testutil.go",
"testutil_race.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/testutil",
+ importpath = "gvisor.dev/gvisor/runsc/test/testutil",
visibility = ["//:sandbox"],
deps = [
"//runsc/boot",
@@ -18,6 +18,5 @@ go_library(
"@com_github_cenkalti_backoff//:go_default_library",
"@com_github_kr_pty//:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
- "@com_github_syndtr_gocapability//capability:go_default_library",
],
)
diff --git a/runsc/test/testutil/docker.go b/runsc/test/testutil/docker.go
index 81f5a9ef0..3f3e191b0 100644
--- a/runsc/test/testutil/docker.go
+++ b/runsc/test/testutil/docker.go
@@ -15,6 +15,7 @@
package testutil
import (
+ "flag"
"fmt"
"io/ioutil"
"log"
@@ -30,10 +31,15 @@ import (
"github.com/kr/pty"
)
+var runtimeType = flag.String("runtime-type", "", "specify which runtime to use: kvm, hostnet, overlay")
+
func getRuntime() string {
r, ok := os.LookupEnv("RUNSC_RUNTIME")
if !ok {
- return "runsc-test"
+ r = "runsc-test"
+ }
+ if *runtimeType != "" {
+ r += "-" + *runtimeType
}
return r
}
@@ -197,7 +203,7 @@ func (d *Docker) Stop() error {
}
// Run calls 'docker run' with the arguments provided. The container starts
-// running in the backgroud and the call returns immediately.
+// running in the background and the call returns immediately.
func (d *Docker) Run(args ...string) error {
a := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-d"}
a = append(a, args...)
diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go
index 727b648a6..a98675bfc 100644
--- a/runsc/test/testutil/testutil.go
+++ b/runsc/test/testutil/testutil.go
@@ -30,7 +30,6 @@ import (
"os/exec"
"os/signal"
"path/filepath"
- "runtime"
"strings"
"sync"
"sync/atomic"
@@ -39,9 +38,8 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
)
func init() {
@@ -134,6 +132,7 @@ func TestConfig() *boot.Config {
LogPackets: true,
Network: boot.NetworkNone,
Strace: true,
+ Platform: "ptrace",
FileAccess: boot.FileAccessExclusive,
TestOnlyAllowRunAsCurrentUserWithoutChroot: true,
NumNetworkChannels: 1,
@@ -284,54 +283,6 @@ func WaitForHTTP(port int, timeout time.Duration) error {
return Poll(cb, timeout)
}
-// RunAsRoot ensures the test runs with CAP_SYS_ADMIN and CAP_SYS_CHROOT. If
-// needed it will create a new user namespace and re-execute the test as root
-// inside of the namespace. This function returns when it's running as root. If
-// it needs to create another process, it will exit from there and not return.
-func RunAsRoot() {
- if specutils.HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT) {
- return
- }
-
- fmt.Println("*** Re-running test as root in new user namespace ***")
-
- // Current process doesn't have CAP_SYS_ADMIN, create user namespace and run
- // as root inside that namespace to get it.
- runtime.LockOSThread()
- defer runtime.UnlockOSThread()
-
- cmd := exec.Command("/proc/self/exe", os.Args[1:]...)
- cmd.SysProcAttr = &syscall.SysProcAttr{
- Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
- // Set current user/group as root inside the namespace.
- UidMappings: []syscall.SysProcIDMap{
- {ContainerID: 0, HostID: os.Getuid(), Size: 1},
- },
- GidMappings: []syscall.SysProcIDMap{
- {ContainerID: 0, HostID: os.Getgid(), Size: 1},
- },
- GidMappingsEnableSetgroups: false,
- Credential: &syscall.Credential{
- Uid: 0,
- Gid: 0,
- },
- }
- cmd.Env = os.Environ()
- cmd.Stdin = os.Stdin
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- if err := cmd.Run(); err != nil {
- if exit, ok := err.(*exec.ExitError); ok {
- if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
- os.Exit(ws.ExitStatus())
- }
- os.Exit(-1)
- }
- panic(fmt.Sprint("error running child process:", err.Error()))
- }
- os.Exit(0)
-}
-
// Reaper reaps child processes.
type Reaper struct {
// mu protects ch, which will be nil if the reaper is not running.