summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/BUILD10
-rw-r--r--runsc/boot/BUILD9
-rw-r--r--runsc/boot/compat.go16
-rw-r--r--runsc/boot/compat_amd64.go2
-rw-r--r--runsc/boot/compat_test.go2
-rw-r--r--runsc/boot/config.go15
-rw-r--r--runsc/boot/controller.go35
-rw-r--r--runsc/boot/debug.go2
-rw-r--r--runsc/boot/events.go4
-rw-r--r--runsc/boot/fds.go12
-rw-r--r--runsc/boot/filter/BUILD2
-rw-r--r--runsc/boot/filter/config.go10
-rw-r--r--runsc/boot/filter/extra_filters.go2
-rw-r--r--runsc/boot/filter/extra_filters_msan.go2
-rw-r--r--runsc/boot/filter/extra_filters_race.go2
-rw-r--r--runsc/boot/filter/filter.go10
-rw-r--r--runsc/boot/fs.go313
-rw-r--r--runsc/boot/fs_test.go193
-rw-r--r--runsc/boot/limits.go4
-rw-r--r--runsc/boot/loader.go153
-rw-r--r--runsc/boot/loader_test.go18
-rw-r--r--runsc/boot/network.go61
-rw-r--r--runsc/boot/pprof.go18
-rw-r--r--runsc/boot/strace.go2
-rw-r--r--runsc/boot/user.go146
-rw-r--r--runsc/boot/user_test.go253
-rw-r--r--runsc/cgroup/BUILD2
-rw-r--r--runsc/cgroup/cgroup.go4
-rw-r--r--runsc/cmd/BUILD6
-rw-r--r--runsc/cmd/boot.go28
-rw-r--r--runsc/cmd/capability.go2
-rw-r--r--runsc/cmd/capability_test.go12
-rw-r--r--runsc/cmd/checkpoint.go8
-rw-r--r--runsc/cmd/chroot.go4
-rw-r--r--runsc/cmd/cmd.go23
-rw-r--r--runsc/cmd/create.go15
-rw-r--r--runsc/cmd/debug.go6
-rw-r--r--runsc/cmd/delete.go6
-rw-r--r--runsc/cmd/delete_test.go2
-rw-r--r--runsc/cmd/do.go47
-rw-r--r--runsc/cmd/error.go72
-rw-r--r--runsc/cmd/events.go6
-rw-r--r--runsc/cmd/exec.go44
-rw-r--r--runsc/cmd/exec_test.go8
-rw-r--r--runsc/cmd/gofer.go14
-rw-r--r--runsc/cmd/help.go126
-rw-r--r--runsc/cmd/kill.go4
-rw-r--r--runsc/cmd/list.go4
-rw-r--r--runsc/cmd/pause.go4
-rw-r--r--runsc/cmd/ps.go6
-rw-r--r--runsc/cmd/restore.go16
-rw-r--r--runsc/cmd/resume.go4
-rw-r--r--runsc/cmd/run.go14
-rw-r--r--runsc/cmd/start.go5
-rw-r--r--runsc/cmd/state.go6
-rw-r--r--runsc/cmd/syscalls.go347
-rw-r--r--runsc/cmd/wait.go4
-rw-r--r--runsc/console/BUILD2
-rw-r--r--runsc/container/BUILD2
-rw-r--r--runsc/container/console_test.go8
-rw-r--r--runsc/container/container.go12
-rw-r--r--runsc/container/container_test.go15
-rw-r--r--runsc/container/hook.go2
-rw-r--r--runsc/container/multi_container_test.go307
-rw-r--r--runsc/container/shared_volume_test.go8
-rw-r--r--runsc/container/test_app/test_app.go2
-rw-r--r--runsc/fsgofer/BUILD2
-rw-r--r--runsc/fsgofer/filter/BUILD2
-rw-r--r--runsc/fsgofer/filter/config.go4
-rw-r--r--runsc/fsgofer/filter/extra_filters.go2
-rw-r--r--runsc/fsgofer/filter/extra_filters_msan.go4
-rw-r--r--runsc/fsgofer/filter/extra_filters_race.go4
-rw-r--r--runsc/fsgofer/filter/filter.go2
-rw-r--r--runsc/fsgofer/fsgofer.go10
-rw-r--r--runsc/fsgofer/fsgofer_test.go4
-rw-r--r--runsc/fsgofer/fsgofer_unsafe.go4
-rw-r--r--runsc/main.go153
-rw-r--r--runsc/sandbox/BUILD2
-rw-r--r--runsc/sandbox/network.go129
-rw-r--r--runsc/sandbox/sandbox.go112
-rw-r--r--runsc/specutils/BUILD7
-rw-r--r--runsc/specutils/fs.go40
-rw-r--r--runsc/specutils/namespace.go54
-rw-r--r--runsc/specutils/specutils.go6
-rw-r--r--runsc/test/image/BUILD2
-rw-r--r--runsc/test/image/image_test.go2
-rw-r--r--runsc/test/integration/BUILD3
-rw-r--r--runsc/test/integration/exec_test.go27
-rw-r--r--runsc/test/integration/integration_test.go2
-rw-r--r--runsc/test/integration/regression_test.go45
-rw-r--r--runsc/test/root/BUILD2
-rw-r--r--runsc/test/root/cgroup_test.go4
-rw-r--r--runsc/test/root/chroot_test.go4
-rw-r--r--runsc/test/root/crictl_test.go6
-rw-r--r--runsc/test/root/testdata/BUILD2
-rw-r--r--runsc/test/testutil/BUILD3
-rw-r--r--runsc/test/testutil/testutil.go55
97 files changed, 2586 insertions, 615 deletions
diff --git a/runsc/BUILD b/runsc/BUILD
index af8e928c5..8a57c597b 100644
--- a/runsc/BUILD
+++ b/runsc/BUILD
@@ -1,6 +1,4 @@
-package(
- licenses = ["notice"], # Apache 2.0
-)
+package(licenses = ["notice"]) # Apache 2.0
load("@io_bazel_rules_go//go:def.bzl", "go_binary")
load("@bazel_tools//tools/build_defs/pkg:pkg.bzl", "pkg_deb", "pkg_tar")
@@ -84,8 +82,9 @@ pkg_tar(
genrule(
name = "deb-version",
outs = ["version.txt"],
- cmd = "cat bazel-out/volatile-status.txt | grep VERSION | sed 's/^[^0-9]*//' >$@",
+ cmd = "$(location :runsc) -version | grep 'runsc version' | sed 's/^[^0-9]*//' > $@",
stamp = 1,
+ tools = [":runsc"],
)
pkg_deb(
@@ -98,4 +97,7 @@ pkg_deb(
package = "runsc",
postinst = "debian/postinst.sh",
version_file = ":version.txt",
+ visibility = [
+ "//visibility:public",
+ ],
)
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index df9907e52..7ec15a524 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -16,9 +16,11 @@ go_library(
"limits.go",
"loader.go",
"network.go",
+ "pprof.go",
"strace.go",
+ "user.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/boot",
+ importpath = "gvisor.dev/gvisor/runsc/boot",
visibility = [
"//runsc:__subpackages__",
"//test:__subpackages__",
@@ -30,6 +32,7 @@ go_library(
"//pkg/cpuid",
"//pkg/eventchannel",
"//pkg/log",
+ "//pkg/memutil",
"//pkg/rand",
"//pkg/sentry/arch",
"//pkg/sentry/arch:registers_go_proto",
@@ -51,7 +54,6 @@ go_library(
"//pkg/sentry/kernel/kdefs",
"//pkg/sentry/limits",
"//pkg/sentry/loader",
- "//pkg/sentry/memutil",
"//pkg/sentry/pgalloc",
"//pkg/sentry/platform",
"//pkg/sentry/platform/kvm",
@@ -68,6 +70,7 @@ go_library(
"//pkg/sentry/time",
"//pkg/sentry/unimpl:unimplemented_syscall_go_proto",
"//pkg/sentry/usage",
+ "//pkg/sentry/usermem",
"//pkg/sentry/watchdog",
"//pkg/syserror",
"//pkg/tcpip",
@@ -94,7 +97,9 @@ go_test(
size = "small",
srcs = [
"compat_test.go",
+ "fs_test.go",
"loader_test.go",
+ "user_test.go",
],
embed = [":boot"],
deps = [
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index c369e4d64..07e35ab10 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -21,14 +21,14 @@ import (
"syscall"
"github.com/golang/protobuf/proto"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto"
- ucspb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/strace"
- spb "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
+ ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
+ spb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
)
func initCompatLogs(fd int) error {
diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go
index 99df5e614..43cd0db94 100644
--- a/runsc/boot/compat_amd64.go
+++ b/runsc/boot/compat_amd64.go
@@ -17,7 +17,7 @@ package boot
import (
"fmt"
- rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
)
// reportLimit is the max number of events that should be reported per tracker.
diff --git a/runsc/boot/compat_test.go b/runsc/boot/compat_test.go
index ccec3d20c..388298d8d 100644
--- a/runsc/boot/compat_test.go
+++ b/runsc/boot/compat_test.go
@@ -17,7 +17,7 @@ package boot
import (
"testing"
- rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
)
func TestOnceTracker(t *testing.T) {
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 15f624f9b..6d276f207 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -19,7 +19,7 @@ import (
"strconv"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
)
// PlatformType tells which platform to use.
@@ -221,6 +221,17 @@ type Config struct {
// user, and without chrooting the sandbox process. This can be
// necessary in test environments that have limited capabilities.
TestOnlyAllowRunAsCurrentUserWithoutChroot bool
+
+ // NumNetworkChannels controls the number of AF_PACKET sockets that map
+ // to the same underlying network device. This allows netstack to better
+ // scale for high throughput use cases.
+ NumNetworkChannels int
+
+ // Rootless allows the sandbox to be started with a user that is not root.
+ // Defense is depth measures are weaker with rootless. Specifically, the
+ // sandbox and Gofer process run as root inside a user namespace with root
+ // mapped to the caller's user.
+ Rootless bool
}
// ToFlags returns a slice of flags that correspond to the given Config.
@@ -244,6 +255,8 @@ func (c *Config) ToFlags() []string {
"--panic-signal=" + strconv.Itoa(c.PanicSignal),
"--profile=" + strconv.FormatBool(c.ProfileEnable),
"--net-raw=" + strconv.FormatBool(c.EnableRaw),
+ "--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels),
+ "--rootless=" + strconv.FormatBool(c.Rootless),
}
if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
// Only include if set since it is never to be used by users.
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index a277145b1..7f41a9c53 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -22,17 +22,17 @@ import (
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/control/server"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/state"
- "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
+ "gvisor.dev/gvisor/pkg/sentry/state"
+ "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/urpc"
)
const (
@@ -340,7 +340,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
cm.l.k = k
// Set up the restore environment.
- mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k)
+ mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k, cm.l.mountHints)
renv, err := mntr.createRestoreEnvironment(cm.l.conf)
if err != nil {
return fmt.Errorf("creating RestoreEnvironment: %v", err)
@@ -359,6 +359,17 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
return fmt.Errorf("file cannot be empty")
}
+ if cm.l.conf.ProfileEnable {
+ // initializePProf opens /proc/self/maps, so has to be
+ // called before installing seccomp filters.
+ initializePProf()
+ }
+
+ // Seccomp filters have to be applied before parsing the state file.
+ if err := cm.l.installSeccompFilters(); err != nil {
+ return err
+ }
+
// Load the state.
loadOpts := state.LoadOpts{Source: specFile}
if err := loadOpts.Load(k, networkStack); err != nil {
diff --git a/runsc/boot/debug.go b/runsc/boot/debug.go
index 79f7387ac..1fb32c527 100644
--- a/runsc/boot/debug.go
+++ b/runsc/boot/debug.go
@@ -15,7 +15,7 @@
package boot
import (
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
type debug struct {
diff --git a/runsc/boot/events.go b/runsc/boot/events.go
index ffd99f5e9..422f4da00 100644
--- a/runsc/boot/events.go
+++ b/runsc/boot/events.go
@@ -15,8 +15,8 @@
package boot
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
)
// Event struct for encoding the event data to JSON. Corresponds to runc's
diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go
index 0811e10f4..59e1b46ec 100644
--- a/runsc/boot/fds.go
+++ b/runsc/boot/fds.go
@@ -17,12 +17,12 @@ package boot
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/kdefs"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
)
// createFDMap creates an FD map that contains stdin, stdout, and stderr. If
diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD
index 3b6020cf3..07898f3de 100644
--- a/runsc/boot/filter/BUILD
+++ b/runsc/boot/filter/BUILD
@@ -11,7 +11,7 @@ go_library(
"extra_filters_race.go",
"filter.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/boot/filter",
+ importpath = "gvisor.dev/gvisor/runsc/boot/filter",
visibility = [
"//runsc/boot:__subpackages__",
],
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 652da1cef..e4ccb40d9 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -19,9 +19,9 @@ import (
"syscall"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
)
// allowedSyscalls is the set of syscalls executed by the Sentry to the host OS.
@@ -246,6 +246,10 @@ var allowedSyscalls = seccomp.SyscallRules{
},
syscall.SYS_SETITIMER: {},
syscall.SYS_SHUTDOWN: []seccomp.Rule{
+ // Used by fs/host to shutdown host sockets.
+ {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RD)},
+ {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_WR)},
+ // Used by unet to shutdown connections.
{seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)},
},
syscall.SYS_SIGALTSTACK: {},
diff --git a/runsc/boot/filter/extra_filters.go b/runsc/boot/filter/extra_filters.go
index 5c5ec4e06..1056cd314 100644
--- a/runsc/boot/filter/extra_filters.go
+++ b/runsc/boot/filter/extra_filters.go
@@ -17,7 +17,7 @@
package filter
import (
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by
diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go
index ac5a0f1aa..5e5a3c998 100644
--- a/runsc/boot/filter/extra_filters_msan.go
+++ b/runsc/boot/filter/extra_filters_msan.go
@@ -19,7 +19,7 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by MSAN.
diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go
index ba3c1ce87..d5bee4453 100644
--- a/runsc/boot/filter/extra_filters_race.go
+++ b/runsc/boot/filter/extra_filters_race.go
@@ -19,7 +19,7 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by TSAN.
diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go
index 17479e0dd..468481f29 100644
--- a/runsc/boot/filter/filter.go
+++ b/runsc/boot/filter/filter.go
@@ -20,11 +20,11 @@ package filter
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform/kvm"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ptrace"
)
// Options are seccomp filter related options.
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 939f2419c..588317f4f 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -18,29 +18,30 @@ import (
"fmt"
"path"
"path/filepath"
+ "sort"
"strconv"
"strings"
"syscall"
// Include filesystem types that OCI spec might mount.
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/dev"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tty"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/dev"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/proc"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/sys"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/tty"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/gofer"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/gofer"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -50,6 +51,9 @@ const (
// Device name for root mount.
rootDevice = "9pfs-/"
+ // MountPrefix is the annotation prefix for mount hints.
+ MountPrefix = "gvisor.dev/spec/mount"
+
// ChildContainersDir is the directory where child container root
// filesystems are mounted.
ChildContainersDir = "/__runsc_containers__"
@@ -292,6 +296,174 @@ func (f *fdDispenser) empty() bool {
return len(f.fds) == 0
}
+type shareType int
+
+const (
+ invalid shareType = iota
+
+ // container shareType indicates that the mount is used by a single container.
+ container
+
+ // pod shareType indicates that the mount is used by more than one container
+ // inside the pod.
+ pod
+
+ // shared shareType indicates that the mount can also be shared with a process
+ // outside the pod, e.g. NFS.
+ shared
+)
+
+func parseShare(val string) (shareType, error) {
+ switch val {
+ case "container":
+ return container, nil
+ case "pod":
+ return pod, nil
+ case "shared":
+ return shared, nil
+ default:
+ return 0, fmt.Errorf("invalid share value %q", val)
+ }
+}
+
+func (s shareType) String() string {
+ switch s {
+ case invalid:
+ return "invalid"
+ case container:
+ return "container"
+ case pod:
+ return "pod"
+ case shared:
+ return "shared"
+ default:
+ return fmt.Sprintf("invalid share value %d", s)
+ }
+}
+
+// mountHint represents extra information about mounts that are provided via
+// annotations. They can override mount type, and provide sharing information
+// so that mounts can be correctly shared inside the pod.
+type mountHint struct {
+ name string
+ share shareType
+ mount specs.Mount
+
+ // root is the inode where the volume is mounted. For mounts with 'pod' share
+ // the volume is mounted once and then bind mounted inside the containers.
+ root *fs.Inode
+}
+
+func (m *mountHint) setField(key, val string) error {
+ switch key {
+ case "source":
+ if len(val) == 0 {
+ return fmt.Errorf("source cannot be empty")
+ }
+ m.mount.Source = val
+ case "type":
+ return m.setType(val)
+ case "share":
+ share, err := parseShare(val)
+ if err != nil {
+ return err
+ }
+ m.share = share
+ case "options":
+ return m.setOptions(val)
+ default:
+ return fmt.Errorf("invalid mount annotation: %s=%s", key, val)
+ }
+ return nil
+}
+
+func (m *mountHint) setType(val string) error {
+ switch val {
+ case "tmpfs", "bind":
+ m.mount.Type = val
+ default:
+ return fmt.Errorf("invalid type %q", val)
+ }
+ return nil
+}
+
+func (m *mountHint) setOptions(val string) error {
+ opts := strings.Split(val, ",")
+ if err := specutils.ValidateMountOptions(opts); err != nil {
+ return err
+ }
+ // Sort options so it can be compared with container mount options later on.
+ sort.Strings(opts)
+ m.mount.Options = opts
+ return nil
+}
+
+func (m *mountHint) isSupported() bool {
+ return m.mount.Type == tmpfs && m.share == pod
+}
+
+// podMountHints contains a collection of mountHints for the pod.
+type podMountHints struct {
+ mounts map[string]*mountHint
+}
+
+func newPodMountHints(spec *specs.Spec) (*podMountHints, error) {
+ mnts := make(map[string]*mountHint)
+ for k, v := range spec.Annotations {
+ // Look for 'gvisor.dev/spec/mount' annotations and parse them.
+ if strings.HasPrefix(k, MountPrefix) {
+ parts := strings.Split(k, "/")
+ if len(parts) != 5 {
+ return nil, fmt.Errorf("invalid mount annotation: %s=%s", k, v)
+ }
+ name := parts[3]
+ if len(name) == 0 || path.Clean(name) != name {
+ return nil, fmt.Errorf("invalid mount name: %s", name)
+ }
+ mnt := mnts[name]
+ if mnt == nil {
+ mnt = &mountHint{name: name}
+ mnts[name] = mnt
+ }
+ if err := mnt.setField(parts[4], v); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ // Validate all hints after done parsing.
+ for name, m := range mnts {
+ log.Infof("Mount annotation found, name: %s, source: %q, type: %s, share: %v", name, m.mount.Source, m.mount.Type, m.share)
+ if m.share == invalid {
+ return nil, fmt.Errorf("share field for %q has not been set", m.name)
+ }
+ if len(m.mount.Source) == 0 {
+ return nil, fmt.Errorf("source field for %q has not been set", m.name)
+ }
+ if len(m.mount.Type) == 0 {
+ return nil, fmt.Errorf("type field for %q has not been set", m.name)
+ }
+
+ // Check for duplicate mount sources.
+ for name2, m2 := range mnts {
+ if name != name2 && m.mount.Source == m2.mount.Source {
+ return nil, fmt.Errorf("mounts %q and %q have the same mount source %q", m.name, m2.name, m.mount.Source)
+ }
+ }
+ }
+
+ return &podMountHints{mounts: mnts}, nil
+}
+
+func (p *podMountHints) findMount(mount specs.Mount) *mountHint {
+ for _, m := range p.mounts {
+ if m.mount.Source == mount.Source {
+ return m
+ }
+ }
+ return nil
+}
+
type containerMounter struct {
// cid is the container ID. May be set to empty for the root container.
cid string
@@ -306,15 +478,18 @@ type containerMounter struct {
fds fdDispenser
k *kernel.Kernel
+
+ hints *podMountHints
}
-func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel) *containerMounter {
+func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter {
return &containerMounter{
cid: cid,
root: spec.Root,
mounts: compileMounts(spec),
fds: fdDispenser{fds: goferFDs},
k: k,
+ hints: hints,
}
}
@@ -476,6 +651,15 @@ func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error
// 'setMountNS' is called after namespace is created. It must set the mount NS
// to 'rootCtx'.
func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx context.Context, conf *Config, setMountNS func(*fs.MountNamespace)) error {
+ for _, hint := range c.hints.mounts {
+ log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
+ inode, err := c.mountSharedMaster(rootCtx, conf, hint)
+ if err != nil {
+ return fmt.Errorf("mounting shared master %q: %v", hint.name, err)
+ }
+ hint.root = inode
+ }
+
// Create a tmpfs mount where we create and mount a root filesystem for
// each child container.
c.mounts = append(c.mounts, specs.Mount{
@@ -498,21 +682,57 @@ func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx c
return c.mountSubmounts(rootCtx, conf, mns, root)
}
+// mountSharedMaster mounts the master of a volume that is shared among
+// containers in a pod. It returns the root mount's inode.
+func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *Config, hint *mountHint) (*fs.Inode, error) {
+ // Map mount type to filesystem name, and parse out the options that we are
+ // capable of dealing with.
+ fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, hint.mount)
+ if err != nil {
+ return nil, err
+ }
+ if len(fsName) == 0 {
+ return nil, fmt.Errorf("mount type not supported %q", hint.mount.Type)
+ }
+
+ // Mount with revalidate because it's shared among containers.
+ opts = append(opts, "cache=revalidate")
+
+ // All filesystem names should have been mapped to something we know.
+ filesystem := mustFindFilesystem(fsName)
+
+ mf := mountFlags(hint.mount.Options)
+ if useOverlay {
+ // All writes go to upper, be paranoid and make lower readonly.
+ mf.ReadOnly = true
+ }
+
+ inode, err := filesystem.Mount(ctx, mountDevice(hint.mount), mf, strings.Join(opts, ","), nil)
+ if err != nil {
+ return nil, fmt.Errorf("creating mount %q: %v", hint.name, err)
+ }
+
+ if useOverlay {
+ log.Debugf("Adding overlay on top of shared mount %q", hint.name)
+ inode, err = addOverlay(ctx, conf, inode, hint.mount.Type, mf)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return inode, nil
+}
+
// createRootMount creates the root filesystem.
func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*fs.Inode, error) {
// First construct the filesystem from the spec.Root.
mf := fs.MountSourceFlags{ReadOnly: c.root.Readonly || conf.Overlay}
- var (
- rootInode *fs.Inode
- err error
- )
-
fd := c.fds.remove()
log.Infof("Mounting root over 9P, ioFD: %d", fd)
p9FS := mustFindFilesystem("9p")
opts := p9MountOptions(fd, conf.FileAccess)
- rootInode, err = p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
+ rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
if err != nil {
return nil, fmt.Errorf("creating root mount point: %v", err)
}
@@ -579,8 +799,14 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error {
for _, m := range c.mounts {
- if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil {
- return fmt.Errorf("mount submount %q: %v", m.Destination, err)
+ if hint := c.hints.findMount(m); hint != nil && hint.isSupported() {
+ if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil {
+ return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, m.Destination, err)
+ }
+ } else {
+ if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil {
+ return fmt.Errorf("mount submount %q: %v", m.Destination, err)
+ }
}
}
@@ -653,6 +879,37 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns
return nil
}
+// mountSharedSubmount binds mount to a previously mounted volume that is shared
+// among containers in the same pod.
+func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.MountNamespace, root *fs.Dirent, mount specs.Mount, source *mountHint) error {
+ // For now enforce that all options are the same. Once bind mount is properly
+ // supported, then we should ensure the master is less restrictive than the
+ // container, e.g. master can be 'rw' while container mounts as 'ro'.
+ if len(mount.Options) != len(source.mount.Options) {
+ return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", source.mount.Options, mount.Options)
+ }
+ sort.Strings(mount.Options)
+ for i, opt := range mount.Options {
+ if opt != source.mount.Options[i] {
+ return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", source.mount.Options, mount.Options)
+ }
+ }
+
+ maxTraversals := uint(0)
+ target, err := mns.FindInode(ctx, root, root, mount.Destination, &maxTraversals)
+ if err != nil {
+ return fmt.Errorf("can't find mount destination %q: %v", mount.Destination, err)
+ }
+ defer target.DecRef()
+
+ if err := mns.Mount(ctx, target, source.root); err != nil {
+ return fmt.Errorf("bind mount %q error: %v", mount.Destination, err)
+ }
+
+ log.Infof("Mounted %q type shared bind to %q", mount.Destination, source.name)
+ return nil
+}
+
// addRestoreMount adds a mount to the MountSources map used for restoring a
// checkpointed container.
func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount) error {
@@ -678,8 +935,8 @@ func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnviron
return nil
}
-// createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding the mounts
-// to the environment.
+// createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding
+// the mounts to the environment.
func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEnvironment, error) {
renv := &fs.RestoreEnvironment{
MountSources: make(map[string][]fs.MountArgs),
@@ -730,7 +987,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
// Technically we don't have to mount tmpfs at /tmp, as we could just rely on
// the host /tmp, but this is a nice optimization, and fixes some apps that call
// mknod in /tmp. It's unsafe to mount tmpfs if:
-// 1. /tmp is mounted explictly: we should not override user's wish
+// 1. /tmp is mounted explicitly: we should not override user's wish
// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp
//
// Note that when there are submounts inside of '/tmp', directories for the
diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go
new file mode 100644
index 000000000..49ab34b33
--- /dev/null
+++ b/runsc/boot/fs_test.go
@@ -0,0 +1,193 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "path"
+ "reflect"
+ "strings"
+ "testing"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func TestPodMountHintsHappy(t *testing.T) {
+ spec := &specs.Spec{
+ Annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+
+ path.Join(MountPrefix, "mount2", "source"): "bar",
+ path.Join(MountPrefix, "mount2", "type"): "bind",
+ path.Join(MountPrefix, "mount2", "share"): "container",
+ path.Join(MountPrefix, "mount2", "options"): "rw,private",
+ },
+ }
+ podHints, err := newPodMountHints(spec)
+ if err != nil {
+ t.Errorf("newPodMountHints failed: %v", err)
+ }
+
+ // Check that fields were set correctly.
+ mount1 := podHints.mounts["mount1"]
+ if want := "mount1"; want != mount1.name {
+ t.Errorf("mount1 name, want: %q, got: %q", want, mount1.name)
+ }
+ if want := "foo"; want != mount1.mount.Source {
+ t.Errorf("mount1 source, want: %q, got: %q", want, mount1.mount.Source)
+ }
+ if want := "tmpfs"; want != mount1.mount.Type {
+ t.Errorf("mount1 type, want: %q, got: %q", want, mount1.mount.Type)
+ }
+ if want := pod; want != mount1.share {
+ t.Errorf("mount1 type, want: %q, got: %q", want, mount1.share)
+ }
+ if want := []string(nil); !reflect.DeepEqual(want, mount1.mount.Options) {
+ t.Errorf("mount1 type, want: %q, got: %q", want, mount1.mount.Options)
+ }
+
+ mount2 := podHints.mounts["mount2"]
+ if want := "mount2"; want != mount2.name {
+ t.Errorf("mount2 name, want: %q, got: %q", want, mount2.name)
+ }
+ if want := "bar"; want != mount2.mount.Source {
+ t.Errorf("mount2 source, want: %q, got: %q", want, mount2.mount.Source)
+ }
+ if want := "bind"; want != mount2.mount.Type {
+ t.Errorf("mount2 type, want: %q, got: %q", want, mount2.mount.Type)
+ }
+ if want := container; want != mount2.share {
+ t.Errorf("mount2 type, want: %q, got: %q", want, mount2.share)
+ }
+ if want := []string{"private", "rw"}; !reflect.DeepEqual(want, mount2.mount.Options) {
+ t.Errorf("mount2 type, want: %q, got: %q", want, mount2.mount.Options)
+ }
+}
+
+func TestPodMountHintsErrors(t *testing.T) {
+ for _, tst := range []struct {
+ name string
+ annotations map[string]string
+ error string
+ }{
+ {
+ name: "too short",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1"): "foo",
+ },
+ error: "invalid mount annotation",
+ },
+ {
+ name: "no name",
+ annotations: map[string]string{
+ MountPrefix + "//source": "foo",
+ },
+ error: "invalid mount name",
+ },
+ {
+ name: "missing source",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "source field",
+ },
+ {
+ name: "missing type",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "type field",
+ },
+ {
+ name: "missing share",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ },
+ error: "share field",
+ },
+ {
+ name: "invalid field name",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "invalid"): "foo",
+ },
+ error: "invalid mount annotation",
+ },
+ {
+ name: "invalid source",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "source cannot be empty",
+ },
+ {
+ name: "invalid type",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "invalid-type",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "invalid type",
+ },
+ {
+ name: "invalid share",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "invalid-share",
+ },
+ error: "invalid share",
+ },
+ {
+ name: "invalid options",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ path.Join(MountPrefix, "mount1", "options"): "invalid-option",
+ },
+ error: "unknown mount option",
+ },
+ {
+ name: "duplicate source",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+
+ path.Join(MountPrefix, "mount2", "source"): "foo",
+ path.Join(MountPrefix, "mount2", "type"): "bind",
+ path.Join(MountPrefix, "mount2", "share"): "container",
+ },
+ error: "have the same mount source",
+ },
+ } {
+ t.Run(tst.name, func(t *testing.T) {
+ spec := &specs.Spec{Annotations: tst.annotations}
+ podHints, err := newPodMountHints(spec)
+ if err == nil || !strings.Contains(err.Error(), tst.error) {
+ t.Errorf("newPodMountHints invalid error, want: .*%s.*, got: %v", tst.error, err)
+ }
+ if podHints != nil {
+ t.Errorf("newPodMountHints must return nil on failure: %+v", podHints)
+ }
+ })
+ }
+}
diff --git a/runsc/boot/limits.go b/runsc/boot/limits.go
index 3364aa5e6..d1c0bb9b5 100644
--- a/runsc/boot/limits.go
+++ b/runsc/boot/limits.go
@@ -20,8 +20,8 @@ import (
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
)
// Mapping from linux resource names to limits.LimitType.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index a997776f8..38425f97d 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -20,51 +20,52 @@ import (
mrand "math/rand"
"os"
"runtime"
+ "strings"
"sync"
"sync/atomic"
"syscall"
gtime "time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/rand"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/loader"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace"
- "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling"
- slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp"
- "gvisor.googlesource.com/gvisor/runsc/boot/filter"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/memutil"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/loader"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform/kvm"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ptrace"
+ "gvisor.dev/gvisor/pkg/sentry/sighandling"
+ slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+ "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/arp"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/icmp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+ "gvisor.dev/gvisor/runsc/boot/filter"
+ "gvisor.dev/gvisor/runsc/specutils"
// Include supported socket providers.
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/route"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix"
+ "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/hostinet"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
)
// Loader keeps state needed to start the kernel and run the container..
@@ -117,6 +118,10 @@ type Loader struct {
//
// processes is guardded by mu.
processes map[execID]*execProcess
+
+ // mountHints provides extra information about mounts for containers that
+ // apply to the entire pod.
+ mountHints *podMountHints
}
// execID uniquely identifies a sentry process that is executed in a container.
@@ -299,6 +304,11 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("initializing compat logs: %v", err)
}
+ mountHints, err := newPodMountHints(args.Spec)
+ if err != nil {
+ return nil, fmt.Errorf("creating pod mount hints: %v", err)
+ }
+
eid := execID{cid: args.ID}
l := &Loader{
k: k,
@@ -311,6 +321,7 @@ func New(args Args) (*Loader, error) {
rootProcArgs: procArgs,
sandboxID: args.ID,
processes: map[execID]*execProcess{eid: {}},
+ mountHints: mountHints,
}
// We don't care about child signals; some platforms can generate a
@@ -424,6 +435,9 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
return nil, fmt.Errorf("error creating memfd: %v", err)
}
memfile := os.NewFile(uintptr(memfd), memfileName)
+ // We can't enable pgalloc.MemoryFileOpts.UseHostMemcgPressure even if
+ // there are memory cgroups specified, because at this point we're already
+ // in a mount namespace in which the relevant cgroupfs is not visible.
mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{})
if err != nil {
memfile.Close()
@@ -432,6 +446,23 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
return mf, nil
}
+func (l *Loader) installSeccompFilters() error {
+ if l.conf.DisableSeccomp {
+ filter.Report("syscall filter is DISABLED. Running in less secure mode.")
+ } else {
+ opts := filter.Options{
+ Platform: l.k.Platform,
+ HostNetwork: l.conf.Network == NetworkHost,
+ ProfileEnable: l.conf.ProfileEnable,
+ ControllerFD: l.ctrl.srv.FD(),
+ }
+ if err := filter.Install(opts); err != nil {
+ return fmt.Errorf("installing seccomp filters: %v", err)
+ }
+ }
+ return nil
+}
+
// Run runs the root container.
func (l *Loader) Run() error {
err := l.run()
@@ -467,25 +498,19 @@ func (l *Loader) run() error {
return fmt.Errorf("trying to start deleted container %q", l.sandboxID)
}
- // Finally done with all configuration. Setup filters before user code
- // is loaded.
- if l.conf.DisableSeccomp {
- filter.Report("syscall filter is DISABLED. Running in less secure mode.")
- } else {
- opts := filter.Options{
- Platform: l.k.Platform,
- HostNetwork: l.conf.Network == NetworkHost,
- ProfileEnable: l.conf.ProfileEnable,
- ControllerFD: l.ctrl.srv.FD(),
- }
- if err := filter.Install(opts); err != nil {
- return fmt.Errorf("installing seccomp filters: %v", err)
- }
- }
-
// If we are restoring, we do not want to create a process.
// l.restore is set by the container manager when a restore call is made.
if !l.restore {
+ if l.conf.ProfileEnable {
+ initializePProf()
+ }
+
+ // Finally done with all configuration. Setup filters before user code
+ // is loaded.
+ if err := l.installSeccompFilters(); err != nil {
+ return err
+ }
+
// Create the FD map, which will set stdin, stdout, and stderr. If console
// is true, then ioctl calls will be passed through to the host fd.
ctx := l.rootProcArgs.NewContext(l.k)
@@ -499,7 +524,7 @@ func (l *Loader) run() error {
// cid for root container can be empty. Only subcontainers need it to set
// the mount location.
- mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k)
+ mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k, l.mountHints)
if err := mntr.setupFS(ctx, l.conf, &l.rootProcArgs, l.rootProcArgs.Credentials); err != nil {
return err
}
@@ -510,6 +535,24 @@ func (l *Loader) run() error {
return err
}
+ // Read /etc/passwd for the user's HOME directory and set the HOME
+ // environment variable as required by POSIX if it is not overridden by
+ // the user.
+ hasHomeEnvv := false
+ for _, envv := range l.rootProcArgs.Envv {
+ if strings.HasPrefix(envv, "HOME=") {
+ hasHomeEnvv = true
+ }
+ }
+ if !hasHomeEnvv {
+ homeDir, err := getExecUserHome(rootCtx, rootMns, uint32(l.rootProcArgs.Credentials.RealKUID))
+ if err != nil {
+ return fmt.Errorf("error reading exec user: %v", err)
+ }
+
+ l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir)
+ }
+
// Create the root container init task. It will begin running
// when the kernel is started.
if _, _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
@@ -620,7 +663,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
goferFDs = append(goferFDs, fd)
}
- mntr := newContainerMounter(spec, cid, goferFDs, l.k)
+ mntr := newContainerMounter(spec, cid, goferFDs, l.k, l.mountHints)
if err := mntr.setupFS(ctx, conf, &procArgs, creds); err != nil {
return fmt.Errorf("configuring container FS: %v", err)
}
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 6393cb3fb..4af45bfcc 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -25,13 +25,13 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/control/server"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/runsc/fsgofer"
+ "gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/runsc/fsgofer"
)
func init() {
@@ -404,7 +404,7 @@ func TestCreateMountNamespace(t *testing.T) {
mns = m
ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
}
- mntr := newContainerMounter(&tc.spec, "", []int{sandEnd}, nil)
+ mntr := newContainerMounter(&tc.spec, "", []int{sandEnd}, nil, &podMountHints{})
if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err)
}
@@ -610,7 +610,7 @@ func TestRestoreEnvironment(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
conf := testConfig()
- mntr := newContainerMounter(tc.spec, "", tc.ioFDs, nil)
+ mntr := newContainerMounter(tc.spec, "", tc.ioFDs, nil, &podMountHints{})
actualRenv, err := mntr.createRestoreEnvironment(conf)
if !tc.errorExpected && err != nil {
t.Fatalf("could not create restore environment for test:%s", tc.name)
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 0a154d90b..d3d98243d 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -19,16 +19,16 @@ import (
"net"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
+ "gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/arp"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/urpc"
)
// Network exposes methods that can be used to configure a network stack.
@@ -56,7 +56,11 @@ type FDBasedLink struct {
Addresses []net.IP
Routes []Route
GSOMaxSize uint32
- LinkAddress []byte
+ LinkAddress net.HardwareAddr
+
+ // NumChannels controls how many underlying FD's are to be used to
+ // create this endpoint.
+ NumChannels int
}
// LoopbackLink configures a loopback li nk.
@@ -68,8 +72,9 @@ type LoopbackLink struct {
// CreateLinksAndRoutesArgs are arguments to CreateLinkAndRoutes.
type CreateLinksAndRoutesArgs struct {
- // FilePayload contains the fds associated with the FDBasedLinks. The
- // two slices must have the same length.
+ // FilePayload contains the fds associated with the FDBasedLinks. The
+ // number of fd's should match the sum of the NumChannels field of the
+ // FDBasedLink entries below.
urpc.FilePayload
LoopbackLinks []LoopbackLink
@@ -95,8 +100,12 @@ func (r *Route) toTcpipRoute(id tcpip.NICID) tcpip.Route {
// CreateLinksAndRoutes creates links and routes in a network stack. It should
// only be called once.
func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct{}) error {
- if len(args.FilePayload.Files) != len(args.FDBasedLinks) {
- return fmt.Errorf("FilePayload must be same length at FDBasedLinks")
+ wantFDs := 0
+ for _, l := range args.FDBasedLinks {
+ wantFDs += l.NumChannels
+ }
+ if got := len(args.FilePayload.Files); got != wantFDs {
+ return fmt.Errorf("args.FilePayload.Files has %d FD's but we need %d entries based on FDBasedLinks", got, wantFDs)
}
var nicID tcpip.NICID
@@ -123,20 +132,26 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
}
}
- for i, link := range args.FDBasedLinks {
+ fdOffset := 0
+ for _, link := range args.FDBasedLinks {
nicID++
nicids[link.Name] = nicID
- // Copy the underlying FD.
- oldFD := args.FilePayload.Files[i].Fd()
- newFD, err := syscall.Dup(int(oldFD))
- if err != nil {
- return fmt.Errorf("failed to dup FD %v: %v", oldFD, err)
+ FDs := []int{}
+ for j := 0; j < link.NumChannels; j++ {
+ // Copy the underlying FD.
+ oldFD := args.FilePayload.Files[fdOffset].Fd()
+ newFD, err := syscall.Dup(int(oldFD))
+ if err != nil {
+ return fmt.Errorf("failed to dup FD %v: %v", oldFD, err)
+ }
+ FDs = append(FDs, newFD)
+ fdOffset++
}
mac := tcpip.LinkAddress(link.LinkAddress)
linkEP, err := fdbased.New(&fdbased.Options{
- FD: newFD,
+ FDs: FDs,
MTU: uint32(link.MTU),
EthernetHeader: true,
Address: mac,
@@ -148,7 +163,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
return err
}
- log.Infof("Enabling interface %q with id %d on addresses %+v (%v)", link.Name, nicID, link.Addresses, mac)
+ log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels)
if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses, false /* loopback */); err != nil {
return err
}
diff --git a/runsc/boot/pprof.go b/runsc/boot/pprof.go
new file mode 100644
index 000000000..463362f02
--- /dev/null
+++ b/runsc/boot/pprof.go
@@ -0,0 +1,18 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+func initializePProf() {
+}
diff --git a/runsc/boot/strace.go b/runsc/boot/strace.go
index 19c7f8fbd..fbfd3b07c 100644
--- a/runsc/boot/strace.go
+++ b/runsc/boot/strace.go
@@ -15,7 +15,7 @@
package boot
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/strace"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
)
func enableStrace(conf *Config) error {
diff --git a/runsc/boot/user.go b/runsc/boot/user.go
new file mode 100644
index 000000000..d1d423a5c
--- /dev/null
+++ b/runsc/boot/user.go
@@ -0,0 +1,146 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "bufio"
+ "io"
+ "strconv"
+ "strings"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+)
+
+type fileReader struct {
+ // Ctx is the context for the file reader.
+ Ctx context.Context
+
+ // File is the file to read from.
+ File *fs.File
+}
+
+// Read implements io.Reader.Read.
+func (r *fileReader) Read(buf []byte) (int, error) {
+ n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf))
+ return int(n), err
+}
+
+// getExecUserHome returns the home directory of the executing user read from
+// /etc/passwd as read from the container filesystem.
+func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32) (string, error) {
+ // The default user home directory to return if no user matching the user
+ // if found in the /etc/passwd found in the image.
+ const defaultHome = "/"
+
+ // Open the /etc/passwd file from the dirent via the root mount namespace.
+ mnsRoot := rootMns.Root()
+ maxTraversals := uint(linux.MaxSymlinkTraversals)
+ dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals)
+ if err != nil {
+ // NOTE: Ignore errors opening the passwd file. If the passwd file
+ // doesn't exist we will return the default home directory.
+ return defaultHome, nil
+ }
+ defer dirent.DecRef()
+
+ // Check read permissions on the file.
+ if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil {
+ // NOTE: Ignore permissions errors here and return default root dir.
+ return defaultHome, nil
+ }
+
+ // Only open regular files. We don't open other files like named pipes as
+ // they may block and might present some attack surface to the container.
+ // Note that runc does not seem to do this kind of checking.
+ if !fs.IsRegular(dirent.Inode.StableAttr) {
+ return defaultHome, nil
+ }
+
+ f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false})
+ if err != nil {
+ return "", err
+ }
+ defer f.DecRef()
+
+ r := &fileReader{
+ Ctx: ctx,
+ File: f,
+ }
+
+ homeDir, err := findHomeInPasswd(uid, r, defaultHome)
+ if err != nil {
+ return "", err
+ }
+
+ return homeDir, nil
+}
+
+// findHomeInPasswd parses a passwd file and returns the given user's home
+// directory. This function does it's best to replicate the runc's behavior.
+func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) {
+ s := bufio.NewScanner(passwd)
+
+ for s.Scan() {
+ if err := s.Err(); err != nil {
+ return "", err
+ }
+
+ line := strings.TrimSpace(s.Text())
+ if line == "" {
+ continue
+ }
+
+ // Pull out part of passwd entry. Loosely parse the passwd entry as some
+ // passwd files could be poorly written and for compatibility with runc.
+ //
+ // Per 'man 5 passwd'
+ // /etc/passwd contains one line for each user account, with seven
+ // fields delimited by colons (“:”). These fields are:
+ //
+ // - login name
+ // - optional encrypted password
+ // - numerical user ID
+ // - numerical group ID
+ // - user name or comment field
+ // - user home directory
+ // - optional user command interpreter
+ parts := strings.Split(line, ":")
+
+ found := false
+ homeDir := ""
+ for i, p := range parts {
+ switch i {
+ case 2:
+ parsedUID, err := strconv.ParseUint(p, 10, 32)
+ if err == nil && parsedUID == uint64(uid) {
+ found = true
+ }
+ case 5:
+ homeDir = p
+ }
+ }
+ if found {
+ // NOTE: If the uid is present but the home directory is not
+ // present in the /etc/passwd entry we return an empty string. This
+ // is, for better or worse, what runc does.
+ return homeDir, nil
+ }
+ }
+
+ return defaultHome, nil
+}
diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go
new file mode 100644
index 000000000..834003430
--- /dev/null
+++ b/runsc/boot/user_test.go
@@ -0,0 +1,253 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strings"
+ "syscall"
+ "testing"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+)
+
+func setupTempDir() (string, error) {
+ tmpDir, err := ioutil.TempDir(os.TempDir(), "exec-user-test")
+ if err != nil {
+ return "", err
+ }
+ return tmpDir, nil
+}
+
+func setupPasswd(contents string, perms os.FileMode) func() (string, error) {
+ return func() (string, error) {
+ tmpDir, err := setupTempDir()
+ if err != nil {
+ return "", err
+ }
+
+ if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
+ return "", err
+ }
+
+ f, err := os.Create(filepath.Join(tmpDir, "etc", "passwd"))
+ if err != nil {
+ return "", err
+ }
+ defer f.Close()
+
+ _, err = f.WriteString(contents)
+ if err != nil {
+ return "", err
+ }
+
+ err = f.Chmod(perms)
+ if err != nil {
+ return "", err
+ }
+ return tmpDir, nil
+ }
+}
+
+// TestGetExecUserHome tests the getExecUserHome function.
+func TestGetExecUserHome(t *testing.T) {
+ tests := map[string]struct {
+ uid uint32
+ createRoot func() (string, error)
+ expected string
+ }{
+ "success": {
+ uid: 1000,
+ createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0666),
+ expected: "/home/adin",
+ },
+ "no_passwd": {
+ uid: 1000,
+ createRoot: setupTempDir,
+ expected: "/",
+ },
+ "no_perms": {
+ uid: 1000,
+ createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0000),
+ expected: "/",
+ },
+ "directory": {
+ uid: 1000,
+ createRoot: func() (string, error) {
+ tmpDir, err := setupTempDir()
+ if err != nil {
+ return "", err
+ }
+
+ if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
+ return "", err
+ }
+
+ if err := syscall.Mkdir(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil {
+ return "", err
+ }
+
+ return tmpDir, nil
+ },
+ expected: "/",
+ },
+ // Currently we don't allow named pipes.
+ "named_pipe": {
+ uid: 1000,
+ createRoot: func() (string, error) {
+ tmpDir, err := setupTempDir()
+ if err != nil {
+ return "", err
+ }
+
+ if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
+ return "", err
+ }
+
+ if err := syscall.Mkfifo(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil {
+ return "", err
+ }
+
+ return tmpDir, nil
+ },
+ expected: "/",
+ },
+ }
+
+ for name, tc := range tests {
+ t.Run(name, func(t *testing.T) {
+ tmpDir, err := tc.createRoot()
+ if err != nil {
+ t.Fatalf("failed to create root dir: %v", err)
+ }
+
+ sandEnd, cleanup, err := startGofer(tmpDir)
+ if err != nil {
+ t.Fatalf("failed to create gofer: %v", err)
+ }
+ defer cleanup()
+
+ ctx := contexttest.Context(t)
+ conf := &Config{
+ RootDir: "unused_root_dir",
+ Network: NetworkNone,
+ DisableSeccomp: true,
+ }
+
+ spec := &specs.Spec{
+ Root: &specs.Root{
+ Path: tmpDir,
+ Readonly: true,
+ },
+ // Add /proc mount as tmpfs to avoid needing a kernel.
+ Mounts: []specs.Mount{
+ {
+ Destination: "/proc",
+ Type: "tmpfs",
+ },
+ },
+ }
+
+ var mns *fs.MountNamespace
+ setMountNS := func(m *fs.MountNamespace) {
+ mns = m
+ ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
+ }
+ mntr := newContainerMounter(spec, "", []int{sandEnd}, nil, &podMountHints{})
+ if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
+ t.Fatalf("failed to create mount namespace: %v", err)
+ }
+
+ got, err := getExecUserHome(ctx, mns, tc.uid)
+ if err != nil {
+ t.Fatalf("failed to get user home: %v", err)
+ }
+
+ if got != tc.expected {
+ t.Fatalf("expected %v, got: %v", tc.expected, got)
+ }
+ })
+ }
+}
+
+// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing.
+func TestFindHomeInPasswd(t *testing.T) {
+ tests := map[string]struct {
+ uid uint32
+ passwd string
+ expected string
+ def string
+ }{
+ "empty": {
+ uid: 1000,
+ passwd: "",
+ expected: "/",
+ def: "/",
+ },
+ "whitespace": {
+ uid: 1000,
+ passwd: " ",
+ expected: "/",
+ def: "/",
+ },
+ "full": {
+ uid: 1000,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh",
+ expected: "/home/adin",
+ def: "/",
+ },
+ // For better or worse, this is how runc works.
+ "partial": {
+ uid: 1000,
+ passwd: "adin::1000:1111:",
+ expected: "",
+ def: "/",
+ },
+ "multiple": {
+ uid: 1001,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh",
+ expected: "/home/ian",
+ def: "/",
+ },
+ "duplicate": {
+ uid: 1000,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh",
+ expected: "/home/adin",
+ def: "/",
+ },
+ "empty_lines": {
+ uid: 1001,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh",
+ expected: "/home/ian",
+ def: "/",
+ },
+ }
+
+ for name, tc := range tests {
+ t.Run(name, func(t *testing.T) {
+ got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def)
+ if err != nil {
+ t.Fatalf("error parsing passwd: %v", err)
+ }
+ if tc.expected != got {
+ t.Fatalf("expected %v, got: %v", tc.expected, got)
+ }
+ })
+ }
+}
diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD
index 620d33a19..ab2387614 100644
--- a/runsc/cgroup/BUILD
+++ b/runsc/cgroup/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "cgroup",
srcs = ["cgroup.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/cgroup",
+ importpath = "gvisor.dev/gvisor/runsc/cgroup",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 7431b17d6..ab3a25b9b 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -30,8 +30,8 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index b7551a5ab..2c8b84252 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -14,9 +14,11 @@ go_library(
"debug.go",
"delete.go",
"do.go",
+ "error.go",
"events.go",
"exec.go",
"gofer.go",
+ "help.go",
"kill.go",
"list.go",
"path.go",
@@ -28,9 +30,10 @@ go_library(
"spec.go",
"start.go",
"state.go",
+ "syscalls.go",
"wait.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/cmd",
+ importpath = "gvisor.dev/gvisor/runsc/cmd",
visibility = [
"//runsc:__subpackages__",
],
@@ -38,6 +41,7 @@ go_library(
"//pkg/log",
"//pkg/p9",
"//pkg/sentry/control",
+ "//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/unet",
"//pkg/urpc",
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index 3a547d4aa..272eb14d3 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -24,9 +24,9 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Boot implements subcommands.Command for the "boot" command which starts a
@@ -130,6 +130,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Ensure that if there is a panic, all goroutine stacks are printed.
debug.SetTraceback("all")
+ conf := args[0].(*boot.Config)
+
if b.setUpRoot {
if err := setUpChroot(b.pidns); err != nil {
Fatalf("error setting up chroot: %v", err)
@@ -143,14 +145,16 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
args = append(args, arg)
}
}
- // Note that we've already read the spec from the spec FD, and
- // we will read it again after the exec call. This works
- // because the ReadSpecFromFile function seeks to the beginning
- // of the file before reading.
- if err := callSelfAsNobody(args); err != nil {
- Fatalf("%v", err)
+ if !conf.Rootless {
+ // Note that we've already read the spec from the spec FD, and
+ // we will read it again after the exec call. This works
+ // because the ReadSpecFromFile function seeks to the beginning
+ // of the file before reading.
+ if err := callSelfAsNobody(args); err != nil {
+ Fatalf("%v", err)
+ }
+ panic("callSelfAsNobody must never return success")
}
- panic("callSelfAsNobody must never return success")
}
}
@@ -163,9 +167,6 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
specutils.LogSpec(spec)
- conf := args[0].(*boot.Config)
- waitStatus := args[1].(*syscall.WaitStatus)
-
if b.applyCaps {
caps := spec.Process.Capabilities
if caps == nil {
@@ -251,6 +252,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
ws := l.WaitExit()
log.Infof("application exiting with %+v", ws)
+ waitStatus := args[1].(*syscall.WaitStatus)
*waitStatus = syscall.WaitStatus(ws.Status())
l.Destroy()
return subcommands.ExitSuccess
diff --git a/runsc/cmd/capability.go b/runsc/cmd/capability.go
index 312e5b471..abfbb7cfc 100644
--- a/runsc/cmd/capability.go
+++ b/runsc/cmd/capability.go
@@ -19,7 +19,7 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
var allCapTypes = []capability.CapType{
diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go
index ee74d33d8..79863efa3 100644
--- a/runsc/cmd/capability_test.go
+++ b/runsc/cmd/capability_test.go
@@ -21,11 +21,11 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func init() {
@@ -116,6 +116,6 @@ func TestCapabilities(t *testing.T) {
}
func TestMain(m *testing.M) {
- testutil.RunAsRoot()
+ specutils.MaybeRunAsRoot()
os.Exit(m.Run())
}
diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go
index 96d3c3378..7298a0828 100644
--- a/runsc/cmd/checkpoint.go
+++ b/runsc/cmd/checkpoint.go
@@ -22,10 +22,10 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// File containing the container's saved image/state within the given image-path's directory.
diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go
index 1a774db04..b5a0ce17d 100644
--- a/runsc/cmd/chroot.go
+++ b/runsc/cmd/chroot.go
@@ -20,8 +20,8 @@ import (
"path/filepath"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// mountInChroot creates the destination mount point in the given chroot and
diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go
index a2fc377d1..f1a4887ef 100644
--- a/runsc/cmd/cmd.go
+++ b/runsc/cmd/cmd.go
@@ -17,34 +17,15 @@ package cmd
import (
"fmt"
- "os"
"runtime"
"strconv"
"syscall"
- "github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/specutils"
)
-// Errorf logs to stderr and returns subcommands.ExitFailure.
-func Errorf(s string, args ...interface{}) subcommands.ExitStatus {
- // If runsc is being invoked by docker or cri-o, then we might not have
- // access to stderr, so we log a serious-looking warning in addition to
- // writing to stderr.
- log.Warningf("FATAL ERROR: "+s, args...)
- fmt.Fprintf(os.Stderr, s+"\n", args...)
- // Return an error that is unlikely to be used by the application.
- return subcommands.ExitFailure
-}
-
-// Fatalf logs to stderr and exits with a failure status code.
-func Fatalf(s string, args ...interface{}) {
- Errorf(s, args...)
- os.Exit(128)
-}
-
// intFlags can be used with int flags that appear multiple times.
type intFlags []int
diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go
index 629c198fd..42663c05c 100644
--- a/runsc/cmd/create.go
+++ b/runsc/cmd/create.go
@@ -16,12 +16,11 @@ package cmd
import (
"context"
-
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Create implements subcommands.Command for the "create" command.
@@ -83,13 +82,17 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
id := f.Arg(0)
conf := args[0].(*boot.Config)
+ if conf.Rootless {
+ return Errorf("Rootless mode not supported with %q", c.Name())
+ }
+
bundleDir := c.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
- Fatalf("reading spec: %v", err)
+ return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
@@ -97,7 +100,7 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
// container unless the metadata specifies that it should be run in an
// existing container.
if _, err := container.Create(id, spec, conf, bundleDir, c.consoleSocket, c.pidFile, c.userLog); err != nil {
- Fatalf("creating container: %v", err)
+ return Errorf("creating container: %v", err)
}
return subcommands.ExitSuccess
}
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 27eb51172..30a69acf0 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -22,9 +22,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Debug implements subcommands.Command for the "debug" command.
diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go
index 9039723e9..30d8164b1 100644
--- a/runsc/cmd/delete.go
+++ b/runsc/cmd/delete.go
@@ -21,9 +21,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Delete implements subcommands.Command for the "delete" command.
diff --git a/runsc/cmd/delete_test.go b/runsc/cmd/delete_test.go
index 45fc91016..cb59516a3 100644
--- a/runsc/cmd/delete_test.go
+++ b/runsc/cmd/delete_test.go
@@ -18,7 +18,7 @@ import (
"io/ioutil"
"testing"
- "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/boot"
)
func TestNotFound(t *testing.T) {
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index 8ea59046c..876e674c4 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -30,19 +30,18 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Do implements subcommands.Command for the "do" command. It sets up a simple
// sandbox and executes the command inside it. See Usage() for more details.
type Do struct {
- root string
- cwd string
- ip string
- networkNamespace bool
+ root string
+ cwd string
+ ip string
}
// Name implements subcommands.Command.Name.
@@ -72,7 +71,6 @@ func (c *Do) SetFlags(f *flag.FlagSet) {
f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`)
f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory")
f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox")
- f.BoolVar(&c.networkNamespace, "netns", true, "run in a new network namespace")
}
// Execute implements subcommands.Command.Execute.
@@ -85,15 +83,21 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
- // Map the entire host file system, but make it readonly with a writable
- // overlay on top (ignore --overlay option).
- conf.Overlay = true
+ if conf.Rootless {
+ if err := specutils.MaybeRunAsRoot(); err != nil {
+ return Errorf("Error executing inside namespace: %v", err)
+ }
+ // Execution will continue here if no more capabilities are needed...
+ }
hostname, err := os.Hostname()
if err != nil {
return Errorf("Error to retrieve hostname: %v", err)
}
+ // Map the entire host file system, but make it readonly with a writable
+ // overlay on top (ignore --overlay option).
+ conf.Overlay = true
absRoot, err := resolvePath(c.root)
if err != nil {
return Errorf("Error resolving root: %v", err)
@@ -119,11 +123,22 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
specutils.LogSpec(spec)
cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000))
- if !c.networkNamespace {
- if conf.Network != boot.NetworkHost {
- Fatalf("The current network namespace can be used only if --network=host is set", nil)
+ if conf.Network == boot.NetworkNone {
+ netns := specs.LinuxNamespace{
+ Type: specs.NetworkNamespace,
+ }
+ if spec.Linux != nil {
+ panic("spec.Linux is not nil")
}
- } else if conf.Network != boot.NetworkNone {
+ spec.Linux = &specs.Linux{Namespaces: []specs.LinuxNamespace{netns}}
+
+ } else if conf.Rootless {
+ if conf.Network == boot.NetworkSandbox {
+ fmt.Println("*** Rootless requires changing network type to host ***")
+ conf.Network = boot.NetworkHost
+ }
+
+ } else {
clean, err := c.setupNet(cid, spec)
if err != nil {
return Errorf("Error setting up network: %v", err)
diff --git a/runsc/cmd/error.go b/runsc/cmd/error.go
new file mode 100644
index 000000000..3585b5448
--- /dev/null
+++ b/runsc/cmd/error.go
@@ -0,0 +1,72 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "os"
+ "time"
+
+ "github.com/google/subcommands"
+ "gvisor.dev/gvisor/pkg/log"
+)
+
+// ErrorLogger is where error messages should be written to. These messages are
+// consumed by containerd and show up to users of command line tools,
+// like docker/kubectl.
+var ErrorLogger io.Writer
+
+type jsonError struct {
+ Msg string `json:"msg"`
+ Level string `json:"level"`
+ Time time.Time `json:"time"`
+}
+
+// Errorf logs error to containerd log (--log), to stderr, and debug logs. It
+// returns subcommands.ExitFailure for convenience with subcommand.Execute()
+// methods:
+// return Errorf("Danger! Danger!")
+//
+func Errorf(format string, args ...interface{}) subcommands.ExitStatus {
+ // If runsc is being invoked by docker or cri-o, then we might not have
+ // access to stderr, so we log a serious-looking warning in addition to
+ // writing to stderr.
+ log.Warningf("FATAL ERROR: "+format, args...)
+ fmt.Fprintf(os.Stderr, format+"\n", args...)
+
+ j := jsonError{
+ Msg: fmt.Sprintf(format, args...),
+ Level: "error",
+ Time: time.Now(),
+ }
+ b, err := json.Marshal(j)
+ if err != nil {
+ panic(err)
+ }
+ if ErrorLogger != nil {
+ ErrorLogger.Write(b)
+ }
+
+ return subcommands.ExitFailure
+}
+
+// Fatalf logs the same way as Errorf() does, plus *exits* the process.
+func Fatalf(format string, args ...interface{}) {
+ Errorf(format, args...)
+ // Return an error that is unlikely to be used by the application.
+ os.Exit(128)
+}
diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go
index c6bc8fc3a..3972e9224 100644
--- a/runsc/cmd/events.go
+++ b/runsc/cmd/events.go
@@ -22,9 +22,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Events implements subcommands.Command for the "events" command.
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index 8cd070e61..7adc23a77 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -30,14 +30,14 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/console"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/console"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Exec implements subcommands.Command for the "exec" command.
@@ -143,13 +143,16 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// write the child's PID to the pid file. So when the container returns, the
// child process will also return and signal containerd.
if ex.detach {
- return ex.execAndWait(waitStatus)
+ return ex.execChildAndWait(waitStatus)
}
+ return ex.exec(c, e, waitStatus)
+}
+func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *syscall.WaitStatus) subcommands.ExitStatus {
// Start the new process and get it pid.
pid, err := c.Execute(e)
if err != nil {
- Fatalf("executing processes for container: %v", err)
+ return Errorf("executing processes for container: %v", err)
}
if e.StdioIsPty {
@@ -163,29 +166,29 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
if ex.internalPidFile != "" {
pidStr := []byte(strconv.Itoa(int(pid)))
if err := ioutil.WriteFile(ex.internalPidFile, pidStr, 0644); err != nil {
- Fatalf("writing internal pid file %q: %v", ex.internalPidFile, err)
+ return Errorf("writing internal pid file %q: %v", ex.internalPidFile, err)
}
}
- // Generate the pid file after the internal pid file is generated, so that users
- // can safely assume that the internal pid file is ready after `runsc exec -d`
- // returns.
+ // Generate the pid file after the internal pid file is generated, so that
+ // users can safely assume that the internal pid file is ready after
+ // `runsc exec -d` returns.
if ex.pidFile != "" {
if err := ioutil.WriteFile(ex.pidFile, []byte(strconv.Itoa(os.Getpid())), 0644); err != nil {
- Fatalf("writing pid file: %v", err)
+ return Errorf("writing pid file: %v", err)
}
}
// Wait for the process to exit.
ws, err := c.WaitPID(pid)
if err != nil {
- Fatalf("waiting on pid %d: %v", pid, err)
+ return Errorf("waiting on pid %d: %v", pid, err)
}
*waitStatus = ws
return subcommands.ExitSuccess
}
-func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStatus {
+func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStatus {
var args []string
for _, a := range os.Args[1:] {
if !strings.Contains(a, "detach") {
@@ -193,7 +196,7 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat
}
}
- // The command needs to write a pid file so that execAndWait can tell
+ // The command needs to write a pid file so that execChildAndWait can tell
// when it has started. If no pid-file was provided, we should use a
// filename in a temp directory.
pidFile := ex.pidFile
@@ -262,7 +265,10 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat
return false, nil
}
if err := specutils.WaitForReady(cmd.Process.Pid, 10*time.Second, ready); err != nil {
- Fatalf("unexpected error waiting for PID file, err: %v", err)
+ // Don't log fatal error here, otherwise it will override the error logged
+ // by the child process that has failed to start.
+ log.Warningf("Unexpected error waiting for PID file, err: %v", err)
+ return subcommands.ExitFailure
}
*waitStatus = 0
diff --git a/runsc/cmd/exec_test.go b/runsc/cmd/exec_test.go
index 6f0f258c0..eb38a431f 100644
--- a/runsc/cmd/exec_test.go
+++ b/runsc/cmd/exec_test.go
@@ -21,10 +21,10 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/urpc"
)
func TestUser(t *testing.T) {
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index bccb29397..52609a57a 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -27,13 +27,13 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/fsgofer"
- "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/fsgofer"
+ "gvisor.dev/gvisor/runsc/fsgofer/filter"
+ "gvisor.dev/gvisor/runsc/specutils"
)
var caps = []string{
diff --git a/runsc/cmd/help.go b/runsc/cmd/help.go
new file mode 100644
index 000000000..ff4f901cb
--- /dev/null
+++ b/runsc/cmd/help.go
@@ -0,0 +1,126 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "fmt"
+
+ "flag"
+ "github.com/google/subcommands"
+)
+
+// NewHelp returns a help command for the given commander.
+func NewHelp(cdr *subcommands.Commander) *Help {
+ return &Help{
+ cdr: cdr,
+ }
+}
+
+// Help implements subcommands.Command for the "help" command. The 'help'
+// command prints help for commands registered to a Commander but also allows for
+// registering additional help commands that print other documentation.
+type Help struct {
+ cdr *subcommands.Commander
+ commands []subcommands.Command
+ help bool
+}
+
+// Name implements subcommands.Command.Name.
+func (*Help) Name() string {
+ return "help"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Help) Synopsis() string {
+ return "Print help documentation."
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Help) Usage() string {
+ return `help [<subcommand>]:
+ With an argument, prints detailed information on the use of
+ the specified topic or subcommand. With no argument, print a list of
+ all commands and a brief description of each.
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (h *Help) SetFlags(f *flag.FlagSet) {}
+
+// Execute implements subcommands.Command.Execute.
+func (h *Help) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ switch f.NArg() {
+ case 0:
+ fmt.Fprintf(h.cdr.Output, "Usage: %s <flags> <subcommand> <subcommand args>\n\n", h.cdr.Name())
+ fmt.Fprintf(h.cdr.Output, `runsc is a command line client for running applications packaged in the Open
+Container Initiative (OCI) format. Applications run by runsc are run in an
+isolated gVisor sandbox that emulates a Linux environment.
+
+gVisor is a user-space kernel, written in Go, that implements a substantial
+portion of the Linux system call interface. It provides an additional layer
+of isolation between running applications and the host operating system.
+
+Functionality is provided by subcommands. For additonal help on individual
+subcommands use "%s %s <subcommand>".
+
+`, h.cdr.Name(), h.Name())
+ h.cdr.VisitGroups(func(g *subcommands.CommandGroup) {
+ h.cdr.ExplainGroup(h.cdr.Output, g)
+ })
+
+ fmt.Fprintf(h.cdr.Output, "Additional help topics (Use \"%s %s <topic>\" to see help on the topic):\n", h.cdr.Name(), h.Name())
+ for _, cmd := range h.commands {
+ fmt.Fprintf(h.cdr.Output, "\t%-15s %s\n", cmd.Name(), cmd.Synopsis())
+ }
+ fmt.Fprintf(h.cdr.Output, "\nUse \"%s flags\" for a list of top-level flags\n", h.cdr.Name())
+ return subcommands.ExitSuccess
+ default:
+ // Look for commands registered to the commander and print help explanation if found.
+ found := false
+ h.cdr.VisitCommands(func(g *subcommands.CommandGroup, cmd subcommands.Command) {
+ if f.Arg(0) == cmd.Name() {
+ h.cdr.ExplainCommand(h.cdr.Output, cmd)
+ found = true
+ }
+ })
+ if found {
+ return subcommands.ExitSuccess
+ }
+
+ // Next check commands registered to the help command.
+ for _, cmd := range h.commands {
+ if f.Arg(0) == cmd.Name() {
+ fs := flag.NewFlagSet(f.Arg(0), flag.ContinueOnError)
+ fs.Usage = func() { h.cdr.ExplainCommand(h.cdr.Error, cmd) }
+ cmd.SetFlags(fs)
+ if fs.Parse(f.Args()[1:]) != nil {
+ return subcommands.ExitUsageError
+ }
+ return cmd.Execute(ctx, f, args...)
+ }
+ }
+
+ fmt.Fprintf(h.cdr.Error, "Subcommand %s not understood\n", f.Arg(0))
+ }
+
+ f.Usage()
+ return subcommands.ExitUsageError
+}
+
+// Register registers a new help command.
+func (h *Help) Register(cmd subcommands.Command) {
+ h.commands = append(h.commands, cmd)
+}
diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go
index aed5f3291..6c1f197a6 100644
--- a/runsc/cmd/kill.go
+++ b/runsc/cmd/kill.go
@@ -24,8 +24,8 @@ import (
"flag"
"github.com/google/subcommands"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Kill implements subcommands.Command for the "kill" command.
diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go
index 1f5ca2473..dd2d99a6b 100644
--- a/runsc/cmd/list.go
+++ b/runsc/cmd/list.go
@@ -25,8 +25,8 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// List implements subcommands.Command for the "list" command for the "list" command.
diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go
index 11b36aa10..9c0e92001 100644
--- a/runsc/cmd/pause.go
+++ b/runsc/cmd/pause.go
@@ -19,8 +19,8 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Pause implements subcommands.Command for the "pause" command.
diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go
index 3a3e6f17a..45c644f3f 100644
--- a/runsc/cmd/ps.go
+++ b/runsc/cmd/ps.go
@@ -20,9 +20,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// PS implements subcommands.Command for the "ps" command.
diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go
index 3ab2f5676..a5124697d 100644
--- a/runsc/cmd/restore.go
+++ b/runsc/cmd/restore.go
@@ -21,9 +21,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Restore implements subcommands.Command for the "restore" command.
@@ -80,25 +80,29 @@ func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
+ if conf.Rootless {
+ return Errorf("Rootless mode not supported with %q", r.Name())
+ }
+
bundleDir := r.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
- Fatalf("reading spec: %v", err)
+ return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
if r.imagePath == "" {
- Fatalf("image-path flag must be provided")
+ return Errorf("image-path flag must be provided")
}
conf.RestoreFile = filepath.Join(r.imagePath, checkpointFileName)
ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
if err != nil {
- Fatalf("running container: %v", err)
+ return Errorf("running container: %v", err)
}
*waitStatus = ws
diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go
index 9a2ade41e..b2df5c640 100644
--- a/runsc/cmd/resume.go
+++ b/runsc/cmd/resume.go
@@ -19,8 +19,8 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Resume implements subcommands.Command for the "resume" command.
diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go
index c228b4f93..c1734741d 100644
--- a/runsc/cmd/run.go
+++ b/runsc/cmd/run.go
@@ -20,9 +20,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Run implements subcommands.Command for the "run" command.
@@ -67,19 +67,23 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
+ if conf.Rootless {
+ return Errorf("Rootless mode not supported with %q", r.Name())
+ }
+
bundleDir := r.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
- Fatalf("reading spec: %v", err)
+ return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
if err != nil {
- Fatalf("running container: %v", err)
+ return Errorf("running container: %v", err)
}
*waitStatus = ws
diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go
index 657726251..de2115dff 100644
--- a/runsc/cmd/start.go
+++ b/runsc/cmd/start.go
@@ -16,11 +16,10 @@ package cmd
import (
"context"
-
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Start implements subcommands.Command for the "start" command.
diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go
index f0d449b19..e9f41cbd8 100644
--- a/runsc/cmd/state.go
+++ b/runsc/cmd/state.go
@@ -21,9 +21,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// State implements subcommands.Command for the "state" command.
diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go
new file mode 100644
index 000000000..df92c126a
--- /dev/null
+++ b/runsc/cmd/syscalls.go
@@ -0,0 +1,347 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "encoding/csv"
+ "encoding/json"
+ "fmt"
+ "io"
+ "os"
+ "sort"
+ "strconv"
+ "text/tabwriter"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+)
+
+// Syscalls implements subcommands.Command for the "syscalls" command.
+type Syscalls struct {
+ output string
+ os string
+ arch string
+}
+
+// CompatibilityInfo is a map of system and architecture to compatibility doc.
+// Maps operating system to architecture to ArchInfo.
+type CompatibilityInfo map[string]map[string]ArchInfo
+
+// ArchInfo is compatbility doc for an architecture.
+type ArchInfo struct {
+ // Syscalls maps syscall number for the architecture to the doc.
+ Syscalls map[uintptr]SyscallDoc `json:"syscalls"`
+}
+
+// SyscallDoc represents a single item of syscall documentation.
+type SyscallDoc struct {
+ Name string `json:"name"`
+ num uintptr
+
+ Support string `json:"support"`
+ Note string `json:"note,omitempty"`
+ URLs []string `json:"urls,omitempty"`
+}
+
+type outputFunc func(io.Writer, CompatibilityInfo) error
+
+var (
+ // The string name to use for printing compatibility for all OSes.
+ osAll = "all"
+
+ // The string name to use for printing compatibility for all architectures.
+ archAll = "all"
+
+ // A map of OS name to map of architecture name to syscall table.
+ syscallTableMap = make(map[string]map[string]*kernel.SyscallTable)
+
+ // A map of output type names to output functions.
+ outputMap = map[string]outputFunc{
+ "table": outputTable,
+ "json": outputJSON,
+ "csv": outputCSV,
+ }
+)
+
+// Name implements subcommands.Command.Name.
+func (*Syscalls) Name() string {
+ return "syscalls"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Syscalls) Synopsis() string {
+ return "Print compatibility information for syscalls."
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Syscalls) Usage() string {
+ return `syscalls [options] - Print compatibility information for syscalls.
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (s *Syscalls) SetFlags(f *flag.FlagSet) {
+ f.StringVar(&s.output, "o", "table", "Output format (table, csv, json).")
+ f.StringVar(&s.os, "os", osAll, "The OS (e.g. linux)")
+ f.StringVar(&s.arch, "arch", archAll, "The CPU architecture (e.g. amd64).")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (s *Syscalls) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ out, ok := outputMap[s.output]
+ if !ok {
+ Fatalf("Unsupported output format %q", s.output)
+ }
+
+ // Build map of all supported architectures.
+ tables := kernel.SyscallTables()
+ for _, t := range tables {
+ osMap, ok := syscallTableMap[t.OS.String()]
+ if !ok {
+ osMap = make(map[string]*kernel.SyscallTable)
+ syscallTableMap[t.OS.String()] = osMap
+ }
+ osMap[t.Arch.String()] = t
+ }
+
+ // Build a map of the architectures we want to output.
+ info, err := getCompatibilityInfo(s.os, s.arch)
+ if err != nil {
+ Fatalf("%v", err)
+ }
+
+ if err := out(os.Stdout, info); err != nil {
+ Fatalf("Error writing output: %v", err)
+ }
+
+ return subcommands.ExitSuccess
+}
+
+// getCompatibilityInfo returns compatibility info for the given OS name and
+// architecture name. Supports the special name 'all' for OS and architecture that
+// specifies that all supported OSes or architectures should be included.
+func getCompatibilityInfo(osName string, archName string) (CompatibilityInfo, error) {
+ info := CompatibilityInfo(make(map[string]map[string]ArchInfo))
+ if osName == osAll {
+ // Special processing for the 'all' OS name.
+ for osName, _ := range syscallTableMap {
+ info[osName] = make(map[string]ArchInfo)
+ // osName is a specific OS name.
+ if err := addToCompatibilityInfo(info, osName, archName); err != nil {
+ return info, err
+ }
+ }
+ } else {
+ // osName is a specific OS name.
+ info[osName] = make(map[string]ArchInfo)
+ if err := addToCompatibilityInfo(info, osName, archName); err != nil {
+ return info, err
+ }
+ }
+
+ return info, nil
+}
+
+// addToCompatibilityInfo adds ArchInfo for the given specific OS name and
+// architecture name. Supports the special architecture name 'all' to specify
+// that all supported architectures for the OS should be included.
+func addToCompatibilityInfo(info CompatibilityInfo, osName string, archName string) error {
+ if archName == archAll {
+ // Special processing for the 'all' architecture name.
+ for archName, _ := range syscallTableMap[osName] {
+ archInfo, err := getArchInfo(osName, archName)
+ if err != nil {
+ return err
+ }
+ info[osName][archName] = archInfo
+ }
+ } else {
+ // archName is a specific architecture name.
+ archInfo, err := getArchInfo(osName, archName)
+ if err != nil {
+ return err
+ }
+ info[osName][archName] = archInfo
+ }
+
+ return nil
+}
+
+// getArchInfo returns compatibility info for a specific OS and architecture.
+func getArchInfo(osName string, archName string) (ArchInfo, error) {
+ info := ArchInfo{}
+ info.Syscalls = make(map[uintptr]SyscallDoc)
+
+ t, ok := syscallTableMap[osName][archName]
+ if !ok {
+ return info, fmt.Errorf("syscall table for %s/%s not found", osName, archName)
+ }
+
+ for num, sc := range t.Table {
+ info.Syscalls[num] = SyscallDoc{
+ Name: sc.Name,
+ num: num,
+ Support: sc.SupportLevel.String(),
+ Note: sc.Note,
+ URLs: sc.URLs,
+ }
+ }
+
+ return info, nil
+}
+
+// outputTable outputs the syscall info in tabular format.
+func outputTable(w io.Writer, info CompatibilityInfo) error {
+ tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0)
+
+ // Linux
+ for osName, osInfo := range info {
+ for archName, archInfo := range osInfo {
+ // Print the OS/arch
+ fmt.Fprintf(w, "%s/%s:\n\n", osName, archName)
+
+ // Sort the syscalls for output in the table.
+ sortedCalls := []SyscallDoc{}
+ for _, sc := range archInfo.Syscalls {
+ sortedCalls = append(sortedCalls, sc)
+ }
+ sort.Slice(sortedCalls, func(i, j int) bool {
+ return sortedCalls[i].num < sortedCalls[j].num
+ })
+
+ // Write the header
+ _, err := fmt.Fprintf(tw, "%s\t%s\t%s\t%s\n",
+ "NUM",
+ "NAME",
+ "SUPPORT",
+ "NOTE",
+ )
+ if err != nil {
+ return err
+ }
+
+ // Write each syscall entry
+ for _, sc := range sortedCalls {
+ _, err = fmt.Fprintf(tw, "%s\t%s\t%s\t%s\n",
+ strconv.FormatInt(int64(sc.num), 10),
+ sc.Name,
+ sc.Support,
+ sc.Note,
+ )
+ if err != nil {
+ return err
+ }
+ // Add issue urls to note.
+ for _, url := range sc.URLs {
+ _, err = fmt.Fprintf(tw, "%s\t%s\t%s\tSee: %s\t\n",
+ "",
+ "",
+ "",
+ url,
+ )
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ err = tw.Flush()
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+// outputJSON outputs the syscall info in JSON format.
+func outputJSON(w io.Writer, info CompatibilityInfo) error {
+ e := json.NewEncoder(w)
+ e.SetIndent("", " ")
+ return e.Encode(info)
+}
+
+// numberedRow is aCSV row annotated by syscall number (used for sorting)
+type numberedRow struct {
+ num uintptr
+ row []string
+}
+
+// outputCSV outputs the syscall info in tabular format.
+func outputCSV(w io.Writer, info CompatibilityInfo) error {
+ csvWriter := csv.NewWriter(w)
+
+ // Linux
+ for osName, osInfo := range info {
+ for archName, archInfo := range osInfo {
+ // Sort the syscalls for output in the table.
+ sortedCalls := []numberedRow{}
+ for _, sc := range archInfo.Syscalls {
+ // Add issue urls to note.
+ note := sc.Note
+ for _, url := range sc.URLs {
+ note = fmt.Sprintf("%s\nSee: %s", note, url)
+ }
+
+ sortedCalls = append(sortedCalls, numberedRow{
+ num: sc.num,
+ row: []string{
+ osName,
+ archName,
+ strconv.FormatInt(int64(sc.num), 10),
+ sc.Name,
+ sc.Support,
+ note,
+ },
+ })
+ }
+ sort.Slice(sortedCalls, func(i, j int) bool {
+ return sortedCalls[i].num < sortedCalls[j].num
+ })
+
+ // Write the header
+ err := csvWriter.Write([]string{
+ "OS",
+ "Arch",
+ "Num",
+ "Name",
+ "Support",
+ "Note",
+ })
+ if err != nil {
+ return err
+ }
+
+ // Write each syscall entry
+ for _, sc := range sortedCalls {
+ err = csvWriter.Write(sc.row)
+ if err != nil {
+ return err
+ }
+ }
+
+ csvWriter.Flush()
+ err = csvWriter.Error()
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go
index 58fd01974..046489687 100644
--- a/runsc/cmd/wait.go
+++ b/runsc/cmd/wait.go
@@ -22,8 +22,8 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
const (
diff --git a/runsc/console/BUILD b/runsc/console/BUILD
index 3ff9eba27..2d71cd371 100644
--- a/runsc/console/BUILD
+++ b/runsc/console/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "console",
srcs = ["console.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/console",
+ importpath = "gvisor.dev/gvisor/runsc/console",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 13709a0ae..ebe77165e 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -9,7 +9,7 @@ go_library(
"hook.go",
"status.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/container",
+ importpath = "gvisor.dev/gvisor/runsc/container",
visibility = [
"//runsc:__subpackages__",
"//test:__subpackages__",
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index d016533e6..e3ca3d387 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -27,10 +27,10 @@ import (
"github.com/kr/pty"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// socketPath creates a path inside bundleDir and ensures that the returned
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 04b611b56..e67f99742 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -33,12 +33,12 @@ import (
"github.com/cenkalti/backoff"
"github.com/gofrs/flock"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/cgroup"
- "gvisor.googlesource.com/gvisor/runsc/sandbox"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/cgroup"
+ "gvisor.dev/gvisor/runsc/sandbox"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 72c5ecbb0..0e3a736b3 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -31,12 +31,13 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// waitForProcessList waits for the given process list to show up in the container.
@@ -1853,7 +1854,7 @@ func TestMain(m *testing.M) {
if err := testutil.ConfigureExePath(); err != nil {
panic(err.Error())
}
- testutil.RunAsRoot()
+ specutils.MaybeRunAsRoot()
os.Exit(m.Run())
}
diff --git a/runsc/container/hook.go b/runsc/container/hook.go
index acae6781e..901607aee 100644
--- a/runsc/container/hook.go
+++ b/runsc/container/hook.go
@@ -24,7 +24,7 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
// This file implements hooks as defined in OCI spec:
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 4ea3c74ac..83fe24d64 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -28,10 +28,10 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) {
@@ -99,6 +99,36 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
return containers, cleanup, nil
}
+type execDesc struct {
+ c *Container
+ cmd []string
+ want int
+ desc string
+}
+
+func execMany(execs []execDesc) error {
+ for _, exec := range execs {
+ args := &control.ExecArgs{Argv: exec.cmd}
+ if ws, err := exec.c.executeSync(args); err != nil {
+ return fmt.Errorf("error executing %+v: %v", args, err)
+ } else if ws.ExitStatus() != exec.want {
+ return fmt.Errorf("%q: exec %q got exit status: %d, want: %d", exec.desc, exec.cmd, ws.ExitStatus(), exec.want)
+ }
+ }
+ return nil
+}
+
+func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) {
+ for _, spec := range pod {
+ spec.Annotations[path.Join(boot.MountPrefix, name, "source")] = mount.Source
+ spec.Annotations[path.Join(boot.MountPrefix, name, "type")] = mount.Type
+ spec.Annotations[path.Join(boot.MountPrefix, name, "share")] = "pod"
+ if len(mount.Options) > 0 {
+ spec.Annotations[path.Join(boot.MountPrefix, name, "options")] = strings.Join(mount.Options, ",")
+ }
+ }
+}
+
// TestMultiContainerSanity checks that it is possible to run 2 dead-simple
// containers in the same sandbox.
func TestMultiContainerSanity(t *testing.T) {
@@ -828,3 +858,272 @@ func TestMultiContainerGoferStop(t *testing.T) {
}
}
}
+
+// Test that pod shared mounts are properly mounted in 2 containers and that
+// changes from one container is reflected in the other.
+func TestMultiContainerSharedMount(t *testing.T) {
+ for _, conf := range configs(all...) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: nil,
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+
+ createSharedMount(mnt0, "test-mount", podSpec...)
+
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
+ desc: "directory is mounted in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
+ desc: "directory is mounted in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/touch", file0},
+ desc: "create file in container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ desc: "file appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ desc: "file appears in container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/rm", file1},
+ desc: "file removed from container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-f", file0},
+ desc: "file removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-f", file1},
+ desc: "file removed from container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/mkdir", file1},
+ desc: "create directory in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", file0},
+ desc: "dir appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", file1},
+ desc: "dir appears in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/bin/rmdir", file0},
+ desc: "create directory in container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-d", file0},
+ desc: "dir removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-d", file1},
+ desc: "dir removed from container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+ }
+}
+
+// Test that pod mounts are mounted as readonly when requested.
+func TestMultiContainerSharedMountReadonly(t *testing.T) {
+ for _, conf := range configs(all...) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: []string{"ro"},
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+
+ createSharedMount(mnt0, "test-mount", podSpec...)
+
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
+ desc: "directory is mounted in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
+ desc: "directory is mounted in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/touch", file0},
+ want: 1,
+ desc: "fails to write to container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/touch", file1},
+ want: 1,
+ desc: "fails to write to container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+ }
+}
+
+// Test that shared pod mounts continue to work after container is restarted.
+func TestMultiContainerSharedMountRestart(t *testing.T) {
+ for _, conf := range configs(all...) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: nil,
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+
+ createSharedMount(mnt0, "test-mount", podSpec...)
+
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/touch", file0},
+ desc: "create file in container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ desc: "file appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ desc: "file appears in container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+
+ containers[1].Destroy()
+
+ bundleDir, err := testutil.SetupBundleDir(podSpec[1])
+ if err != nil {
+ t.Fatalf("error restarting container: %v", err)
+ }
+ defer os.RemoveAll(bundleDir)
+
+ containers[1], err = Create(ids[1], podSpec[1], conf, bundleDir, "", "", "")
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ if err := containers[1].Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
+
+ execs = []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ desc: "file is still in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ desc: "file is still in container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/rm", file1},
+ desc: "file removed from container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-f", file0},
+ desc: "file removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-f", file1},
+ desc: "file removed from container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+ }
+}
diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go
index 9d5a592a5..51a7f99df 100644
--- a/runsc/container/shared_volume_test.go
+++ b/runsc/container/shared_volume_test.go
@@ -22,10 +22,10 @@ import (
"path/filepath"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// TestSharedVolume checks that modifications to a volume mount are propagated
diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go
index 62923f1ef..b7fc6498f 100644
--- a/runsc/container/test_app/test_app.go
+++ b/runsc/container/test_app/test_app.go
@@ -29,7 +29,7 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func main() {
diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD
index 4adc9c1bc..80a4aa2fe 100644
--- a/runsc/fsgofer/BUILD
+++ b/runsc/fsgofer/BUILD
@@ -8,7 +8,7 @@ go_library(
"fsgofer.go",
"fsgofer_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/fsgofer",
+ importpath = "gvisor.dev/gvisor/runsc/fsgofer",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/fsgofer/filter/BUILD b/runsc/fsgofer/filter/BUILD
index 78c5b526c..e2318a978 100644
--- a/runsc/fsgofer/filter/BUILD
+++ b/runsc/fsgofer/filter/BUILD
@@ -11,7 +11,7 @@ go_library(
"extra_filters_race.go",
"filter.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter",
+ importpath = "gvisor.dev/gvisor/runsc/fsgofer/filter",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index 4faab2946..2d50774d4 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -19,8 +19,8 @@ import (
"syscall"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// allowedSyscalls is the set of syscalls executed by the gofer.
diff --git a/runsc/fsgofer/filter/extra_filters.go b/runsc/fsgofer/filter/extra_filters.go
index 5c5ec4e06..1056cd314 100644
--- a/runsc/fsgofer/filter/extra_filters.go
+++ b/runsc/fsgofer/filter/extra_filters.go
@@ -17,7 +17,7 @@
package filter
import (
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by
diff --git a/runsc/fsgofer/filter/extra_filters_msan.go b/runsc/fsgofer/filter/extra_filters_msan.go
index 553060bc3..8c6179c8f 100644
--- a/runsc/fsgofer/filter/extra_filters_msan.go
+++ b/runsc/fsgofer/filter/extra_filters_msan.go
@@ -19,8 +19,8 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by MSAN.
diff --git a/runsc/fsgofer/filter/extra_filters_race.go b/runsc/fsgofer/filter/extra_filters_race.go
index 28555f898..885c92f7a 100644
--- a/runsc/fsgofer/filter/extra_filters_race.go
+++ b/runsc/fsgofer/filter/extra_filters_race.go
@@ -19,8 +19,8 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by TSAN.
diff --git a/runsc/fsgofer/filter/filter.go b/runsc/fsgofer/filter/filter.go
index ff8154369..65053415f 100644
--- a/runsc/fsgofer/filter/filter.go
+++ b/runsc/fsgofer/filter/filter.go
@@ -18,7 +18,7 @@
package filter
import (
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// Install installs seccomp filters.
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 2cf50290a..8f50af780 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -32,11 +32,11 @@ import (
"syscall"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index 695836927..68267df1b 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -22,8 +22,8 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
)
func init() {
diff --git a/runsc/fsgofer/fsgofer_unsafe.go b/runsc/fsgofer/fsgofer_unsafe.go
index 58af5e44d..ff2556aee 100644
--- a/runsc/fsgofer/fsgofer_unsafe.go
+++ b/runsc/fsgofer/fsgofer_unsafe.go
@@ -18,8 +18,8 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserr"
)
func statAt(dirFd int, name string) (syscall.Stat_t, error) {
diff --git a/runsc/main.go b/runsc/main.go
index 11bc73f75..135061cd3 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -29,10 +29,10 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/cmd"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/cmd"
+ "gvisor.dev/gvisor/runsc/specutils"
)
var (
@@ -48,11 +48,12 @@ var (
// system that are not covered by the runtime spec.
// Debugging flags.
- debugLog = flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.")
- logPackets = flag.Bool("log-packets", false, "enable network packet logging")
- logFD = flag.Int("log-fd", -1, "file descriptor to log to. If set, the 'log' flag is ignored.")
- debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored.")
- debugLogFormat = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s")
+ debugLog = flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.")
+ logPackets = flag.Bool("log-packets", false, "enable network packet logging")
+ logFD = flag.Int("log-fd", -1, "file descriptor to log to. If set, the 'log' flag is ignored.")
+ debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored.")
+ debugLogFormat = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s")
+ alsoLogToStderr = flag.Bool("alsologtostderr", false, "send log messages to stderr")
// Debugging flags: strace related
strace = flag.Bool("strace", false, "enable strace")
@@ -60,22 +61,27 @@ var (
straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs")
// Flags that control sandbox runtime behavior.
- platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
- network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
- gso = flag.Bool("gso", true, "enable generic segmenation offload")
- fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
- overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
- watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
- panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
- profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
- netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
-
+ platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
+ network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
+ gso = flag.Bool("gso", true, "enable generic segmenation offload")
+ fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
+ overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
+ watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
+ panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
+ profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
+ netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
+ numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
+ rootless = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
+
+ // Test flags, not to be used outside tests, ever.
testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
)
func main() {
// Help and flags commands are generated automatically.
- subcommands.Register(subcommands.HelpCommand(), "")
+ help := cmd.NewHelp(subcommands.DefaultCommander)
+ help.Register(new(cmd.Syscalls))
+ subcommands.Register(help, "")
subcommands.Register(subcommands.FlagsCommand(), "")
// Register user-facing runsc commands.
@@ -117,6 +123,22 @@ func main() {
os.Exit(0)
}
+ var errorLogger io.Writer
+ if *logFD > -1 {
+ errorLogger = os.NewFile(uintptr(*logFD), "error log file")
+
+ } else if *logFilename != "" {
+ // We must set O_APPEND and not O_TRUNC because Docker passes
+ // the same log file for all commands (and also parses these
+ // log files), so we can't destroy them on each command.
+ var err error
+ errorLogger, err = os.OpenFile(*logFilename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
+ if err != nil {
+ cmd.Fatalf("error opening log file %q: %v", *logFilename, err)
+ }
+ }
+ cmd.ErrorLogger = errorLogger
+
platformType, err := boot.MakePlatformType(*platform)
if err != nil {
cmd.Fatalf("%v", err)
@@ -141,26 +163,33 @@ func main() {
cmd.Fatalf("%v", err)
}
+ if *numNetworkChannels <= 0 {
+ cmd.Fatalf("num_network_channels must be > 0, got: %d", *numNetworkChannels)
+ }
+
// Create a new Config from the flags.
conf := &boot.Config{
- RootDir: *rootDir,
- Debug: *debug,
- LogFilename: *logFilename,
- LogFormat: *logFormat,
- DebugLog: *debugLog,
- DebugLogFormat: *debugLogFormat,
- FileAccess: fsAccess,
- Overlay: *overlay,
- Network: netType,
- GSO: *gso,
- LogPackets: *logPackets,
- Platform: platformType,
- Strace: *strace,
- StraceLogSize: *straceLogSize,
- WatchdogAction: wa,
- PanicSignal: *panicSignal,
- ProfileEnable: *profile,
- EnableRaw: *netRaw,
+ RootDir: *rootDir,
+ Debug: *debug,
+ LogFilename: *logFilename,
+ LogFormat: *logFormat,
+ DebugLog: *debugLog,
+ DebugLogFormat: *debugLogFormat,
+ FileAccess: fsAccess,
+ Overlay: *overlay,
+ Network: netType,
+ GSO: *gso,
+ LogPackets: *logPackets,
+ Platform: platformType,
+ Strace: *strace,
+ StraceLogSize: *straceLogSize,
+ WatchdogAction: wa,
+ PanicSignal: *panicSignal,
+ ProfileEnable: *profile,
+ EnableRaw: *netRaw,
+ NumNetworkChannels: *numNetworkChannels,
+ Rootless: *rootless,
+
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
}
if len(*straceSyscalls) != 0 {
@@ -174,24 +203,7 @@ func main() {
subcommand := flag.CommandLine.Arg(0)
- var logFile io.Writer = os.Stderr
- if *logFD > -1 {
- logFile = os.NewFile(uintptr(*logFD), "log file")
- } else if *logFilename != "" {
- // We must set O_APPEND and not O_TRUNC because Docker passes
- // the same log file for all commands (and also parses these
- // log files), so we can't destroy them on each command.
- f, err := os.OpenFile(*logFilename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
- if err != nil {
- cmd.Fatalf("error opening log file %q: %v", *logFilename, err)
- }
- logFile = f
- } else if subcommand == "do" {
- logFile = ioutil.Discard
- }
-
- e := newEmitter(*logFormat, logFile)
-
+ var e log.Emitter
if *debugLogFD > -1 {
f := os.NewFile(uintptr(*debugLogFD), "debug log file")
@@ -201,28 +213,31 @@ func main() {
cmd.Fatalf("flag --debug-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand)
}
- // If we are the boot process, then we own our stdio FDs and
- // can do what we want with them. Since Docker and Containerd
- // both eat boot's stderr, we dup our stderr to the provided
- // log FD so that panics will appear in the logs, rather than
- // just disappear.
+ // If we are the boot process, then we own our stdio FDs and can do what we
+ // want with them. Since Docker and Containerd both eat boot's stderr, we
+ // dup our stderr to the provided log FD so that panics will appear in the
+ // logs, rather than just disappear.
if err := syscall.Dup2(int(f.Fd()), int(os.Stderr.Fd())); err != nil {
cmd.Fatalf("error dup'ing fd %d to stderr: %v", f.Fd(), err)
}
- if logFile == os.Stderr {
- // Suppress logging to stderr when debug log is enabled. Otherwise all
- // messages will be duplicated in the debug log (see Dup2() call above).
- e = newEmitter(*debugLogFormat, f)
- } else {
- e = log.MultiEmitter{e, newEmitter(*debugLogFormat, f)}
- }
+ e = newEmitter(*debugLogFormat, f)
+
} else if *debugLog != "" {
f, err := specutils.DebugLogFile(*debugLog, subcommand)
if err != nil {
cmd.Fatalf("error opening debug log file in %q: %v", *debugLog, err)
}
- e = log.MultiEmitter{e, newEmitter(*debugLogFormat, f)}
+ e = newEmitter(*debugLogFormat, f)
+
+ } else {
+ // Stderr is reserved for the application, just discard the logs if no debug
+ // log is specified.
+ e = newEmitter("text", ioutil.Discard)
+ }
+
+ if *alsoLogToStderr {
+ e = log.MultiEmitter{e, newEmitter(*debugLogFormat, os.Stderr)}
}
log.SetTarget(e)
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index c0de9a28f..f32da45c1 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -9,7 +9,7 @@ go_library(
"network_unsafe.go",
"sandbox.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/sandbox",
+ importpath = "gvisor.dev/gvisor/runsc/sandbox",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index 0460d5f1a..a965a9dcb 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -27,10 +27,10 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -68,7 +68,7 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi
// Build the path to the net namespace of the sandbox process.
// This is what we will copy.
nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net")
- if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.GSO); err != nil {
+ if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.GSO, conf.NumNetworkChannels); err != nil {
return fmt.Errorf("creating interfaces from net namespace %q: %v", nsPath, err)
}
case boot.NetworkHost:
@@ -138,7 +138,7 @@ func isRootNS() (bool, error) {
// createInterfacesAndRoutesFromNS scrapes the interface and routes from the
// net namespace with the given path, creates them in the sandbox, and removes
// them from the host.
-func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO bool) error {
+func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO bool, numNetworkChannels int) error {
// Join the network namespace that we will be copying.
restore, err := joinNetNS(nsPath)
if err != nil {
@@ -202,25 +202,6 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
continue
}
- // Create the socket.
- const protocol = 0x0300 // htons(ETH_P_ALL)
- fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol)
- if err != nil {
- return fmt.Errorf("unable to create raw socket: %v", err)
- }
- deviceFile := os.NewFile(uintptr(fd), "raw-device-fd")
-
- // Bind to the appropriate device.
- ll := syscall.SockaddrLinklayer{
- Protocol: protocol,
- Ifindex: iface.Index,
- Hatype: 0, // No ARP type.
- Pkttype: syscall.PACKET_OTHERHOST,
- }
- if err := syscall.Bind(fd, &ll); err != nil {
- return fmt.Errorf("unable to bind to %q: %v", iface.Name, err)
- }
-
// Scrape the routes before removing the address, since that
// will remove the routes as well.
routes, def, err := routesForIface(iface)
@@ -236,9 +217,10 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
}
link := boot.FDBasedLink{
- Name: iface.Name,
- MTU: iface.MTU,
- Routes: routes,
+ Name: iface.Name,
+ MTU: iface.MTU,
+ Routes: routes,
+ NumChannels: numNetworkChannels,
}
// Get the link for the interface.
@@ -246,32 +228,25 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
if err != nil {
return fmt.Errorf("getting link for interface %q: %v", iface.Name, err)
}
- link.LinkAddress = []byte(ifaceLink.Attrs().HardwareAddr)
+ link.LinkAddress = ifaceLink.Attrs().HardwareAddr
- if enableGSO {
- gso, err := isGSOEnabled(fd, iface.Name)
+ log.Debugf("Setting up network channels")
+ // Create the socket for the device.
+ for i := 0; i < link.NumChannels; i++ {
+ log.Debugf("Creating Channel %d", i)
+ socketEntry, err := createSocket(iface, ifaceLink, enableGSO)
if err != nil {
- return fmt.Errorf("getting GSO for interface %q: %v", iface.Name, err)
+ return fmt.Errorf("failed to createSocket for %s : %v", iface.Name, err)
}
- if gso {
- if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil {
- return fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err)
- }
- link.GSOMaxSize = ifaceLink.Attrs().GSOMaxSize
+ if i == 0 {
+ link.GSOMaxSize = socketEntry.gsoMaxSize
} else {
- log.Infof("GSO not available in host.")
+ if link.GSOMaxSize != socketEntry.gsoMaxSize {
+ return fmt.Errorf("inconsistent gsoMaxSize %d and %d when creating multiple channels for same interface: %s",
+ link.GSOMaxSize, socketEntry.gsoMaxSize, iface.Name)
+ }
}
- }
-
- // Use SO_RCVBUFFORCE because on linux the receive buffer for an
- // AF_PACKET socket is capped by "net.core.rmem_max". rmem_max
- // defaults to a unusually low value of 208KB. This is too low
- // for gVisor to be able to receive packets at high throughputs
- // without incurring packet drops.
- const rcvBufSize = 4 << 20 // 4MB.
-
- if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, rcvBufSize); err != nil {
- return fmt.Errorf("failed to increase socket rcv buffer to %d: %v", rcvBufSize, err)
+ args.FilePayload.Files = append(args.FilePayload.Files, socketEntry.deviceFile)
}
// Collect the addresses for the interface, enable forwarding,
@@ -285,7 +260,6 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
}
}
- args.FilePayload.Files = append(args.FilePayload.Files, deviceFile)
args.FDBasedLinks = append(args.FDBasedLinks, link)
}
@@ -296,6 +270,61 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
return nil
}
+type socketEntry struct {
+ deviceFile *os.File
+ gsoMaxSize uint32
+}
+
+// createSocket creates an underlying AF_PACKET socket and configures it for use by
+// the sentry and returns an *os.File that wraps the underlying socket fd.
+func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) (*socketEntry, error) {
+ // Create the socket.
+ const protocol = 0x0300 // htons(ETH_P_ALL)
+ fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol)
+ if err != nil {
+ return nil, fmt.Errorf("unable to create raw socket: %v", err)
+ }
+ deviceFile := os.NewFile(uintptr(fd), "raw-device-fd")
+ // Bind to the appropriate device.
+ ll := syscall.SockaddrLinklayer{
+ Protocol: protocol,
+ Ifindex: iface.Index,
+ Hatype: 0, // No ARP type.
+ Pkttype: syscall.PACKET_OTHERHOST,
+ }
+ if err := syscall.Bind(fd, &ll); err != nil {
+ return nil, fmt.Errorf("unable to bind to %q: %v", iface.Name, err)
+ }
+
+ gsoMaxSize := uint32(0)
+ if enableGSO {
+ gso, err := isGSOEnabled(fd, iface.Name)
+ if err != nil {
+ return nil, fmt.Errorf("getting GSO for interface %q: %v", iface.Name, err)
+ }
+ if gso {
+ if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil {
+ return nil, fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err)
+ }
+ gsoMaxSize = ifaceLink.Attrs().GSOMaxSize
+ } else {
+ log.Infof("GSO not available in host.")
+ }
+ }
+
+ // Use SO_RCVBUFFORCE because on linux the receive buffer for an
+ // AF_PACKET socket is capped by "net.core.rmem_max". rmem_max
+ // defaults to a unusually low value of 208KB. This is too low
+ // for gVisor to be able to receive packets at high throughputs
+ // without incurring packet drops.
+ const rcvBufSize = 4 << 20 // 4MB.
+
+ if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, rcvBufSize); err != nil {
+ return nil, fmt.Errorf("failed to increase socket rcv buffer to %d: %v", rcvBufSize, err)
+ }
+ return &socketEntry{deviceFile, gsoMaxSize}, nil
+}
+
// loopbackLinks collects the links for a loopback interface.
func loopbackLinks(iface net.Interface, addrs []net.Addr) ([]boot.LoopbackLink, error) {
var links []boot.LoopbackLink
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 032190636..a19b1d124 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -28,16 +28,16 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/pkg/control/client"
- "gvisor.googlesource.com/gvisor/pkg/control/server"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/cgroup"
- "gvisor.googlesource.com/gvisor/runsc/console"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/control/client"
+ "gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/platform/kvm"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/cgroup"
+ "gvisor.dev/gvisor/runsc/console"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Sandbox wraps a sandbox process.
@@ -515,46 +515,64 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
log.Infof("Sandbox will be started in new user namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
+ cmd.Args = append(cmd.Args, "--setup-root")
- // Map nobody in the new namespace to nobody in the parent namespace.
- //
- // A sandbox process will construct an empty
- // root for itself, so it has to have the CAP_SYS_ADMIN
- // capability.
- //
- // FIXME(b/122554829): The current implementations of
- // os/exec doesn't allow to set ambient capabilities if
- // a process is started in a new user namespace. As a
- // workaround, we start the sandbox process with the 0
- // UID and then it constructs a chroot and sets UID to
- // nobody. https://github.com/golang/go/issues/2315
- const nobody = 65534
- cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: int(0),
- HostID: int(nobody - 1),
- Size: int(1),
- },
- {
- ContainerID: int(nobody),
- HostID: int(nobody),
- Size: int(1),
- },
- }
- cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: int(nobody),
- HostID: int(nobody),
- Size: int(1),
- },
+ if conf.Rootless {
+ log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
+ cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: os.Getuid(),
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: os.Getgid(),
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
+
+ } else {
+ // Map nobody in the new namespace to nobody in the parent namespace.
+ //
+ // A sandbox process will construct an empty
+ // root for itself, so it has to have the CAP_SYS_ADMIN
+ // capability.
+ //
+ // FIXME(b/122554829): The current implementations of
+ // os/exec doesn't allow to set ambient capabilities if
+ // a process is started in a new user namespace. As a
+ // workaround, we start the sandbox process with the 0
+ // UID and then it constructs a chroot and sets UID to
+ // nobody. https://github.com/golang/go/issues/2315
+ const nobody = 65534
+ cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: nobody - 1,
+ Size: 1,
+ },
+ {
+ ContainerID: nobody,
+ HostID: nobody,
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: nobody,
+ HostID: nobody,
+ Size: 1,
+ },
+ }
+
+ // Set credentials to run as user and group nobody.
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody}
}
- // Set credentials to run as user and group nobody.
- cmd.SysProcAttr.Credential = &syscall.Credential{
- Uid: 0,
- Gid: nobody,
- }
- cmd.Args = append(cmd.Args, "--setup-root")
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}
diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD
index 15476de6f..fbfb8e2f8 100644
--- a/runsc/specutils/BUILD
+++ b/runsc/specutils/BUILD
@@ -9,11 +9,8 @@ go_library(
"namespace.go",
"specutils.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/specutils",
- visibility = [
- "//runsc:__subpackages__",
- "//test:__subpackages__",
- ],
+ importpath = "gvisor.dev/gvisor/runsc/specutils",
+ visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/log",
diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go
index 1f3afb4e4..6e6902e9f 100644
--- a/runsc/specutils/fs.go
+++ b/runsc/specutils/fs.go
@@ -16,6 +16,7 @@ package specutils
import (
"fmt"
+ "math/bits"
"path"
"syscall"
@@ -105,22 +106,30 @@ func optionsToFlags(opts []string, source map[string]mapping) uint32 {
return rv
}
-// ValidateMount validates that spec mounts are correct.
+// validateMount validates that spec mounts are correct.
func validateMount(mnt *specs.Mount) error {
if !path.IsAbs(mnt.Destination) {
return fmt.Errorf("Mount.Destination must be an absolute path: %v", mnt)
}
-
if mnt.Type == "bind" {
- for _, o := range mnt.Options {
- if ContainsStr(invalidOptions, o) {
- return fmt.Errorf("mount option %q is not supported: %v", o, mnt)
- }
- _, ok1 := optionsMap[o]
- _, ok2 := propOptionsMap[o]
- if !ok1 && !ok2 {
- return fmt.Errorf("unknown mount option %q", o)
- }
+ return ValidateMountOptions(mnt.Options)
+ }
+ return nil
+}
+
+// ValidateMountOptions validates that mount options are correct.
+func ValidateMountOptions(opts []string) error {
+ for _, o := range opts {
+ if ContainsStr(invalidOptions, o) {
+ return fmt.Errorf("mount option %q is not supported", o)
+ }
+ _, ok1 := optionsMap[o]
+ _, ok2 := propOptionsMap[o]
+ if !ok1 && !ok2 {
+ return fmt.Errorf("unknown mount option %q", o)
+ }
+ if err := validatePropagation(o); err != nil {
+ return err
}
}
return nil
@@ -133,5 +142,14 @@ func validateRootfsPropagation(opt string) error {
if flags&(syscall.MS_SLAVE|syscall.MS_PRIVATE) == 0 {
return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt)
}
+ return validatePropagation(opt)
+}
+
+func validatePropagation(opt string) error {
+ flags := PropOptionsToFlags([]string{opt})
+ exclusive := flags & (syscall.MS_SLAVE | syscall.MS_PRIVATE | syscall.MS_SHARED | syscall.MS_UNBINDABLE)
+ if bits.OnesCount32(exclusive) > 1 {
+ return fmt.Errorf("mount propagation options are mutually exclusive: %q", opt)
+ }
return nil
}
diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go
index 7d194335c..c9ef606cb 100644
--- a/runsc/specutils/namespace.go
+++ b/runsc/specutils/namespace.go
@@ -25,7 +25,7 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
// nsCloneFlag returns the clone flag that can be used to set a namespace of
@@ -220,3 +220,55 @@ func HasCapabilities(cs ...capability.Cap) bool {
}
return true
}
+
+// MaybeRunAsRoot ensures the process runs with capabilities needed to create a
+// sandbox, e.g. CAP_SYS_ADMIN, CAP_SYS_CHROOT, etc. If capabilities are needed,
+// it will create a new user namespace and re-execute the process as root
+// inside the namespace with the same arguments and environment.
+//
+// This function returns immediately when no new capability is needed. If
+// another process is executed, it returns straight from here with the same exit
+// code as the child.
+func MaybeRunAsRoot() error {
+ if HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT, capability.CAP_SETUID, capability.CAP_SETGID) {
+ return nil
+ }
+
+ // Current process doesn't have required capabilities, create user namespace
+ // and run as root inside the namespace to acquire capabilities.
+ log.Infof("*** Re-running as root in new user namespace ***")
+
+ cmd := exec.Command("/proc/self/exe", os.Args[1:]...)
+
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
+ // Set current user/group as root inside the namespace. Since we may not
+ // have CAP_SETUID/CAP_SETGID, just map root to the current user/group.
+ UidMappings: []syscall.SysProcIDMap{
+ {ContainerID: 0, HostID: os.Getuid(), Size: 1},
+ },
+ GidMappings: []syscall.SysProcIDMap{
+ {ContainerID: 0, HostID: os.Getgid(), Size: 1},
+ },
+ Credential: &syscall.Credential{Uid: 0, Gid: 0},
+ GidMappingsEnableSetgroups: false,
+ }
+
+ cmd.Env = os.Environ()
+ cmd.Stdin = os.Stdin
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ if err := cmd.Run(); err != nil {
+ if exit, ok := err.(*exec.ExitError); ok {
+ if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
+ os.Exit(ws.ExitStatus())
+ }
+ log.Warningf("No wait status provided, exiting with -1: %v", err)
+ os.Exit(-1)
+ }
+ return fmt.Errorf("re-executing self: %v", err)
+ }
+ // Child completed with success.
+ os.Exit(0)
+ panic("unreachable")
+}
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 2888f55db..215828120 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -29,9 +29,9 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
// ExePath must point to runsc binary, which is normally the same binary. It's
diff --git a/runsc/test/image/BUILD b/runsc/test/image/BUILD
index e8b629c6a..f3ceccb69 100644
--- a/runsc/test/image/BUILD
+++ b/runsc/test/image/BUILD
@@ -26,5 +26,5 @@ go_test(
go_library(
name = "image",
srcs = ["image.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/image",
+ importpath = "gvisor.dev/gvisor/runsc/test/image",
)
diff --git a/runsc/test/image/image_test.go b/runsc/test/image/image_test.go
index b969731b0..14cbd30c4 100644
--- a/runsc/test/image/image_test.go
+++ b/runsc/test/image/image_test.go
@@ -32,7 +32,7 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func TestHelloWorld(t *testing.T) {
diff --git a/runsc/test/integration/BUILD b/runsc/test/integration/BUILD
index 0c4e4fa80..45cfd98ba 100644
--- a/runsc/test/integration/BUILD
+++ b/runsc/test/integration/BUILD
@@ -8,6 +8,7 @@ go_test(
srcs = [
"exec_test.go",
"integration_test.go",
+ "regression_test.go",
],
embed = [":integration"],
tags = [
@@ -24,5 +25,5 @@ go_test(
go_library(
name = "integration",
srcs = ["integration.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/integration",
+ importpath = "gvisor.dev/gvisor/runsc/test/integration",
)
diff --git a/runsc/test/integration/exec_test.go b/runsc/test/integration/exec_test.go
index 7af064d79..993136f96 100644
--- a/runsc/test/integration/exec_test.go
+++ b/runsc/test/integration/exec_test.go
@@ -29,12 +29,13 @@ package integration
import (
"fmt"
"strconv"
+ "strings"
"syscall"
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func TestExecCapabilities(t *testing.T) {
@@ -136,3 +137,25 @@ func TestExecJobControl(t *testing.T) {
t.Errorf("ws.ExitedStatus got %d, want %d", got, want)
}
}
+
+// Test that failure to exec returns proper error message.
+func TestExecError(t *testing.T) {
+ if err := testutil.Pull("alpine"); err != nil {
+ t.Fatalf("docker pull failed: %v", err)
+ }
+ d := testutil.MakeDocker("exec-error-test")
+
+ // Start the container.
+ if err := d.Run("alpine", "sleep", "1000"); err != nil {
+ t.Fatalf("docker run failed: %v", err)
+ }
+ defer d.CleanUp()
+
+ _, err := d.Exec("no_can_find")
+ if err == nil {
+ t.Fatalf("docker exec didn't fail")
+ }
+ if want := `error finding executable "no_can_find" in PATH`; !strings.Contains(err.Error(), want) {
+ t.Fatalf("docker exec wrong error, got: %s, want: .*%s.*", err.Error(), want)
+ }
+}
diff --git a/runsc/test/integration/integration_test.go b/runsc/test/integration/integration_test.go
index c51cab3ae..55ebc2f5d 100644
--- a/runsc/test/integration/integration_test.go
+++ b/runsc/test/integration/integration_test.go
@@ -32,7 +32,7 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// httpRequestSucceeds sends a request to a given url and checks that the status is OK.
diff --git a/runsc/test/integration/regression_test.go b/runsc/test/integration/regression_test.go
new file mode 100644
index 000000000..39b30e757
--- /dev/null
+++ b/runsc/test/integration/regression_test.go
@@ -0,0 +1,45 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package integration
+
+import (
+ "strings"
+ "testing"
+
+ "gvisor.dev/gvisor/runsc/test/testutil"
+)
+
+// Test that UDS can be created using overlay when parent directory is in lower
+// layer only (b/134090485).
+//
+// Prerequisite: the directory where the socket file is created must not have
+// been open for write before bind(2) is called.
+func TestBindOverlay(t *testing.T) {
+ if err := testutil.Pull("ubuntu:trusty"); err != nil {
+ t.Fatal("docker pull failed:", err)
+ }
+ d := testutil.MakeDocker("bind-overlay-test")
+
+ cmd := "nc -l -U /var/run/sock& sleep 1 && echo foobar-asdf | nc -U /var/run/sock"
+ got, err := d.RunFg("ubuntu:trusty", "bash", "-c", cmd)
+ if err != nil {
+ t.Fatal("docker run failed:", err)
+ }
+
+ if want := "foobar-asdf"; !strings.Contains(got, want) {
+ t.Fatalf("docker run output is missing %q: %s", want, got)
+ }
+ defer d.CleanUp()
+}
diff --git a/runsc/test/root/BUILD b/runsc/test/root/BUILD
index 7ded78baa..500ef7b8e 100644
--- a/runsc/test/root/BUILD
+++ b/runsc/test/root/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "root",
srcs = ["root.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/root",
+ importpath = "gvisor.dev/gvisor/runsc/test/root",
)
go_test(
diff --git a/runsc/test/root/cgroup_test.go b/runsc/test/root/cgroup_test.go
index edb6dee1d..5392dc6e0 100644
--- a/runsc/test/root/cgroup_test.go
+++ b/runsc/test/root/cgroup_test.go
@@ -25,8 +25,8 @@ import (
"strings"
"testing"
- "gvisor.googlesource.com/gvisor/runsc/cgroup"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/cgroup"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func verifyPid(pid int, path string) error {
diff --git a/runsc/test/root/chroot_test.go b/runsc/test/root/chroot_test.go
index da2f473b9..d0f236580 100644
--- a/runsc/test/root/chroot_test.go
+++ b/runsc/test/root/chroot_test.go
@@ -31,8 +31,8 @@ import (
"testing"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// TestChroot verifies that the sandbox is chroot'd and that mounts are cleaned
diff --git a/runsc/test/root/crictl_test.go b/runsc/test/root/crictl_test.go
index 3cc176104..515ae2df1 100644
--- a/runsc/test/root/crictl_test.go
+++ b/runsc/test/root/crictl_test.go
@@ -29,9 +29,9 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/root/testdata"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/root/testdata"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// Tests for crictl have to be run as root (rather than in a user namespace)
diff --git a/runsc/test/root/testdata/BUILD b/runsc/test/root/testdata/BUILD
index 7f272dcd3..80dc5f214 100644
--- a/runsc/test/root/testdata/BUILD
+++ b/runsc/test/root/testdata/BUILD
@@ -11,7 +11,7 @@ go_library(
"httpd_mount_paths.go",
"sandbox.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/root/testdata",
+ importpath = "gvisor.dev/gvisor/runsc/test/root/testdata",
visibility = [
"//visibility:public",
],
diff --git a/runsc/test/testutil/BUILD b/runsc/test/testutil/BUILD
index ddec81444..327e7ca4d 100644
--- a/runsc/test/testutil/BUILD
+++ b/runsc/test/testutil/BUILD
@@ -10,7 +10,7 @@ go_library(
"testutil.go",
"testutil_race.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/testutil",
+ importpath = "gvisor.dev/gvisor/runsc/test/testutil",
visibility = ["//:sandbox"],
deps = [
"//runsc/boot",
@@ -18,6 +18,5 @@ go_library(
"@com_github_cenkalti_backoff//:go_default_library",
"@com_github_kr_pty//:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
- "@com_github_syndtr_gocapability//capability:go_default_library",
],
)
diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go
index 9efb1ba8e..ecab6871d 100644
--- a/runsc/test/testutil/testutil.go
+++ b/runsc/test/testutil/testutil.go
@@ -30,7 +30,6 @@ import (
"os/exec"
"os/signal"
"path/filepath"
- "runtime"
"strings"
"sync"
"sync/atomic"
@@ -39,9 +38,8 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
)
func init() {
@@ -136,6 +134,7 @@ func TestConfig() *boot.Config {
Strace: true,
FileAccess: boot.FileAccessExclusive,
TestOnlyAllowRunAsCurrentUserWithoutChroot: true,
+ NumNetworkChannels: 1,
}
}
@@ -283,54 +282,6 @@ func WaitForHTTP(port int, timeout time.Duration) error {
return Poll(cb, timeout)
}
-// RunAsRoot ensures the test runs with CAP_SYS_ADMIN and CAP_SYS_CHROOT. If
-// needed it will create a new user namespace and re-execute the test as root
-// inside of the namespace. This function returns when it's running as root. If
-// it needs to create another process, it will exit from there and not return.
-func RunAsRoot() {
- if specutils.HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT) {
- return
- }
-
- fmt.Println("*** Re-running test as root in new user namespace ***")
-
- // Current process doesn't have CAP_SYS_ADMIN, create user namespace and run
- // as root inside that namespace to get it.
- runtime.LockOSThread()
- defer runtime.UnlockOSThread()
-
- cmd := exec.Command("/proc/self/exe", os.Args[1:]...)
- cmd.SysProcAttr = &syscall.SysProcAttr{
- Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
- // Set current user/group as root inside the namespace.
- UidMappings: []syscall.SysProcIDMap{
- {ContainerID: 0, HostID: os.Getuid(), Size: 1},
- },
- GidMappings: []syscall.SysProcIDMap{
- {ContainerID: 0, HostID: os.Getgid(), Size: 1},
- },
- GidMappingsEnableSetgroups: false,
- Credential: &syscall.Credential{
- Uid: 0,
- Gid: 0,
- },
- }
- cmd.Env = os.Environ()
- cmd.Stdin = os.Stdin
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- if err := cmd.Run(); err != nil {
- if exit, ok := err.(*exec.ExitError); ok {
- if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
- os.Exit(ws.ExitStatus())
- }
- os.Exit(-1)
- }
- panic(fmt.Sprint("error running child process:", err.Error()))
- }
- os.Exit(0)
-}
-
// Reaper reaps child processes.
type Reaper struct {
// mu protects ch, which will be nil if the reaper is not running.