diff options
Diffstat (limited to 'runsc')
101 files changed, 2634 insertions, 788 deletions
diff --git a/runsc/BUILD b/runsc/BUILD index 3d6c92e4c..6b8c92706 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -16,6 +16,7 @@ go_binary( x_defs = {"main.version": "{VERSION}"}, deps = [ "//pkg/log", + "//pkg/sentry/platform", "//runsc/boot", "//runsc/cmd", "//runsc/specutils", @@ -47,6 +48,7 @@ go_binary( x_defs = {"main.version": "{VERSION}"}, deps = [ "//pkg/log", + "//pkg/sentry/platform", "//runsc/boot", "//runsc/cmd", "//runsc/specutils", @@ -82,7 +84,7 @@ pkg_tar( genrule( name = "deb-version", outs = ["version.txt"], - cmd = "$(location :runsc) -version | head -n 1 | sed 's/^[^0-9]*//' > $@", + cmd = "$(location :runsc) -version | grep 'runsc version' | sed 's/^[^0-9]*//' > $@", stamp = 1, tools = [":runsc"], ) @@ -96,6 +98,11 @@ pkg_deb( maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>", package = "runsc", postinst = "debian/postinst.sh", + tags = [ + # TODO(b/135475885): pkg_deb requires python2: + # https://github.com/bazelbuild/bazel/issues/8443 + "manual", + ], version_file = ":version.txt", visibility = [ "//visibility:public", diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index ac28c4339..5025401dd 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -16,9 +16,11 @@ go_library( "limits.go", "loader.go", "network.go", + "pprof.go", "strace.go", + "user.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/boot", + importpath = "gvisor.dev/gvisor/runsc/boot", visibility = [ "//runsc:__subpackages__", "//test:__subpackages__", @@ -32,6 +34,7 @@ go_library( "//pkg/log", "//pkg/memutil", "//pkg/rand", + "//pkg/refs", "//pkg/sentry/arch", "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/context", @@ -49,13 +52,10 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel:uncaught_signal_go_proto", "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/limits", "//pkg/sentry/loader", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", - "//pkg/sentry/platform/kvm", - "//pkg/sentry/platform/ptrace", "//pkg/sentry/sighandling", "//pkg/sentry/socket/epsocket", "//pkg/sentry/socket/hostinet", @@ -68,6 +68,7 @@ go_library( "//pkg/sentry/time", "//pkg/sentry/unimpl:unimplemented_syscall_go_proto", "//pkg/sentry/usage", + "//pkg/sentry/usermem", "//pkg/sentry/watchdog", "//pkg/syserror", "//pkg/tcpip", @@ -83,6 +84,7 @@ go_library( "//pkg/tcpip/transport/udp", "//pkg/urpc", "//runsc/boot/filter", + "//runsc/boot/platforms", "//runsc/specutils", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", @@ -94,7 +96,9 @@ go_test( size = "small", srcs = [ "compat_test.go", + "fs_test.go", "loader_test.go", + "user_test.go", ], embed = [":boot"], deps = [ diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index c369e4d64..07e35ab10 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -21,14 +21,14 @@ import ( "syscall" "github.com/golang/protobuf/proto" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/eventchannel" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto" - ucspb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" - "gvisor.googlesource.com/gvisor/pkg/sentry/strace" - spb "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/arch" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" + "gvisor.dev/gvisor/pkg/sentry/strace" + spb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" ) func initCompatLogs(fd int) error { diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go index 99df5e614..43cd0db94 100644 --- a/runsc/boot/compat_amd64.go +++ b/runsc/boot/compat_amd64.go @@ -17,7 +17,7 @@ package boot import ( "fmt" - rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" ) // reportLimit is the max number of events that should be reported per tracker. diff --git a/runsc/boot/compat_test.go b/runsc/boot/compat_test.go index ccec3d20c..388298d8d 100644 --- a/runsc/boot/compat_test.go +++ b/runsc/boot/compat_test.go @@ -17,7 +17,7 @@ package boot import ( "testing" - rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" ) func TestOnceTracker(t *testing.T) { diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 8564c502d..6f1eb9a41 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -19,43 +19,9 @@ import ( "strconv" "strings" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/sentry/watchdog" ) -// PlatformType tells which platform to use. -type PlatformType int - -const ( - // PlatformPtrace runs the sandbox with the ptrace platform. - PlatformPtrace PlatformType = iota - - // PlatformKVM runs the sandbox with the KVM platform. - PlatformKVM -) - -// MakePlatformType converts type from string. -func MakePlatformType(s string) (PlatformType, error) { - switch s { - case "ptrace": - return PlatformPtrace, nil - case "kvm": - return PlatformKVM, nil - default: - return 0, fmt.Errorf("invalid platform type %q", s) - } -} - -func (p PlatformType) String() string { - switch p { - case PlatformPtrace: - return "ptrace" - case PlatformKVM: - return "kvm" - default: - return fmt.Sprintf("unknown(%d)", p) - } -} - // FileAccessType tells how the filesystem is accessed. type FileAccessType int @@ -187,7 +153,7 @@ type Config struct { LogPackets bool // Platform is the platform to run on. - Platform PlatformType + Platform string // Strace indicates that strace should be enabled. Strace bool @@ -226,6 +192,12 @@ type Config struct { // to the same underlying network device. This allows netstack to better // scale for high throughput use cases. NumNetworkChannels int + + // Rootless allows the sandbox to be started with a user that is not root. + // Defense is depth measures are weaker with rootless. Specifically, the + // sandbox and Gofer process run as root inside a user namespace with root + // mapped to the caller's user. + Rootless bool } // ToFlags returns a slice of flags that correspond to the given Config. @@ -241,7 +213,7 @@ func (c *Config) ToFlags() []string { "--overlay=" + strconv.FormatBool(c.Overlay), "--network=" + c.Network.String(), "--log-packets=" + strconv.FormatBool(c.LogPackets), - "--platform=" + c.Platform.String(), + "--platform=" + c.Platform, "--strace=" + strconv.FormatBool(c.Strace), "--strace-syscalls=" + strings.Join(c.StraceSyscalls, ","), "--strace-log-size=" + strconv.Itoa(int(c.StraceLogSize)), @@ -250,6 +222,7 @@ func (c *Config) ToFlags() []string { "--profile=" + strconv.FormatBool(c.ProfileEnable), "--net-raw=" + strconv.FormatBool(c.EnableRaw), "--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels), + "--rootless=" + strconv.FormatBool(c.Rootless), } if c.TestOnlyAllowRunAsCurrentUserWithoutChroot { // Only include if set since it is never to be used by users. diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index a277145b1..d79aaff60 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -22,17 +22,17 @@ import ( "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/control/server" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket" - "gvisor.googlesource.com/gvisor/pkg/sentry/state" - "gvisor.googlesource.com/gvisor/pkg/sentry/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/epsocket" + "gvisor.dev/gvisor/pkg/sentry/state" + "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/urpc" ) const ( @@ -96,8 +96,10 @@ const ( // SandboxStacks collects sandbox stacks for debugging. SandboxStacks = "debug.Stacks" +) - // Profiling related commands (see pprof.go for more details). +// Profiling related commands (see pprof.go for more details). +const ( StartCPUProfile = "Profile.StartCPUProfile" StopCPUProfile = "Profile.StopCPUProfile" HeapProfile = "Profile.HeapProfile" @@ -105,6 +107,11 @@ const ( StopTrace = "Profile.StopTrace" ) +// Logging related commands (see logging.go for more details). +const ( + ChangeLogging = "Logging.Change" +) + // ControlSocketAddr generates an abstract unix socket name for the given ID. func ControlSocketAddr(id string) string { return fmt.Sprintf("\x00runsc-sandbox.%s", id) @@ -143,6 +150,7 @@ func newController(fd int, l *Loader) (*controller, error) { } srv.Register(&debug{}) + srv.Register(&control.Logging{}) if l.conf.ProfileEnable { srv.Register(&control.Profile{}) } @@ -340,7 +348,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { cm.l.k = k // Set up the restore environment. - mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k) + mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k, cm.l.mountHints) renv, err := mntr.createRestoreEnvironment(cm.l.conf) if err != nil { return fmt.Errorf("creating RestoreEnvironment: %v", err) @@ -359,6 +367,17 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { return fmt.Errorf("file cannot be empty") } + if cm.l.conf.ProfileEnable { + // initializePProf opens /proc/self/maps, so has to be + // called before installing seccomp filters. + initializePProf() + } + + // Seccomp filters have to be applied before parsing the state file. + if err := cm.l.installSeccompFilters(); err != nil { + return err + } + // Load the state. loadOpts := state.LoadOpts{Source: specFile} if err := loadOpts.Load(k, networkStack); err != nil { diff --git a/runsc/boot/debug.go b/runsc/boot/debug.go index 79f7387ac..1fb32c527 100644 --- a/runsc/boot/debug.go +++ b/runsc/boot/debug.go @@ -15,7 +15,7 @@ package boot import ( - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) type debug struct { diff --git a/runsc/boot/events.go b/runsc/boot/events.go index ffd99f5e9..422f4da00 100644 --- a/runsc/boot/events.go +++ b/runsc/boot/events.go @@ -15,8 +15,8 @@ package boot import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usage" ) // Event struct for encoding the event data to JSON. Corresponds to runc's diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go index 0811e10f4..e5de1f3d7 100644 --- a/runsc/boot/fds.go +++ b/runsc/boot/fds.go @@ -17,36 +17,27 @@ package boot import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) -// createFDMap creates an FD map that contains stdin, stdout, and stderr. If -// console is true, then ioctl calls will be passed through to the host FD. +// createFDTable creates an FD table that contains stdin, stdout, and stderr. +// If console is true, then ioctl calls will be passed through to the host FD. // Upon success, createFDMap dups then closes stdioFDs. -func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs []int) (*kernel.FDMap, error) { +func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) { if len(stdioFDs) != 3 { return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) } k := kernel.KernelFromContext(ctx) - fdm := k.NewFDMap() - defer fdm.DecRef() + fdTable := k.NewFDTable() + defer fdTable.DecRef() mounter := fs.FileOwnerFromContext(ctx) - // Maps sandbox FD to host FD. - fdMap := map[int]int{ - 0: stdioFDs[0], - 1: stdioFDs[1], - 2: stdioFDs[2], - } - var ttyFile *fs.File - for appFD, hostFD := range fdMap { + for appFD, hostFD := range stdioFDs { var appFile *fs.File if console && appFD < 3 { @@ -80,11 +71,11 @@ func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs } // Add the file to the FD map. - if err := fdm.NewFDAt(kdefs.FD(appFD), appFile, kernel.FDFlags{}, l); err != nil { + if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { return nil, err } } - fdm.IncRef() - return fdm, nil + fdTable.IncRef() + return fdTable, nil } diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index 3b6020cf3..f5509b6b7 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -11,7 +11,7 @@ go_library( "extra_filters_race.go", "filter.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/boot/filter", + importpath = "gvisor.dev/gvisor/runsc/boot/filter", visibility = [ "//runsc/boot:__subpackages__", ], @@ -20,8 +20,6 @@ go_library( "//pkg/log", "//pkg/seccomp", "//pkg/sentry/platform", - "//pkg/sentry/platform/kvm", - "//pkg/sentry/platform/ptrace", "//pkg/tcpip/link/fdbased", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index ef2dbfad2..0ee5b8bbd 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -19,9 +19,9 @@ import ( "syscall" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/seccomp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" ) // allowedSyscalls is the set of syscalls executed by the Sentry to the host OS. @@ -437,29 +437,6 @@ func hostInetFilters() seccomp.SyscallRules { } } -// ptraceFilters returns syscalls made exclusively by the ptrace platform. -func ptraceFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - unix.SYS_GETCPU: {}, - unix.SYS_SCHED_SETAFFINITY: {}, - syscall.SYS_PTRACE: {}, - syscall.SYS_TGKILL: {}, - syscall.SYS_WAIT4: {}, - } -} - -// kvmFilters returns syscalls made exclusively by the KVM platform. -func kvmFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - syscall.SYS_ARCH_PRCTL: {}, - syscall.SYS_IOCTL: {}, - syscall.SYS_MMAP: {}, - syscall.SYS_RT_SIGSUSPEND: {}, - syscall.SYS_RT_SIGTIMEDWAIT: {}, - 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host. - } -} - func controlServerFilters(fd int) seccomp.SyscallRules { return seccomp.SyscallRules{ syscall.SYS_ACCEPT: []seccomp.Rule{ diff --git a/runsc/boot/filter/extra_filters.go b/runsc/boot/filter/extra_filters.go index 5c5ec4e06..e28d4b8d6 100644 --- a/runsc/boot/filter/extra_filters.go +++ b/runsc/boot/filter/extra_filters.go @@ -17,11 +17,11 @@ package filter import ( - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by -// Go intrumentation tools, e.g. -race, -msan. +// Go instrumentation tools, e.g. -race, -msan. // Returns empty when disabled. func instrumentationFilters() seccomp.SyscallRules { return nil diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go index ac5a0f1aa..5e5a3c998 100644 --- a/runsc/boot/filter/extra_filters_msan.go +++ b/runsc/boot/filter/extra_filters_msan.go @@ -19,7 +19,7 @@ package filter import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by MSAN. diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go index ba3c1ce87..9ff80276a 100644 --- a/runsc/boot/filter/extra_filters_race.go +++ b/runsc/boot/filter/extra_filters_race.go @@ -19,7 +19,7 @@ package filter import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by TSAN. @@ -33,6 +33,7 @@ func instrumentationFilters() seccomp.SyscallRules { syscall.SYS_MUNLOCK: {}, syscall.SYS_NANOSLEEP: {}, syscall.SYS_OPEN: {}, + syscall.SYS_OPENAT: {}, syscall.SYS_SET_ROBUST_LIST: {}, // Used within glibc's malloc. syscall.SYS_TIME: {}, diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go index 17479e0dd..e80c171b3 100644 --- a/runsc/boot/filter/filter.go +++ b/runsc/boot/filter/filter.go @@ -18,13 +18,9 @@ package filter import ( - "fmt" - - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/seccomp" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/sentry/platform" ) // Options are seccomp filter related options. @@ -53,14 +49,7 @@ func Install(opt Options) error { s.Merge(profileFilters()) } - switch p := opt.Platform.(type) { - case *ptrace.PTrace: - s.Merge(ptraceFilters()) - case *kvm.KVM: - s.Merge(kvmFilters()) - default: - return fmt.Errorf("unknown platform type %T", p) - } + s.Merge(opt.Platform.SyscallFilters()) return seccomp.Install(s) } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 939f2419c..d3e3196fd 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -18,29 +18,30 @@ import ( "fmt" "path" "path/filepath" + "sort" "strconv" "strings" "syscall" // Include filesystem types that OCI spec might mount. - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/dev" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tty" + _ "gvisor.dev/gvisor/pkg/sentry/fs/dev" + "gvisor.dev/gvisor/pkg/sentry/fs/gofer" + _ "gvisor.dev/gvisor/pkg/sentry/fs/host" + _ "gvisor.dev/gvisor/pkg/sentry/fs/proc" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + _ "gvisor.dev/gvisor/pkg/sentry/fs/sys" + _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" + _ "gvisor.dev/gvisor/pkg/sentry/fs/tty" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/gofer" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -50,6 +51,9 @@ const ( // Device name for root mount. rootDevice = "9pfs-/" + // MountPrefix is the annotation prefix for mount hints. + MountPrefix = "gvisor.dev/spec/mount" + // ChildContainersDir is the directory where child container root // filesystems are mounted. ChildContainersDir = "/__runsc_containers__" @@ -72,7 +76,7 @@ func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, tmpFS := mustFindFilesystem("tmpfs") if !fs.IsDir(lower.StableAttr) { // Create overlay on top of mount file, e.g. /etc/hostname. - msrc := fs.NewCachingMountSource(tmpFS, upperFlags) + msrc := fs.NewCachingMountSource(ctx, tmpFS, upperFlags) return fs.NewOverlayRootFile(ctx, msrc, lower, upperFlags) } @@ -222,7 +226,11 @@ func mustFindFilesystem(name string) fs.Filesystem { // addSubmountOverlay overlays the inode over a ramfs tree containing the given // paths. func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string) (*fs.Inode, error) { - msrc := fs.NewPseudoMountSource() + // Construct a ramfs tree of mount points. The contents never + // change, so this can be fully caching. There's no real + // filesystem backing this tree, so we set the filesystem to + // nil. + msrc := fs.NewCachingMountSource(ctx, nil, fs.MountSourceFlags{}) mountTree, err := ramfs.MakeDirectoryTree(ctx, msrc, submounts) if err != nil { return nil, fmt.Errorf("creating mount tree: %v", err) @@ -292,6 +300,174 @@ func (f *fdDispenser) empty() bool { return len(f.fds) == 0 } +type shareType int + +const ( + invalid shareType = iota + + // container shareType indicates that the mount is used by a single container. + container + + // pod shareType indicates that the mount is used by more than one container + // inside the pod. + pod + + // shared shareType indicates that the mount can also be shared with a process + // outside the pod, e.g. NFS. + shared +) + +func parseShare(val string) (shareType, error) { + switch val { + case "container": + return container, nil + case "pod": + return pod, nil + case "shared": + return shared, nil + default: + return 0, fmt.Errorf("invalid share value %q", val) + } +} + +func (s shareType) String() string { + switch s { + case invalid: + return "invalid" + case container: + return "container" + case pod: + return "pod" + case shared: + return "shared" + default: + return fmt.Sprintf("invalid share value %d", s) + } +} + +// mountHint represents extra information about mounts that are provided via +// annotations. They can override mount type, and provide sharing information +// so that mounts can be correctly shared inside the pod. +type mountHint struct { + name string + share shareType + mount specs.Mount + + // root is the inode where the volume is mounted. For mounts with 'pod' share + // the volume is mounted once and then bind mounted inside the containers. + root *fs.Inode +} + +func (m *mountHint) setField(key, val string) error { + switch key { + case "source": + if len(val) == 0 { + return fmt.Errorf("source cannot be empty") + } + m.mount.Source = val + case "type": + return m.setType(val) + case "share": + share, err := parseShare(val) + if err != nil { + return err + } + m.share = share + case "options": + return m.setOptions(val) + default: + return fmt.Errorf("invalid mount annotation: %s=%s", key, val) + } + return nil +} + +func (m *mountHint) setType(val string) error { + switch val { + case "tmpfs", "bind": + m.mount.Type = val + default: + return fmt.Errorf("invalid type %q", val) + } + return nil +} + +func (m *mountHint) setOptions(val string) error { + opts := strings.Split(val, ",") + if err := specutils.ValidateMountOptions(opts); err != nil { + return err + } + // Sort options so it can be compared with container mount options later on. + sort.Strings(opts) + m.mount.Options = opts + return nil +} + +func (m *mountHint) isSupported() bool { + return m.mount.Type == tmpfs && m.share == pod +} + +// podMountHints contains a collection of mountHints for the pod. +type podMountHints struct { + mounts map[string]*mountHint +} + +func newPodMountHints(spec *specs.Spec) (*podMountHints, error) { + mnts := make(map[string]*mountHint) + for k, v := range spec.Annotations { + // Look for 'gvisor.dev/spec/mount' annotations and parse them. + if strings.HasPrefix(k, MountPrefix) { + parts := strings.Split(k, "/") + if len(parts) != 5 { + return nil, fmt.Errorf("invalid mount annotation: %s=%s", k, v) + } + name := parts[3] + if len(name) == 0 || path.Clean(name) != name { + return nil, fmt.Errorf("invalid mount name: %s", name) + } + mnt := mnts[name] + if mnt == nil { + mnt = &mountHint{name: name} + mnts[name] = mnt + } + if err := mnt.setField(parts[4], v); err != nil { + return nil, err + } + } + } + + // Validate all hints after done parsing. + for name, m := range mnts { + log.Infof("Mount annotation found, name: %s, source: %q, type: %s, share: %v", name, m.mount.Source, m.mount.Type, m.share) + if m.share == invalid { + return nil, fmt.Errorf("share field for %q has not been set", m.name) + } + if len(m.mount.Source) == 0 { + return nil, fmt.Errorf("source field for %q has not been set", m.name) + } + if len(m.mount.Type) == 0 { + return nil, fmt.Errorf("type field for %q has not been set", m.name) + } + + // Check for duplicate mount sources. + for name2, m2 := range mnts { + if name != name2 && m.mount.Source == m2.mount.Source { + return nil, fmt.Errorf("mounts %q and %q have the same mount source %q", m.name, m2.name, m.mount.Source) + } + } + } + + return &podMountHints{mounts: mnts}, nil +} + +func (p *podMountHints) findMount(mount specs.Mount) *mountHint { + for _, m := range p.mounts { + if m.mount.Source == mount.Source { + return m + } + } + return nil +} + type containerMounter struct { // cid is the container ID. May be set to empty for the root container. cid string @@ -306,15 +482,18 @@ type containerMounter struct { fds fdDispenser k *kernel.Kernel + + hints *podMountHints } -func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel) *containerMounter { +func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter { return &containerMounter{ cid: cid, root: spec.Root, mounts: compileMounts(spec), fds: fdDispenser{fds: goferFDs}, k: k, + hints: hints, } } @@ -476,6 +655,15 @@ func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error // 'setMountNS' is called after namespace is created. It must set the mount NS // to 'rootCtx'. func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx context.Context, conf *Config, setMountNS func(*fs.MountNamespace)) error { + for _, hint := range c.hints.mounts { + log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type) + inode, err := c.mountSharedMaster(rootCtx, conf, hint) + if err != nil { + return fmt.Errorf("mounting shared master %q: %v", hint.name, err) + } + hint.root = inode + } + // Create a tmpfs mount where we create and mount a root filesystem for // each child container. c.mounts = append(c.mounts, specs.Mount{ @@ -498,21 +686,57 @@ func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx c return c.mountSubmounts(rootCtx, conf, mns, root) } +// mountSharedMaster mounts the master of a volume that is shared among +// containers in a pod. It returns the root mount's inode. +func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *Config, hint *mountHint) (*fs.Inode, error) { + // Map mount type to filesystem name, and parse out the options that we are + // capable of dealing with. + fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, hint.mount) + if err != nil { + return nil, err + } + if len(fsName) == 0 { + return nil, fmt.Errorf("mount type not supported %q", hint.mount.Type) + } + + // Mount with revalidate because it's shared among containers. + opts = append(opts, "cache=revalidate") + + // All filesystem names should have been mapped to something we know. + filesystem := mustFindFilesystem(fsName) + + mf := mountFlags(hint.mount.Options) + if useOverlay { + // All writes go to upper, be paranoid and make lower readonly. + mf.ReadOnly = true + } + + inode, err := filesystem.Mount(ctx, mountDevice(hint.mount), mf, strings.Join(opts, ","), nil) + if err != nil { + return nil, fmt.Errorf("creating mount %q: %v", hint.name, err) + } + + if useOverlay { + log.Debugf("Adding overlay on top of shared mount %q", hint.name) + inode, err = addOverlay(ctx, conf, inode, hint.mount.Type, mf) + if err != nil { + return nil, err + } + } + + return inode, nil +} + // createRootMount creates the root filesystem. func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*fs.Inode, error) { // First construct the filesystem from the spec.Root. mf := fs.MountSourceFlags{ReadOnly: c.root.Readonly || conf.Overlay} - var ( - rootInode *fs.Inode - err error - ) - fd := c.fds.remove() log.Infof("Mounting root over 9P, ioFD: %d", fd) p9FS := mustFindFilesystem("9p") opts := p9MountOptions(fd, conf.FileAccess) - rootInode, err = p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil) + rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil) if err != nil { return nil, fmt.Errorf("creating root mount point: %v", err) } @@ -579,8 +803,14 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error { for _, m := range c.mounts { - if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil { - return fmt.Errorf("mount submount %q: %v", m.Destination, err) + if hint := c.hints.findMount(m); hint != nil && hint.isSupported() { + if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil { + return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, m.Destination, err) + } + } else { + if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil { + return fmt.Errorf("mount submount %q: %v", m.Destination, err) + } } } @@ -653,6 +883,40 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns return nil } +// mountSharedSubmount binds mount to a previously mounted volume that is shared +// among containers in the same pod. +func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.MountNamespace, root *fs.Dirent, mount specs.Mount, source *mountHint) error { + // For now enforce that all options are the same. Once bind mount is properly + // supported, then we should ensure the master is less restrictive than the + // container, e.g. master can be 'rw' while container mounts as 'ro'. + if len(mount.Options) != len(source.mount.Options) { + return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", source.mount.Options, mount.Options) + } + sort.Strings(mount.Options) + for i, opt := range mount.Options { + if opt != source.mount.Options[i] { + return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", source.mount.Options, mount.Options) + } + } + + maxTraversals := uint(0) + target, err := mns.FindInode(ctx, root, root, mount.Destination, &maxTraversals) + if err != nil { + return fmt.Errorf("can't find mount destination %q: %v", mount.Destination, err) + } + defer target.DecRef() + + // Take a ref on the inode that is about to be (re)-mounted. + source.root.IncRef() + if err := mns.Mount(ctx, target, source.root); err != nil { + source.root.DecRef() + return fmt.Errorf("bind mount %q error: %v", mount.Destination, err) + } + + log.Infof("Mounted %q type shared bind to %q", mount.Destination, source.name) + return nil +} + // addRestoreMount adds a mount to the MountSources map used for restoring a // checkpointed container. func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount) error { @@ -678,8 +942,8 @@ func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnviron return nil } -// createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding the mounts -// to the environment. +// createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding +// the mounts to the environment. func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEnvironment, error) { renv := &fs.RestoreEnvironment{ MountSources: make(map[string][]fs.MountArgs), @@ -730,7 +994,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // Technically we don't have to mount tmpfs at /tmp, as we could just rely on // the host /tmp, but this is a nice optimization, and fixes some apps that call // mknod in /tmp. It's unsafe to mount tmpfs if: -// 1. /tmp is mounted explictly: we should not override user's wish +// 1. /tmp is mounted explicitly: we should not override user's wish // 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp // // Note that when there are submounts inside of '/tmp', directories for the diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go new file mode 100644 index 000000000..49ab34b33 --- /dev/null +++ b/runsc/boot/fs_test.go @@ -0,0 +1,193 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "path" + "reflect" + "strings" + "testing" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func TestPodMountHintsHappy(t *testing.T) { + spec := &specs.Spec{ + Annotations: map[string]string{ + path.Join(MountPrefix, "mount1", "source"): "foo", + path.Join(MountPrefix, "mount1", "type"): "tmpfs", + path.Join(MountPrefix, "mount1", "share"): "pod", + + path.Join(MountPrefix, "mount2", "source"): "bar", + path.Join(MountPrefix, "mount2", "type"): "bind", + path.Join(MountPrefix, "mount2", "share"): "container", + path.Join(MountPrefix, "mount2", "options"): "rw,private", + }, + } + podHints, err := newPodMountHints(spec) + if err != nil { + t.Errorf("newPodMountHints failed: %v", err) + } + + // Check that fields were set correctly. + mount1 := podHints.mounts["mount1"] + if want := "mount1"; want != mount1.name { + t.Errorf("mount1 name, want: %q, got: %q", want, mount1.name) + } + if want := "foo"; want != mount1.mount.Source { + t.Errorf("mount1 source, want: %q, got: %q", want, mount1.mount.Source) + } + if want := "tmpfs"; want != mount1.mount.Type { + t.Errorf("mount1 type, want: %q, got: %q", want, mount1.mount.Type) + } + if want := pod; want != mount1.share { + t.Errorf("mount1 type, want: %q, got: %q", want, mount1.share) + } + if want := []string(nil); !reflect.DeepEqual(want, mount1.mount.Options) { + t.Errorf("mount1 type, want: %q, got: %q", want, mount1.mount.Options) + } + + mount2 := podHints.mounts["mount2"] + if want := "mount2"; want != mount2.name { + t.Errorf("mount2 name, want: %q, got: %q", want, mount2.name) + } + if want := "bar"; want != mount2.mount.Source { + t.Errorf("mount2 source, want: %q, got: %q", want, mount2.mount.Source) + } + if want := "bind"; want != mount2.mount.Type { + t.Errorf("mount2 type, want: %q, got: %q", want, mount2.mount.Type) + } + if want := container; want != mount2.share { + t.Errorf("mount2 type, want: %q, got: %q", want, mount2.share) + } + if want := []string{"private", "rw"}; !reflect.DeepEqual(want, mount2.mount.Options) { + t.Errorf("mount2 type, want: %q, got: %q", want, mount2.mount.Options) + } +} + +func TestPodMountHintsErrors(t *testing.T) { + for _, tst := range []struct { + name string + annotations map[string]string + error string + }{ + { + name: "too short", + annotations: map[string]string{ + path.Join(MountPrefix, "mount1"): "foo", + }, + error: "invalid mount annotation", + }, + { + name: "no name", + annotations: map[string]string{ + MountPrefix + "//source": "foo", + }, + error: "invalid mount name", + }, + { + name: "missing source", + annotations: map[string]string{ + path.Join(MountPrefix, "mount1", "type"): "tmpfs", + path.Join(MountPrefix, "mount1", "share"): "pod", + }, + error: "source field", + }, + { + name: "missing type", + annotations: map[string]string{ + path.Join(MountPrefix, "mount1", "source"): "foo", + path.Join(MountPrefix, "mount1", "share"): "pod", + }, + error: "type field", + }, + { + name: "missing share", + annotations: map[string]string{ + path.Join(MountPrefix, "mount1", "source"): "foo", + path.Join(MountPrefix, "mount1", "type"): "tmpfs", + }, + error: "share field", + }, + { + name: "invalid field name", + annotations: map[string]string{ + path.Join(MountPrefix, "mount1", "invalid"): "foo", + }, + error: "invalid mount annotation", + }, + { + name: "invalid source", + annotations: map[string]string{ + path.Join(MountPrefix, "mount1", "source"): "", + path.Join(MountPrefix, "mount1", "type"): "tmpfs", + path.Join(MountPrefix, "mount1", "share"): "pod", + }, + error: "source cannot be empty", + }, + { + name: "invalid type", + annotations: map[string]string{ + path.Join(MountPrefix, "mount1", "source"): "foo", + path.Join(MountPrefix, "mount1", "type"): "invalid-type", + path.Join(MountPrefix, "mount1", "share"): "pod", + }, + error: "invalid type", + }, + { + name: "invalid share", + annotations: map[string]string{ + path.Join(MountPrefix, "mount1", "source"): "foo", + path.Join(MountPrefix, "mount1", "type"): "tmpfs", + path.Join(MountPrefix, "mount1", "share"): "invalid-share", + }, + error: "invalid share", + }, + { + name: "invalid options", + annotations: map[string]string{ + path.Join(MountPrefix, "mount1", "source"): "foo", + path.Join(MountPrefix, "mount1", "type"): "tmpfs", + path.Join(MountPrefix, "mount1", "share"): "pod", + path.Join(MountPrefix, "mount1", "options"): "invalid-option", + }, + error: "unknown mount option", + }, + { + name: "duplicate source", + annotations: map[string]string{ + path.Join(MountPrefix, "mount1", "source"): "foo", + path.Join(MountPrefix, "mount1", "type"): "tmpfs", + path.Join(MountPrefix, "mount1", "share"): "pod", + + path.Join(MountPrefix, "mount2", "source"): "foo", + path.Join(MountPrefix, "mount2", "type"): "bind", + path.Join(MountPrefix, "mount2", "share"): "container", + }, + error: "have the same mount source", + }, + } { + t.Run(tst.name, func(t *testing.T) { + spec := &specs.Spec{Annotations: tst.annotations} + podHints, err := newPodMountHints(spec) + if err == nil || !strings.Contains(err.Error(), tst.error) { + t.Errorf("newPodMountHints invalid error, want: .*%s.*, got: %v", tst.error, err) + } + if podHints != nil { + t.Errorf("newPodMountHints must return nil on failure: %+v", podHints) + } + }) + } +} diff --git a/runsc/boot/limits.go b/runsc/boot/limits.go index 3364aa5e6..d1c0bb9b5 100644 --- a/runsc/boot/limits.go +++ b/runsc/boot/limits.go @@ -20,8 +20,8 @@ import ( "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/limits" ) // Mapping from linux resource names to limits.LimitType. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 42bddb2e8..8e8c6105b 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -20,51 +20,52 @@ import ( mrand "math/rand" "os" "runtime" + "strings" "sync" "sync/atomic" "syscall" gtime "time" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/memutil" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/loader" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace" - "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling" - slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/runsc/boot/filter" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/memutil" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/loader" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/sighandling" + slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" + "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/runsc/boot/filter" + _ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms. + "gvisor.dev/gvisor/runsc/specutils" // Include supported socket providers. - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/route" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/epsocket" + "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" + _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink" + _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route" + _ "gvisor.dev/gvisor/pkg/sentry/socket/unix" ) // Loader keeps state needed to start the kernel and run the container.. @@ -117,6 +118,10 @@ type Loader struct { // // processes is guardded by mu. processes map[execID]*execProcess + + // mountHints provides extra information about mounts for containers that + // apply to the entire pod. + mountHints *podMountHints } // execID uniquely identifies a sentry process that is executed in a container. @@ -201,7 +206,9 @@ func New(args Args) (*Loader, error) { // Create VDSO. // // Pass k as the platform since it is savable, unlike the actual platform. - vdso, err := loader.PrepareVDSO(k) + // + // FIXME(b/109889800): Use non-nil context. + vdso, err := loader.PrepareVDSO(nil, k) if err != nil { return nil, fmt.Errorf("creating vdso: %v", err) } @@ -255,7 +262,7 @@ func New(args Args) (*Loader, error) { // Adjust the total memory returned by the Sentry so that applications that // use /proc/meminfo can make allocations based on this limit. usage.MinimumTotalMemoryBytes = args.TotalMem - log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(2^30)) + log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(1<<30)) } // Initiate the Kernel object, which is required by the Context passed @@ -299,6 +306,11 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("initializing compat logs: %v", err) } + mountHints, err := newPodMountHints(args.Spec) + if err != nil { + return nil, fmt.Errorf("creating pod mount hints: %v", err) + } + eid := execID{cid: args.ID} l := &Loader{ k: k, @@ -311,6 +323,7 @@ func New(args Args) (*Loader, error) { rootProcArgs: procArgs, sandboxID: args.ID, processes: map[execID]*execProcess{eid: {}}, + mountHints: mountHints, } // We don't care about child signals; some platforms can generate a @@ -402,19 +415,12 @@ func (l *Loader) Destroy() { } func createPlatform(conf *Config, deviceFile *os.File) (platform.Platform, error) { - switch conf.Platform { - case PlatformPtrace: - log.Infof("Platform: ptrace") - return ptrace.New() - case PlatformKVM: - log.Infof("Platform: kvm") - if deviceFile == nil { - return nil, fmt.Errorf("kvm device file must be provided") - } - return kvm.New(deviceFile) - default: - return nil, fmt.Errorf("invalid platform %v", conf.Platform) + p, err := platform.Lookup(conf.Platform) + if err != nil { + panic(fmt.Sprintf("invalid platform %v: %v", conf.Platform, err)) } + log.Infof("Platform: %s", conf.Platform) + return p.New(deviceFile) } func createMemoryFile() (*pgalloc.MemoryFile, error) { @@ -435,6 +441,23 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) { return mf, nil } +func (l *Loader) installSeccompFilters() error { + if l.conf.DisableSeccomp { + filter.Report("syscall filter is DISABLED. Running in less secure mode.") + } else { + opts := filter.Options{ + Platform: l.k.Platform, + HostNetwork: l.conf.Network == NetworkHost, + ProfileEnable: l.conf.ProfileEnable, + ControllerFD: l.ctrl.srv.FD(), + } + if err := filter.Install(opts); err != nil { + return fmt.Errorf("installing seccomp filters: %v", err) + } + } + return nil +} + // Run runs the root container. func (l *Loader) Run() error { err := l.run() @@ -470,39 +493,33 @@ func (l *Loader) run() error { return fmt.Errorf("trying to start deleted container %q", l.sandboxID) } - // Finally done with all configuration. Setup filters before user code - // is loaded. - if l.conf.DisableSeccomp { - filter.Report("syscall filter is DISABLED. Running in less secure mode.") - } else { - opts := filter.Options{ - Platform: l.k.Platform, - HostNetwork: l.conf.Network == NetworkHost, - ProfileEnable: l.conf.ProfileEnable, - ControllerFD: l.ctrl.srv.FD(), - } - if err := filter.Install(opts); err != nil { - return fmt.Errorf("installing seccomp filters: %v", err) - } - } - // If we are restoring, we do not want to create a process. // l.restore is set by the container manager when a restore call is made. if !l.restore { + if l.conf.ProfileEnable { + initializePProf() + } + + // Finally done with all configuration. Setup filters before user code + // is loaded. + if err := l.installSeccompFilters(); err != nil { + return err + } + // Create the FD map, which will set stdin, stdout, and stderr. If console // is true, then ioctl calls will be passed through to the host fd. ctx := l.rootProcArgs.NewContext(l.k) - fdm, err := createFDMap(ctx, l.rootProcArgs.Limits, l.console, l.stdioFDs) + fdTable, err := createFDTable(ctx, l.console, l.stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } // CreateProcess takes a reference on FDMap if successful. We won't need // ours either way. - l.rootProcArgs.FDMap = fdm + l.rootProcArgs.FDTable = fdTable // cid for root container can be empty. Only subcontainers need it to set // the mount location. - mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k) + mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k, l.mountHints) if err := mntr.setupFS(ctx, l.conf, &l.rootProcArgs, l.rootProcArgs.Credentials); err != nil { return err } @@ -513,19 +530,37 @@ func (l *Loader) run() error { return err } + // Read /etc/passwd for the user's HOME directory and set the HOME + // environment variable as required by POSIX if it is not overridden by + // the user. + hasHomeEnvv := false + for _, envv := range l.rootProcArgs.Envv { + if strings.HasPrefix(envv, "HOME=") { + hasHomeEnvv = true + } + } + if !hasHomeEnvv { + homeDir, err := getExecUserHome(rootCtx, rootMns, uint32(l.rootProcArgs.Credentials.RealKUID)) + if err != nil { + return fmt.Errorf("error reading exec user: %v", err) + } + + l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir) + } + // Create the root container init task. It will begin running // when the kernel is started. if _, _, err := l.k.CreateProcess(l.rootProcArgs); err != nil { return fmt.Errorf("creating init process: %v", err) } - // CreateProcess takes a reference on FDMap if successful. - l.rootProcArgs.FDMap.DecRef() + // CreateProcess takes a reference on FDTable if successful. + l.rootProcArgs.FDTable.DecRef() } ep.tg = l.k.GlobalInit() if l.console { - ttyFile := l.rootProcArgs.FDMap.GetFile(0) + ttyFile, _ := l.rootProcArgs.FDTable.Get(0) defer ttyFile.DecRef() ep.tty = ttyFile.FileOperations.(*host.TTYFileOperations) @@ -605,13 +640,13 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file // Create the FD map, which will set stdin, stdout, and stderr. ctx := procArgs.NewContext(l.k) - fdm, err := createFDMap(ctx, procArgs.Limits, false, stdioFDs) + fdTable, err := createFDTable(ctx, false, stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } - // CreateProcess takes a reference on FDMap if successful. We won't need ours - // either way. - procArgs.FDMap = fdm + // CreateProcess takes a reference on fdTable if successful. We won't + // need ours either way. + procArgs.FDTable = fdTable // Can't take ownership away from os.File. dup them to get a new FDs. var goferFDs []int @@ -623,7 +658,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file goferFDs = append(goferFDs, fd) } - mntr := newContainerMounter(spec, cid, goferFDs, l.k) + mntr := newContainerMounter(spec, cid, goferFDs, l.k, l.mountHints) if err := mntr.setupFS(ctx, conf, &procArgs, creds); err != nil { return fmt.Errorf("configuring container FS: %v", err) } @@ -640,8 +675,8 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file } l.k.StartProcess(tg) - // CreateProcess takes a reference on FDMap if successful. - procArgs.FDMap.DecRef() + // CreateProcess takes a reference on FDTable if successful. + procArgs.FDTable.DecRef() l.processes[eid].tg = tg return nil @@ -805,9 +840,17 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) { // privileges. Raw: true, })} + + // Enable SACK Recovery. if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { return nil, fmt.Errorf("failed to enable SACK: %v", err) } + + // Enable Receive Buffer Auto-Tuning. + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { + return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) + } + return &s, nil default: @@ -956,3 +999,8 @@ func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host } return ep.tg, ep.tty, nil } + +func init() { + // TODO(gvisor.dev/issue/365): Make this configurable. + refs.SetLeakMode(refs.NoLeakChecking) +} diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 6393cb3fb..ff713660d 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -25,18 +25,21 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/control/server" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/unet" - "gvisor.googlesource.com/gvisor/runsc/fsgofer" + "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/runsc/fsgofer" ) func init() { log.SetLevel(log.Debug) rand.Seed(time.Now().UnixNano()) + if err := fsgofer.OpenProcSelfFD(); err != nil { + panic(err) + } } func testConfig() *Config { @@ -44,6 +47,7 @@ func testConfig() *Config { RootDir: "unused_root_dir", Network: NetworkNone, DisableSeccomp: true, + Platform: "ptrace", } } @@ -404,7 +408,7 @@ func TestCreateMountNamespace(t *testing.T) { mns = m ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root()) } - mntr := newContainerMounter(&tc.spec, "", []int{sandEnd}, nil) + mntr := newContainerMounter(&tc.spec, "", []int{sandEnd}, nil, &podMountHints{}) if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil { t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err) } @@ -610,7 +614,7 @@ func TestRestoreEnvironment(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { conf := testConfig() - mntr := newContainerMounter(tc.spec, "", tc.ioFDs, nil) + mntr := newContainerMounter(tc.spec, "", tc.ioFDs, nil, &podMountHints{}) actualRenv, err := mntr.createRestoreEnvironment(conf) if !tc.errorExpected && err != nil { t.Fatalf("could not create restore environment for test:%s", tc.name) diff --git a/runsc/boot/network.go b/runsc/boot/network.go index 82c259f47..d3d98243d 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -19,16 +19,16 @@ import ( "net" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/urpc" ) // Network exposes methods that can be used to configure a network stack. @@ -56,7 +56,7 @@ type FDBasedLink struct { Addresses []net.IP Routes []Route GSOMaxSize uint32 - LinkAddress []byte + LinkAddress net.HardwareAddr // NumChannels controls how many underlying FD's are to be used to // create this endpoint. diff --git a/runsc/boot/platforms/BUILD b/runsc/boot/platforms/BUILD new file mode 100644 index 000000000..03391cdca --- /dev/null +++ b/runsc/boot/platforms/BUILD @@ -0,0 +1,16 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "platforms", + srcs = ["platforms.go"], + importpath = "gvisor.dev/gvisor/runsc/boot/platforms", + visibility = [ + "//runsc:__subpackages__", + ], + deps = [ + "//pkg/sentry/platform/kvm", + "//pkg/sentry/platform/ptrace", + ], +) diff --git a/runsc/boot/platforms/platforms.go b/runsc/boot/platforms/platforms.go new file mode 100644 index 000000000..056b46ad5 --- /dev/null +++ b/runsc/boot/platforms/platforms.go @@ -0,0 +1,30 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package platforms imports all available platform packages. +package platforms + +import ( + // Import platforms that runsc might use. + _ "gvisor.dev/gvisor/pkg/sentry/platform/kvm" + _ "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" +) + +const ( + // Ptrace runs the sandbox with the ptrace platform. + Ptrace = "ptrace" + + // KVM runs the sandbox with the KVM platform. + KVM = "kvm" +) diff --git a/runsc/boot/pprof.go b/runsc/boot/pprof.go new file mode 100644 index 000000000..463362f02 --- /dev/null +++ b/runsc/boot/pprof.go @@ -0,0 +1,18 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +func initializePProf() { +} diff --git a/runsc/boot/strace.go b/runsc/boot/strace.go index 19c7f8fbd..fbfd3b07c 100644 --- a/runsc/boot/strace.go +++ b/runsc/boot/strace.go @@ -15,7 +15,7 @@ package boot import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/strace" + "gvisor.dev/gvisor/pkg/sentry/strace" ) func enableStrace(conf *Config) error { diff --git a/runsc/boot/user.go b/runsc/boot/user.go new file mode 100644 index 000000000..d1d423a5c --- /dev/null +++ b/runsc/boot/user.go @@ -0,0 +1,146 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "bufio" + "io" + "strconv" + "strings" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/usermem" +) + +type fileReader struct { + // Ctx is the context for the file reader. + Ctx context.Context + + // File is the file to read from. + File *fs.File +} + +// Read implements io.Reader.Read. +func (r *fileReader) Read(buf []byte) (int, error) { + n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf)) + return int(n), err +} + +// getExecUserHome returns the home directory of the executing user read from +// /etc/passwd as read from the container filesystem. +func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32) (string, error) { + // The default user home directory to return if no user matching the user + // if found in the /etc/passwd found in the image. + const defaultHome = "/" + + // Open the /etc/passwd file from the dirent via the root mount namespace. + mnsRoot := rootMns.Root() + maxTraversals := uint(linux.MaxSymlinkTraversals) + dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals) + if err != nil { + // NOTE: Ignore errors opening the passwd file. If the passwd file + // doesn't exist we will return the default home directory. + return defaultHome, nil + } + defer dirent.DecRef() + + // Check read permissions on the file. + if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil { + // NOTE: Ignore permissions errors here and return default root dir. + return defaultHome, nil + } + + // Only open regular files. We don't open other files like named pipes as + // they may block and might present some attack surface to the container. + // Note that runc does not seem to do this kind of checking. + if !fs.IsRegular(dirent.Inode.StableAttr) { + return defaultHome, nil + } + + f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false}) + if err != nil { + return "", err + } + defer f.DecRef() + + r := &fileReader{ + Ctx: ctx, + File: f, + } + + homeDir, err := findHomeInPasswd(uid, r, defaultHome) + if err != nil { + return "", err + } + + return homeDir, nil +} + +// findHomeInPasswd parses a passwd file and returns the given user's home +// directory. This function does it's best to replicate the runc's behavior. +func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) { + s := bufio.NewScanner(passwd) + + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // Pull out part of passwd entry. Loosely parse the passwd entry as some + // passwd files could be poorly written and for compatibility with runc. + // + // Per 'man 5 passwd' + // /etc/passwd contains one line for each user account, with seven + // fields delimited by colons (“:”). These fields are: + // + // - login name + // - optional encrypted password + // - numerical user ID + // - numerical group ID + // - user name or comment field + // - user home directory + // - optional user command interpreter + parts := strings.Split(line, ":") + + found := false + homeDir := "" + for i, p := range parts { + switch i { + case 2: + parsedUID, err := strconv.ParseUint(p, 10, 32) + if err == nil && parsedUID == uint64(uid) { + found = true + } + case 5: + homeDir = p + } + } + if found { + // NOTE: If the uid is present but the home directory is not + // present in the /etc/passwd entry we return an empty string. This + // is, for better or worse, what runc does. + return homeDir, nil + } + } + + return defaultHome, nil +} diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go new file mode 100644 index 000000000..834003430 --- /dev/null +++ b/runsc/boot/user_test.go @@ -0,0 +1,253 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "io/ioutil" + "os" + "path/filepath" + "strings" + "syscall" + "testing" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" +) + +func setupTempDir() (string, error) { + tmpDir, err := ioutil.TempDir(os.TempDir(), "exec-user-test") + if err != nil { + return "", err + } + return tmpDir, nil +} + +func setupPasswd(contents string, perms os.FileMode) func() (string, error) { + return func() (string, error) { + tmpDir, err := setupTempDir() + if err != nil { + return "", err + } + + if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { + return "", err + } + + f, err := os.Create(filepath.Join(tmpDir, "etc", "passwd")) + if err != nil { + return "", err + } + defer f.Close() + + _, err = f.WriteString(contents) + if err != nil { + return "", err + } + + err = f.Chmod(perms) + if err != nil { + return "", err + } + return tmpDir, nil + } +} + +// TestGetExecUserHome tests the getExecUserHome function. +func TestGetExecUserHome(t *testing.T) { + tests := map[string]struct { + uid uint32 + createRoot func() (string, error) + expected string + }{ + "success": { + uid: 1000, + createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0666), + expected: "/home/adin", + }, + "no_passwd": { + uid: 1000, + createRoot: setupTempDir, + expected: "/", + }, + "no_perms": { + uid: 1000, + createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0000), + expected: "/", + }, + "directory": { + uid: 1000, + createRoot: func() (string, error) { + tmpDir, err := setupTempDir() + if err != nil { + return "", err + } + + if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { + return "", err + } + + if err := syscall.Mkdir(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { + return "", err + } + + return tmpDir, nil + }, + expected: "/", + }, + // Currently we don't allow named pipes. + "named_pipe": { + uid: 1000, + createRoot: func() (string, error) { + tmpDir, err := setupTempDir() + if err != nil { + return "", err + } + + if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { + return "", err + } + + if err := syscall.Mkfifo(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { + return "", err + } + + return tmpDir, nil + }, + expected: "/", + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + tmpDir, err := tc.createRoot() + if err != nil { + t.Fatalf("failed to create root dir: %v", err) + } + + sandEnd, cleanup, err := startGofer(tmpDir) + if err != nil { + t.Fatalf("failed to create gofer: %v", err) + } + defer cleanup() + + ctx := contexttest.Context(t) + conf := &Config{ + RootDir: "unused_root_dir", + Network: NetworkNone, + DisableSeccomp: true, + } + + spec := &specs.Spec{ + Root: &specs.Root{ + Path: tmpDir, + Readonly: true, + }, + // Add /proc mount as tmpfs to avoid needing a kernel. + Mounts: []specs.Mount{ + { + Destination: "/proc", + Type: "tmpfs", + }, + }, + } + + var mns *fs.MountNamespace + setMountNS := func(m *fs.MountNamespace) { + mns = m + ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root()) + } + mntr := newContainerMounter(spec, "", []int{sandEnd}, nil, &podMountHints{}) + if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil { + t.Fatalf("failed to create mount namespace: %v", err) + } + + got, err := getExecUserHome(ctx, mns, tc.uid) + if err != nil { + t.Fatalf("failed to get user home: %v", err) + } + + if got != tc.expected { + t.Fatalf("expected %v, got: %v", tc.expected, got) + } + }) + } +} + +// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing. +func TestFindHomeInPasswd(t *testing.T) { + tests := map[string]struct { + uid uint32 + passwd string + expected string + def string + }{ + "empty": { + uid: 1000, + passwd: "", + expected: "/", + def: "/", + }, + "whitespace": { + uid: 1000, + passwd: " ", + expected: "/", + def: "/", + }, + "full": { + uid: 1000, + passwd: "adin::1000:1111::/home/adin:/bin/sh", + expected: "/home/adin", + def: "/", + }, + // For better or worse, this is how runc works. + "partial": { + uid: 1000, + passwd: "adin::1000:1111:", + expected: "", + def: "/", + }, + "multiple": { + uid: 1001, + passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh", + expected: "/home/ian", + def: "/", + }, + "duplicate": { + uid: 1000, + passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh", + expected: "/home/adin", + def: "/", + }, + "empty_lines": { + uid: 1001, + passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh", + expected: "/home/ian", + def: "/", + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def) + if err != nil { + t.Fatalf("error parsing passwd: %v", err) + } + if tc.expected != got { + t.Fatalf("expected %v, got: %v", tc.expected, got) + } + }) + } +} diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD index 620d33a19..ab2387614 100644 --- a/runsc/cgroup/BUILD +++ b/runsc/cgroup/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "cgroup", srcs = ["cgroup.go"], - importpath = "gvisor.googlesource.com/gvisor/runsc/cgroup", + importpath = "gvisor.dev/gvisor/runsc/cgroup", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 7431b17d6..ab3a25b9b 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -30,8 +30,8 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/specutils" ) const ( diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index df6af0ced..5223b9972 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -33,7 +33,7 @@ go_library( "syscalls.go", "wait.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/cmd", + importpath = "gvisor.dev/gvisor/runsc/cmd", visibility = [ "//runsc:__subpackages__", ], @@ -46,6 +46,7 @@ go_library( "//pkg/unet", "//pkg/urpc", "//runsc/boot", + "//runsc/boot/platforms", "//runsc/console", "//runsc/container", "//runsc/fsgofer", diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 3a547d4aa..b40fded5b 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -24,9 +24,10 @@ import ( "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/boot/platforms" + "gvisor.dev/gvisor/runsc/specutils" ) // Boot implements subcommands.Command for the "boot" command which starts a @@ -130,6 +131,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Ensure that if there is a panic, all goroutine stacks are printed. debug.SetTraceback("all") + conf := args[0].(*boot.Config) + if b.setUpRoot { if err := setUpChroot(b.pidns); err != nil { Fatalf("error setting up chroot: %v", err) @@ -143,14 +146,16 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) args = append(args, arg) } } - // Note that we've already read the spec from the spec FD, and - // we will read it again after the exec call. This works - // because the ReadSpecFromFile function seeks to the beginning - // of the file before reading. - if err := callSelfAsNobody(args); err != nil { - Fatalf("%v", err) + if !conf.Rootless { + // Note that we've already read the spec from the spec FD, and + // we will read it again after the exec call. This works + // because the ReadSpecFromFile function seeks to the beginning + // of the file before reading. + if err := callSelfAsNobody(args); err != nil { + Fatalf("%v", err) + } + panic("callSelfAsNobody must never return success") } - panic("callSelfAsNobody must never return success") } } @@ -163,15 +168,12 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } specutils.LogSpec(spec) - conf := args[0].(*boot.Config) - waitStatus := args[1].(*syscall.WaitStatus) - if b.applyCaps { caps := spec.Process.Capabilities if caps == nil { caps = &specs.LinuxCapabilities{} } - if conf.Platform == boot.PlatformPtrace { + if conf.Platform == platforms.Ptrace { // Ptrace platform requires extra capabilities. const c = "CAP_SYS_PTRACE" caps.Bounding = append(caps.Bounding, c) @@ -251,6 +253,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) ws := l.WaitExit() log.Infof("application exiting with %+v", ws) + waitStatus := args[1].(*syscall.WaitStatus) *waitStatus = syscall.WaitStatus(ws.Status()) l.Destroy() return subcommands.ExitSuccess diff --git a/runsc/cmd/capability.go b/runsc/cmd/capability.go index 312e5b471..abfbb7cfc 100644 --- a/runsc/cmd/capability.go +++ b/runsc/cmd/capability.go @@ -19,7 +19,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) var allCapTypes = []capability.CapType{ diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go index ee74d33d8..3ae25a257 100644 --- a/runsc/cmd/capability_test.go +++ b/runsc/cmd/capability_test.go @@ -21,11 +21,11 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/test/testutil" ) func init() { @@ -97,7 +97,12 @@ func TestCapabilities(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - c, err := container.Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := container.Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := container.New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -116,6 +121,6 @@ func TestCapabilities(t *testing.T) { } func TestMain(m *testing.M) { - testutil.RunAsRoot() + specutils.MaybeRunAsRoot() os.Exit(m.Run()) } diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go index 96d3c3378..d8b3a8573 100644 --- a/runsc/cmd/checkpoint.go +++ b/runsc/cmd/checkpoint.go @@ -22,10 +22,10 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // File containing the container's saved image/state within the given image-path's directory. @@ -133,7 +133,12 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa Fatalf("destroying container: %v", err) } - cont, err = container.Create(id, spec, conf, bundleDir, "", "", "") + contArgs := container.Args{ + ID: id, + Spec: spec, + BundleDir: bundleDir, + } + cont, err = container.New(conf, contArgs) if err != nil { Fatalf("restoring container: %v", err) } diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go index 1a774db04..b5a0ce17d 100644 --- a/runsc/cmd/chroot.go +++ b/runsc/cmd/chroot.go @@ -20,8 +20,8 @@ import ( "path/filepath" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/specutils" ) // mountInChroot creates the destination mount point in the given chroot and diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go index 5b4cc4a39..f1a4887ef 100644 --- a/runsc/cmd/cmd.go +++ b/runsc/cmd/cmd.go @@ -22,8 +22,8 @@ import ( "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/specutils" ) // intFlags can be used with int flags that appear multiple times. diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go index 8bf9b7dcf..a4e3071b3 100644 --- a/runsc/cmd/create.go +++ b/runsc/cmd/create.go @@ -18,9 +18,9 @@ import ( "context" "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // Create implements subcommands.Command for the "create" command. @@ -82,21 +82,33 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{} id := f.Arg(0) conf := args[0].(*boot.Config) + if conf.Rootless { + return Errorf("Rootless mode not supported with %q", c.Name()) + } + bundleDir := c.bundleDir if bundleDir == "" { bundleDir = getwdOrDie() } spec, err := specutils.ReadSpec(bundleDir) if err != nil { - Fatalf("reading spec: %v", err) + return Errorf("reading spec: %v", err) } specutils.LogSpec(spec) // Create the container. A new sandbox will be created for the // container unless the metadata specifies that it should be run in an // existing container. - if _, err := container.Create(id, spec, conf, bundleDir, c.consoleSocket, c.pidFile, c.userLog); err != nil { - Fatalf("creating container: %v", err) + contArgs := container.Args{ + ID: id, + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: c.consoleSocket, + PIDFile: c.pidFile, + UserLog: c.userLog, + } + if _, err := container.New(conf, contArgs); err != nil { + return Errorf("creating container: %v", err) } return subcommands.ExitSuccess } diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go index 27eb51172..7313e473f 100644 --- a/runsc/cmd/debug.go +++ b/runsc/cmd/debug.go @@ -17,14 +17,17 @@ package cmd import ( "context" "os" + "strconv" + "strings" "syscall" "time" "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Debug implements subcommands.Command for the "debug" command. @@ -36,6 +39,9 @@ type Debug struct { profileCPU string profileDelay int trace string + strace string + logLevel string + logPackets string } // Name implements subcommands.Command. @@ -62,6 +68,9 @@ func (d *Debug) SetFlags(f *flag.FlagSet) { f.IntVar(&d.profileDelay, "profile-delay", 5, "amount of time to wait before stoping CPU profile") f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.") f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox") + f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all`) + f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).") + f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.") } // Execute implements subcommands.Command.Execute. @@ -78,7 +87,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) var err error c, err = container.Load(conf.RootDir, f.Arg(0)) if err != nil { - Fatalf("loading container %q: %v", f.Arg(0), err) + return Errorf("loading container %q: %v", f.Arg(0), err) } } else { if f.NArg() != 0 { @@ -88,12 +97,12 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Go over all sandboxes and find the one that matches PID. ids, err := container.List(conf.RootDir) if err != nil { - Fatalf("listing containers: %v", err) + return Errorf("listing containers: %v", err) } for _, id := range ids { candidate, err := container.Load(conf.RootDir, id) if err != nil { - Fatalf("loading container %q: %v", id, err) + return Errorf("loading container %q: %v", id, err) } if candidate.SandboxPid() == d.pid { c = candidate @@ -101,38 +110,38 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } } if c == nil { - Fatalf("container with PID %d not found", d.pid) + return Errorf("container with PID %d not found", d.pid) } } if c.Sandbox == nil || !c.Sandbox.IsRunning() { - Fatalf("container sandbox is not running") + return Errorf("container sandbox is not running") } log.Infof("Found sandbox %q, PID: %d", c.Sandbox.ID, c.Sandbox.Pid) if d.signal > 0 { log.Infof("Sending signal %d to process: %d", d.signal, c.Sandbox.Pid) if err := syscall.Kill(c.Sandbox.Pid, syscall.Signal(d.signal)); err != nil { - Fatalf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid) + return Errorf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid) } } if d.stacks { log.Infof("Retrieving sandbox stacks") stacks, err := c.Sandbox.Stacks() if err != nil { - Fatalf("retrieving stacks: %v", err) + return Errorf("retrieving stacks: %v", err) } log.Infof(" *** Stack dump ***\n%s", stacks) } if d.profileHeap != "" { f, err := os.Create(d.profileHeap) if err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } defer f.Close() if err := c.Sandbox.HeapProfile(f); err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } log.Infof("Heap profile written to %q", d.profileHeap) } @@ -142,7 +151,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) delay = true f, err := os.Create(d.profileCPU) if err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } defer func() { f.Close() @@ -152,7 +161,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) log.Infof("CPU profile written to %q", d.profileCPU) }() if err := c.Sandbox.StartCPUProfile(f); err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } log.Infof("CPU profile started for %d sec, writing to %q", d.profileDelay, d.profileCPU) } @@ -160,7 +169,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) delay = true f, err := os.Create(d.trace) if err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } defer func() { f.Close() @@ -170,15 +179,71 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) log.Infof("Trace written to %q", d.trace) }() if err := c.Sandbox.StartTrace(f); err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } log.Infof("Tracing started for %d sec, writing to %q", d.profileDelay, d.trace) } + if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 { + args := control.LoggingArgs{} + switch strings.ToLower(d.strace) { + case "": + // strace not set, nothing to do here. + + case "off": + log.Infof("Disabling strace") + args.SetStrace = true + + case "all": + log.Infof("Enabling all straces") + args.SetStrace = true + args.EnableStrace = true + + default: + log.Infof("Enabling strace for syscalls: %s", d.strace) + args.SetStrace = true + args.EnableStrace = true + args.StraceWhitelist = strings.Split(d.strace, ",") + } + + if len(d.logLevel) != 0 { + args.SetLevel = true + switch strings.ToLower(d.logLevel) { + case "warning", "0": + args.Level = log.Warning + case "info", "1": + args.Level = log.Info + case "debug", "2": + args.Level = log.Debug + default: + return Errorf("invalid log level %q", d.logLevel) + } + log.Infof("Setting log level %v", args.Level) + } + + if len(d.logPackets) != 0 { + args.SetLogPackets = true + lp, err := strconv.ParseBool(d.logPackets) + if err != nil { + return Errorf("invalid value for log_packets %q", d.logPackets) + } + args.LogPackets = lp + if args.LogPackets { + log.Infof("Enabling packet logging") + } else { + log.Infof("Disabling packet logging") + } + } + + if err := c.Sandbox.ChangeLogging(args); err != nil { + return Errorf(err.Error()) + } + log.Infof("Logging options changed") + } + if delay { time.Sleep(time.Duration(d.profileDelay) * time.Second) - } return subcommands.ExitSuccess diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go index 9039723e9..30d8164b1 100644 --- a/runsc/cmd/delete.go +++ b/runsc/cmd/delete.go @@ -21,9 +21,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Delete implements subcommands.Command for the "delete" command. diff --git a/runsc/cmd/delete_test.go b/runsc/cmd/delete_test.go index 45fc91016..cb59516a3 100644 --- a/runsc/cmd/delete_test.go +++ b/runsc/cmd/delete_test.go @@ -18,7 +18,7 @@ import ( "io/ioutil" "testing" - "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/boot" ) func TestNotFound(t *testing.T) { diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go index 8ea59046c..9a8a49054 100644 --- a/runsc/cmd/do.go +++ b/runsc/cmd/do.go @@ -30,19 +30,19 @@ import ( "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // Do implements subcommands.Command for the "do" command. It sets up a simple // sandbox and executes the command inside it. See Usage() for more details. type Do struct { - root string - cwd string - ip string - networkNamespace bool + root string + cwd string + ip string + quiet bool } // Name implements subcommands.Command.Name. @@ -72,7 +72,7 @@ func (c *Do) SetFlags(f *flag.FlagSet) { f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`) f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory") f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox") - f.BoolVar(&c.networkNamespace, "netns", true, "run in a new network namespace") + f.BoolVar(&c.quiet, "quiet", false, "suppress runsc messages to stdout. Application output is still sent to stdout and stderr") } // Execute implements subcommands.Command.Execute. @@ -85,15 +85,21 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su conf := args[0].(*boot.Config) waitStatus := args[1].(*syscall.WaitStatus) - // Map the entire host file system, but make it readonly with a writable - // overlay on top (ignore --overlay option). - conf.Overlay = true + if conf.Rootless { + if err := specutils.MaybeRunAsRoot(); err != nil { + return Errorf("Error executing inside namespace: %v", err) + } + // Execution will continue here if no more capabilities are needed... + } hostname, err := os.Hostname() if err != nil { return Errorf("Error to retrieve hostname: %v", err) } + // Map the entire host file system, but make it readonly with a writable + // overlay on top (ignore --overlay option). + conf.Overlay = true absRoot, err := resolvePath(c.root) if err != nil { return Errorf("Error resolving root: %v", err) @@ -119,11 +125,22 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su specutils.LogSpec(spec) cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000)) - if !c.networkNamespace { - if conf.Network != boot.NetworkHost { - Fatalf("The current network namespace can be used only if --network=host is set", nil) + if conf.Network == boot.NetworkNone { + netns := specs.LinuxNamespace{ + Type: specs.NetworkNamespace, } - } else if conf.Network != boot.NetworkNone { + if spec.Linux != nil { + panic("spec.Linux is not nil") + } + spec.Linux = &specs.Linux{Namespaces: []specs.LinuxNamespace{netns}} + + } else if conf.Rootless { + if conf.Network == boot.NetworkSandbox { + c.notifyUser("*** Warning: using host network due to --rootless ***") + conf.Network = boot.NetworkHost + } + + } else { clean, err := c.setupNet(cid, spec) if err != nil { return Errorf("Error setting up network: %v", err) @@ -149,7 +166,13 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su return Errorf("Error write spec: %v", err) } - ws, err := container.Run(cid, spec, conf, tmpDir, "", "", "", false) + runArgs := container.Args{ + ID: cid, + Spec: spec, + BundleDir: tmpDir, + Attached: true, + } + ws, err := container.Run(conf, runArgs) if err != nil { return Errorf("running container: %v", err) } @@ -158,6 +181,13 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su return subcommands.ExitSuccess } +func (c *Do) notifyUser(format string, v ...interface{}) { + if !c.quiet { + fmt.Printf(format+"\n", v...) + } + log.Warningf(format, v...) +} + func resolvePath(path string) (string, error) { var err error path, err = filepath.Abs(path) diff --git a/runsc/cmd/error.go b/runsc/cmd/error.go index 700b19f14..3585b5448 100644 --- a/runsc/cmd/error.go +++ b/runsc/cmd/error.go @@ -22,7 +22,7 @@ import ( "time" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) // ErrorLogger is where error messages should be written to. These messages are diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go index c6bc8fc3a..3972e9224 100644 --- a/runsc/cmd/events.go +++ b/runsc/cmd/events.go @@ -22,9 +22,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Events implements subcommands.Command for the "events" command. diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index 0eeaaadba..e817eff77 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -30,14 +30,14 @@ import ( "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/urpc" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/console" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/console" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // Exec implements subcommands.Command for the "exec" command. @@ -235,7 +235,11 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi cmd.SysProcAttr = &syscall.SysProcAttr{ Setsid: true, Setctty: true, - Ctty: int(tty.Fd()), + // The Ctty FD must be the FD in the child process's FD + // table. Since we set cmd.Stdin/Stdout/Stderr to the + // tty FD, we can use any of 0, 1, or 2 here. + // See https://github.com/golang/go/issues/29458. + Ctty: 0, } } diff --git a/runsc/cmd/exec_test.go b/runsc/cmd/exec_test.go index 6f0f258c0..eb38a431f 100644 --- a/runsc/cmd/exec_test.go +++ b/runsc/cmd/exec_test.go @@ -21,10 +21,10 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/urpc" ) func TestUser(t *testing.T) { diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index bccb29397..9faabf494 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -27,13 +27,13 @@ import ( "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/unet" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/fsgofer" - "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/fsgofer" + "gvisor.dev/gvisor/runsc/fsgofer/filter" + "gvisor.dev/gvisor/runsc/specutils" ) var caps = []string{ @@ -152,6 +152,10 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // modes exactly as sent by the sandbox, which will have applied its own umask. syscall.Umask(0) + if err := fsgofer.OpenProcSelfFD(); err != nil { + Fatalf("failed to open /proc/self/fd: %v", err) + } + if err := syscall.Chroot(root); err != nil { Fatalf("failed to chroot to %q: %v", root, err) } diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go index aed5f3291..6c1f197a6 100644 --- a/runsc/cmd/kill.go +++ b/runsc/cmd/kill.go @@ -24,8 +24,8 @@ import ( "flag" "github.com/google/subcommands" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Kill implements subcommands.Command for the "kill" command. diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go index 1f5ca2473..dd2d99a6b 100644 --- a/runsc/cmd/list.go +++ b/runsc/cmd/list.go @@ -25,8 +25,8 @@ import ( "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // List implements subcommands.Command for the "list" command for the "list" command. diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go index 11b36aa10..9c0e92001 100644 --- a/runsc/cmd/pause.go +++ b/runsc/cmd/pause.go @@ -19,8 +19,8 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Pause implements subcommands.Command for the "pause" command. diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go index 3a3e6f17a..45c644f3f 100644 --- a/runsc/cmd/ps.go +++ b/runsc/cmd/ps.go @@ -20,9 +20,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // PS implements subcommands.Command for the "ps" command. diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go index 3ab2f5676..7be60cd7d 100644 --- a/runsc/cmd/restore.go +++ b/runsc/cmd/restore.go @@ -21,9 +21,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // Restore implements subcommands.Command for the "restore" command. @@ -80,25 +80,38 @@ func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{ conf := args[0].(*boot.Config) waitStatus := args[1].(*syscall.WaitStatus) + if conf.Rootless { + return Errorf("Rootless mode not supported with %q", r.Name()) + } + bundleDir := r.bundleDir if bundleDir == "" { bundleDir = getwdOrDie() } spec, err := specutils.ReadSpec(bundleDir) if err != nil { - Fatalf("reading spec: %v", err) + return Errorf("reading spec: %v", err) } specutils.LogSpec(spec) if r.imagePath == "" { - Fatalf("image-path flag must be provided") + return Errorf("image-path flag must be provided") } conf.RestoreFile = filepath.Join(r.imagePath, checkpointFileName) - ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach) + runArgs := container.Args{ + ID: id, + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: r.consoleSocket, + PIDFile: r.pidFile, + UserLog: r.userLog, + Attached: !r.detach, + } + ws, err := container.Run(conf, runArgs) if err != nil { - Fatalf("running container: %v", err) + return Errorf("running container: %v", err) } *waitStatus = ws diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go index 9a2ade41e..b2df5c640 100644 --- a/runsc/cmd/resume.go +++ b/runsc/cmd/resume.go @@ -19,8 +19,8 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Resume implements subcommands.Command for the "resume" command. diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go index c228b4f93..33f4bc12b 100644 --- a/runsc/cmd/run.go +++ b/runsc/cmd/run.go @@ -20,9 +20,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // Run implements subcommands.Command for the "run" command. @@ -67,19 +67,32 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s conf := args[0].(*boot.Config) waitStatus := args[1].(*syscall.WaitStatus) + if conf.Rootless { + return Errorf("Rootless mode not supported with %q", r.Name()) + } + bundleDir := r.bundleDir if bundleDir == "" { bundleDir = getwdOrDie() } spec, err := specutils.ReadSpec(bundleDir) if err != nil { - Fatalf("reading spec: %v", err) + return Errorf("reading spec: %v", err) } specutils.LogSpec(spec) - ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach) + runArgs := container.Args{ + ID: id, + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: r.consoleSocket, + PIDFile: r.pidFile, + UserLog: r.userLog, + Attached: !r.detach, + } + ws, err := container.Run(conf, runArgs) if err != nil { - Fatalf("running container: %v", err) + return Errorf("running container: %v", err) } *waitStatus = ws diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go index 31e8f42bb..de2115dff 100644 --- a/runsc/cmd/start.go +++ b/runsc/cmd/start.go @@ -18,8 +18,8 @@ import ( "context" "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Start implements subcommands.Command for the "start" command. diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go index f0d449b19..e9f41cbd8 100644 --- a/runsc/cmd/state.go +++ b/runsc/cmd/state.go @@ -21,9 +21,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // State implements subcommands.Command for the "state" command. diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go index 9c8a66490..fb6c1ab29 100644 --- a/runsc/cmd/syscalls.go +++ b/runsc/cmd/syscalls.go @@ -27,7 +27,7 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) // Syscalls implements subcommands.Command for the "syscalls" command. @@ -41,7 +41,7 @@ type Syscalls struct { // Maps operating system to architecture to ArchInfo. type CompatibilityInfo map[string]map[string]ArchInfo -// ArchInfo is compatbility doc for an architecture. +// ArchInfo is compatibility doc for an architecture. type ArchInfo struct { // Syscalls maps syscall number for the architecture to the doc. Syscalls map[uintptr]SyscallDoc `json:"syscalls"` diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go index 58fd01974..046489687 100644 --- a/runsc/cmd/wait.go +++ b/runsc/cmd/wait.go @@ -22,8 +22,8 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) const ( diff --git a/runsc/console/BUILD b/runsc/console/BUILD index 3ff9eba27..e623c1a0f 100644 --- a/runsc/console/BUILD +++ b/runsc/console/BUILD @@ -4,8 +4,10 @@ package(licenses = ["notice"]) go_library( name = "console", - srcs = ["console.go"], - importpath = "gvisor.googlesource.com/gvisor/runsc/console", + srcs = [ + "console.go", + ], + importpath = "gvisor.dev/gvisor/runsc/console", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 13709a0ae..e246c38ae 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -9,7 +9,7 @@ go_library( "hook.go", "status.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/container", + importpath = "gvisor.dev/gvisor/runsc/container", visibility = [ "//runsc:__subpackages__", "//test:__subpackages__", @@ -53,6 +53,7 @@ go_test( "//pkg/unet", "//pkg/urpc", "//runsc/boot", + "//runsc/boot/platforms", "//runsc/specutils", "//runsc/test/testutil", "@com_github_cenkalti_backoff//:go_default_library", diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index d016533e6..e9372989f 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -27,10 +27,10 @@ import ( "github.com/kr/pty" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/unet" - "gvisor.googlesource.com/gvisor/pkg/urpc" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/test/testutil" ) // socketPath creates a path inside bundleDir and ensures that the returned @@ -138,8 +138,13 @@ func TestConsoleSocket(t *testing.T) { defer cleanup() // Create the container and pass the socket name. - id := testutil.UniqueContainerID() - c, err := Create(id, spec, conf, bundleDir, sock, "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: sock, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -167,7 +172,12 @@ func TestJobControlSignalExec(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -186,7 +196,7 @@ func TestJobControlSignalExec(t *testing.T) { defer ptySlave.Close() // Exec bash and attach a terminal. - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: "/bin/bash", // Don't let bash execute from profile or rc files, otherwise // our PID counts get messed up. @@ -198,7 +208,7 @@ func TestJobControlSignalExec(t *testing.T) { StdioIsPty: true, } - pid, err := c.Execute(args) + pid, err := c.Execute(execArgs) if err != nil { t.Fatalf("error executing: %v", err) } @@ -296,8 +306,13 @@ func TestJobControlSignalRootContainer(t *testing.T) { defer cleanup() // Create the container and pass the socket name. - id := testutil.UniqueContainerID() - c, err := Create(id, spec, conf, bundleDir, sock, "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: sock, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } diff --git a/runsc/container/container.go b/runsc/container/container.go index 04b611b56..8320bb2ca 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -33,12 +33,12 @@ import ( "github.com/cenkalti/backoff" "github.com/gofrs/flock" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/cgroup" - "gvisor.googlesource.com/gvisor/runsc/sandbox" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/cgroup" + "gvisor.dev/gvisor/runsc/sandbox" + "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -206,7 +206,7 @@ func findContainerRoot(rootDir, partialID string) (string, error) { } // Now see whether id could be an abbreviation of exactly 1 of the - // container ids. If id is ambigious (it could match more than 1 + // container ids. If id is ambiguous (it could match more than 1 // container), it is an error. cRoot = "" ids, err := List(rootDir) @@ -242,16 +242,47 @@ func List(rootDir string) ([]string, error) { return out, nil } -// Create creates the container in a new Sandbox process, unless the metadata +// Args is used to configure a new container. +type Args struct { + // ID is the container unique identifier. + ID string + + // Spec is the OCI spec that describes the container. + Spec *specs.Spec + + // BundleDir is the directory containing the container bundle. + BundleDir string + + // ConsoleSocket is the path to a unix domain socket that will receive + // the console FD. It may be empty. + ConsoleSocket string + + // PIDFile is the filename where the container's root process PID will be + // written to. It may be empty. + PIDFile string + + // UserLog is the filename to send user-visible logs to. It may be empty. + // + // It only applies for the init container. + UserLog string + + // Attached indicates that the sandbox lifecycle is attached with the caller. + // If the caller exits, the sandbox should exit too. + // + // It only applies for the init container. + Attached bool +} + +// New creates the container in a new Sandbox process, unless the metadata // indicates that an existing Sandbox should be used. The caller must call // Destroy() on the container. -func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile, userLog string) (*Container, error) { - log.Debugf("Create container %q in root dir: %s", id, conf.RootDir) - if err := validateID(id); err != nil { +func New(conf *boot.Config, args Args) (*Container, error) { + log.Debugf("Create container %q in root dir: %s", args.ID, conf.RootDir) + if err := validateID(args.ID); err != nil { return nil, err } - unlockRoot, err := maybeLockRootContainer(spec, conf.RootDir) + unlockRoot, err := maybeLockRootContainer(args.Spec, conf.RootDir) if err != nil { return nil, err } @@ -259,7 +290,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // Lock the container metadata file to prevent concurrent creations of // containers with the same id. - containerRoot := filepath.Join(conf.RootDir, id) + containerRoot := filepath.Join(conf.RootDir, args.ID) unlock, err := lockContainerMetadata(containerRoot) if err != nil { return nil, err @@ -269,16 +300,16 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // Check if the container already exists by looking for the metadata // file. if _, err := os.Stat(filepath.Join(containerRoot, metadataFilename)); err == nil { - return nil, fmt.Errorf("container with id %q already exists", id) + return nil, fmt.Errorf("container with id %q already exists", args.ID) } else if !os.IsNotExist(err) { return nil, fmt.Errorf("looking for existing container in %q: %v", containerRoot, err) } c := &Container{ - ID: id, - Spec: spec, - ConsoleSocket: consoleSocket, - BundleDir: bundleDir, + ID: args.ID, + Spec: args.Spec, + ConsoleSocket: args.ConsoleSocket, + BundleDir: args.BundleDir, Root: containerRoot, Status: Creating, CreatedAt: time.Now(), @@ -294,31 +325,47 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // started in an existing sandbox, we must do so. The metadata will // indicate the ID of the sandbox, which is the same as the ID of the // init container in the sandbox. - if isRoot(spec) { - log.Debugf("Creating new sandbox for container %q", id) + if isRoot(args.Spec) { + log.Debugf("Creating new sandbox for container %q", args.ID) // Create and join cgroup before processes are created to ensure they are - // part of the cgroup from the start (and all tneir children processes). - cg, err := cgroup.New(spec) + // part of the cgroup from the start (and all their children processes). + cg, err := cgroup.New(args.Spec) if err != nil { return nil, err } if cg != nil { // If there is cgroup config, install it before creating sandbox process. - if err := cg.Install(spec.Linux.Resources); err != nil { + if err := cg.Install(args.Spec.Linux.Resources); err != nil { return nil, fmt.Errorf("configuring cgroup: %v", err) } } if err := runInCgroup(cg, func() error { - ioFiles, specFile, err := c.createGoferProcess(spec, conf, bundleDir) + ioFiles, specFile, err := c.createGoferProcess(args.Spec, conf, args.BundleDir) if err != nil { return err } // Start a new sandbox for this container. Any errors after this point // must destroy the container. - c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, cg) - return err + sandArgs := &sandbox.Args{ + ID: args.ID, + Spec: args.Spec, + BundleDir: args.BundleDir, + ConsoleSocket: args.ConsoleSocket, + UserLog: args.UserLog, + IOFiles: ioFiles, + MountsFile: specFile, + Cgroup: cg, + Attached: args.Attached, + } + sand, err := sandbox.New(conf, sandArgs) + if err != nil { + return err + } + c.Sandbox = sand + return nil + }); err != nil { return nil, err } @@ -331,7 +378,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // * A container struct whose sandbox ID is equal to the above // container/sandbox ID, but that has a different container // ID. This is the child container. - sbid, ok := specutils.SandboxID(spec) + sbid, ok := specutils.SandboxID(args.Spec) if !ok { return nil, fmt.Errorf("no sandbox ID found when creating container") } @@ -356,8 +403,8 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // Write the PID file. Containerd considers the create complete after // this file is created, so it must be the last thing we do. - if pidFile != "" { - if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(c.SandboxPid())), 0644); err != nil { + if args.PIDFile != "" { + if err := ioutil.WriteFile(args.PIDFile, []byte(strconv.Itoa(c.SandboxPid())), 0644); err != nil { return nil, fmt.Errorf("error writing PID file: %v", err) } } @@ -399,7 +446,7 @@ func (c *Container) Start(conf *boot.Config) error { } } else { // Join cgroup to strt gofer process to ensure it's part of the cgroup from - // the start (and all tneir children processes). + // the start (and all their children processes). if err := runInCgroup(c.Sandbox.Cgroup, func() error { // Create the gofer process. ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir) @@ -461,13 +508,13 @@ func (c *Container) Restore(spec *specs.Spec, conf *boot.Config, restoreFile str } // Run is a helper that calls Create + Start + Wait. -func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile, userLog string, detach bool) (syscall.WaitStatus, error) { - log.Debugf("Run container %q in root dir: %s", id, conf.RootDir) - c, err := Create(id, spec, conf, bundleDir, consoleSocket, pidFile, userLog) +func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) { + log.Debugf("Run container %q in root dir: %s", args.ID, conf.RootDir) + c, err := New(conf, args) if err != nil { return 0, fmt.Errorf("creating container: %v", err) } - // Clean up partially created container if an error ocurrs. + // Clean up partially created container if an error occurs. // Any errors returned by Destroy() itself are ignored. cu := specutils.MakeCleanup(func() { c.Destroy() @@ -476,7 +523,7 @@ func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke if conf.RestoreFile != "" { log.Debugf("Restore: %v", conf.RestoreFile) - if err := c.Restore(spec, conf, conf.RestoreFile); err != nil { + if err := c.Restore(args.Spec, conf, conf.RestoreFile); err != nil { return 0, fmt.Errorf("starting container: %v", err) } } else { @@ -484,11 +531,11 @@ func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke return 0, fmt.Errorf("starting container: %v", err) } } - if detach { - cu.Release() - return 0, nil + if args.Attached { + return c.Wait() } - return c.Wait() + cu.Release() + return 0, nil } // Execute runs the specified command in the container. It returns the PID of diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 72c5ecbb0..c1d6ca7b8 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -17,6 +17,7 @@ package container import ( "bytes" "fmt" + "io" "io/ioutil" "os" "path" @@ -31,12 +32,14 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/boot/platforms" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/test/testutil" ) // waitForProcessList waits for the given process list to show up in the container. @@ -210,7 +213,13 @@ func run(spec *specs.Spec, conf *boot.Config) error { defer os.RemoveAll(bundleDir) // Create, start and wait for the container. - ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "", false) + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + Attached: true, + } + ws, err := Run(conf, args) if err != nil { return fmt.Errorf("running container: %v", err) } @@ -249,7 +258,7 @@ func configs(opts ...configOption) []*boot.Config { if testutil.RaceEnabled { continue } - c.Platform = boot.PlatformKVM + c.Platform = platforms.KVM case nonExclusiveFS: c.FileAccess = boot.FileAccessShared default: @@ -294,15 +303,19 @@ func TestLifecycle(t *testing.T) { }, } // Create the container. - id := testutil.UniqueContainerID() - c, err := Create(id, spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } defer c.Destroy() // Load the container from disk and check the status. - c, err = Load(rootDir, id) + c, err = Load(rootDir, args.ID) if err != nil { t.Fatalf("error loading container: %v", err) } @@ -315,7 +328,7 @@ func TestLifecycle(t *testing.T) { if err != nil { t.Fatalf("error listing containers: %v", err) } - if got, want := ids, []string{id}; !reflect.DeepEqual(got, want) { + if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) { t.Errorf("container list got %v, want %v", got, want) } @@ -325,7 +338,7 @@ func TestLifecycle(t *testing.T) { } // Load the container from disk and check the status. - c, err = Load(rootDir, id) + c, err = Load(rootDir, args.ID) if err != nil { t.Fatalf("error loading container: %v", err) } @@ -366,7 +379,7 @@ func TestLifecycle(t *testing.T) { wg.Wait() // Load the container from disk and check the status. - c, err = Load(rootDir, id) + c, err = Load(rootDir, args.ID) if err != nil { t.Fatalf("error loading container: %v", err) } @@ -389,7 +402,7 @@ func TestLifecycle(t *testing.T) { } // Loading the container by id should fail. - if _, err = Load(rootDir, id); err == nil { + if _, err = Load(rootDir, args.ID); err == nil { t.Errorf("expected loading destroyed container to fail, but it did not") } } @@ -397,6 +410,46 @@ func TestLifecycle(t *testing.T) { // Test the we can execute the application with different path formats. func TestExePath(t *testing.T) { + // Create two directories that will be prepended to PATH. + firstPath, err := ioutil.TempDir(testutil.TmpDir(), "first") + if err != nil { + t.Fatal(err) + } + secondPath, err := ioutil.TempDir(testutil.TmpDir(), "second") + if err != nil { + t.Fatal(err) + } + + // Create two minimal executables in the second path, two of which + // will be masked by files in first path. + for _, p := range []string{"unmasked", "masked1", "masked2"} { + path := filepath.Join(secondPath, p) + f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0777) + if err != nil { + t.Fatal(err) + } + defer f.Close() + if _, err := io.WriteString(f, "#!/bin/true\n"); err != nil { + t.Fatal(err) + } + } + + // Create a non-executable file in the first path which masks a healthy + // executable in the second. + nonExecutable := filepath.Join(firstPath, "masked1") + f2, err := os.OpenFile(nonExecutable, os.O_CREATE|os.O_EXCL, 0666) + if err != nil { + t.Fatal(err) + } + f2.Close() + + // Create a non-regular file in the first path which masks a healthy + // executable in the second. + nonRegular := filepath.Join(firstPath, "masked2") + if err := os.Mkdir(nonRegular, 0777); err != nil { + t.Fatal(err) + } + for _, conf := range configs(overlay) { t.Logf("Running test with conf: %+v", conf) for _, test := range []struct { @@ -409,14 +462,36 @@ func TestExePath(t *testing.T) { {path: "thisfiledoesntexit", success: false}, {path: "bin/thisfiledoesntexit", success: false}, {path: "/bin/thisfiledoesntexit", success: false}, + + {path: "unmasked", success: true}, + {path: filepath.Join(firstPath, "unmasked"), success: false}, + {path: filepath.Join(secondPath, "unmasked"), success: true}, + + {path: "masked1", success: true}, + {path: filepath.Join(firstPath, "masked1"), success: false}, + {path: filepath.Join(secondPath, "masked1"), success: true}, + + {path: "masked2", success: true}, + {path: filepath.Join(firstPath, "masked2"), success: false}, + {path: filepath.Join(secondPath, "masked2"), success: true}, } { spec := testutil.NewSpecWithArgs(test.path) + spec.Process.Env = []string{ + fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")), + } + rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("exec: %s, error setting up container: %v", test.path, err) } - ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "", false) + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + Attached: true, + } + ws, err := Run(conf, args) os.RemoveAll(rootDir) os.RemoveAll(bundleDir) @@ -449,7 +524,13 @@ func TestAppExitStatus(t *testing.T) { defer os.RemoveAll(rootDir) defer os.RemoveAll(bundleDir) - ws, err := Run(testutil.UniqueContainerID(), succSpec, conf, bundleDir, "", "", "", false) + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: succSpec, + BundleDir: bundleDir, + Attached: true, + } + ws, err := Run(conf, args) if err != nil { t.Fatalf("error running container: %v", err) } @@ -468,7 +549,13 @@ func TestAppExitStatus(t *testing.T) { defer os.RemoveAll(rootDir2) defer os.RemoveAll(bundleDir2) - ws, err = Run(testutil.UniqueContainerID(), errSpec, conf, bundleDir2, "", "", "", false) + args2 := Args{ + ID: testutil.UniqueContainerID(), + Spec: errSpec, + BundleDir: bundleDir2, + Attached: true, + } + ws, err = Run(conf, args2) if err != nil { t.Fatalf("error running container: %v", err) } @@ -493,7 +580,12 @@ func TestExec(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -525,7 +617,7 @@ func TestExec(t *testing.T) { t.Error(err) } - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: "/bin/sleep", Argv: []string{"/bin/sleep", "5"}, WorkingDirectory: "/", @@ -536,7 +628,7 @@ func TestExec(t *testing.T) { // First, start running exec (whick blocks). status := make(chan error, 1) go func() { - exitStatus, err := cont.executeSync(args) + exitStatus, err := cont.executeSync(execArgs) if err != nil { log.Debugf("error executing: %v", err) status <- err @@ -584,7 +676,12 @@ func TestKillPid(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -655,7 +752,12 @@ func TestCheckpointRestore(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -701,7 +803,12 @@ func TestCheckpointRestore(t *testing.T) { defer outputFile2.Close() // Restore into a new container. - cont2, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args2 := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont2, err := New(conf, args2) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -740,7 +847,12 @@ func TestCheckpointRestore(t *testing.T) { defer outputFile3.Close() // Restore into a new container. - cont3, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args3 := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont3, err := New(conf, args3) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -777,7 +889,7 @@ func TestUnixDomainSockets(t *testing.T) { t.Logf("Running test with conf: %+v", conf) // UDS path is limited to 108 chars for compatibility with older systems. - // Use '/tmp' (instead of testutil.TmpDir) to to ensure the size limit is + // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is // not exceeded. Assumes '/tmp' exists in the system. dir, err := ioutil.TempDir("/tmp", "uds-test") if err != nil { @@ -819,7 +931,12 @@ func TestUnixDomainSockets(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -866,7 +983,12 @@ func TestUnixDomainSockets(t *testing.T) { defer outputFile2.Close() // Restore into a new container. - contRestore, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + argsRestore := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + contRestore, err := New(conf, argsRestore) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -920,7 +1042,12 @@ func TestPauseResume(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -948,7 +1075,7 @@ func TestPauseResume(t *testing.T) { } script := fmt.Sprintf("while [[ -f %q ]]; do sleep 0.1; done", lock.Name()) - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: "/bin/bash", Argv: []string{"bash", "-c", script}, WorkingDirectory: "/", @@ -956,7 +1083,7 @@ func TestPauseResume(t *testing.T) { } // First, start running exec. - _, err = cont.Execute(args) + _, err = cont.Execute(execArgs) if err != nil { t.Fatalf("error executing: %v", err) } @@ -1025,7 +1152,12 @@ func TestPauseResumeStatus(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1089,7 +1221,12 @@ func TestCapabilities(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1131,7 +1268,7 @@ func TestCapabilities(t *testing.T) { // Need to traverse the intermediate directory. os.Chmod(rootDir, 0755) - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: exePath, Argv: []string{exePath}, WorkingDirectory: "/", @@ -1141,16 +1278,16 @@ func TestCapabilities(t *testing.T) { } // "exe" should fail because we don't have the necessary permissions. - if _, err := cont.executeSync(args); err == nil { + if _, err := cont.executeSync(execArgs); err == nil { t.Fatalf("container executed without error, but an error was expected") } // Now we run with the capability enabled and should succeed. - args.Capabilities = &auth.TaskCapabilities{ + execArgs.Capabilities = &auth.TaskCapabilities{ EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE), } // "exe" should not fail this time. - if _, err := cont.executeSync(args); err != nil { + if _, err := cont.executeSync(execArgs); err != nil { t.Fatalf("container failed to exec %v: %v", args, err) } } @@ -1231,7 +1368,12 @@ func TestReadonlyRoot(t *testing.T) { defer os.RemoveAll(bundleDir) // Create, start and wait for the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1299,7 +1441,12 @@ func TestUIDMap(t *testing.T) { defer os.RemoveAll(bundleDir) // Create, start and wait for the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1351,7 +1498,12 @@ func TestReadonlyMount(t *testing.T) { defer os.RemoveAll(bundleDir) // Create, start and wait for the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1395,7 +1547,12 @@ func TestAbbreviatedIDs(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(cid, spec, conf, bundleDir, "", "", "") + args := Args{ + ID: cid, + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1440,7 +1597,12 @@ func TestGoferExits(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1519,7 +1681,14 @@ func TestUserLog(t *testing.T) { userLog := filepath.Join(dir, "user.log") // Create, start and wait for the container. - ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", userLog, false) + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + UserLog: userLog, + Attached: true, + } + ws, err := Run(conf, args) if err != nil { t.Fatalf("error running container: %v", err) } @@ -1553,7 +1722,12 @@ func TestWaitOnExitedSandbox(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and Start the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1596,7 +1770,12 @@ func TestDestroyNotStarted(t *testing.T) { defer os.RemoveAll(bundleDir) // Create the container and check that it can be destroyed. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1618,15 +1797,19 @@ func TestDestroyStarting(t *testing.T) { defer os.RemoveAll(bundleDir) // Create the container and check that it can be destroyed. - id := testutil.UniqueContainerID() - c, err := Create(id, spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } // Container is not thread safe, so load another instance to run in // concurrently. - startCont, err := Load(rootDir, id) + startCont, err := Load(rootDir, args.ID) if err != nil { t.Fatalf("error loading container: %v", err) } @@ -1731,7 +1914,12 @@ func TestMountPropagation(t *testing.T) { defer os.RemoveAll(rootDir) defer os.RemoveAll(bundleDir) - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("creating container: %v", err) } @@ -1749,21 +1937,21 @@ func TestMountPropagation(t *testing.T) { // Check that mount didn't propagate to private mount. privFile := filepath.Join(priv, "mnt", "file") - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: "/usr/bin/test", Argv: []string{"test", "!", "-f", privFile}, } - if ws, err := cont.executeSync(args); err != nil || ws != 0 { + if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { t.Fatalf("exec: test ! -f %q, ws: %v, err: %v", privFile, ws, err) } // Check that mount propagated to slave mount. slaveFile := filepath.Join(slave, "mnt", "file") - args = &control.ExecArgs{ + execArgs = &control.ExecArgs{ Filename: "/usr/bin/test", Argv: []string{"test", "-f", slaveFile}, } - if ws, err := cont.executeSync(args); err != nil || ws != 0 { + if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { t.Fatalf("exec: test -f %q, ws: %v, err: %v", privFile, ws, err) } } @@ -1812,7 +2000,12 @@ func TestMountSymlink(t *testing.T) { defer os.RemoveAll(rootDir) defer os.RemoveAll(bundleDir) - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("creating container: %v", err) } @@ -1825,11 +2018,11 @@ func TestMountSymlink(t *testing.T) { // Check that symlink was resolved and mount was created where the symlink // is pointing to. file := path.Join(target, "file") - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: "/usr/bin/test", Argv: []string{"test", "-f", file}, } - if ws, err := cont.executeSync(args); err != nil || ws != 0 { + if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err) } } @@ -1853,7 +2046,7 @@ func TestMain(m *testing.M) { if err := testutil.ConfigureExePath(); err != nil { panic(err.Error()) } - testutil.RunAsRoot() + specutils.MaybeRunAsRoot() os.Exit(m.Run()) } diff --git a/runsc/container/hook.go b/runsc/container/hook.go index acae6781e..901607aee 100644 --- a/runsc/container/hook.go +++ b/runsc/container/hook.go @@ -24,7 +24,7 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) // This file implements hooks as defined in OCI spec: diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 4ea3c74ac..c0f9b372c 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -28,10 +28,10 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/specutils" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/test/testutil" ) func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { @@ -84,7 +84,12 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C } bundles = append(bundles, bundleDir) - cont, err := Create(ids[i], spec, conf, bundleDir, "", "", "") + args := Args{ + ID: ids[i], + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { cleanup() return nil, nil, fmt.Errorf("error creating container: %v", err) @@ -99,6 +104,36 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C return containers, cleanup, nil } +type execDesc struct { + c *Container + cmd []string + want int + desc string +} + +func execMany(execs []execDesc) error { + for _, exec := range execs { + args := &control.ExecArgs{Argv: exec.cmd} + if ws, err := exec.c.executeSync(args); err != nil { + return fmt.Errorf("error executing %+v: %v", args, err) + } else if ws.ExitStatus() != exec.want { + return fmt.Errorf("%q: exec %q got exit status: %d, want: %d", exec.desc, exec.cmd, ws.ExitStatus(), exec.want) + } + } + return nil +} + +func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) { + for _, spec := range pod { + spec.Annotations[path.Join(boot.MountPrefix, name, "source")] = mount.Source + spec.Annotations[path.Join(boot.MountPrefix, name, "type")] = mount.Type + spec.Annotations[path.Join(boot.MountPrefix, name, "share")] = "pod" + if len(mount.Options) > 0 { + spec.Annotations[path.Join(boot.MountPrefix, name, "options")] = strings.Join(mount.Options, ",") + } + } +} + // TestMultiContainerSanity checks that it is possible to run 2 dead-simple // containers in the same sandbox. func TestMultiContainerSanity(t *testing.T) { @@ -631,7 +666,12 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) { } defer os.RemoveAll(rootBundleDir) - root, err := Create(ids[0], specs[0], conf, rootBundleDir, "", "", "") + rootArgs := Args{ + ID: ids[0], + Spec: specs[0], + BundleDir: rootBundleDir, + } + root, err := New(conf, rootArgs) if err != nil { t.Fatalf("error creating root container: %v", err) } @@ -647,7 +687,12 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) { } defer os.RemoveAll(bundleDir) - cont, err := Create(ids[1], specs[1], conf, bundleDir, "", "", "") + args := Args{ + ID: ids[1], + Spec: specs[1], + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -682,7 +727,12 @@ func TestMultiContainerDestroyStarting(t *testing.T) { } defer os.RemoveAll(rootBundleDir) - root, err := Create(ids[0], specs[0], conf, rootBundleDir, "", "", "") + rootArgs := Args{ + ID: ids[0], + Spec: specs[0], + BundleDir: rootBundleDir, + } + root, err := New(conf, rootArgs) if err != nil { t.Fatalf("error creating root container: %v", err) } @@ -703,7 +753,12 @@ func TestMultiContainerDestroyStarting(t *testing.T) { } defer os.RemoveAll(bundleDir) - cont, err := Create(ids[i], specs[i], conf, bundleDir, "", "", "") + rootArgs := Args{ + ID: ids[i], + Spec: specs[i], + BundleDir: rootBundleDir, + } + cont, err := New(conf, rootArgs) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -777,7 +832,12 @@ func TestMultiContainerGoferStop(t *testing.T) { // Start root container. conf := testutil.TestConfigWithRoot(rootDir) - root, err := Create(rootID, rootSpec, conf, bundleDir, "", "", "") + rootArgs := Args{ + ID: rootID, + Spec: rootSpec, + BundleDir: bundleDir, + } + root, err := New(conf, rootArgs) if err != nil { t.Fatalf("error creating root container: %v", err) } @@ -801,7 +861,12 @@ func TestMultiContainerGoferStop(t *testing.T) { } defer os.RemoveAll(bundleDir) - child, err := Create(ids[j], spec, conf, bundleDir, "", "", "") + args := Args{ + ID: ids[j], + Spec: spec, + BundleDir: bundleDir, + } + child, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -828,3 +893,277 @@ func TestMultiContainerGoferStop(t *testing.T) { } } } + +// Test that pod shared mounts are properly mounted in 2 containers and that +// changes from one container is reflected in the other. +func TestMultiContainerSharedMount(t *testing.T) { + for _, conf := range configs(all...) { + t.Logf("Running test with conf: %+v", conf) + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: nil, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + + createSharedMount(mnt0, "test-mount", podSpec...) + + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, + desc: "directory is mounted in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, + desc: "directory is mounted in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + desc: "create file in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file appears in container1", + }, + { + c: containers[1], + cmd: []string{"/bin/rm", file1}, + desc: "file removed from container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-f", file0}, + desc: "file removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-f", file1}, + desc: "file removed from container1", + }, + { + c: containers[1], + cmd: []string{"/bin/mkdir", file1}, + desc: "create directory in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", file0}, + desc: "dir appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", file1}, + desc: "dir appears in container1", + }, + { + c: containers[0], + cmd: []string{"/bin/rmdir", file0}, + desc: "create directory in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-d", file0}, + desc: "dir removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-d", file1}, + desc: "dir removed from container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + } +} + +// Test that pod mounts are mounted as readonly when requested. +func TestMultiContainerSharedMountReadonly(t *testing.T) { + for _, conf := range configs(all...) { + t.Logf("Running test with conf: %+v", conf) + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: []string{"ro"}, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + + createSharedMount(mnt0, "test-mount", podSpec...) + + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, + desc: "directory is mounted in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, + desc: "directory is mounted in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + want: 1, + desc: "fails to write to container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/touch", file1}, + want: 1, + desc: "fails to write to container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + } +} + +// Test that shared pod mounts continue to work after container is restarted. +func TestMultiContainerSharedMountRestart(t *testing.T) { + for _, conf := range configs(all...) { + t.Logf("Running test with conf: %+v", conf) + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: nil, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + + createSharedMount(mnt0, "test-mount", podSpec...) + + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + desc: "create file in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file appears in container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + + containers[1].Destroy() + + bundleDir, err := testutil.SetupBundleDir(podSpec[1]) + if err != nil { + t.Fatalf("error restarting container: %v", err) + } + defer os.RemoveAll(bundleDir) + + args := Args{ + ID: ids[1], + Spec: podSpec[1], + BundleDir: bundleDir, + } + containers[1], err = New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + if err := containers[1].Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + + execs = []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file is still in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file is still in container1", + }, + { + c: containers[1], + cmd: []string{"/bin/rm", file1}, + desc: "file removed from container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-f", file0}, + desc: "file removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-f", file1}, + desc: "file removed from container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + } +} diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go index 9d5a592a5..1f90d2462 100644 --- a/runsc/container/shared_volume_test.go +++ b/runsc/container/shared_volume_test.go @@ -22,10 +22,10 @@ import ( "path/filepath" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/test/testutil" ) // TestSharedVolume checks that modifications to a volume mount are propagated @@ -52,7 +52,12 @@ func TestSharedVolume(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -206,7 +211,12 @@ func TestSharedVolumeFile(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go index 62923f1ef..b7fc6498f 100644 --- a/runsc/container/test_app/test_app.go +++ b/runsc/container/test_app/test_app.go @@ -29,7 +29,7 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/test/testutil" ) func main() { diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index 4adc9c1bc..80a4aa2fe 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -8,7 +8,7 @@ go_library( "fsgofer.go", "fsgofer_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/fsgofer", + importpath = "gvisor.dev/gvisor/runsc/fsgofer", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/fsgofer/filter/BUILD b/runsc/fsgofer/filter/BUILD index 78c5b526c..e2318a978 100644 --- a/runsc/fsgofer/filter/BUILD +++ b/runsc/fsgofer/filter/BUILD @@ -11,7 +11,7 @@ go_library( "extra_filters_race.go", "filter.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter", + importpath = "gvisor.dev/gvisor/runsc/fsgofer/filter", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go index 4faab2946..2d50774d4 100644 --- a/runsc/fsgofer/filter/config.go +++ b/runsc/fsgofer/filter/config.go @@ -19,8 +19,8 @@ import ( "syscall" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/seccomp" ) // allowedSyscalls is the set of syscalls executed by the gofer. diff --git a/runsc/fsgofer/filter/extra_filters.go b/runsc/fsgofer/filter/extra_filters.go index 5c5ec4e06..e28d4b8d6 100644 --- a/runsc/fsgofer/filter/extra_filters.go +++ b/runsc/fsgofer/filter/extra_filters.go @@ -17,11 +17,11 @@ package filter import ( - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by -// Go intrumentation tools, e.g. -race, -msan. +// Go instrumentation tools, e.g. -race, -msan. // Returns empty when disabled. func instrumentationFilters() seccomp.SyscallRules { return nil diff --git a/runsc/fsgofer/filter/extra_filters_msan.go b/runsc/fsgofer/filter/extra_filters_msan.go index 553060bc3..8c6179c8f 100644 --- a/runsc/fsgofer/filter/extra_filters_msan.go +++ b/runsc/fsgofer/filter/extra_filters_msan.go @@ -19,8 +19,8 @@ package filter import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by MSAN. diff --git a/runsc/fsgofer/filter/extra_filters_race.go b/runsc/fsgofer/filter/extra_filters_race.go index 28555f898..885c92f7a 100644 --- a/runsc/fsgofer/filter/extra_filters_race.go +++ b/runsc/fsgofer/filter/extra_filters_race.go @@ -19,8 +19,8 @@ package filter import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by TSAN. diff --git a/runsc/fsgofer/filter/filter.go b/runsc/fsgofer/filter/filter.go index ff8154369..65053415f 100644 --- a/runsc/fsgofer/filter/filter.go +++ b/runsc/fsgofer/filter/filter.go @@ -18,7 +18,7 @@ package filter import ( - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // Install installs seccomp filters. diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 2cf50290a..fe450c64f 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -28,15 +28,16 @@ import ( "path" "path/filepath" "runtime" + "strconv" "sync" "syscall" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -223,6 +224,28 @@ type localFile struct { lastDirentOffset uint64 } +var procSelfFD *fd.FD + +// OpenProcSelfFD opens the /proc/self/fd directory, which will be used to +// reopen file descriptors. +func OpenProcSelfFD() error { + d, err := syscall.Open("/proc/self/fd", syscall.O_RDONLY|syscall.O_DIRECTORY, 0) + if err != nil { + return fmt.Errorf("error opening /proc/self/fd: %v", err) + } + procSelfFD = fd.New(d) + return nil +} + +func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) { + d, err := syscall.Openat(int(procSelfFD.FD()), strconv.Itoa(f.FD()), mode&^syscall.O_NOFOLLOW, 0) + if err != nil { + return nil, err + } + + return fd.New(d), nil +} + func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, error) { path := path.Join(parent.hostPath, name) f, err := openAnyFile(path, func(mode int) (*fd.FD, error) { @@ -348,7 +371,7 @@ func (l *localFile) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { // name_to_handle_at and open_by_handle_at aren't supported by overlay2. log.Debugf("Open reopening file, mode: %v, %q", mode, l.hostPath) var err error - newFile, err = fd.Open(l.hostPath, openFlags|mode.OSFlags(), 0) + newFile, err = reopenProcFd(l.file, openFlags|mode.OSFlags()) if err != nil { return nil, p9.QID{}, 0, extractErrno(err) } @@ -477,7 +500,7 @@ func (l *localFile) Walk(names []string) ([]p9.QID, p9.File, error) { // Duplicate current file if 'names' is empty. if len(names) == 0 { newFile, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) { - return fd.Open(l.hostPath, openFlags|mode, 0) + return reopenProcFd(l.file, openFlags|mode) }) if err != nil { return nil, nil, extractErrno(err) @@ -596,7 +619,7 @@ func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) } // SetAttr implements p9.File. Due to mismatch in file API, options -// cannot be changed atomicaly and user may see partial changes when +// cannot be changed atomically and user may see partial changes when // an error happens. func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { conf := l.attachPoint.conf @@ -635,7 +658,7 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { f := l.file if l.ft == regular && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite { var err error - f, err = fd.Open(l.hostPath, openFlags|syscall.O_WRONLY, 0) + f, err = reopenProcFd(l.file, openFlags|os.O_WRONLY) if err != nil { return extractErrno(err) } diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go index 695836927..0a162bb8a 100644 --- a/runsc/fsgofer/fsgofer_test.go +++ b/runsc/fsgofer/fsgofer_test.go @@ -22,8 +22,8 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" ) func init() { @@ -31,6 +31,10 @@ func init() { allConfs = append(allConfs, rwConfs...) allConfs = append(allConfs, roConfs...) + + if err := OpenProcSelfFD(); err != nil { + panic(err) + } } func assertPanic(t *testing.T, f func()) { diff --git a/runsc/fsgofer/fsgofer_unsafe.go b/runsc/fsgofer/fsgofer_unsafe.go index 58af5e44d..ff2556aee 100644 --- a/runsc/fsgofer/fsgofer_unsafe.go +++ b/runsc/fsgofer/fsgofer_unsafe.go @@ -18,8 +18,8 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/syserr" ) func statAt(dirFd int, name string) (syscall.Stat_t, error) { diff --git a/runsc/main.go b/runsc/main.go index a214f6ba0..bc83c57a2 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -29,10 +29,11 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/cmd" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/cmd" + "gvisor.dev/gvisor/runsc/specutils" ) var ( @@ -61,16 +62,19 @@ var ( straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs") // Flags that control sandbox runtime behavior. - platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm") - network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.") - gso = flag.Bool("gso", true, "enable generic segmenation offload") - fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") - overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") - watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.") - panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.") - profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).") - netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.") - numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.") + platformName = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm") + network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.") + gso = flag.Bool("gso", true, "enable generic segmenation offload") + fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") + overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") + watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.") + panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.") + profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).") + netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.") + numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.") + rootless = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.") + + // Test flags, not to be used outside tests, ever. testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.") ) @@ -136,8 +140,8 @@ func main() { } cmd.ErrorLogger = errorLogger - platformType, err := boot.MakePlatformType(*platform) - if err != nil { + platformType := *platformName + if _, err := platform.Lookup(platformType); err != nil { cmd.Fatalf("%v", err) } @@ -166,26 +170,28 @@ func main() { // Create a new Config from the flags. conf := &boot.Config{ - RootDir: *rootDir, - Debug: *debug, - LogFilename: *logFilename, - LogFormat: *logFormat, - DebugLog: *debugLog, - DebugLogFormat: *debugLogFormat, - FileAccess: fsAccess, - Overlay: *overlay, - Network: netType, - GSO: *gso, - LogPackets: *logPackets, - Platform: platformType, - Strace: *strace, - StraceLogSize: *straceLogSize, - WatchdogAction: wa, - PanicSignal: *panicSignal, - ProfileEnable: *profile, - EnableRaw: *netRaw, + RootDir: *rootDir, + Debug: *debug, + LogFilename: *logFilename, + LogFormat: *logFormat, + DebugLog: *debugLog, + DebugLogFormat: *debugLogFormat, + FileAccess: fsAccess, + Overlay: *overlay, + Network: netType, + GSO: *gso, + LogPackets: *logPackets, + Platform: platformType, + Strace: *strace, + StraceLogSize: *straceLogSize, + WatchdogAction: wa, + PanicSignal: *panicSignal, + ProfileEnable: *profile, + EnableRaw: *netRaw, + NumNetworkChannels: *numNetworkChannels, + Rootless: *rootless, + TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot, - NumNetworkChannels: *numNetworkChannels, } if len(*straceSyscalls) != 0 { conf.StraceSyscalls = strings.Split(*straceSyscalls, ",") diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index c0de9a28f..7fdceaab6 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -9,7 +9,7 @@ go_library( "network_unsafe.go", "sandbox.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/sandbox", + importpath = "gvisor.dev/gvisor/runsc/sandbox", visibility = [ "//runsc:__subpackages__", ], @@ -18,9 +18,10 @@ go_library( "//pkg/control/server", "//pkg/log", "//pkg/sentry/control", - "//pkg/sentry/platform/kvm", + "//pkg/sentry/platform", "//pkg/urpc", "//runsc/boot", + "//runsc/boot/platforms", "//runsc/cgroup", "//runsc/console", "//runsc/specutils", diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index 1fd091514..a965a9dcb 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -27,10 +27,10 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/vishvananda/netlink" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/urpc" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -228,7 +228,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO if err != nil { return fmt.Errorf("getting link for interface %q: %v", iface.Name, err) } - link.LinkAddress = []byte(ifaceLink.Attrs().HardwareAddr) + link.LinkAddress = ifaceLink.Attrs().HardwareAddr log.Debugf("Setting up network channels") // Create the socket for the device. diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 032190636..4a11f617d 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -28,16 +28,17 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" - "gvisor.googlesource.com/gvisor/pkg/control/client" - "gvisor.googlesource.com/gvisor/pkg/control/server" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm" - "gvisor.googlesource.com/gvisor/pkg/urpc" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/cgroup" - "gvisor.googlesource.com/gvisor/runsc/console" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/control/client" + "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/boot/platforms" + "gvisor.dev/gvisor/runsc/cgroup" + "gvisor.dev/gvisor/runsc/console" + "gvisor.dev/gvisor/runsc/specutils" ) // Sandbox wraps a sandbox process. @@ -73,10 +74,46 @@ type Sandbox struct { statusMu sync.Mutex } +// Args is used to configure a new sandbox. +type Args struct { + // ID is the sandbox unique identifier. + ID string + + // Spec is the OCI spec that describes the container. + Spec *specs.Spec + + // BundleDir is the directory containing the container bundle. + BundleDir string + + // ConsoleSocket is the path to a unix domain socket that will receive + // the console FD. It may be empty. + ConsoleSocket string + + // UserLog is the filename to send user-visible logs to. It may be empty. + UserLog string + + // IOFiles is the list of files that connect to a 9P endpoint for the mounts + // points using Gofers. They must be in the same order as mounts appear in + // the spec. + IOFiles []*os.File + + // MountsFile is a file container mount information from the spec. It's + // equivalent to the mounts from the spec, except that all paths have been + // resolved to their final absolute location. + MountsFile *os.File + + // Gcgroup is the cgroup that the sandbox is part of. + Cgroup *cgroup.Cgroup + + // Attached indicates that the sandbox lifecycle is attached with the caller. + // If the caller exits, the sandbox should exit too. + Attached bool +} + // New creates the sandbox process. The caller must call Destroy() on the // sandbox. -func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, specFile *os.File, cg *cgroup.Cgroup) (*Sandbox, error) { - s := &Sandbox{ID: id, Cgroup: cg} +func New(conf *boot.Config, args *Args) (*Sandbox, error) { + s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup} // The Cleanup object cleans up partially created sandboxes when an error // occurs. Any errors occurring during cleanup itself are ignored. c := specutils.MakeCleanup(func() { @@ -93,7 +130,7 @@ func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke defer clientSyncFile.Close() // Create the sandbox process. - err = s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, sandboxSyncFile) + err = s.createSandboxProcess(conf, args, sandboxSyncFile) // sandboxSyncFile has to be closed to be able to detect when the sandbox // process exits unexpectedly. sandboxSyncFile.Close() @@ -291,7 +328,7 @@ func (s *Sandbox) connError(err error) error { // createSandboxProcess starts the sandbox as a subprocess by running the "boot" // command, passing in the bundle dir. -func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, mountsFile, startSyncFile *os.File) error { +func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncFile *os.File) error { // nextFD is used to get unused FDs that we can pass to the sandbox. It // starts at 3 because 0, 1, and 2 are taken by stdin/out/err. nextFD := 3 @@ -327,7 +364,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // Add the "boot" command to the args. // // All flags after this must be for the boot command - cmd.Args = append(cmd.Args, "boot", "--bundle="+bundleDir) + cmd.Args = append(cmd.Args, "boot", "--bundle="+args.BundleDir) // Create a socket for the control server and donate it to the sandbox. addr := boot.ControlSocketAddr(s.ID) @@ -342,12 +379,12 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund cmd.Args = append(cmd.Args, "--controller-fd="+strconv.Itoa(nextFD)) nextFD++ - defer mountsFile.Close() - cmd.ExtraFiles = append(cmd.ExtraFiles, mountsFile) + defer args.MountsFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, args.MountsFile) cmd.Args = append(cmd.Args, "--mounts-fd="+strconv.Itoa(nextFD)) nextFD++ - specFile, err := specutils.OpenSpec(bundleDir) + specFile, err := specutils.OpenSpec(args.BundleDir) if err != nil { return err } @@ -361,7 +398,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund nextFD++ // If there is a gofer, sends all socket ends to the sandbox. - for _, f := range ioFiles { + for _, f := range args.IOFiles { defer f.Close() cmd.ExtraFiles = append(cmd.ExtraFiles, f) cmd.Args = append(cmd.Args, "--io-fds="+strconv.Itoa(nextFD)) @@ -389,23 +426,22 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // If the console control socket file is provided, then create a new // pty master/slave pair and set the TTY on the sandbox process. - if consoleSocket != "" { + if args.ConsoleSocket != "" { cmd.Args = append(cmd.Args, "--console=true") // console.NewWithSocket will send the master on the given // socket, and return the slave. - tty, err := console.NewWithSocket(consoleSocket) + tty, err := console.NewWithSocket(args.ConsoleSocket) if err != nil { - return fmt.Errorf("setting up console with socket %q: %v", consoleSocket, err) + return fmt.Errorf("setting up console with socket %q: %v", args.ConsoleSocket, err) } defer tty.Close() // Set the TTY as a controlling TTY on the sandbox process. - // Note that the Ctty field must be the FD of the TTY in the - // *new* process, not this process. Since we are about to - // assign the TTY to nextFD, we can use that value here. - // stdin, we can use FD 0 here. cmd.SysProcAttr.Setctty = true + // The Ctty FD must be the FD in the child process's FD table, + // which will be nextFD in this case. + // See https://github.com/golang/go/issues/29458. cmd.SysProcAttr.Ctty = nextFD // Pass the tty as all stdio fds to sandbox. @@ -456,7 +492,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund {Type: specs.UTSNamespace}, } - if conf.Platform == boot.PlatformPtrace { + if conf.Platform == platforms.Ptrace { // TODO(b/75837838): Also set a new PID namespace so that we limit // access to other host processes. log.Infof("Sandbox will be started in the current PID namespace") @@ -469,7 +505,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // Joins the network namespace if network is enabled. the sandbox talks // directly to the host network, which may have been configured in the // namespace. - if ns, ok := specutils.GetNS(specs.NetworkNamespace, spec); ok && conf.Network != boot.NetworkNone { + if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != boot.NetworkNone { log.Infof("Sandbox will be started in the container's network namespace: %+v", ns) nss = append(nss, ns) } else if conf.Network == boot.NetworkHost { @@ -483,10 +519,10 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // inside the user namespace specified in the spec or the current namespace // if none is configured. if conf.Network == boot.NetworkHost { - if userns, ok := specutils.GetNS(specs.UserNamespace, spec); ok { + if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok { log.Infof("Sandbox will be started in container's user namespace: %+v", userns) nss = append(nss, userns) - specutils.SetUIDGIDMappings(cmd, spec) + specutils.SetUIDGIDMappings(cmd, args.Spec) } else { log.Infof("Sandbox will be started in the current user namespace") } @@ -515,46 +551,64 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund } else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) { log.Infof("Sandbox will be started in new user namespace") nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace}) + cmd.Args = append(cmd.Args, "--setup-root") - // Map nobody in the new namespace to nobody in the parent namespace. - // - // A sandbox process will construct an empty - // root for itself, so it has to have the CAP_SYS_ADMIN - // capability. - // - // FIXME(b/122554829): The current implementations of - // os/exec doesn't allow to set ambient capabilities if - // a process is started in a new user namespace. As a - // workaround, we start the sandbox process with the 0 - // UID and then it constructs a chroot and sets UID to - // nobody. https://github.com/golang/go/issues/2315 - const nobody = 65534 - cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ - { - ContainerID: int(0), - HostID: int(nobody - 1), - Size: int(1), - }, - { - ContainerID: int(nobody), - HostID: int(nobody), - Size: int(1), - }, - } - cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ - { - ContainerID: int(nobody), - HostID: int(nobody), - Size: int(1), - }, + if conf.Rootless { + log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid()) + cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ + { + ContainerID: 0, + HostID: os.Getuid(), + Size: 1, + }, + } + cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ + { + ContainerID: 0, + HostID: os.Getgid(), + Size: 1, + }, + } + cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0} + + } else { + // Map nobody in the new namespace to nobody in the parent namespace. + // + // A sandbox process will construct an empty + // root for itself, so it has to have the CAP_SYS_ADMIN + // capability. + // + // FIXME(b/122554829): The current implementations of + // os/exec doesn't allow to set ambient capabilities if + // a process is started in a new user namespace. As a + // workaround, we start the sandbox process with the 0 + // UID and then it constructs a chroot and sets UID to + // nobody. https://github.com/golang/go/issues/2315 + const nobody = 65534 + cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ + { + ContainerID: 0, + HostID: nobody - 1, + Size: 1, + }, + { + ContainerID: nobody, + HostID: nobody, + Size: 1, + }, + } + cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ + { + ContainerID: nobody, + HostID: nobody, + Size: 1, + }, + } + + // Set credentials to run as user and group nobody. + cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody} } - // Set credentials to run as user and group nobody. - cmd.SysProcAttr.Credential = &syscall.Credential{ - Uid: 0, - Gid: nobody, - } - cmd.Args = append(cmd.Args, "--setup-root") } else { return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID") } @@ -580,8 +634,8 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund } } - if userLog != "" { - f, err := os.OpenFile(userLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664) + if args.UserLog != "" { + f, err := os.OpenFile(args.UserLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664) if err != nil { return fmt.Errorf("opening compat log file: %v", err) } @@ -600,6 +654,11 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund log.Debugf("Donating FD %d: %q", i+3, f.Name()) } + if args.Attached { + // Kill sandbox if parent process exits in attached mode. + cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + } + log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args) log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr) if err := specutils.StartInNS(cmd, nss); err != nil { @@ -902,7 +961,7 @@ func (s *Sandbox) StartTrace(f *os.File) error { return nil } -// StopTrace stops a previously started trace.. +// StopTrace stops a previously started trace. func (s *Sandbox) StopTrace() error { log.Debugf("Trace stop %q", s.ID) conn, err := s.sandboxConnect() @@ -917,6 +976,21 @@ func (s *Sandbox) StopTrace() error { return nil } +// ChangeLogging changes logging options. +func (s *Sandbox) ChangeLogging(args control.LoggingArgs) error { + log.Debugf("Change logging start %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + if err := conn.Call(boot.ChangeLogging, &args, nil); err != nil { + return fmt.Errorf("changing sandbox %q logging: %v", s.ID, err) + } + return nil +} + // DestroyContainer destroys the given container. If it is the root container, // then the entire sandbox is destroyed. func (s *Sandbox) DestroyContainer(cid string) error { @@ -973,19 +1047,15 @@ func (s *Sandbox) waitForStopped() error { // deviceFileForPlatform opens the device file for the given platform. If the // platform does not need a device file, then nil is returned. -func deviceFileForPlatform(p boot.PlatformType) (*os.File, error) { - var ( - f *os.File - err error - ) - switch p { - case boot.PlatformKVM: - f, err = kvm.OpenDevice() - default: - return nil, nil +func deviceFileForPlatform(name string) (*os.File, error) { + p, err := platform.Lookup(name) + if err != nil { + return nil, err } + + f, err := p.OpenDevice() if err != nil { return nil, fmt.Errorf("opening device file for platform %q: %v", p, err) } - return f, err + return f, nil } diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD index 15476de6f..fbfb8e2f8 100644 --- a/runsc/specutils/BUILD +++ b/runsc/specutils/BUILD @@ -9,11 +9,8 @@ go_library( "namespace.go", "specutils.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/specutils", - visibility = [ - "//runsc:__subpackages__", - "//test:__subpackages__", - ], + importpath = "gvisor.dev/gvisor/runsc/specutils", + visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", "//pkg/log", diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go index 1f3afb4e4..138aa4dd1 100644 --- a/runsc/specutils/fs.go +++ b/runsc/specutils/fs.go @@ -16,6 +16,7 @@ package specutils import ( "fmt" + "math/bits" "path" "syscall" @@ -86,7 +87,7 @@ func OptionsToFlags(opts []string) uint32 { // PropOptionsToFlags converts propagation mount options to syscall flags. // Propagation options cannot be set other with other options and must be -// handled separatedly. +// handled separately. func PropOptionsToFlags(opts []string) uint32 { return optionsToFlags(opts, propOptionsMap) } @@ -105,22 +106,30 @@ func optionsToFlags(opts []string, source map[string]mapping) uint32 { return rv } -// ValidateMount validates that spec mounts are correct. +// validateMount validates that spec mounts are correct. func validateMount(mnt *specs.Mount) error { if !path.IsAbs(mnt.Destination) { return fmt.Errorf("Mount.Destination must be an absolute path: %v", mnt) } - if mnt.Type == "bind" { - for _, o := range mnt.Options { - if ContainsStr(invalidOptions, o) { - return fmt.Errorf("mount option %q is not supported: %v", o, mnt) - } - _, ok1 := optionsMap[o] - _, ok2 := propOptionsMap[o] - if !ok1 && !ok2 { - return fmt.Errorf("unknown mount option %q", o) - } + return ValidateMountOptions(mnt.Options) + } + return nil +} + +// ValidateMountOptions validates that mount options are correct. +func ValidateMountOptions(opts []string) error { + for _, o := range opts { + if ContainsStr(invalidOptions, o) { + return fmt.Errorf("mount option %q is not supported", o) + } + _, ok1 := optionsMap[o] + _, ok2 := propOptionsMap[o] + if !ok1 && !ok2 { + return fmt.Errorf("unknown mount option %q", o) + } + if err := validatePropagation(o); err != nil { + return err } } return nil @@ -133,5 +142,14 @@ func validateRootfsPropagation(opt string) error { if flags&(syscall.MS_SLAVE|syscall.MS_PRIVATE) == 0 { return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt) } + return validatePropagation(opt) +} + +func validatePropagation(opt string) error { + flags := PropOptionsToFlags([]string{opt}) + exclusive := flags & (syscall.MS_SLAVE | syscall.MS_PRIVATE | syscall.MS_SHARED | syscall.MS_UNBINDABLE) + if bits.OnesCount32(exclusive) > 1 { + return fmt.Errorf("mount propagation options are mutually exclusive: %q", opt) + } return nil } diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index 7d194335c..d441419cb 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -25,7 +25,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) // nsCloneFlag returns the clone flag that can be used to set a namespace of @@ -204,7 +204,7 @@ func SetUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) { } } -// HasCapabilities returns true if the user has all capabilties in 'cs'. +// HasCapabilities returns true if the user has all capabilities in 'cs'. func HasCapabilities(cs ...capability.Cap) bool { caps, err := capability.NewPid2(os.Getpid()) if err != nil { @@ -220,3 +220,55 @@ func HasCapabilities(cs ...capability.Cap) bool { } return true } + +// MaybeRunAsRoot ensures the process runs with capabilities needed to create a +// sandbox, e.g. CAP_SYS_ADMIN, CAP_SYS_CHROOT, etc. If capabilities are needed, +// it will create a new user namespace and re-execute the process as root +// inside the namespace with the same arguments and environment. +// +// This function returns immediately when no new capability is needed. If +// another process is executed, it returns straight from here with the same exit +// code as the child. +func MaybeRunAsRoot() error { + if HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT, capability.CAP_SETUID, capability.CAP_SETGID) { + return nil + } + + // Current process doesn't have required capabilities, create user namespace + // and run as root inside the namespace to acquire capabilities. + log.Infof("*** Re-running as root in new user namespace ***") + + cmd := exec.Command("/proc/self/exe", os.Args[1:]...) + + cmd.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS, + // Set current user/group as root inside the namespace. Since we may not + // have CAP_SETUID/CAP_SETGID, just map root to the current user/group. + UidMappings: []syscall.SysProcIDMap{ + {ContainerID: 0, HostID: os.Getuid(), Size: 1}, + }, + GidMappings: []syscall.SysProcIDMap{ + {ContainerID: 0, HostID: os.Getgid(), Size: 1}, + }, + Credential: &syscall.Credential{Uid: 0, Gid: 0}, + GidMappingsEnableSetgroups: false, + } + + cmd.Env = os.Environ() + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + if exit, ok := err.(*exec.ExitError); ok { + if ws, ok := exit.Sys().(syscall.WaitStatus); ok { + os.Exit(ws.ExitStatus()) + } + log.Warningf("No wait status provided, exiting with -1: %v", err) + os.Exit(-1) + } + return fmt.Errorf("re-executing self: %v", err) + } + // Child completed with success. + os.Exit(0) + panic("unreachable") +} diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 2888f55db..0b40e38a3 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -29,9 +29,9 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) // ExePath must point to runsc binary, which is normally the same binary. It's @@ -394,7 +394,7 @@ func WaitForReady(pid int, timeout time.Duration, ready func() (bool, error)) er // DebugLogFile opens a log file using 'logPattern' as location. If 'logPattern' // ends with '/', it's used as a directory with default file name. -// 'logPattern' can contain variables that are substitued: +// 'logPattern' can contain variables that are substituted: // - %TIMESTAMP%: is replaced with a timestamp using the following format: // <yyyymmdd-hhmmss.uuuuuu> // - %COMMAND%: is replaced with 'command' diff --git a/runsc/test/BUILD b/runsc/test/BUILD new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/runsc/test/BUILD diff --git a/runsc/test/build_defs.bzl b/runsc/test/build_defs.bzl new file mode 100644 index 000000000..ac28cc037 --- /dev/null +++ b/runsc/test/build_defs.bzl @@ -0,0 +1,19 @@ +"""Defines a rule for runsc test targets.""" + +load("@io_bazel_rules_go//go:def.bzl", _go_test = "go_test") + +# runtime_test is a macro that will create targets to run the given test target +# with different runtime options. +def runtime_test(**kwargs): + """Runs the given test target with different runtime options.""" + name = kwargs["name"] + _go_test(**kwargs) + kwargs["name"] = name + "_hostnet" + kwargs["args"] = ["--runtime-type=hostnet"] + _go_test(**kwargs) + kwargs["name"] = name + "_kvm" + kwargs["args"] = ["--runtime-type=kvm"] + _go_test(**kwargs) + kwargs["name"] = name + "_overlay" + kwargs["args"] = ["--runtime-type=overlay"] + _go_test(**kwargs) diff --git a/runsc/test/image/BUILD b/runsc/test/image/BUILD index e8b629c6a..58758fde5 100644 --- a/runsc/test/image/BUILD +++ b/runsc/test/image/BUILD @@ -1,8 +1,9 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//runsc/test:build_defs.bzl", "runtime_test") package(licenses = ["notice"]) -go_test( +runtime_test( name = "image_test", size = "large", srcs = [ @@ -26,5 +27,5 @@ go_test( go_library( name = "image", srcs = ["image.go"], - importpath = "gvisor.googlesource.com/gvisor/runsc/test/image", + importpath = "gvisor.dev/gvisor/runsc/test/image", ) diff --git a/runsc/test/image/image_test.go b/runsc/test/image/image_test.go index b969731b0..ddaa2c13b 100644 --- a/runsc/test/image/image_test.go +++ b/runsc/test/image/image_test.go @@ -32,7 +32,7 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/test/testutil" ) func TestHelloWorld(t *testing.T) { @@ -209,11 +209,14 @@ func TestMysql(t *testing.T) { } func TestPythonHello(t *testing.T) { - if err := testutil.Pull("google/python-hello"); err != nil { + // TODO(b/136503277): Once we have more complete python runtime tests, + // we can drop this one. + const img = "gcr.io/gvisor-presubmit/python-hello" + if err := testutil.Pull(img); err != nil { t.Fatalf("docker pull failed: %v", err) } d := testutil.MakeDocker("python-hello-test") - if err := d.Run("-p", "8080", "google/python-hello"); err != nil { + if err := d.Run("-p", "8080", img); err != nil { t.Fatalf("docker run failed: %v", err) } defer d.CleanUp() diff --git a/runsc/test/integration/BUILD b/runsc/test/integration/BUILD index 04ed885c6..12065617c 100644 --- a/runsc/test/integration/BUILD +++ b/runsc/test/integration/BUILD @@ -1,8 +1,9 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//runsc/test:build_defs.bzl", "runtime_test") package(licenses = ["notice"]) -go_test( +runtime_test( name = "integration_test", size = "large", srcs = [ @@ -25,5 +26,5 @@ go_test( go_library( name = "integration", srcs = ["integration.go"], - importpath = "gvisor.googlesource.com/gvisor/runsc/test/integration", + importpath = "gvisor.dev/gvisor/runsc/test/integration", ) diff --git a/runsc/test/integration/exec_test.go b/runsc/test/integration/exec_test.go index 7c0e61ac3..993136f96 100644 --- a/runsc/test/integration/exec_test.go +++ b/runsc/test/integration/exec_test.go @@ -34,8 +34,8 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/runsc/test/testutil" ) func TestExecCapabilities(t *testing.T) { diff --git a/runsc/test/integration/integration_test.go b/runsc/test/integration/integration_test.go index c51cab3ae..7cef4b9dd 100644 --- a/runsc/test/integration/integration_test.go +++ b/runsc/test/integration/integration_test.go @@ -32,7 +32,7 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/test/testutil" ) // httpRequestSucceeds sends a request to a given url and checks that the status is OK. @@ -86,16 +86,17 @@ func TestLifeCycle(t *testing.T) { } func TestPauseResume(t *testing.T) { + const img = "gcr.io/gvisor-presubmit/python-hello" if !testutil.IsPauseResumeSupported() { t.Log("Pause/resume is not supported, skipping test.") return } - if err := testutil.Pull("google/python-hello"); err != nil { + if err := testutil.Pull(img); err != nil { t.Fatal("docker pull failed:", err) } d := testutil.MakeDocker("pause-resume-test") - if err := d.Run("-p", "8080", "google/python-hello"); err != nil { + if err := d.Run("-p", "8080", img); err != nil { t.Fatalf("docker run failed: %v", err) } defer d.CleanUp() @@ -149,15 +150,16 @@ func TestPauseResume(t *testing.T) { } func TestCheckpointRestore(t *testing.T) { + const img = "gcr.io/gvisor-presubmit/python-hello" if !testutil.IsPauseResumeSupported() { t.Log("Pause/resume is not supported, skipping test.") return } - if err := testutil.Pull("google/python-hello"); err != nil { + if err := testutil.Pull(img); err != nil { t.Fatal("docker pull failed:", err) } d := testutil.MakeDocker("save-restore-test") - if err := d.Run("-p", "8080", "google/python-hello"); err != nil { + if err := d.Run("-p", "8080", img); err != nil { t.Fatalf("docker run failed: %v", err) } defer d.CleanUp() diff --git a/runsc/test/integration/regression_test.go b/runsc/test/integration/regression_test.go index 80bae9970..39b30e757 100644 --- a/runsc/test/integration/regression_test.go +++ b/runsc/test/integration/regression_test.go @@ -18,7 +18,7 @@ import ( "strings" "testing" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/test/testutil" ) // Test that UDS can be created using overlay when parent directory is in lower diff --git a/runsc/test/root/BUILD b/runsc/test/root/BUILD index 7ded78baa..500ef7b8e 100644 --- a/runsc/test/root/BUILD +++ b/runsc/test/root/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "root", srcs = ["root.go"], - importpath = "gvisor.googlesource.com/gvisor/runsc/test/root", + importpath = "gvisor.dev/gvisor/runsc/test/root", ) go_test( diff --git a/runsc/test/root/cgroup_test.go b/runsc/test/root/cgroup_test.go index edb6dee1d..5392dc6e0 100644 --- a/runsc/test/root/cgroup_test.go +++ b/runsc/test/root/cgroup_test.go @@ -25,8 +25,8 @@ import ( "strings" "testing" - "gvisor.googlesource.com/gvisor/runsc/cgroup" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/cgroup" + "gvisor.dev/gvisor/runsc/test/testutil" ) func verifyPid(pid int, path string) error { diff --git a/runsc/test/root/chroot_test.go b/runsc/test/root/chroot_test.go index da2f473b9..d0f236580 100644 --- a/runsc/test/root/chroot_test.go +++ b/runsc/test/root/chroot_test.go @@ -31,8 +31,8 @@ import ( "testing" "github.com/syndtr/gocapability/capability" - "gvisor.googlesource.com/gvisor/runsc/specutils" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/test/testutil" ) // TestChroot verifies that the sandbox is chroot'd and that mounts are cleaned diff --git a/runsc/test/root/crictl_test.go b/runsc/test/root/crictl_test.go index 3cc176104..515ae2df1 100644 --- a/runsc/test/root/crictl_test.go +++ b/runsc/test/root/crictl_test.go @@ -29,9 +29,9 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/runsc/specutils" - "gvisor.googlesource.com/gvisor/runsc/test/root/testdata" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/test/root/testdata" + "gvisor.dev/gvisor/runsc/test/testutil" ) // Tests for crictl have to be run as root (rather than in a user namespace) diff --git a/runsc/test/root/testdata/BUILD b/runsc/test/root/testdata/BUILD index 7f272dcd3..80dc5f214 100644 --- a/runsc/test/root/testdata/BUILD +++ b/runsc/test/root/testdata/BUILD @@ -11,7 +11,7 @@ go_library( "httpd_mount_paths.go", "sandbox.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/test/root/testdata", + importpath = "gvisor.dev/gvisor/runsc/test/root/testdata", visibility = [ "//visibility:public", ], diff --git a/runsc/test/testutil/BUILD b/runsc/test/testutil/BUILD index ddec81444..327e7ca4d 100644 --- a/runsc/test/testutil/BUILD +++ b/runsc/test/testutil/BUILD @@ -10,7 +10,7 @@ go_library( "testutil.go", "testutil_race.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/test/testutil", + importpath = "gvisor.dev/gvisor/runsc/test/testutil", visibility = ["//:sandbox"], deps = [ "//runsc/boot", @@ -18,6 +18,5 @@ go_library( "@com_github_cenkalti_backoff//:go_default_library", "@com_github_kr_pty//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", - "@com_github_syndtr_gocapability//capability:go_default_library", ], ) diff --git a/runsc/test/testutil/docker.go b/runsc/test/testutil/docker.go index 81f5a9ef0..3f3e191b0 100644 --- a/runsc/test/testutil/docker.go +++ b/runsc/test/testutil/docker.go @@ -15,6 +15,7 @@ package testutil import ( + "flag" "fmt" "io/ioutil" "log" @@ -30,10 +31,15 @@ import ( "github.com/kr/pty" ) +var runtimeType = flag.String("runtime-type", "", "specify which runtime to use: kvm, hostnet, overlay") + func getRuntime() string { r, ok := os.LookupEnv("RUNSC_RUNTIME") if !ok { - return "runsc-test" + r = "runsc-test" + } + if *runtimeType != "" { + r += "-" + *runtimeType } return r } @@ -197,7 +203,7 @@ func (d *Docker) Stop() error { } // Run calls 'docker run' with the arguments provided. The container starts -// running in the backgroud and the call returns immediately. +// running in the background and the call returns immediately. func (d *Docker) Run(args ...string) error { a := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-d"} a = append(a, args...) diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go index 727b648a6..a98675bfc 100644 --- a/runsc/test/testutil/testutil.go +++ b/runsc/test/testutil/testutil.go @@ -30,7 +30,6 @@ import ( "os/exec" "os/signal" "path/filepath" - "runtime" "strings" "sync" "sync/atomic" @@ -39,9 +38,8 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/syndtr/gocapability/capability" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/specutils" ) func init() { @@ -134,6 +132,7 @@ func TestConfig() *boot.Config { LogPackets: true, Network: boot.NetworkNone, Strace: true, + Platform: "ptrace", FileAccess: boot.FileAccessExclusive, TestOnlyAllowRunAsCurrentUserWithoutChroot: true, NumNetworkChannels: 1, @@ -284,54 +283,6 @@ func WaitForHTTP(port int, timeout time.Duration) error { return Poll(cb, timeout) } -// RunAsRoot ensures the test runs with CAP_SYS_ADMIN and CAP_SYS_CHROOT. If -// needed it will create a new user namespace and re-execute the test as root -// inside of the namespace. This function returns when it's running as root. If -// it needs to create another process, it will exit from there and not return. -func RunAsRoot() { - if specutils.HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT) { - return - } - - fmt.Println("*** Re-running test as root in new user namespace ***") - - // Current process doesn't have CAP_SYS_ADMIN, create user namespace and run - // as root inside that namespace to get it. - runtime.LockOSThread() - defer runtime.UnlockOSThread() - - cmd := exec.Command("/proc/self/exe", os.Args[1:]...) - cmd.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS, - // Set current user/group as root inside the namespace. - UidMappings: []syscall.SysProcIDMap{ - {ContainerID: 0, HostID: os.Getuid(), Size: 1}, - }, - GidMappings: []syscall.SysProcIDMap{ - {ContainerID: 0, HostID: os.Getgid(), Size: 1}, - }, - GidMappingsEnableSetgroups: false, - Credential: &syscall.Credential{ - Uid: 0, - Gid: 0, - }, - } - cmd.Env = os.Environ() - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - if exit, ok := err.(*exec.ExitError); ok { - if ws, ok := exit.Sys().(syscall.WaitStatus); ok { - os.Exit(ws.ExitStatus()) - } - os.Exit(-1) - } - panic(fmt.Sprint("error running child process:", err.Error())) - } - os.Exit(0) -} - // Reaper reaps child processes. type Reaper struct { // mu protects ch, which will be nil if the reaper is not running. |