diff options
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/BUILD | 11 | ||||
-rw-r--r-- | runsc/boot/compat.go | 16 | ||||
-rw-r--r-- | runsc/boot/compat_amd64.go | 2 | ||||
-rw-r--r-- | runsc/boot/compat_test.go | 2 | ||||
-rw-r--r-- | runsc/boot/config.go | 47 | ||||
-rw-r--r-- | runsc/boot/controller.go | 43 | ||||
-rw-r--r-- | runsc/boot/debug.go | 2 | ||||
-rw-r--r-- | runsc/boot/events.go | 4 | ||||
-rw-r--r-- | runsc/boot/fds.go | 35 | ||||
-rw-r--r-- | runsc/boot/filter/BUILD | 4 | ||||
-rw-r--r-- | runsc/boot/filter/config.go | 29 | ||||
-rw-r--r-- | runsc/boot/filter/extra_filters.go | 4 | ||||
-rw-r--r-- | runsc/boot/filter/extra_filters_msan.go | 2 | ||||
-rw-r--r-- | runsc/boot/filter/extra_filters_race.go | 3 | ||||
-rw-r--r-- | runsc/boot/filter/filter.go | 19 | ||||
-rw-r--r-- | runsc/boot/fs.go | 43 | ||||
-rw-r--r-- | runsc/boot/limits.go | 4 | ||||
-rw-r--r-- | runsc/boot/loader.go | 194 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 18 | ||||
-rw-r--r-- | runsc/boot/network.go | 20 | ||||
-rw-r--r-- | runsc/boot/platforms/BUILD | 16 | ||||
-rw-r--r-- | runsc/boot/platforms/platforms.go | 30 | ||||
-rw-r--r-- | runsc/boot/pprof.go | 18 | ||||
-rw-r--r-- | runsc/boot/strace.go | 2 | ||||
-rw-r--r-- | runsc/boot/user.go | 146 | ||||
-rw-r--r-- | runsc/boot/user_test.go | 253 |
26 files changed, 715 insertions, 252 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 6ba196917..5025401dd 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -16,9 +16,11 @@ go_library( "limits.go", "loader.go", "network.go", + "pprof.go", "strace.go", + "user.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/boot", + importpath = "gvisor.dev/gvisor/runsc/boot", visibility = [ "//runsc:__subpackages__", "//test:__subpackages__", @@ -32,6 +34,7 @@ go_library( "//pkg/log", "//pkg/memutil", "//pkg/rand", + "//pkg/refs", "//pkg/sentry/arch", "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/context", @@ -49,13 +52,10 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel:uncaught_signal_go_proto", "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/limits", "//pkg/sentry/loader", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", - "//pkg/sentry/platform/kvm", - "//pkg/sentry/platform/ptrace", "//pkg/sentry/sighandling", "//pkg/sentry/socket/epsocket", "//pkg/sentry/socket/hostinet", @@ -68,6 +68,7 @@ go_library( "//pkg/sentry/time", "//pkg/sentry/unimpl:unimplemented_syscall_go_proto", "//pkg/sentry/usage", + "//pkg/sentry/usermem", "//pkg/sentry/watchdog", "//pkg/syserror", "//pkg/tcpip", @@ -83,6 +84,7 @@ go_library( "//pkg/tcpip/transport/udp", "//pkg/urpc", "//runsc/boot/filter", + "//runsc/boot/platforms", "//runsc/specutils", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", @@ -96,6 +98,7 @@ go_test( "compat_test.go", "fs_test.go", "loader_test.go", + "user_test.go", ], embed = [":boot"], deps = [ diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index c369e4d64..07e35ab10 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -21,14 +21,14 @@ import ( "syscall" "github.com/golang/protobuf/proto" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/eventchannel" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto" - ucspb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" - "gvisor.googlesource.com/gvisor/pkg/sentry/strace" - spb "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/arch" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" + "gvisor.dev/gvisor/pkg/sentry/strace" + spb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" ) func initCompatLogs(fd int) error { diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go index 99df5e614..43cd0db94 100644 --- a/runsc/boot/compat_amd64.go +++ b/runsc/boot/compat_amd64.go @@ -17,7 +17,7 @@ package boot import ( "fmt" - rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" ) // reportLimit is the max number of events that should be reported per tracker. diff --git a/runsc/boot/compat_test.go b/runsc/boot/compat_test.go index ccec3d20c..388298d8d 100644 --- a/runsc/boot/compat_test.go +++ b/runsc/boot/compat_test.go @@ -17,7 +17,7 @@ package boot import ( "testing" - rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" ) func TestOnceTracker(t *testing.T) { diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 8564c502d..6f1eb9a41 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -19,43 +19,9 @@ import ( "strconv" "strings" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/sentry/watchdog" ) -// PlatformType tells which platform to use. -type PlatformType int - -const ( - // PlatformPtrace runs the sandbox with the ptrace platform. - PlatformPtrace PlatformType = iota - - // PlatformKVM runs the sandbox with the KVM platform. - PlatformKVM -) - -// MakePlatformType converts type from string. -func MakePlatformType(s string) (PlatformType, error) { - switch s { - case "ptrace": - return PlatformPtrace, nil - case "kvm": - return PlatformKVM, nil - default: - return 0, fmt.Errorf("invalid platform type %q", s) - } -} - -func (p PlatformType) String() string { - switch p { - case PlatformPtrace: - return "ptrace" - case PlatformKVM: - return "kvm" - default: - return fmt.Sprintf("unknown(%d)", p) - } -} - // FileAccessType tells how the filesystem is accessed. type FileAccessType int @@ -187,7 +153,7 @@ type Config struct { LogPackets bool // Platform is the platform to run on. - Platform PlatformType + Platform string // Strace indicates that strace should be enabled. Strace bool @@ -226,6 +192,12 @@ type Config struct { // to the same underlying network device. This allows netstack to better // scale for high throughput use cases. NumNetworkChannels int + + // Rootless allows the sandbox to be started with a user that is not root. + // Defense is depth measures are weaker with rootless. Specifically, the + // sandbox and Gofer process run as root inside a user namespace with root + // mapped to the caller's user. + Rootless bool } // ToFlags returns a slice of flags that correspond to the given Config. @@ -241,7 +213,7 @@ func (c *Config) ToFlags() []string { "--overlay=" + strconv.FormatBool(c.Overlay), "--network=" + c.Network.String(), "--log-packets=" + strconv.FormatBool(c.LogPackets), - "--platform=" + c.Platform.String(), + "--platform=" + c.Platform, "--strace=" + strconv.FormatBool(c.Strace), "--strace-syscalls=" + strings.Join(c.StraceSyscalls, ","), "--strace-log-size=" + strconv.Itoa(int(c.StraceLogSize)), @@ -250,6 +222,7 @@ func (c *Config) ToFlags() []string { "--profile=" + strconv.FormatBool(c.ProfileEnable), "--net-raw=" + strconv.FormatBool(c.EnableRaw), "--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels), + "--rootless=" + strconv.FormatBool(c.Rootless), } if c.TestOnlyAllowRunAsCurrentUserWithoutChroot { // Only include if set since it is never to be used by users. diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 416e5355d..d79aaff60 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -22,17 +22,17 @@ import ( "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/control/server" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket" - "gvisor.googlesource.com/gvisor/pkg/sentry/state" - "gvisor.googlesource.com/gvisor/pkg/sentry/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/epsocket" + "gvisor.dev/gvisor/pkg/sentry/state" + "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/urpc" ) const ( @@ -96,8 +96,10 @@ const ( // SandboxStacks collects sandbox stacks for debugging. SandboxStacks = "debug.Stacks" +) - // Profiling related commands (see pprof.go for more details). +// Profiling related commands (see pprof.go for more details). +const ( StartCPUProfile = "Profile.StartCPUProfile" StopCPUProfile = "Profile.StopCPUProfile" HeapProfile = "Profile.HeapProfile" @@ -105,6 +107,11 @@ const ( StopTrace = "Profile.StopTrace" ) +// Logging related commands (see logging.go for more details). +const ( + ChangeLogging = "Logging.Change" +) + // ControlSocketAddr generates an abstract unix socket name for the given ID. func ControlSocketAddr(id string) string { return fmt.Sprintf("\x00runsc-sandbox.%s", id) @@ -143,6 +150,7 @@ func newController(fd int, l *Loader) (*controller, error) { } srv.Register(&debug{}) + srv.Register(&control.Logging{}) if l.conf.ProfileEnable { srv.Register(&control.Profile{}) } @@ -359,6 +367,17 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { return fmt.Errorf("file cannot be empty") } + if cm.l.conf.ProfileEnable { + // initializePProf opens /proc/self/maps, so has to be + // called before installing seccomp filters. + initializePProf() + } + + // Seccomp filters have to be applied before parsing the state file. + if err := cm.l.installSeccompFilters(); err != nil { + return err + } + // Load the state. loadOpts := state.LoadOpts{Source: specFile} if err := loadOpts.Load(k, networkStack); err != nil { diff --git a/runsc/boot/debug.go b/runsc/boot/debug.go index 79f7387ac..1fb32c527 100644 --- a/runsc/boot/debug.go +++ b/runsc/boot/debug.go @@ -15,7 +15,7 @@ package boot import ( - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) type debug struct { diff --git a/runsc/boot/events.go b/runsc/boot/events.go index ffd99f5e9..422f4da00 100644 --- a/runsc/boot/events.go +++ b/runsc/boot/events.go @@ -15,8 +15,8 @@ package boot import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usage" ) // Event struct for encoding the event data to JSON. Corresponds to runc's diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go index 0811e10f4..e5de1f3d7 100644 --- a/runsc/boot/fds.go +++ b/runsc/boot/fds.go @@ -17,36 +17,27 @@ package boot import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) -// createFDMap creates an FD map that contains stdin, stdout, and stderr. If -// console is true, then ioctl calls will be passed through to the host FD. +// createFDTable creates an FD table that contains stdin, stdout, and stderr. +// If console is true, then ioctl calls will be passed through to the host FD. // Upon success, createFDMap dups then closes stdioFDs. -func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs []int) (*kernel.FDMap, error) { +func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) { if len(stdioFDs) != 3 { return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) } k := kernel.KernelFromContext(ctx) - fdm := k.NewFDMap() - defer fdm.DecRef() + fdTable := k.NewFDTable() + defer fdTable.DecRef() mounter := fs.FileOwnerFromContext(ctx) - // Maps sandbox FD to host FD. - fdMap := map[int]int{ - 0: stdioFDs[0], - 1: stdioFDs[1], - 2: stdioFDs[2], - } - var ttyFile *fs.File - for appFD, hostFD := range fdMap { + for appFD, hostFD := range stdioFDs { var appFile *fs.File if console && appFD < 3 { @@ -80,11 +71,11 @@ func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs } // Add the file to the FD map. - if err := fdm.NewFDAt(kdefs.FD(appFD), appFile, kernel.FDFlags{}, l); err != nil { + if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { return nil, err } } - fdm.IncRef() - return fdm, nil + fdTable.IncRef() + return fdTable, nil } diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index 3b6020cf3..f5509b6b7 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -11,7 +11,7 @@ go_library( "extra_filters_race.go", "filter.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/boot/filter", + importpath = "gvisor.dev/gvisor/runsc/boot/filter", visibility = [ "//runsc/boot:__subpackages__", ], @@ -20,8 +20,6 @@ go_library( "//pkg/log", "//pkg/seccomp", "//pkg/sentry/platform", - "//pkg/sentry/platform/kvm", - "//pkg/sentry/platform/ptrace", "//pkg/tcpip/link/fdbased", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index ef2dbfad2..0ee5b8bbd 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -19,9 +19,9 @@ import ( "syscall" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/seccomp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" ) // allowedSyscalls is the set of syscalls executed by the Sentry to the host OS. @@ -437,29 +437,6 @@ func hostInetFilters() seccomp.SyscallRules { } } -// ptraceFilters returns syscalls made exclusively by the ptrace platform. -func ptraceFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - unix.SYS_GETCPU: {}, - unix.SYS_SCHED_SETAFFINITY: {}, - syscall.SYS_PTRACE: {}, - syscall.SYS_TGKILL: {}, - syscall.SYS_WAIT4: {}, - } -} - -// kvmFilters returns syscalls made exclusively by the KVM platform. -func kvmFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - syscall.SYS_ARCH_PRCTL: {}, - syscall.SYS_IOCTL: {}, - syscall.SYS_MMAP: {}, - syscall.SYS_RT_SIGSUSPEND: {}, - syscall.SYS_RT_SIGTIMEDWAIT: {}, - 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host. - } -} - func controlServerFilters(fd int) seccomp.SyscallRules { return seccomp.SyscallRules{ syscall.SYS_ACCEPT: []seccomp.Rule{ diff --git a/runsc/boot/filter/extra_filters.go b/runsc/boot/filter/extra_filters.go index 5c5ec4e06..e28d4b8d6 100644 --- a/runsc/boot/filter/extra_filters.go +++ b/runsc/boot/filter/extra_filters.go @@ -17,11 +17,11 @@ package filter import ( - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by -// Go intrumentation tools, e.g. -race, -msan. +// Go instrumentation tools, e.g. -race, -msan. // Returns empty when disabled. func instrumentationFilters() seccomp.SyscallRules { return nil diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go index ac5a0f1aa..5e5a3c998 100644 --- a/runsc/boot/filter/extra_filters_msan.go +++ b/runsc/boot/filter/extra_filters_msan.go @@ -19,7 +19,7 @@ package filter import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by MSAN. diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go index ba3c1ce87..9ff80276a 100644 --- a/runsc/boot/filter/extra_filters_race.go +++ b/runsc/boot/filter/extra_filters_race.go @@ -19,7 +19,7 @@ package filter import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by TSAN. @@ -33,6 +33,7 @@ func instrumentationFilters() seccomp.SyscallRules { syscall.SYS_MUNLOCK: {}, syscall.SYS_NANOSLEEP: {}, syscall.SYS_OPEN: {}, + syscall.SYS_OPENAT: {}, syscall.SYS_SET_ROBUST_LIST: {}, // Used within glibc's malloc. syscall.SYS_TIME: {}, diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go index 17479e0dd..e80c171b3 100644 --- a/runsc/boot/filter/filter.go +++ b/runsc/boot/filter/filter.go @@ -18,13 +18,9 @@ package filter import ( - "fmt" - - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/seccomp" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/sentry/platform" ) // Options are seccomp filter related options. @@ -53,14 +49,7 @@ func Install(opt Options) error { s.Merge(profileFilters()) } - switch p := opt.Platform.(type) { - case *ptrace.PTrace: - s.Merge(ptraceFilters()) - case *kvm.KVM: - s.Merge(kvmFilters()) - default: - return fmt.Errorf("unknown platform type %T", p) - } + s.Merge(opt.Platform.SyscallFilters()) return seccomp.Install(s) } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 2fa0725d1..d3e3196fd 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -24,24 +24,24 @@ import ( "syscall" // Include filesystem types that OCI spec might mount. - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/dev" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tty" + _ "gvisor.dev/gvisor/pkg/sentry/fs/dev" + "gvisor.dev/gvisor/pkg/sentry/fs/gofer" + _ "gvisor.dev/gvisor/pkg/sentry/fs/host" + _ "gvisor.dev/gvisor/pkg/sentry/fs/proc" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + _ "gvisor.dev/gvisor/pkg/sentry/fs/sys" + _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" + _ "gvisor.dev/gvisor/pkg/sentry/fs/tty" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/gofer" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -76,7 +76,7 @@ func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, tmpFS := mustFindFilesystem("tmpfs") if !fs.IsDir(lower.StableAttr) { // Create overlay on top of mount file, e.g. /etc/hostname. - msrc := fs.NewCachingMountSource(tmpFS, upperFlags) + msrc := fs.NewCachingMountSource(ctx, tmpFS, upperFlags) return fs.NewOverlayRootFile(ctx, msrc, lower, upperFlags) } @@ -226,7 +226,11 @@ func mustFindFilesystem(name string) fs.Filesystem { // addSubmountOverlay overlays the inode over a ramfs tree containing the given // paths. func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string) (*fs.Inode, error) { - msrc := fs.NewPseudoMountSource() + // Construct a ramfs tree of mount points. The contents never + // change, so this can be fully caching. There's no real + // filesystem backing this tree, so we set the filesystem to + // nil. + msrc := fs.NewCachingMountSource(ctx, nil, fs.MountSourceFlags{}) mountTree, err := ramfs.MakeDirectoryTree(ctx, msrc, submounts) if err != nil { return nil, fmt.Errorf("creating mount tree: %v", err) @@ -902,7 +906,10 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun } defer target.DecRef() + // Take a ref on the inode that is about to be (re)-mounted. + source.root.IncRef() if err := mns.Mount(ctx, target, source.root); err != nil { + source.root.DecRef() return fmt.Errorf("bind mount %q error: %v", mount.Destination, err) } diff --git a/runsc/boot/limits.go b/runsc/boot/limits.go index 3364aa5e6..d1c0bb9b5 100644 --- a/runsc/boot/limits.go +++ b/runsc/boot/limits.go @@ -20,8 +20,8 @@ import ( "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/limits" ) // Mapping from linux resource names to limits.LimitType. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 3e6095fdc..8e8c6105b 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -20,51 +20,52 @@ import ( mrand "math/rand" "os" "runtime" + "strings" "sync" "sync/atomic" "syscall" gtime "time" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/memutil" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/loader" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace" - "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling" - slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/runsc/boot/filter" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/memutil" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/loader" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/sighandling" + slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" + "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/runsc/boot/filter" + _ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms. + "gvisor.dev/gvisor/runsc/specutils" // Include supported socket providers. - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/route" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/epsocket" + "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" + _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink" + _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route" + _ "gvisor.dev/gvisor/pkg/sentry/socket/unix" ) // Loader keeps state needed to start the kernel and run the container.. @@ -205,7 +206,9 @@ func New(args Args) (*Loader, error) { // Create VDSO. // // Pass k as the platform since it is savable, unlike the actual platform. - vdso, err := loader.PrepareVDSO(k) + // + // FIXME(b/109889800): Use non-nil context. + vdso, err := loader.PrepareVDSO(nil, k) if err != nil { return nil, fmt.Errorf("creating vdso: %v", err) } @@ -259,7 +262,7 @@ func New(args Args) (*Loader, error) { // Adjust the total memory returned by the Sentry so that applications that // use /proc/meminfo can make allocations based on this limit. usage.MinimumTotalMemoryBytes = args.TotalMem - log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(2^30)) + log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(1<<30)) } // Initiate the Kernel object, which is required by the Context passed @@ -412,19 +415,12 @@ func (l *Loader) Destroy() { } func createPlatform(conf *Config, deviceFile *os.File) (platform.Platform, error) { - switch conf.Platform { - case PlatformPtrace: - log.Infof("Platform: ptrace") - return ptrace.New() - case PlatformKVM: - log.Infof("Platform: kvm") - if deviceFile == nil { - return nil, fmt.Errorf("kvm device file must be provided") - } - return kvm.New(deviceFile) - default: - return nil, fmt.Errorf("invalid platform %v", conf.Platform) + p, err := platform.Lookup(conf.Platform) + if err != nil { + panic(fmt.Sprintf("invalid platform %v: %v", conf.Platform, err)) } + log.Infof("Platform: %s", conf.Platform) + return p.New(deviceFile) } func createMemoryFile() (*pgalloc.MemoryFile, error) { @@ -445,6 +441,23 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) { return mf, nil } +func (l *Loader) installSeccompFilters() error { + if l.conf.DisableSeccomp { + filter.Report("syscall filter is DISABLED. Running in less secure mode.") + } else { + opts := filter.Options{ + Platform: l.k.Platform, + HostNetwork: l.conf.Network == NetworkHost, + ProfileEnable: l.conf.ProfileEnable, + ControllerFD: l.ctrl.srv.FD(), + } + if err := filter.Install(opts); err != nil { + return fmt.Errorf("installing seccomp filters: %v", err) + } + } + return nil +} + // Run runs the root container. func (l *Loader) Run() error { err := l.run() @@ -480,35 +493,29 @@ func (l *Loader) run() error { return fmt.Errorf("trying to start deleted container %q", l.sandboxID) } - // Finally done with all configuration. Setup filters before user code - // is loaded. - if l.conf.DisableSeccomp { - filter.Report("syscall filter is DISABLED. Running in less secure mode.") - } else { - opts := filter.Options{ - Platform: l.k.Platform, - HostNetwork: l.conf.Network == NetworkHost, - ProfileEnable: l.conf.ProfileEnable, - ControllerFD: l.ctrl.srv.FD(), - } - if err := filter.Install(opts); err != nil { - return fmt.Errorf("installing seccomp filters: %v", err) - } - } - // If we are restoring, we do not want to create a process. // l.restore is set by the container manager when a restore call is made. if !l.restore { + if l.conf.ProfileEnable { + initializePProf() + } + + // Finally done with all configuration. Setup filters before user code + // is loaded. + if err := l.installSeccompFilters(); err != nil { + return err + } + // Create the FD map, which will set stdin, stdout, and stderr. If console // is true, then ioctl calls will be passed through to the host fd. ctx := l.rootProcArgs.NewContext(l.k) - fdm, err := createFDMap(ctx, l.rootProcArgs.Limits, l.console, l.stdioFDs) + fdTable, err := createFDTable(ctx, l.console, l.stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } // CreateProcess takes a reference on FDMap if successful. We won't need // ours either way. - l.rootProcArgs.FDMap = fdm + l.rootProcArgs.FDTable = fdTable // cid for root container can be empty. Only subcontainers need it to set // the mount location. @@ -523,19 +530,37 @@ func (l *Loader) run() error { return err } + // Read /etc/passwd for the user's HOME directory and set the HOME + // environment variable as required by POSIX if it is not overridden by + // the user. + hasHomeEnvv := false + for _, envv := range l.rootProcArgs.Envv { + if strings.HasPrefix(envv, "HOME=") { + hasHomeEnvv = true + } + } + if !hasHomeEnvv { + homeDir, err := getExecUserHome(rootCtx, rootMns, uint32(l.rootProcArgs.Credentials.RealKUID)) + if err != nil { + return fmt.Errorf("error reading exec user: %v", err) + } + + l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir) + } + // Create the root container init task. It will begin running // when the kernel is started. if _, _, err := l.k.CreateProcess(l.rootProcArgs); err != nil { return fmt.Errorf("creating init process: %v", err) } - // CreateProcess takes a reference on FDMap if successful. - l.rootProcArgs.FDMap.DecRef() + // CreateProcess takes a reference on FDTable if successful. + l.rootProcArgs.FDTable.DecRef() } ep.tg = l.k.GlobalInit() if l.console { - ttyFile := l.rootProcArgs.FDMap.GetFile(0) + ttyFile, _ := l.rootProcArgs.FDTable.Get(0) defer ttyFile.DecRef() ep.tty = ttyFile.FileOperations.(*host.TTYFileOperations) @@ -615,13 +640,13 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file // Create the FD map, which will set stdin, stdout, and stderr. ctx := procArgs.NewContext(l.k) - fdm, err := createFDMap(ctx, procArgs.Limits, false, stdioFDs) + fdTable, err := createFDTable(ctx, false, stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } - // CreateProcess takes a reference on FDMap if successful. We won't need ours - // either way. - procArgs.FDMap = fdm + // CreateProcess takes a reference on fdTable if successful. We won't + // need ours either way. + procArgs.FDTable = fdTable // Can't take ownership away from os.File. dup them to get a new FDs. var goferFDs []int @@ -650,8 +675,8 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file } l.k.StartProcess(tg) - // CreateProcess takes a reference on FDMap if successful. - procArgs.FDMap.DecRef() + // CreateProcess takes a reference on FDTable if successful. + procArgs.FDTable.DecRef() l.processes[eid].tg = tg return nil @@ -815,9 +840,17 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) { // privileges. Raw: true, })} + + // Enable SACK Recovery. if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { return nil, fmt.Errorf("failed to enable SACK: %v", err) } + + // Enable Receive Buffer Auto-Tuning. + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { + return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) + } + return &s, nil default: @@ -966,3 +999,8 @@ func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host } return ep.tg, ep.tty, nil } + +func init() { + // TODO(gvisor.dev/issue/365): Make this configurable. + refs.SetLeakMode(refs.NoLeakChecking) +} diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 2f2499811..ff713660d 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -25,18 +25,21 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/control/server" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/unet" - "gvisor.googlesource.com/gvisor/runsc/fsgofer" + "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/runsc/fsgofer" ) func init() { log.SetLevel(log.Debug) rand.Seed(time.Now().UnixNano()) + if err := fsgofer.OpenProcSelfFD(); err != nil { + panic(err) + } } func testConfig() *Config { @@ -44,6 +47,7 @@ func testConfig() *Config { RootDir: "unused_root_dir", Network: NetworkNone, DisableSeccomp: true, + Platform: "ptrace", } } diff --git a/runsc/boot/network.go b/runsc/boot/network.go index d86803252..d3d98243d 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -19,16 +19,16 @@ import ( "net" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/urpc" ) // Network exposes methods that can be used to configure a network stack. diff --git a/runsc/boot/platforms/BUILD b/runsc/boot/platforms/BUILD new file mode 100644 index 000000000..03391cdca --- /dev/null +++ b/runsc/boot/platforms/BUILD @@ -0,0 +1,16 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "platforms", + srcs = ["platforms.go"], + importpath = "gvisor.dev/gvisor/runsc/boot/platforms", + visibility = [ + "//runsc:__subpackages__", + ], + deps = [ + "//pkg/sentry/platform/kvm", + "//pkg/sentry/platform/ptrace", + ], +) diff --git a/runsc/boot/platforms/platforms.go b/runsc/boot/platforms/platforms.go new file mode 100644 index 000000000..056b46ad5 --- /dev/null +++ b/runsc/boot/platforms/platforms.go @@ -0,0 +1,30 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package platforms imports all available platform packages. +package platforms + +import ( + // Import platforms that runsc might use. + _ "gvisor.dev/gvisor/pkg/sentry/platform/kvm" + _ "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" +) + +const ( + // Ptrace runs the sandbox with the ptrace platform. + Ptrace = "ptrace" + + // KVM runs the sandbox with the KVM platform. + KVM = "kvm" +) diff --git a/runsc/boot/pprof.go b/runsc/boot/pprof.go new file mode 100644 index 000000000..463362f02 --- /dev/null +++ b/runsc/boot/pprof.go @@ -0,0 +1,18 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +func initializePProf() { +} diff --git a/runsc/boot/strace.go b/runsc/boot/strace.go index 19c7f8fbd..fbfd3b07c 100644 --- a/runsc/boot/strace.go +++ b/runsc/boot/strace.go @@ -15,7 +15,7 @@ package boot import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/strace" + "gvisor.dev/gvisor/pkg/sentry/strace" ) func enableStrace(conf *Config) error { diff --git a/runsc/boot/user.go b/runsc/boot/user.go new file mode 100644 index 000000000..d1d423a5c --- /dev/null +++ b/runsc/boot/user.go @@ -0,0 +1,146 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "bufio" + "io" + "strconv" + "strings" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/usermem" +) + +type fileReader struct { + // Ctx is the context for the file reader. + Ctx context.Context + + // File is the file to read from. + File *fs.File +} + +// Read implements io.Reader.Read. +func (r *fileReader) Read(buf []byte) (int, error) { + n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf)) + return int(n), err +} + +// getExecUserHome returns the home directory of the executing user read from +// /etc/passwd as read from the container filesystem. +func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32) (string, error) { + // The default user home directory to return if no user matching the user + // if found in the /etc/passwd found in the image. + const defaultHome = "/" + + // Open the /etc/passwd file from the dirent via the root mount namespace. + mnsRoot := rootMns.Root() + maxTraversals := uint(linux.MaxSymlinkTraversals) + dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals) + if err != nil { + // NOTE: Ignore errors opening the passwd file. If the passwd file + // doesn't exist we will return the default home directory. + return defaultHome, nil + } + defer dirent.DecRef() + + // Check read permissions on the file. + if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil { + // NOTE: Ignore permissions errors here and return default root dir. + return defaultHome, nil + } + + // Only open regular files. We don't open other files like named pipes as + // they may block and might present some attack surface to the container. + // Note that runc does not seem to do this kind of checking. + if !fs.IsRegular(dirent.Inode.StableAttr) { + return defaultHome, nil + } + + f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false}) + if err != nil { + return "", err + } + defer f.DecRef() + + r := &fileReader{ + Ctx: ctx, + File: f, + } + + homeDir, err := findHomeInPasswd(uid, r, defaultHome) + if err != nil { + return "", err + } + + return homeDir, nil +} + +// findHomeInPasswd parses a passwd file and returns the given user's home +// directory. This function does it's best to replicate the runc's behavior. +func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) { + s := bufio.NewScanner(passwd) + + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // Pull out part of passwd entry. Loosely parse the passwd entry as some + // passwd files could be poorly written and for compatibility with runc. + // + // Per 'man 5 passwd' + // /etc/passwd contains one line for each user account, with seven + // fields delimited by colons (“:”). These fields are: + // + // - login name + // - optional encrypted password + // - numerical user ID + // - numerical group ID + // - user name or comment field + // - user home directory + // - optional user command interpreter + parts := strings.Split(line, ":") + + found := false + homeDir := "" + for i, p := range parts { + switch i { + case 2: + parsedUID, err := strconv.ParseUint(p, 10, 32) + if err == nil && parsedUID == uint64(uid) { + found = true + } + case 5: + homeDir = p + } + } + if found { + // NOTE: If the uid is present but the home directory is not + // present in the /etc/passwd entry we return an empty string. This + // is, for better or worse, what runc does. + return homeDir, nil + } + } + + return defaultHome, nil +} diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go new file mode 100644 index 000000000..834003430 --- /dev/null +++ b/runsc/boot/user_test.go @@ -0,0 +1,253 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "io/ioutil" + "os" + "path/filepath" + "strings" + "syscall" + "testing" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" +) + +func setupTempDir() (string, error) { + tmpDir, err := ioutil.TempDir(os.TempDir(), "exec-user-test") + if err != nil { + return "", err + } + return tmpDir, nil +} + +func setupPasswd(contents string, perms os.FileMode) func() (string, error) { + return func() (string, error) { + tmpDir, err := setupTempDir() + if err != nil { + return "", err + } + + if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { + return "", err + } + + f, err := os.Create(filepath.Join(tmpDir, "etc", "passwd")) + if err != nil { + return "", err + } + defer f.Close() + + _, err = f.WriteString(contents) + if err != nil { + return "", err + } + + err = f.Chmod(perms) + if err != nil { + return "", err + } + return tmpDir, nil + } +} + +// TestGetExecUserHome tests the getExecUserHome function. +func TestGetExecUserHome(t *testing.T) { + tests := map[string]struct { + uid uint32 + createRoot func() (string, error) + expected string + }{ + "success": { + uid: 1000, + createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0666), + expected: "/home/adin", + }, + "no_passwd": { + uid: 1000, + createRoot: setupTempDir, + expected: "/", + }, + "no_perms": { + uid: 1000, + createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0000), + expected: "/", + }, + "directory": { + uid: 1000, + createRoot: func() (string, error) { + tmpDir, err := setupTempDir() + if err != nil { + return "", err + } + + if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { + return "", err + } + + if err := syscall.Mkdir(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { + return "", err + } + + return tmpDir, nil + }, + expected: "/", + }, + // Currently we don't allow named pipes. + "named_pipe": { + uid: 1000, + createRoot: func() (string, error) { + tmpDir, err := setupTempDir() + if err != nil { + return "", err + } + + if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { + return "", err + } + + if err := syscall.Mkfifo(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { + return "", err + } + + return tmpDir, nil + }, + expected: "/", + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + tmpDir, err := tc.createRoot() + if err != nil { + t.Fatalf("failed to create root dir: %v", err) + } + + sandEnd, cleanup, err := startGofer(tmpDir) + if err != nil { + t.Fatalf("failed to create gofer: %v", err) + } + defer cleanup() + + ctx := contexttest.Context(t) + conf := &Config{ + RootDir: "unused_root_dir", + Network: NetworkNone, + DisableSeccomp: true, + } + + spec := &specs.Spec{ + Root: &specs.Root{ + Path: tmpDir, + Readonly: true, + }, + // Add /proc mount as tmpfs to avoid needing a kernel. + Mounts: []specs.Mount{ + { + Destination: "/proc", + Type: "tmpfs", + }, + }, + } + + var mns *fs.MountNamespace + setMountNS := func(m *fs.MountNamespace) { + mns = m + ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root()) + } + mntr := newContainerMounter(spec, "", []int{sandEnd}, nil, &podMountHints{}) + if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil { + t.Fatalf("failed to create mount namespace: %v", err) + } + + got, err := getExecUserHome(ctx, mns, tc.uid) + if err != nil { + t.Fatalf("failed to get user home: %v", err) + } + + if got != tc.expected { + t.Fatalf("expected %v, got: %v", tc.expected, got) + } + }) + } +} + +// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing. +func TestFindHomeInPasswd(t *testing.T) { + tests := map[string]struct { + uid uint32 + passwd string + expected string + def string + }{ + "empty": { + uid: 1000, + passwd: "", + expected: "/", + def: "/", + }, + "whitespace": { + uid: 1000, + passwd: " ", + expected: "/", + def: "/", + }, + "full": { + uid: 1000, + passwd: "adin::1000:1111::/home/adin:/bin/sh", + expected: "/home/adin", + def: "/", + }, + // For better or worse, this is how runc works. + "partial": { + uid: 1000, + passwd: "adin::1000:1111:", + expected: "", + def: "/", + }, + "multiple": { + uid: 1001, + passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh", + expected: "/home/ian", + def: "/", + }, + "duplicate": { + uid: 1000, + passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh", + expected: "/home/adin", + def: "/", + }, + "empty_lines": { + uid: 1001, + passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh", + expected: "/home/ian", + def: "/", + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def) + if err != nil { + t.Fatalf("error parsing passwd: %v", err) + } + if tc.expected != got { + t.Fatalf("expected %v, got: %v", tc.expected, got) + } + }) + } +} |