diff options
Diffstat (limited to 'runsc/boot/loader.go')
-rw-r--r-- | runsc/boot/loader.go | 153 |
1 files changed, 98 insertions, 55 deletions
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index a997776f8..38425f97d 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -20,51 +20,52 @@ import ( mrand "math/rand" "os" "runtime" + "strings" "sync" "sync/atomic" "syscall" gtime "time" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/loader" - "gvisor.googlesource.com/gvisor/pkg/sentry/memutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace" - "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling" - slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/runsc/boot/filter" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/memutil" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/loader" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/platform/kvm" + "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" + "gvisor.dev/gvisor/pkg/sentry/sighandling" + slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" + "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/runsc/boot/filter" + "gvisor.dev/gvisor/runsc/specutils" // Include supported socket providers. - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/route" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/epsocket" + "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" + _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink" + _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route" + _ "gvisor.dev/gvisor/pkg/sentry/socket/unix" ) // Loader keeps state needed to start the kernel and run the container.. @@ -117,6 +118,10 @@ type Loader struct { // // processes is guardded by mu. processes map[execID]*execProcess + + // mountHints provides extra information about mounts for containers that + // apply to the entire pod. + mountHints *podMountHints } // execID uniquely identifies a sentry process that is executed in a container. @@ -299,6 +304,11 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("initializing compat logs: %v", err) } + mountHints, err := newPodMountHints(args.Spec) + if err != nil { + return nil, fmt.Errorf("creating pod mount hints: %v", err) + } + eid := execID{cid: args.ID} l := &Loader{ k: k, @@ -311,6 +321,7 @@ func New(args Args) (*Loader, error) { rootProcArgs: procArgs, sandboxID: args.ID, processes: map[execID]*execProcess{eid: {}}, + mountHints: mountHints, } // We don't care about child signals; some platforms can generate a @@ -424,6 +435,9 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) { return nil, fmt.Errorf("error creating memfd: %v", err) } memfile := os.NewFile(uintptr(memfd), memfileName) + // We can't enable pgalloc.MemoryFileOpts.UseHostMemcgPressure even if + // there are memory cgroups specified, because at this point we're already + // in a mount namespace in which the relevant cgroupfs is not visible. mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{}) if err != nil { memfile.Close() @@ -432,6 +446,23 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) { return mf, nil } +func (l *Loader) installSeccompFilters() error { + if l.conf.DisableSeccomp { + filter.Report("syscall filter is DISABLED. Running in less secure mode.") + } else { + opts := filter.Options{ + Platform: l.k.Platform, + HostNetwork: l.conf.Network == NetworkHost, + ProfileEnable: l.conf.ProfileEnable, + ControllerFD: l.ctrl.srv.FD(), + } + if err := filter.Install(opts); err != nil { + return fmt.Errorf("installing seccomp filters: %v", err) + } + } + return nil +} + // Run runs the root container. func (l *Loader) Run() error { err := l.run() @@ -467,25 +498,19 @@ func (l *Loader) run() error { return fmt.Errorf("trying to start deleted container %q", l.sandboxID) } - // Finally done with all configuration. Setup filters before user code - // is loaded. - if l.conf.DisableSeccomp { - filter.Report("syscall filter is DISABLED. Running in less secure mode.") - } else { - opts := filter.Options{ - Platform: l.k.Platform, - HostNetwork: l.conf.Network == NetworkHost, - ProfileEnable: l.conf.ProfileEnable, - ControllerFD: l.ctrl.srv.FD(), - } - if err := filter.Install(opts); err != nil { - return fmt.Errorf("installing seccomp filters: %v", err) - } - } - // If we are restoring, we do not want to create a process. // l.restore is set by the container manager when a restore call is made. if !l.restore { + if l.conf.ProfileEnable { + initializePProf() + } + + // Finally done with all configuration. Setup filters before user code + // is loaded. + if err := l.installSeccompFilters(); err != nil { + return err + } + // Create the FD map, which will set stdin, stdout, and stderr. If console // is true, then ioctl calls will be passed through to the host fd. ctx := l.rootProcArgs.NewContext(l.k) @@ -499,7 +524,7 @@ func (l *Loader) run() error { // cid for root container can be empty. Only subcontainers need it to set // the mount location. - mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k) + mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k, l.mountHints) if err := mntr.setupFS(ctx, l.conf, &l.rootProcArgs, l.rootProcArgs.Credentials); err != nil { return err } @@ -510,6 +535,24 @@ func (l *Loader) run() error { return err } + // Read /etc/passwd for the user's HOME directory and set the HOME + // environment variable as required by POSIX if it is not overridden by + // the user. + hasHomeEnvv := false + for _, envv := range l.rootProcArgs.Envv { + if strings.HasPrefix(envv, "HOME=") { + hasHomeEnvv = true + } + } + if !hasHomeEnvv { + homeDir, err := getExecUserHome(rootCtx, rootMns, uint32(l.rootProcArgs.Credentials.RealKUID)) + if err != nil { + return fmt.Errorf("error reading exec user: %v", err) + } + + l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir) + } + // Create the root container init task. It will begin running // when the kernel is started. if _, _, err := l.k.CreateProcess(l.rootProcArgs); err != nil { @@ -620,7 +663,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file goferFDs = append(goferFDs, fd) } - mntr := newContainerMounter(spec, cid, goferFDs, l.k) + mntr := newContainerMounter(spec, cid, goferFDs, l.k, l.mountHints) if err := mntr.setupFS(ctx, conf, &procArgs, creds); err != nil { return fmt.Errorf("configuring container FS: %v", err) } |