summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot/loader.go
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot/loader.go')
-rw-r--r--runsc/boot/loader.go153
1 files changed, 98 insertions, 55 deletions
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index a997776f8..38425f97d 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -20,51 +20,52 @@ import (
mrand "math/rand"
"os"
"runtime"
+ "strings"
"sync"
"sync/atomic"
"syscall"
gtime "time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/rand"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/loader"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace"
- "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling"
- slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp"
- "gvisor.googlesource.com/gvisor/runsc/boot/filter"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/memutil"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/loader"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform/kvm"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ptrace"
+ "gvisor.dev/gvisor/pkg/sentry/sighandling"
+ slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+ "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/arp"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/icmp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+ "gvisor.dev/gvisor/runsc/boot/filter"
+ "gvisor.dev/gvisor/runsc/specutils"
// Include supported socket providers.
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/route"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix"
+ "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/hostinet"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
)
// Loader keeps state needed to start the kernel and run the container..
@@ -117,6 +118,10 @@ type Loader struct {
//
// processes is guardded by mu.
processes map[execID]*execProcess
+
+ // mountHints provides extra information about mounts for containers that
+ // apply to the entire pod.
+ mountHints *podMountHints
}
// execID uniquely identifies a sentry process that is executed in a container.
@@ -299,6 +304,11 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("initializing compat logs: %v", err)
}
+ mountHints, err := newPodMountHints(args.Spec)
+ if err != nil {
+ return nil, fmt.Errorf("creating pod mount hints: %v", err)
+ }
+
eid := execID{cid: args.ID}
l := &Loader{
k: k,
@@ -311,6 +321,7 @@ func New(args Args) (*Loader, error) {
rootProcArgs: procArgs,
sandboxID: args.ID,
processes: map[execID]*execProcess{eid: {}},
+ mountHints: mountHints,
}
// We don't care about child signals; some platforms can generate a
@@ -424,6 +435,9 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
return nil, fmt.Errorf("error creating memfd: %v", err)
}
memfile := os.NewFile(uintptr(memfd), memfileName)
+ // We can't enable pgalloc.MemoryFileOpts.UseHostMemcgPressure even if
+ // there are memory cgroups specified, because at this point we're already
+ // in a mount namespace in which the relevant cgroupfs is not visible.
mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{})
if err != nil {
memfile.Close()
@@ -432,6 +446,23 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
return mf, nil
}
+func (l *Loader) installSeccompFilters() error {
+ if l.conf.DisableSeccomp {
+ filter.Report("syscall filter is DISABLED. Running in less secure mode.")
+ } else {
+ opts := filter.Options{
+ Platform: l.k.Platform,
+ HostNetwork: l.conf.Network == NetworkHost,
+ ProfileEnable: l.conf.ProfileEnable,
+ ControllerFD: l.ctrl.srv.FD(),
+ }
+ if err := filter.Install(opts); err != nil {
+ return fmt.Errorf("installing seccomp filters: %v", err)
+ }
+ }
+ return nil
+}
+
// Run runs the root container.
func (l *Loader) Run() error {
err := l.run()
@@ -467,25 +498,19 @@ func (l *Loader) run() error {
return fmt.Errorf("trying to start deleted container %q", l.sandboxID)
}
- // Finally done with all configuration. Setup filters before user code
- // is loaded.
- if l.conf.DisableSeccomp {
- filter.Report("syscall filter is DISABLED. Running in less secure mode.")
- } else {
- opts := filter.Options{
- Platform: l.k.Platform,
- HostNetwork: l.conf.Network == NetworkHost,
- ProfileEnable: l.conf.ProfileEnable,
- ControllerFD: l.ctrl.srv.FD(),
- }
- if err := filter.Install(opts); err != nil {
- return fmt.Errorf("installing seccomp filters: %v", err)
- }
- }
-
// If we are restoring, we do not want to create a process.
// l.restore is set by the container manager when a restore call is made.
if !l.restore {
+ if l.conf.ProfileEnable {
+ initializePProf()
+ }
+
+ // Finally done with all configuration. Setup filters before user code
+ // is loaded.
+ if err := l.installSeccompFilters(); err != nil {
+ return err
+ }
+
// Create the FD map, which will set stdin, stdout, and stderr. If console
// is true, then ioctl calls will be passed through to the host fd.
ctx := l.rootProcArgs.NewContext(l.k)
@@ -499,7 +524,7 @@ func (l *Loader) run() error {
// cid for root container can be empty. Only subcontainers need it to set
// the mount location.
- mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k)
+ mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k, l.mountHints)
if err := mntr.setupFS(ctx, l.conf, &l.rootProcArgs, l.rootProcArgs.Credentials); err != nil {
return err
}
@@ -510,6 +535,24 @@ func (l *Loader) run() error {
return err
}
+ // Read /etc/passwd for the user's HOME directory and set the HOME
+ // environment variable as required by POSIX if it is not overridden by
+ // the user.
+ hasHomeEnvv := false
+ for _, envv := range l.rootProcArgs.Envv {
+ if strings.HasPrefix(envv, "HOME=") {
+ hasHomeEnvv = true
+ }
+ }
+ if !hasHomeEnvv {
+ homeDir, err := getExecUserHome(rootCtx, rootMns, uint32(l.rootProcArgs.Credentials.RealKUID))
+ if err != nil {
+ return fmt.Errorf("error reading exec user: %v", err)
+ }
+
+ l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir)
+ }
+
// Create the root container init task. It will begin running
// when the kernel is started.
if _, _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
@@ -620,7 +663,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
goferFDs = append(goferFDs, fd)
}
- mntr := newContainerMounter(spec, cid, goferFDs, l.k)
+ mntr := newContainerMounter(spec, cid, goferFDs, l.k, l.mountHints)
if err := mntr.setupFS(ctx, conf, &procArgs, creds); err != nil {
return fmt.Errorf("configuring container FS: %v", err)
}