diff options
author | Googler <noreply@google.com> | 2018-04-27 10:37:02 -0700 |
---|---|---|
committer | Adin Scannell <ascannell@google.com> | 2018-04-28 01:44:26 -0400 |
commit | d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch) | |
tree | 54f95eef73aee6bacbfc736fffc631be2605ed53 /runsc/boot/loader.go | |
parent | f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff) |
Check in gVisor.
PiperOrigin-RevId: 194583126
Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
Diffstat (limited to 'runsc/boot/loader.go')
-rw-r--r-- | runsc/boot/loader.go | 354 |
1 files changed, 354 insertions, 0 deletions
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go new file mode 100644 index 000000000..a470cb054 --- /dev/null +++ b/runsc/boot/loader.go @@ -0,0 +1,354 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package boot loads the kernel and runs the application. +package boot + +import ( + "fmt" + "math/rand" + "sync/atomic" + "syscall" + gtime "time" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/cpuid" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/inet" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.googlesource.com/gvisor/pkg/sentry/loader" + "gvisor.googlesource.com/gvisor/pkg/sentry/platform" + "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm" + "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace" + "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling" + slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/time" + "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" + "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" + "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" + "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" + "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" + "gvisor.googlesource.com/gvisor/runsc/boot/filter" + "gvisor.googlesource.com/gvisor/runsc/specutils" + + // Include supported socket providers. + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet" + _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink" + _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/route" + _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix" +) + +// Loader keeps state needed to start the kernel and run the application. +type Loader struct { + // k is the kernel. + k *kernel.Kernel + + // ctrl is the control server. + ctrl *controller + + conf *Config + + // console is set to true if terminal is enabled. + console bool + + watchdog *watchdog.Watchdog + + // stopSignalForwarding disables forwarding of signals to the sandboxed + // app. It should be called when a sandbox is destroyed. + stopSignalForwarding func() + + // procArgs refers to the initial application task. + procArgs kernel.CreateProcessArgs +} + +func init() { + // Initialize the random number generator. + rand.Seed(gtime.Now().UnixNano()) + + // Register the global syscall table. + kernel.RegisterSyscallTable(slinux.AMD64) +} + +// New initializes a new kernel loader configured by spec. +func New(spec *specs.Spec, conf *Config, controllerFD int, ioFDs []int, console bool) (*Loader, error) { + // Create kernel and platform. + p, err := createPlatform(conf) + if err != nil { + return nil, fmt.Errorf("error creating platform: %v", err) + } + k := &kernel.Kernel{ + Platform: p, + } + + // Create VDSO. + vdso, err := loader.PrepareVDSO(p) + if err != nil { + return nil, fmt.Errorf("error creating vdso: %v", err) + } + + // Create timekeeper. + tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) + if err != nil { + return nil, fmt.Errorf("error creating timekeeper: %v", err) + } + tk.SetClocks(time.NewCalibratedClocks()) + + // Create initial limits. + ls, err := createLimitSet(spec) + if err != nil { + return nil, fmt.Errorf("error creating limits: %v", err) + } + + // Create capabilities. + caps, err := specutils.Capabilities(spec.Process.Capabilities) + if err != nil { + return nil, fmt.Errorf("error creating capabilities: %v", err) + } + + // Convert the spec's additional GIDs to KGIDs. + extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids)) + for _, GID := range spec.Process.User.AdditionalGids { + extraKGIDs = append(extraKGIDs, auth.KGID(GID)) + } + + // Create credentials. + creds := auth.NewUserCredentials( + auth.KUID(spec.Process.User.UID), + auth.KGID(spec.Process.User.GID), + extraKGIDs, + caps, + auth.NewRootUserNamespace()) + if err != nil { + return nil, fmt.Errorf("error creating credentials: %v", err) + } + + // Create user namespace. + // TODO: Not clear what domain name should be here. It is + // not configurable from runtime spec. + utsns := kernel.NewUTSNamespace(spec.Hostname, "", creds.UserNamespace) + + ipcns := kernel.NewIPCNamespace() + + if err := enableStrace(conf); err != nil { + return nil, fmt.Errorf("failed to enable strace: %v", err) + } + + // Get the executable path, which is a bit tricky because we have to + // inspect the environment PATH which is relative to the root path. + exec, err := specutils.GetExecutablePath(spec.Process.Args[0], spec.Root.Path, spec.Process.Env) + if err != nil { + return nil, fmt.Errorf("error getting executable path: %v", err) + } + + // Create the process arguments. + procArgs := kernel.CreateProcessArgs{ + Filename: exec, + Argv: spec.Process.Args, + Envv: spec.Process.Env, + WorkingDirectory: spec.Process.Cwd, + Credentials: creds, + // Creating the FDMap requires that we have kernel.Kernel.fdMapUids, so + // it must wait until we have a Kernel. + Umask: uint(syscall.Umask(0)), + Limits: ls, + MaxSymlinkTraversals: linux.MaxSymlinkTraversals, + UTSNamespace: utsns, + IPCNamespace: ipcns, + } + + // Create an empty network stack because the network namespace may be empty at + // this point. Netns is configured before Run() is called. Netstack is + // configured using a control uRPC message. Host network is configured inside + // Run(). + networkStack := newEmptyNetworkStack(conf) + + // Initiate the Kernel object, which is required by the Context passed + // to createVFS in order to mount (among other things) procfs. + if err = k.Init(kernel.InitKernelArgs{ + FeatureSet: cpuid.HostFeatureSet(), + Timekeeper: tk, + RootUserNamespace: creds.UserNamespace, + NetworkStack: networkStack, + ApplicationCores: 8, + Vdso: vdso, + RootUTSNamespace: utsns, + RootIPCNamespace: ipcns, + }); err != nil { + return nil, fmt.Errorf("error initializing kernel: %v", err) + } + + // Turn on packet logging if enabled. + if conf.LogPackets { + log.Infof("Packet logging enabled") + atomic.StoreUint32(&sniffer.LogPackets, 1) + } else { + log.Infof("Packet logging disabled") + atomic.StoreUint32(&sniffer.LogPackets, 0) + } + + // Create the control server using the provided FD. + // + // This must be done *after* we have initialized the kernel since the + // controller is used to configure the kernel's network stack. + // + // This should also be *before* we create the process, since a + // misconfigured process will cause an error, and we want the control + // server up before that so that we don't time out trying to connect to + // it. + ctrl, err := newController(controllerFD, k) + if err != nil { + return nil, fmt.Errorf("error creating control server: %v", err) + } + + ctx := procArgs.NewContext(k) + + // Create the virtual filesystem. + mm, err := createMountNamespace(ctx, spec, conf, ioFDs) + if err != nil { + return nil, fmt.Errorf("error creating mounts: %v", err) + } + k.SetRootMountNamespace(mm) + + // Create the FD map, which will set stdin, stdout, and stderr. If console + // is true, then ioctl calls will be passed through to the host fd. + fdm, err := createFDMap(ctx, k, ls, console) + if err != nil { + return nil, fmt.Errorf("error importing fds: %v", err) + } + + // CreateProcess takes a reference on FDMap if successful. We + // won't need ours either way. + procArgs.FDMap = fdm + + // We don't care about child signals; some platforms can generate a + // tremendous number of useless ones (I'm looking at you, ptrace). + if err := sighandling.IgnoreChildStop(); err != nil { + return nil, fmt.Errorf("failed to ignore child stop signals: %v", err) + } + // Ensure that most signals received in sentry context are forwarded to + // the emulated kernel. + stopSignalForwarding := sighandling.StartForwarding(k) + + watchdog := watchdog.New(k, watchdog.DefaultTimeout, watchdog.LogWarning) + return &Loader{ + k: k, + ctrl: ctrl, + conf: conf, + console: console, + watchdog: watchdog, + stopSignalForwarding: stopSignalForwarding, + procArgs: procArgs, + }, nil +} + +// Destroy cleans up all resources used by the loader. +func (l *Loader) Destroy() { + if l.ctrl != nil { + // Shut down control server. + l.ctrl.srv.Stop() + } + l.stopSignalForwarding() + l.watchdog.Stop() +} + +func createPlatform(conf *Config) (platform.Platform, error) { + switch conf.Platform { + case PlatformPtrace: + log.Infof("Platform: ptrace") + return ptrace.New() + case PlatformKVM: + log.Infof("Platform: kvm") + return kvm.New() + default: + return nil, fmt.Errorf("invalid platform %v", conf.Platform) + } +} + +// Run runs the application. +func (l *Loader) Run() error { + err := l.run() + l.ctrl.app.startResultChan <- err + return err +} + +func (l *Loader) run() error { + if l.conf.Network == NetworkHost { + // Delay host network configuration to this point because network namespace + // is configured after the loader is created and before Run() is called. + log.Debugf("Configuring host network") + stack := l.k.NetworkStack().(*hostinet.Stack) + if err := stack.Configure(); err != nil { + return err + } + } + + // Finally done with all configuration. Setup filters before user code + // is loaded. + if l.conf.DisableSeccomp { + filter.Report("syscall filter is DISABLED. Running in less secure mode.") + } else { + whitelistFS := l.conf.FileAccess == FileAccessDirect + hostNet := l.conf.Network == NetworkHost + if err := filter.Install(l.k.Platform, whitelistFS, l.console, hostNet); err != nil { + return fmt.Errorf("Failed to install seccomp filters: %v", err) + } + } + + // Create the initial application task. + if _, err := l.k.CreateProcess(l.procArgs); err != nil { + return fmt.Errorf("failed to create init process: %v", err) + } + + // CreateProcess takes a reference on FDMap if successful. + l.procArgs.FDMap.DecRef() + + l.watchdog.Start() + return l.k.Start() +} + +// WaitForStartSignal waits for a start signal from the control server. +func (l *Loader) WaitForStartSignal() { + <-l.ctrl.app.startChan +} + +// WaitExit waits for the application to exit, and returns the application's +// exit status. +func (l *Loader) WaitExit() kernel.ExitStatus { + // Wait for application. + l.k.WaitExited() + + return l.k.GlobalInit().ExitStatus() +} + +func newEmptyNetworkStack(conf *Config) inet.Stack { + switch conf.Network { + case NetworkHost: + return hostinet.NewStack() + + case NetworkNone, NetworkSandbox: + // NetworkNone sets up loopback using netstack. + netProtos := []string{ipv4.ProtocolName, ipv6.ProtocolName, arp.ProtocolName} + protoNames := []string{tcp.ProtocolName, udp.ProtocolName} + return &epsocket.Stack{stack.New(netProtos, protoNames)} + + default: + panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) + } +} |