From f1c01ed88666ea81d8f5cef7931153a9951a6e64 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Mon, 1 Oct 2018 22:05:41 -0700 Subject: runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39 --- runsc/boot/controller.go | 46 ++++++++++++++++---- runsc/boot/fds.go | 8 ++-- runsc/boot/loader.go | 108 ++++++++++++++++++++++++++++++++++++----------- 3 files changed, 126 insertions(+), 36 deletions(-) (limited to 'runsc/boot') diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 98356e8b7..eaeb9e2d8 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -65,6 +65,10 @@ const ( // ContainerSignal is used to send a signal to a container. ContainerSignal = "containerManager.Signal" + // ContainerSignalProcess is used to send a signal to a particular + // process in a container. + ContainerSignalProcess = "containerManager.SignalProcess" + // ContainerStart is the URPC endpoint for running a non-root container // within a sandbox. ContainerStart = "containerManager.Start" @@ -92,7 +96,7 @@ const ( SandboxStacks = "debug.Stacks" ) -// ControlSocketAddr generates an abstract unix socket name for the given id. +// ControlSocketAddr generates an abstract unix socket name for the given ID. func ControlSocketAddr(id string) string { return fmt.Sprintf("\x00runsc-sandbox.%s", id) } @@ -248,7 +252,7 @@ func (cm *containerManager) Destroy(cid *string, _ *struct{}) error { } // ExecuteAsync starts running a command on a created or running sandbox. It -// returns the pid of the new process. +// returns the PID of the new process. func (cm *containerManager) ExecuteAsync(args *control.ExecArgs, pid *int32) error { log.Debugf("containerManager.ExecuteAsync: %+v", args) tgid, err := cm.l.executeAsync(args) @@ -373,8 +377,12 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { // restore the state of multiple containers, nor exec processes. cm.l.sandboxID = o.SandboxID cm.l.mu.Lock() - key := execID{cid: o.SandboxID} - cm.l.processes = map[execID]*kernel.ThreadGroup{key: cm.l.k.GlobalInit()} + eid := execID{cid: o.SandboxID} + cm.l.processes = map[execID]*execProcess{ + eid: &execProcess{ + tg: cm.l.k.GlobalInit(), + }, + } cm.l.mu.Unlock() // Tell the root container to start and wait for the result. @@ -419,7 +427,7 @@ func (cm *containerManager) WaitPID(args *WaitPIDArgs, waitStatus *uint32) error // SignalArgs are arguments to the Signal method. type SignalArgs struct { - // CID is the container id. + // CID is the container ID. CID string // Signo is the signal to send to the process. @@ -430,9 +438,31 @@ type SignalArgs struct { All bool } -// Signal sends a signal to the init process of the container. -// TODO: Send signal to exec process. +// Signal sends a signal to the root process of the container. func (cm *containerManager) Signal(args *SignalArgs, _ *struct{}) error { log.Debugf("containerManager.Signal %q %d, all: %t", args.CID, args.Signo, args.All) - return cm.l.signal(args.CID, args.Signo, args.All) + return cm.l.signalContainer(args.CID, args.Signo, args.All) +} + +// SignalProcessArgs are arguments to the Signal method. +type SignalProcessArgs struct { + // CID is the container ID. + CID string + + // PID is the process ID in the given container that will be signaled. + PID int32 + + // Signo is the signal to send to the process. + Signo int32 + + // SendToForegroundProcess indicates that the signal should be sent to + // the foreground process group in the session that PID belongs to. + // This is only valid if the process is attached to a host TTY. + SendToForegroundProcess bool +} + +// SignalProcess sends a signal to a particular process in the container. +func (cm *containerManager) SignalProcess(args *SignalProcessArgs, _ *struct{}) error { + log.Debugf("containerManager.Signal: %+v", args) + return cm.l.signalProcess(args.CID, args.PID, args.Signo, args.SendToForegroundProcess) } diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go index 92d641b68..a5a6ba8af 100644 --- a/runsc/boot/fds.go +++ b/runsc/boot/fds.go @@ -25,8 +25,8 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/limits" ) -// createFDMap creates an fd map that contains stdin, stdout, and stderr. If -// console is true, then ioctl calls will be passed through to the host fd. +// createFDMap creates an FD map that contains stdin, stdout, and stderr. If +// console is true, then ioctl calls will be passed through to the host FD. // Upon success, createFDMap dups then closes stdioFDs. func createFDMap(ctx context.Context, k *kernel.Kernel, l *limits.LimitSet, console bool, stdioFDs []int) (*kernel.FDMap, error) { if len(stdioFDs) != 3 { @@ -36,7 +36,7 @@ func createFDMap(ctx context.Context, k *kernel.Kernel, l *limits.LimitSet, cons fdm := k.NewFDMap() defer fdm.DecRef() - // Maps sandbox fd to host fd. + // Maps sandbox FD to host FD. fdMap := map[int]int{ 0: stdioFDs[0], 1: stdioFDs[1], @@ -45,7 +45,7 @@ func createFDMap(ctx context.Context, k *kernel.Kernel, l *limits.LimitSet, cons mounter := fs.FileOwnerFromContext(ctx) for sfd, hfd := range fdMap { - file, err := host.ImportFile(ctx, hfd, mounter, console /* allow ioctls */) + file, err := host.ImportFile(ctx, hfd, mounter, console /* isTTY */) if err != nil { return nil, fmt.Errorf("failed to import fd %d: %v", hfd, err) } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 9fa9b51a0..766a2e968 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -31,6 +31,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/control" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" "gvisor.googlesource.com/gvisor/pkg/sentry/inet" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" @@ -112,7 +113,7 @@ type Loader struct { // have the corresponding pid set. // // processes is guardded by mu. - processes map[execID]*kernel.ThreadGroup + processes map[execID]*execProcess } // execID uniquely identifies a sentry process. @@ -121,6 +122,14 @@ type execID struct { pid kernel.ThreadID } +// execProcess contains the thread group and host TTY of a sentry process. +type execProcess struct { + tg *kernel.ThreadGroup + + // tty will be nil if the process is not attached to a terminal. + tty *host.TTYFileOperations +} + func init() { // Initialize the random number generator. rand.Seed(gtime.Now().UnixNano()) @@ -276,7 +285,7 @@ func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int, startSignalForwarding: startSignalForwarding, rootProcArgs: procArgs, sandboxID: id, - processes: make(map[execID]*kernel.ThreadGroup), + processes: make(map[execID]*execProcess), } ctrl.manager.l = l return l, nil @@ -330,7 +339,7 @@ func createPlatform(conf *Config, deviceFD int) (platform.Platform, error) { case PlatformKVM: log.Infof("Platform: kvm") if deviceFD < 0 { - return nil, fmt.Errorf("kvm device fd must be provided") + return nil, fmt.Errorf("kvm device FD must be provided") } return kvm.New(os.NewFile(uintptr(deviceFD), "kvm device")) default: @@ -413,8 +422,8 @@ func (l *Loader) run() error { } l.mu.Lock() - key := execID{cid: l.sandboxID} - l.processes[key] = l.k.GlobalInit() + eid := execID{cid: l.sandboxID} + l.processes[eid] = &execProcess{tg: l.k.GlobalInit()} l.mu.Unlock() // Start signal forwarding only after an init process is created. @@ -510,8 +519,8 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config l.mu.Lock() defer l.mu.Unlock() - key := execID{cid: cid} - l.processes[key] = tg + eid := execID{cid: cid} + l.processes[eid] = &execProcess{tg: tg} return nil } @@ -520,7 +529,7 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config // filesystem. func (l *Loader) destroyContainer(cid string) error { // First kill and wait for all processes in the container. - if err := l.signal(cid, int32(linux.SIGKILL), true /*all*/); err != nil { + if err := l.signalContainer(cid, int32(linux.SIGKILL), true /*all*/); err != nil { return fmt.Errorf("failed to SIGKILL all container processes: %v", err) } @@ -549,12 +558,12 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { // process with the same Root. l.mu.Lock() rootKey := execID{cid: args.ContainerID} - tg, ok := l.processes[rootKey] + ep, ok := l.processes[rootKey] l.mu.Unlock() if !ok { return 0, fmt.Errorf("cannot exec in container %q: no such container", args.ContainerID) } - tg.Leader().WithMuLocked(func(t *kernel.Task) { + ep.tg.Leader().WithMuLocked(func(t *kernel.Task) { args.Root = t.FSContext().RootDirectory() }) if args.Root != nil { @@ -563,7 +572,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { // Start the process. proc := control.Proc{Kernel: l.k} - tg, tgid, err := control.ExecAsync(&proc, args) + tg, tgid, ttyFile, err := control.ExecAsync(&proc, args) if err != nil { return 0, fmt.Errorf("error executing: %+v: %v", args, err) } @@ -573,7 +582,10 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { l.mu.Lock() defer l.mu.Unlock() eid := execID{cid: args.ContainerID, pid: tgid} - l.processes[eid] = tg + l.processes[eid] = &execProcess{ + tg: tg, + tty: ttyFile, + } log.Debugf("updated processes: %v", l.processes) return tgid, nil @@ -584,8 +596,8 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { // Don't defer unlock, as doing so would make it impossible for // multiple clients to wait on the same container. l.mu.Lock() - key := execID{cid: cid} - tg, ok := l.processes[key] + eid := execID{cid: cid} + ep, ok := l.processes[eid] l.mu.Unlock() if !ok { return fmt.Errorf("can't find process for container %q in %v", cid, l.processes) @@ -593,7 +605,7 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { // If the thread either has already exited or exits during waiting, // consider the container exited. - ws := l.wait(tg) + ws := l.wait(ep.tg) *waitStatus = ws return nil } @@ -610,10 +622,10 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, wai // entry in l.processes. l.mu.Lock() eid := execID{cid: cid, pid: tgid} - tg, ok := l.processes[eid] + ep, ok := l.processes[eid] l.mu.Unlock() if ok { - ws := l.wait(tg) + ws := l.wait(ep.tg) *waitStatus = ws if clearStatus { // Remove tg from the cache. @@ -626,8 +638,8 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, wai } // This process wasn't created by runsc exec or start, so just find it - // by pid and hope it hasn't exited yet. - tg = l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid)) + // by PID and hope it hasn't exited yet. + tg := l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid)) if tg == nil { return fmt.Errorf("no thread group with ID %d", tgid) } @@ -682,18 +694,66 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) { } } -func (l *Loader) signal(cid string, signo int32, all bool) error { +// signalProcess sends a signal to the process with the given PID. If +// sendToFGProcess is true, then the signal will be sent to the foreground +// process group in the same session that PID belongs to. +func (l *Loader) signalProcess(cid string, pid, signo int32, sendToFGProcess bool) error { + si := arch.SignalInfo{Signo: signo} + + if pid <= 0 { + return fmt.Errorf("failed to signal container %q PID %d: PID must be positive", cid, pid) + } + + eid := execID{ + cid: cid, + pid: kernel.ThreadID(pid), + } l.mu.Lock() - key := execID{cid: cid} - tg, ok := l.processes[key] + ep, ok := l.processes[eid] l.mu.Unlock() + if !ok { - return fmt.Errorf("failed to signal container %q: no such container", cid) + return fmt.Errorf("failed to signal container %q PID %d: no such PID", cid, pid) + } + + if !sendToFGProcess { + // Send signal directly to exec process. + return ep.tg.SendSignal(&si) } + // Lookup foreground process group from the TTY for the given process, + // and send the signal to it. + if ep.tty == nil { + return fmt.Errorf("failed to signal foreground process group in container %q PID %d: no TTY attached", cid, pid) + } + pg := ep.tty.ForegroundProcessGroup() + if pg == nil { + // No foreground process group has been set. Signal the + // original thread group. + log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, pid, pid) + return ep.tg.SendSignal(&si) + } + + // Send the signal. + return pg.Originator().SendSignal(&si) +} + +// signalContainer sends a signal to the root container process, or to all +// processes in the container if all is true. +func (l *Loader) signalContainer(cid string, signo int32, all bool) error { si := arch.SignalInfo{Signo: signo} + + l.mu.Lock() + defer l.mu.Unlock() + + eid := execID{cid: cid} + ep, ok := l.processes[eid] + if !ok { + return fmt.Errorf("failed to signal container %q: no such container", cid) + } + if !all { - return tg.Leader().SendSignal(&si) + return ep.tg.SendSignal(&si) } // Pause the kernel to prevent new processes from being created while -- cgit v1.2.3