summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorNicolas Lacasse <nlacasse@google.com>2018-10-01 22:05:41 -0700
committerShentubot <shentubot@google.com>2018-10-01 22:06:56 -0700
commitf1c01ed88666ea81d8f5cef7931153a9951a6e64 (patch)
tree796b9812ddda2d7b9866225dabb4b94b058c420b
parent0400e5459288592768af12ab71609c6df6afe3d7 (diff)
runsc: Support job control signals in "exec -it".
Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
-rw-r--r--pkg/sentry/control/proc.go78
-rw-r--r--pkg/sentry/fs/host/BUILD1
-rw-r--r--pkg/sentry/fs/host/file.go144
-rw-r--r--pkg/sentry/fs/host/inode.go18
-rw-r--r--pkg/sentry/fs/host/tty.go185
-rw-r--r--pkg/sentry/kernel/sessions.go5
-rw-r--r--runsc/boot/controller.go46
-rw-r--r--runsc/boot/fds.go8
-rw-r--r--runsc/boot/loader.go108
-rw-r--r--runsc/cmd/exec.go13
-rw-r--r--runsc/container/BUILD2
-rw-r--r--runsc/container/container.go51
-rw-r--r--runsc/container/container_test.go117
-rw-r--r--runsc/container/multi_container_test.go7
-rw-r--r--runsc/sandbox/sandbox.go49
-rw-r--r--runsc/test/integration/exec_test.go55
-rw-r--r--runsc/test/testutil/BUILD1
-rw-r--r--runsc/test/testutil/docker.go21
-rw-r--r--runsc/test/testutil/testutil.go36
19 files changed, 732 insertions, 213 deletions
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go
index faf1168bb..0ba730c1e 100644
--- a/pkg/sentry/control/proc.go
+++ b/pkg/sentry/control/proc.go
@@ -78,7 +78,7 @@ type ExecArgs struct {
Capabilities *auth.TaskCapabilities
// StdioIsPty indicates that FDs 0, 1, and 2 are connected to a host
- // pty fd.
+ // pty FD.
StdioIsPty bool
// FilePayload determines the files to give to the new process.
@@ -90,7 +90,7 @@ type ExecArgs struct {
// Exec runs a new task.
func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error {
- newTG, _, err := proc.execAsync(args)
+ newTG, _, _, err := proc.execAsync(args)
if err != nil {
return err
}
@@ -103,18 +103,27 @@ func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error {
// ExecAsync runs a new task, but doesn't wait for it to finish. It is defined
// as a function rather than a method to avoid exposing execAsync as an RPC.
-func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, error) {
+func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, error) {
return proc.execAsync(args)
}
// execAsync runs a new task, but doesn't wait for it to finish. It returns the
-// newly created thread group and its PID.
-func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, error) {
+// newly created thread group and its PID. If the stdio FDs are TTYs, then a
+// TTYFileOperations that wraps the TTY is also returned.
+func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, error) {
// Import file descriptors.
l := limits.NewLimitSet()
fdm := proc.Kernel.NewFDMap()
defer fdm.DecRef()
+ // No matter what happens, we should close all files in the FilePayload
+ // before returning. Any files that are imported will be duped.
+ defer func() {
+ for _, f := range args.FilePayload.Files {
+ f.Close()
+ }
+ }()
+
creds := auth.NewUserCredentials(
args.KUID,
args.KGID,
@@ -150,31 +159,62 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
paths := fs.GetPath(initArgs.Envv)
f, err := proc.Kernel.RootMountNamespace().ResolveExecutablePath(ctx, initArgs.WorkingDirectory, initArgs.Argv[0], paths)
if err != nil {
- return nil, 0, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err)
+ return nil, 0, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err)
}
initArgs.Filename = f
}
mounter := fs.FileOwnerFromContext(ctx)
- for appFD, f := range args.FilePayload.Files {
- enableIoctl := args.StdioIsPty && appFD <= 2
- // Import the given file FD. This dups the FD as well.
- file, err := host.ImportFile(ctx, int(f.Fd()), mounter, enableIoctl)
- if err != nil {
- return nil, 0, err
+ var ttyFile *fs.File
+ for appFD, hostFile := range args.FilePayload.Files {
+ var appFile *fs.File
+
+ if args.StdioIsPty && appFD < 3 {
+ // Import the file as a host TTY file.
+ if ttyFile == nil {
+ var err error
+ appFile, err = host.ImportFile(ctx, int(hostFile.Fd()), mounter, true /* isTTY */)
+ if err != nil {
+ return nil, 0, nil, err
+ }
+ defer appFile.DecRef()
+
+ // Remember this in the TTY file, as we will
+ // use it for the other stdio FDs.
+ ttyFile = appFile
+ } else {
+ // Re-use the existing TTY file, as all three
+ // stdio FDs must point to the same fs.File in
+ // order to share TTY state, specifically the
+ // foreground process group id.
+ appFile = ttyFile
+ }
+ } else {
+ // Import the file as a regular host file.
+ var err error
+ appFile, err = host.ImportFile(ctx, int(hostFile.Fd()), mounter, false /* isTTY */)
+ if err != nil {
+ return nil, 0, nil, err
+ }
+ defer appFile.DecRef()
}
- defer file.DecRef()
-
- // We're done with this file.
- f.Close()
- if err := fdm.NewFDAt(kdefs.FD(appFD), file, kernel.FDFlags{}, l); err != nil {
- return nil, 0, err
+ // Add the file to the FD map.
+ if err := fdm.NewFDAt(kdefs.FD(appFD), appFile, kernel.FDFlags{}, l); err != nil {
+ return nil, 0, nil, err
}
}
- return proc.Kernel.CreateProcess(initArgs)
+ tg, tid, err := proc.Kernel.CreateProcess(initArgs)
+ if err != nil {
+ return nil, 0, nil, err
+ }
+
+ if ttyFile == nil {
+ return tg, tid, nil, nil
+ }
+ return tg, tid, ttyFile.FileOperations.(*host.TTYFileOperations), nil
}
// PsArgs is the set of arguments to ps.
diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD
index d1a6eaf6e..c34f1c26b 100644
--- a/pkg/sentry/fs/host/BUILD
+++ b/pkg/sentry/fs/host/BUILD
@@ -17,6 +17,7 @@ go_library(
"socket.go",
"socket_state.go",
"socket_unsafe.go",
+ "tty.go",
"util.go",
"util_unsafe.go",
],
diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go
index 6f469b5cc..22a5d9f12 100644
--- a/pkg/sentry/fs/host/file.go
+++ b/pkg/sentry/fs/host/file.go
@@ -18,15 +18,12 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/fd"
"gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/pkg/secio"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
@@ -39,6 +36,7 @@ import (
//
// +stateify savable
type fileOperations struct {
+ fsutil.NoIoctl `state:"nosave"`
fsutil.NoopRelease `state:"nosave"`
// iops are the Inode operations for this file.
@@ -49,49 +47,49 @@ type fileOperations struct {
// dirCursor is the directory cursor.
dirCursor string
-
- // allowIoctl determines whether ioctls should be passed through to the
- // host.
- allowIoctl bool
}
// fileOperations implements fs.FileOperations.
var _ fs.FileOperations = (*fileOperations)(nil)
// NewFile creates a new File backed by the provided host file descriptor. If
-// NewFile succeeds, ownership of the fd is transferred to the returned File.
+// NewFile succeeds, ownership of the FD is transferred to the returned File.
//
// The returned File cannot be saved, since there is no guarantee that the same
-// fd will exist or represent the same file at time of restore. If such a
+// FD will exist or represent the same file at time of restore. If such a
// guarantee does exist, use ImportFile instead.
func NewFile(ctx context.Context, fd int, mounter fs.FileOwner) (*fs.File, error) {
return newFileFromDonatedFD(ctx, fd, mounter, false, false)
}
// ImportFile creates a new File backed by the provided host file descriptor.
-// Unlike NewFile, the file descriptor used by the File is duped from fd to
-// ensure that later changes to fd are not reflected by the fs.File.
+// Unlike NewFile, the file descriptor used by the File is duped from FD to
+// ensure that later changes to FD are not reflected by the fs.File.
//
-// If the returned file is saved, it will be restored by re-importing the fd
+// If the returned file is saved, it will be restored by re-importing the FD
// originally passed to ImportFile. It is the restorer's responsibility to
-// ensure that the fd represents the same file.
-func ImportFile(ctx context.Context, fd int, mounter fs.FileOwner, allowIoctl bool) (*fs.File, error) {
- return newFileFromDonatedFD(ctx, fd, mounter, true, allowIoctl)
+// ensure that the FD represents the same file.
+func ImportFile(ctx context.Context, fd int, mounter fs.FileOwner, isTTY bool) (*fs.File, error) {
+ return newFileFromDonatedFD(ctx, fd, mounter, true, isTTY)
}
-// newFileFromDonatedFD returns an fs.File from a donated fd. If the fd is
+// newFileFromDonatedFD returns an fs.File from a donated FD. If the FD is
// saveable, then saveable is true.
-func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner, saveable, allowIoctl bool) (*fs.File, error) {
+func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner, saveable, isTTY bool) (*fs.File, error) {
var s syscall.Stat_t
if err := syscall.Fstat(donated, &s); err != nil {
return nil, err
}
+ flags, err := fileFlagsFromDonatedFD(donated)
+ if err != nil {
+ return nil, err
+ }
switch s.Mode & syscall.S_IFMT {
case syscall.S_IFSOCK:
- flags, err := fileFlagsFromDonatedFD(donated)
- if err != nil {
- return nil, err
+ if isTTY {
+ return nil, fmt.Errorf("cannot import host socket as TTY")
}
+
s, err := newSocket(ctx, donated, saveable)
if err != nil {
return nil, err
@@ -101,10 +99,6 @@ func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner
})
return s, nil
default:
- flags, err := fileFlagsFromDonatedFD(donated)
- if err != nil {
- return nil, err
- }
msrc := newMountSource(ctx, "/", mounter, &Filesystem{}, fs.MountSourceFlags{}, false /* dontTranslateOwnership */)
inode, err := newInode(ctx, msrc, donated, saveable, true /* donated */)
if err != nil {
@@ -116,14 +110,18 @@ func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner
dirent := fs.NewDirent(inode, name)
defer dirent.DecRef()
- return newFile(ctx, dirent, flags, iops, allowIoctl), nil
+ if isTTY {
+ return newTTYFile(ctx, dirent, flags, iops), nil
+ }
+
+ return newFile(ctx, dirent, flags, iops), nil
}
}
func fileFlagsFromDonatedFD(donated int) (fs.FileFlags, error) {
flags, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(donated), syscall.F_GETFL, 0)
if errno != 0 {
- log.Warningf("Failed to get file flags for donated fd %d (errno=%d)", donated, errno)
+ log.Warningf("Failed to get file flags for donated FD %d (errno=%d)", donated, errno)
return fs.FileFlags{}, syscall.EIO
}
accmode := flags & syscall.O_ACCMODE
@@ -138,17 +136,14 @@ func fileFlagsFromDonatedFD(donated int) (fs.FileFlags, error) {
}
// newFile returns a new fs.File.
-func newFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags, iops *inodeOperations, allowIoctl bool) *fs.File {
+func newFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags, iops *inodeOperations) *fs.File {
if !iops.ReturnsWouldBlock() {
// Allow reading/writing at an arbitrary offset for files
// that support it.
flags.Pread = true
flags.Pwrite = true
}
- return fs.NewFile(ctx, dirent, flags, &fileOperations{
- iops: iops,
- allowIoctl: allowIoctl,
- })
+ return fs.NewFile(ctx, dirent, flags, &fileOperations{iops: iops})
}
// EventRegister implements waiter.Waitable.EventRegister.
@@ -269,7 +264,7 @@ func (f *fileOperations) Fsync(ctx context.Context, file *fs.File, start int64,
func (f *fileOperations) Flush(context.Context, *fs.File) error {
// This is a no-op because flushing the resource backing this
// file would mean closing it. We can't do that because other
- // open files may depend on the backing host fd.
+ // open files may depend on the backing host FD.
return nil
}
@@ -285,88 +280,3 @@ func (f *fileOperations) ConfigureMMap(ctx context.Context, file *fs.File, opts
func (f *fileOperations) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) {
return fsutil.SeekWithDirCursor(ctx, file, whence, offset, &f.dirCursor)
}
-
-// Ioctl implements fs.FileOperations.Iocotl.
-func (f *fileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- if !f.allowIoctl {
- return 0, syserror.ENOTTY
- }
- // Ignore arg[0]. This is the real FD:
- fd := f.iops.fileState.FD()
- ioctl := args[1].Uint64()
- switch ioctl {
- case linux.TCGETS:
- termios, err := ioctlGetTermios(fd)
- if err != nil {
- return 0, err
- }
- _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), termios, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
-
- case linux.TCSETS, linux.TCSETSW, linux.TCSETSF:
- var termios linux.Termios
- if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &termios, usermem.IOOpts{
- AddressSpaceActive: true,
- }); err != nil {
- return 0, err
- }
- err := ioctlSetTermios(fd, ioctl, &termios)
- return 0, err
-
- case linux.TIOCGPGRP:
- // Args: pid_t *argp
- // When successful, equivalent to *argp = tcgetpgrp(fd).
- // Get the process group ID of the foreground process group on
- // this terminal.
-
- t := kernel.TaskFromContext(ctx)
- if t == nil {
- panic(fmt.Sprintf("cannot get thread group from context %v", ctx))
- }
- tid := t.ThreadID()
- _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &tid, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
-
- case linux.TIOCSPGRP:
- // Args: const pid_t *argp
- // Equivalent to tcsetpgrp(fd, *argp).
- // Set the foreground process group ID of this terminal.
-
- // Not much we can do with this one at the moment, so we just
- // lie and pretend everything is great. Bash and Sh seem fine
- // with this.
- log.Warningf("Ignoring application ioctl(TIOCSPGRP) call")
- return 0, nil
-
- case linux.TIOCGWINSZ:
- // Args: struct winsize *argp
- // Get window size.
- winsize, err := ioctlGetWinsize(fd)
- if err != nil {
- return 0, err
- }
- _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), winsize, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
-
- case linux.TIOCSWINSZ:
- // Args: const struct winsize *argp
- // Set window size.
- var winsize linux.Winsize
- if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &winsize, usermem.IOOpts{
- AddressSpaceActive: true,
- }); err != nil {
- return 0, err
- }
- err := ioctlSetWinsize(fd, &winsize)
- return 0, err
-
- default:
- return 0, syserror.ENOTTY
- }
-}
diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go
index e7254fa7d..c2e8ba62f 100644
--- a/pkg/sentry/fs/host/inode.go
+++ b/pkg/sentry/fs/host/inode.go
@@ -73,7 +73,7 @@ type inodeFileState struct {
// Common file system state.
mops *superOperations `state:"wait"`
- // descriptor is the backing host fd.
+ // descriptor is the backing host FD.
descriptor *descriptor `state:"wait"`
// Event queue for blocking operations.
@@ -167,7 +167,7 @@ func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, err
// inodeOperations implements fs.InodeOperations.
var _ fs.InodeOperations = (*inodeOperations)(nil)
-// newInode returns a new fs.Inode backed by the host fd.
+// newInode returns a new fs.Inode backed by the host FD.
func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool, donated bool) (*fs.Inode, error) {
// Retrieve metadata.
var s syscall.Stat_t
@@ -212,8 +212,8 @@ func (i *inodeOperations) Mappable(inode *fs.Inode) memmap.Mappable {
return i.cachingInodeOps
}
-// ReturnsWouldBlock returns true if this host fd can return EWOULDBLOCK
-// for operations that would block.
+// ReturnsWouldBlock returns true if this host FD can return EWOULDBLOCK for
+// operations that would block.
func (i *inodeOperations) ReturnsWouldBlock() bool {
return i.fileState.descriptor.wouldBlock
}
@@ -226,7 +226,7 @@ func (i *inodeOperations) Release(context.Context) {
// Lookup implements fs.InodeOperations.Lookup.
func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) {
- // Get a new fd relative to i at name.
+ // Get a new FD relative to i at name.
fd, err := open(i, name)
if err != nil {
if err == syserror.ENOENT {
@@ -321,7 +321,7 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) unix.Bound
// GetFile implements fs.InodeOperations.GetFile.
func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
- return newFile(ctx, d, flags, i, false), nil
+ return newFile(ctx, d, flags, i), nil
}
// canMap returns true if this fs.Inode can be memory mapped.
@@ -362,7 +362,7 @@ func (i *inodeOperations) SetOwner(context.Context, *fs.Inode, fs.FileOwner) err
func (i *inodeOperations) SetPermissions(ctx context.Context, inode *fs.Inode, f fs.FilePermissions) bool {
// Can we use host kernel metadata caches?
if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) {
- // Then just change the timestamps on the fd, the host
+ // Then just change the timestamps on the FD, the host
// will synchronize the metadata update with any host
// inode and page cache.
return syscall.Fchmod(i.fileState.FD(), uint32(f.LinuxMode())) == nil
@@ -375,7 +375,7 @@ func (i *inodeOperations) SetPermissions(ctx context.Context, inode *fs.Inode, f
func (i *inodeOperations) SetTimestamps(ctx context.Context, inode *fs.Inode, ts fs.TimeSpec) error {
// Can we use host kernel metadata caches?
if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) {
- // Then just change the timestamps on the fd, the host
+ // Then just change the timestamps on the FD, the host
// will synchronize the metadata update with any host
// inode and page cache.
return setTimestamps(i.fileState.FD(), ts)
@@ -388,7 +388,7 @@ func (i *inodeOperations) SetTimestamps(ctx context.Context, inode *fs.Inode, ts
func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, size int64) error {
// Is the file not memory-mappable?
if !canMap(inode) {
- // Then just change the file size on the fd, the host
+ // Then just change the file size on the FD, the host
// will synchronize the metadata update with any host
// inode and page cache.
return syscall.Ftruncate(i.fileState.FD(), size)
diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go
new file mode 100644
index 000000000..ad1323610
--- /dev/null
+++ b/pkg/sentry/fs/host/tty.go
@@ -0,0 +1,185 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package host
+
+import (
+ "sync"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// TTYFileOperations implements fs.FileOperations for a host file descriptor
+// that wraps a TTY FD.
+//
+// +stateify savable
+type TTYFileOperations struct {
+ fileOperations
+
+ // mu protects the fields below.
+ mu sync.Mutex
+
+ // FGProcessGroup is the foreground process group this TTY. Will be
+ // nil if not set or if this file has been released.
+ fgProcessGroup *kernel.ProcessGroup
+}
+
+// newTTYFile returns a new fs.File that wraps a TTY FD.
+func newTTYFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags, iops *inodeOperations) *fs.File {
+ return fs.NewFile(ctx, dirent, flags, &TTYFileOperations{
+ fileOperations: fileOperations{iops: iops},
+ })
+}
+
+// ForegroundProcessGroup returns the foreground process for the TTY. This will
+// be nil if the foreground process has not been set or if the file has been
+// released.
+func (t *TTYFileOperations) ForegroundProcessGroup() *kernel.ProcessGroup {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ return t.fgProcessGroup
+}
+
+// Release implements fs.FileOperations.Release.
+func (t *TTYFileOperations) Release() {
+ t.mu.Lock()
+ t.fgProcessGroup = nil
+ t.mu.Unlock()
+
+ t.fileOperations.Release()
+}
+
+// Ioctl implements fs.FileOperations.Ioctl.
+func (t *TTYFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+ // Ignore arg[0]. This is the real FD:
+ fd := t.fileOperations.iops.fileState.FD()
+ ioctl := args[1].Uint64()
+ switch ioctl {
+ case linux.TCGETS:
+ termios, err := ioctlGetTermios(fd)
+ if err != nil {
+ return 0, err
+ }
+ _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), termios, usermem.IOOpts{
+ AddressSpaceActive: true,
+ })
+ return 0, err
+
+ case linux.TCSETS, linux.TCSETSW, linux.TCSETSF:
+ var termios linux.Termios
+ if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &termios, usermem.IOOpts{
+ AddressSpaceActive: true,
+ }); err != nil {
+ return 0, err
+ }
+ err := ioctlSetTermios(fd, ioctl, &termios)
+ return 0, err
+
+ case linux.TIOCGPGRP:
+ // Args: pid_t *argp
+ // When successful, equivalent to *argp = tcgetpgrp(fd).
+ // Get the process group ID of the foreground process group on
+ // this terminal.
+
+ t.mu.Lock()
+ defer t.mu.Unlock()
+
+ if t.fgProcessGroup == nil {
+ // No process group has been set yet. Let's just lie
+ // and tell it the process group from the current task.
+ // The app is probably going to set it to something
+ // else very soon anyways.
+ t.fgProcessGroup = kernel.TaskFromContext(ctx).ThreadGroup().ProcessGroup()
+ }
+
+ // Map the ProcessGroup into a ProcessGroupID in the task's PID
+ // namespace.
+ pgID := kernel.TaskFromContext(ctx).ThreadGroup().PIDNamespace().IDOfProcessGroup(t.fgProcessGroup)
+ _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{
+ AddressSpaceActive: true,
+ })
+ return 0, err
+
+ case linux.TIOCSPGRP:
+ // Args: const pid_t *argp
+ // Equivalent to tcsetpgrp(fd, *argp).
+ // Set the foreground process group ID of this terminal.
+
+ var pgID kernel.ProcessGroupID
+ if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{
+ AddressSpaceActive: true,
+ }); err != nil {
+ return 0, err
+ }
+
+ // pgID must be non-negative.
+ if pgID < 0 {
+ return 0, syserror.EINVAL
+ }
+
+ // Process group with pgID must exist in this PID namespace.
+ task := kernel.TaskFromContext(ctx)
+ pidns := task.PIDNamespace()
+ pg := pidns.ProcessGroupWithID(pgID)
+ if pg == nil {
+ return 0, syserror.ESRCH
+ }
+
+ // Process group must be in same session as calling task's
+ // process group.
+ curSession := task.ThreadGroup().ProcessGroup().Session()
+ curSessionID := pidns.IDOfSession(curSession)
+ if pidns.IDOfSession(pg.Session()) != curSessionID {
+ return 0, syserror.EPERM
+ }
+
+ t.mu.Lock()
+ t.fgProcessGroup = pg
+ t.mu.Unlock()
+ return 0, nil
+
+ case linux.TIOCGWINSZ:
+ // Args: struct winsize *argp
+ // Get window size.
+ winsize, err := ioctlGetWinsize(fd)
+ if err != nil {
+ return 0, err
+ }
+ _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), winsize, usermem.IOOpts{
+ AddressSpaceActive: true,
+ })
+ return 0, err
+
+ case linux.TIOCSWINSZ:
+ // Args: const struct winsize *argp
+ // Set window size.
+ var winsize linux.Winsize
+ if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &winsize, usermem.IOOpts{
+ AddressSpaceActive: true,
+ }); err != nil {
+ return 0, err
+ }
+ err := ioctlSetWinsize(fd, &winsize)
+ return 0, err
+
+ default:
+ return 0, syserror.ENOTTY
+ }
+}
diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go
index cf4e18805..b44d218d9 100644
--- a/pkg/sentry/kernel/sessions.go
+++ b/pkg/sentry/kernel/sessions.go
@@ -219,6 +219,11 @@ func (pg *ProcessGroup) handleOrphan() {
return
}
+// Session returns the process group's session without taking a reference.
+func (pg *ProcessGroup) Session() *Session {
+ return pg.session
+}
+
// CreateSession creates a new Session, with the ThreadGroup as the leader.
//
// EPERM may be returned if either the given ThreadGroup is already a Session
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 98356e8b7..eaeb9e2d8 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -65,6 +65,10 @@ const (
// ContainerSignal is used to send a signal to a container.
ContainerSignal = "containerManager.Signal"
+ // ContainerSignalProcess is used to send a signal to a particular
+ // process in a container.
+ ContainerSignalProcess = "containerManager.SignalProcess"
+
// ContainerStart is the URPC endpoint for running a non-root container
// within a sandbox.
ContainerStart = "containerManager.Start"
@@ -92,7 +96,7 @@ const (
SandboxStacks = "debug.Stacks"
)
-// ControlSocketAddr generates an abstract unix socket name for the given id.
+// ControlSocketAddr generates an abstract unix socket name for the given ID.
func ControlSocketAddr(id string) string {
return fmt.Sprintf("\x00runsc-sandbox.%s", id)
}
@@ -248,7 +252,7 @@ func (cm *containerManager) Destroy(cid *string, _ *struct{}) error {
}
// ExecuteAsync starts running a command on a created or running sandbox. It
-// returns the pid of the new process.
+// returns the PID of the new process.
func (cm *containerManager) ExecuteAsync(args *control.ExecArgs, pid *int32) error {
log.Debugf("containerManager.ExecuteAsync: %+v", args)
tgid, err := cm.l.executeAsync(args)
@@ -373,8 +377,12 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
// restore the state of multiple containers, nor exec processes.
cm.l.sandboxID = o.SandboxID
cm.l.mu.Lock()
- key := execID{cid: o.SandboxID}
- cm.l.processes = map[execID]*kernel.ThreadGroup{key: cm.l.k.GlobalInit()}
+ eid := execID{cid: o.SandboxID}
+ cm.l.processes = map[execID]*execProcess{
+ eid: &execProcess{
+ tg: cm.l.k.GlobalInit(),
+ },
+ }
cm.l.mu.Unlock()
// Tell the root container to start and wait for the result.
@@ -419,7 +427,7 @@ func (cm *containerManager) WaitPID(args *WaitPIDArgs, waitStatus *uint32) error
// SignalArgs are arguments to the Signal method.
type SignalArgs struct {
- // CID is the container id.
+ // CID is the container ID.
CID string
// Signo is the signal to send to the process.
@@ -430,9 +438,31 @@ type SignalArgs struct {
All bool
}
-// Signal sends a signal to the init process of the container.
-// TODO: Send signal to exec process.
+// Signal sends a signal to the root process of the container.
func (cm *containerManager) Signal(args *SignalArgs, _ *struct{}) error {
log.Debugf("containerManager.Signal %q %d, all: %t", args.CID, args.Signo, args.All)
- return cm.l.signal(args.CID, args.Signo, args.All)
+ return cm.l.signalContainer(args.CID, args.Signo, args.All)
+}
+
+// SignalProcessArgs are arguments to the Signal method.
+type SignalProcessArgs struct {
+ // CID is the container ID.
+ CID string
+
+ // PID is the process ID in the given container that will be signaled.
+ PID int32
+
+ // Signo is the signal to send to the process.
+ Signo int32
+
+ // SendToForegroundProcess indicates that the signal should be sent to
+ // the foreground process group in the session that PID belongs to.
+ // This is only valid if the process is attached to a host TTY.
+ SendToForegroundProcess bool
+}
+
+// SignalProcess sends a signal to a particular process in the container.
+func (cm *containerManager) SignalProcess(args *SignalProcessArgs, _ *struct{}) error {
+ log.Debugf("containerManager.Signal: %+v", args)
+ return cm.l.signalProcess(args.CID, args.PID, args.Signo, args.SendToForegroundProcess)
}
diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go
index 92d641b68..a5a6ba8af 100644
--- a/runsc/boot/fds.go
+++ b/runsc/boot/fds.go
@@ -25,8 +25,8 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
)
-// createFDMap creates an fd map that contains stdin, stdout, and stderr. If
-// console is true, then ioctl calls will be passed through to the host fd.
+// createFDMap creates an FD map that contains stdin, stdout, and stderr. If
+// console is true, then ioctl calls will be passed through to the host FD.
// Upon success, createFDMap dups then closes stdioFDs.
func createFDMap(ctx context.Context, k *kernel.Kernel, l *limits.LimitSet, console bool, stdioFDs []int) (*kernel.FDMap, error) {
if len(stdioFDs) != 3 {
@@ -36,7 +36,7 @@ func createFDMap(ctx context.Context, k *kernel.Kernel, l *limits.LimitSet, cons
fdm := k.NewFDMap()
defer fdm.DecRef()
- // Maps sandbox fd to host fd.
+ // Maps sandbox FD to host FD.
fdMap := map[int]int{
0: stdioFDs[0],
1: stdioFDs[1],
@@ -45,7 +45,7 @@ func createFDMap(ctx context.Context, k *kernel.Kernel, l *limits.LimitSet, cons
mounter := fs.FileOwnerFromContext(ctx)
for sfd, hfd := range fdMap {
- file, err := host.ImportFile(ctx, hfd, mounter, console /* allow ioctls */)
+ file, err := host.ImportFile(ctx, hfd, mounter, console /* isTTY */)
if err != nil {
return nil, fmt.Errorf("failed to import fd %d: %v", hfd, err)
}
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 9fa9b51a0..766a2e968 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -31,6 +31,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/control"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
"gvisor.googlesource.com/gvisor/pkg/sentry/inet"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
@@ -112,7 +113,7 @@ type Loader struct {
// have the corresponding pid set.
//
// processes is guardded by mu.
- processes map[execID]*kernel.ThreadGroup
+ processes map[execID]*execProcess
}
// execID uniquely identifies a sentry process.
@@ -121,6 +122,14 @@ type execID struct {
pid kernel.ThreadID
}
+// execProcess contains the thread group and host TTY of a sentry process.
+type execProcess struct {
+ tg *kernel.ThreadGroup
+
+ // tty will be nil if the process is not attached to a terminal.
+ tty *host.TTYFileOperations
+}
+
func init() {
// Initialize the random number generator.
rand.Seed(gtime.Now().UnixNano())
@@ -276,7 +285,7 @@ func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int,
startSignalForwarding: startSignalForwarding,
rootProcArgs: procArgs,
sandboxID: id,
- processes: make(map[execID]*kernel.ThreadGroup),
+ processes: make(map[execID]*execProcess),
}
ctrl.manager.l = l
return l, nil
@@ -330,7 +339,7 @@ func createPlatform(conf *Config, deviceFD int) (platform.Platform, error) {
case PlatformKVM:
log.Infof("Platform: kvm")
if deviceFD < 0 {
- return nil, fmt.Errorf("kvm device fd must be provided")
+ return nil, fmt.Errorf("kvm device FD must be provided")
}
return kvm.New(os.NewFile(uintptr(deviceFD), "kvm device"))
default:
@@ -413,8 +422,8 @@ func (l *Loader) run() error {
}
l.mu.Lock()
- key := execID{cid: l.sandboxID}
- l.processes[key] = l.k.GlobalInit()
+ eid := execID{cid: l.sandboxID}
+ l.processes[eid] = &execProcess{tg: l.k.GlobalInit()}
l.mu.Unlock()
// Start signal forwarding only after an init process is created.
@@ -510,8 +519,8 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config
l.mu.Lock()
defer l.mu.Unlock()
- key := execID{cid: cid}
- l.processes[key] = tg
+ eid := execID{cid: cid}
+ l.processes[eid] = &execProcess{tg: tg}
return nil
}
@@ -520,7 +529,7 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config
// filesystem.
func (l *Loader) destroyContainer(cid string) error {
// First kill and wait for all processes in the container.
- if err := l.signal(cid, int32(linux.SIGKILL), true /*all*/); err != nil {
+ if err := l.signalContainer(cid, int32(linux.SIGKILL), true /*all*/); err != nil {
return fmt.Errorf("failed to SIGKILL all container processes: %v", err)
}
@@ -549,12 +558,12 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
// process with the same Root.
l.mu.Lock()
rootKey := execID{cid: args.ContainerID}
- tg, ok := l.processes[rootKey]
+ ep, ok := l.processes[rootKey]
l.mu.Unlock()
if !ok {
return 0, fmt.Errorf("cannot exec in container %q: no such container", args.ContainerID)
}
- tg.Leader().WithMuLocked(func(t *kernel.Task) {
+ ep.tg.Leader().WithMuLocked(func(t *kernel.Task) {
args.Root = t.FSContext().RootDirectory()
})
if args.Root != nil {
@@ -563,7 +572,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
// Start the process.
proc := control.Proc{Kernel: l.k}
- tg, tgid, err := control.ExecAsync(&proc, args)
+ tg, tgid, ttyFile, err := control.ExecAsync(&proc, args)
if err != nil {
return 0, fmt.Errorf("error executing: %+v: %v", args, err)
}
@@ -573,7 +582,10 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
l.mu.Lock()
defer l.mu.Unlock()
eid := execID{cid: args.ContainerID, pid: tgid}
- l.processes[eid] = tg
+ l.processes[eid] = &execProcess{
+ tg: tg,
+ tty: ttyFile,
+ }
log.Debugf("updated processes: %v", l.processes)
return tgid, nil
@@ -584,8 +596,8 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
// Don't defer unlock, as doing so would make it impossible for
// multiple clients to wait on the same container.
l.mu.Lock()
- key := execID{cid: cid}
- tg, ok := l.processes[key]
+ eid := execID{cid: cid}
+ ep, ok := l.processes[eid]
l.mu.Unlock()
if !ok {
return fmt.Errorf("can't find process for container %q in %v", cid, l.processes)
@@ -593,7 +605,7 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
// If the thread either has already exited or exits during waiting,
// consider the container exited.
- ws := l.wait(tg)
+ ws := l.wait(ep.tg)
*waitStatus = ws
return nil
}
@@ -610,10 +622,10 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, wai
// entry in l.processes.
l.mu.Lock()
eid := execID{cid: cid, pid: tgid}
- tg, ok := l.processes[eid]
+ ep, ok := l.processes[eid]
l.mu.Unlock()
if ok {
- ws := l.wait(tg)
+ ws := l.wait(ep.tg)
*waitStatus = ws
if clearStatus {
// Remove tg from the cache.
@@ -626,8 +638,8 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, wai
}
// This process wasn't created by runsc exec or start, so just find it
- // by pid and hope it hasn't exited yet.
- tg = l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid))
+ // by PID and hope it hasn't exited yet.
+ tg := l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid))
if tg == nil {
return fmt.Errorf("no thread group with ID %d", tgid)
}
@@ -682,18 +694,66 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
}
}
-func (l *Loader) signal(cid string, signo int32, all bool) error {
+// signalProcess sends a signal to the process with the given PID. If
+// sendToFGProcess is true, then the signal will be sent to the foreground
+// process group in the same session that PID belongs to.
+func (l *Loader) signalProcess(cid string, pid, signo int32, sendToFGProcess bool) error {
+ si := arch.SignalInfo{Signo: signo}
+
+ if pid <= 0 {
+ return fmt.Errorf("failed to signal container %q PID %d: PID must be positive", cid, pid)
+ }
+
+ eid := execID{
+ cid: cid,
+ pid: kernel.ThreadID(pid),
+ }
l.mu.Lock()
- key := execID{cid: cid}
- tg, ok := l.processes[key]
+ ep, ok := l.processes[eid]
l.mu.Unlock()
+
if !ok {
- return fmt.Errorf("failed to signal container %q: no such container", cid)
+ return fmt.Errorf("failed to signal container %q PID %d: no such PID", cid, pid)
+ }
+
+ if !sendToFGProcess {
+ // Send signal directly to exec process.
+ return ep.tg.SendSignal(&si)
}
+ // Lookup foreground process group from the TTY for the given process,
+ // and send the signal to it.
+ if ep.tty == nil {
+ return fmt.Errorf("failed to signal foreground process group in container %q PID %d: no TTY attached", cid, pid)
+ }
+ pg := ep.tty.ForegroundProcessGroup()
+ if pg == nil {
+ // No foreground process group has been set. Signal the
+ // original thread group.
+ log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, pid, pid)
+ return ep.tg.SendSignal(&si)
+ }
+
+ // Send the signal.
+ return pg.Originator().SendSignal(&si)
+}
+
+// signalContainer sends a signal to the root container process, or to all
+// processes in the container if all is true.
+func (l *Loader) signalContainer(cid string, signo int32, all bool) error {
si := arch.SignalInfo{Signo: signo}
+
+ l.mu.Lock()
+ defer l.mu.Unlock()
+
+ eid := execID{cid: cid}
+ ep, ok := l.processes[eid]
+ if !ok {
+ return fmt.Errorf("failed to signal container %q: no such container", cid)
+ }
+
if !all {
- return tg.Leader().SendSignal(&si)
+ return ep.tg.SendSignal(&si)
}
// Pause the kernel to prevent new processes from being created while
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index 28229dbcf..336edf3f6 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -158,6 +158,13 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
Fatalf("error getting processes for container: %v", err)
}
+ if e.StdioIsPty {
+ // Forward signals sent to this process to the foreground
+ // process in the sandbox.
+ stopForwarding := c.ForwardSignals(pid, true /* fgProcess */)
+ defer stopForwarding()
+ }
+
// Write the sandbox-internal pid if required.
if ex.internalPidFile != "" {
pidStr := []byte(strconv.Itoa(int(pid)))
@@ -216,9 +223,9 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat
cmd.Stderr = os.Stderr
// If the console control socket file is provided, then create a new
- // pty master/slave pair and set the tty on the sandbox process.
+ // pty master/slave pair and set the TTY on the sandbox process.
if ex.consoleSocket != "" {
- // Create a new tty pair and send the master on the provided
+ // Create a new TTY pair and send the master on the provided
// socket.
tty, err := console.NewWithSocket(ex.consoleSocket)
if err != nil {
@@ -226,7 +233,7 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat
}
defer tty.Close()
- // Set stdio to the new tty slave.
+ // Set stdio to the new TTY slave.
cmd.Stdin = tty
cmd.Stdout = tty
cmd.Stderr = tty
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index e68fb1e8e..bf8b9a2ab 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -50,10 +50,12 @@ go_test(
"//pkg/sentry/control",
"//pkg/sentry/kernel/auth",
"//pkg/unet",
+ "//pkg/urpc",
"//runsc/boot",
"//runsc/specutils",
"//runsc/test/testutil",
"@com_github_cenkalti_backoff//:go_default_library",
+ "@com_github_kr_pty//:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
],
diff --git a/runsc/container/container.go b/runsc/container/container.go
index be833c03d..4b0037b4e 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -22,6 +22,7 @@ import (
"io/ioutil"
"os"
"os/exec"
+ "os/signal"
"path/filepath"
"regexp"
"strconv"
@@ -107,14 +108,13 @@ type Container struct {
Owner string `json:"owner"`
// ConsoleSocket is the path to a unix domain socket that will receive
- // the console FD. It is only used during create, so we don't need to
- // store it in the metadata.
- ConsoleSocket string `json:"-"`
+ // the console FD.
+ ConsoleSocket string `json:"consoleSocket"`
// Status is the current container Status.
Status Status `json:"status"`
- // GoferPid is the pid of the gofer running along side the sandbox. May
+ // GoferPid is the PID of the gofer running along side the sandbox. May
// be 0 if the gofer has been killed.
GoferPid int `json:"goferPid"`
@@ -313,12 +313,12 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
return nil, err
}
- // Write the pid file. Containerd considers the create complete after
+ // Write the PID file. Containerd considers the create complete after
// this file is created, so it must be the last thing we do.
if pidFile != "" {
if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(c.Pid())), 0644); err != nil {
c.Destroy()
- return nil, fmt.Errorf("error writing pid file: %v", err)
+ return nil, fmt.Errorf("error writing PID file: %v", err)
}
}
@@ -406,7 +406,7 @@ func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
return c.Wait()
}
-// Execute runs the specified command in the container. It returns the pid of
+// Execute runs the specified command in the container. It returns the PID of
// the newly created process.
func (c *Container) Execute(args *control.ExecArgs) (int32, error) {
log.Debugf("Execute in container %q, args: %+v", c.ID, args)
@@ -429,7 +429,7 @@ func (c *Container) Event() (*boot.Event, error) {
// Pid returns the Pid of the sandbox the container is running in, or -1 if the
// container is not running.
func (c *Container) Pid() int {
- if err := c.requireStatus("pid", Created, Running, Paused); err != nil {
+ if err := c.requireStatus("get PID", Created, Running, Paused); err != nil {
return -1
}
return c.Sandbox.Pid
@@ -449,7 +449,7 @@ func (c *Container) Wait() (syscall.WaitStatus, error) {
// WaitRootPID waits for process 'pid' in the sandbox's PID namespace and
// returns its WaitStatus.
func (c *Container) WaitRootPID(pid int32, clearStatus bool) (syscall.WaitStatus, error) {
- log.Debugf("Wait on pid %d in sandbox %q", pid, c.Sandbox.ID)
+ log.Debugf("Wait on PID %d in sandbox %q", pid, c.Sandbox.ID)
if !c.isSandboxRunning() {
return 0, fmt.Errorf("container is not running")
}
@@ -459,7 +459,7 @@ func (c *Container) WaitRootPID(pid int32, clearStatus bool) (syscall.WaitStatus
// WaitPID waits for process 'pid' in the container's PID namespace and returns
// its WaitStatus.
func (c *Container) WaitPID(pid int32, clearStatus bool) (syscall.WaitStatus, error) {
- log.Debugf("Wait on pid %d in container %q", pid, c.ID)
+ log.Debugf("Wait on PID %d in container %q", pid, c.ID)
if !c.isSandboxRunning() {
return 0, fmt.Errorf("container is not running")
}
@@ -483,7 +483,30 @@ func (c *Container) Signal(sig syscall.Signal, all bool) error {
if !c.isSandboxRunning() {
return fmt.Errorf("container is not running")
}
- return c.Sandbox.Signal(c.ID, sig, all)
+ return c.Sandbox.SignalContainer(c.ID, sig, all)
+}
+
+// ForwardSignals forwards all signals received by the current process to the
+// container process inside the sandbox. It returns a function that will stop
+// forwarding signals.
+func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() {
+ log.Debugf("Forwarding all signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
+ sigCh := make(chan os.Signal, 1)
+ signal.Notify(sigCh)
+ go func() {
+ for s := range sigCh {
+ log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", s, c.ID, pid, fgProcess)
+ if err := c.Sandbox.SignalProcess(c.ID, pid, s.(syscall.Signal), fgProcess); err != nil {
+ log.Warningf("error forwarding signal %d to container %q: %v", s, c.ID, err)
+ }
+ }
+ log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
+ }()
+
+ return func() {
+ signal.Stop(sigCh)
+ close(sigCh)
+ }
}
// Checkpoint sends the checkpoint call to the container.
@@ -683,9 +706,9 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
if err != nil {
return nil, err
}
- sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox io fd"))
+ sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox IO FD"))
- goferEnd := os.NewFile(uintptr(fds[1]), "gofer io fd")
+ goferEnd := os.NewFile(uintptr(fds[1]), "gofer IO FD")
defer goferEnd.Close()
goferEnds = append(goferEnds, goferEnd)
@@ -710,7 +733,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
if err := specutils.StartInNS(cmd, nss); err != nil {
return nil, err
}
- log.Infof("Gofer started, pid: %d", cmd.Process.Pid)
+ log.Infof("Gofer started, PID: %d", cmd.Process.Pid)
c.GoferPid = cmd.Process.Pid
return sandEnds, nil
}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index aebfb2878..84b59ffd8 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -31,6 +31,7 @@ import (
"time"
"github.com/cenkalti/backoff"
+ "github.com/kr/pty"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
@@ -38,6 +39,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/control"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
"gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.googlesource.com/gvisor/pkg/urpc"
"gvisor.googlesource.com/gvisor/runsc/boot"
"gvisor.googlesource.com/gvisor/runsc/test/testutil"
)
@@ -1577,6 +1579,121 @@ func TestRootNotMount(t *testing.T) {
}
}
+func TestJobControlSignalExec(t *testing.T) {
+ spec := testutil.NewSpecWithArgs("/bin/sleep", "10000")
+ conf := testutil.TestConfig()
+
+ rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ // Create and start the container.
+ c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "")
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer c.Destroy()
+ if err := c.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
+
+ // Create a pty master/slave. The slave will be passed to the exec
+ // process.
+ ptyMaster, ptySlave, err := pty.Open()
+ if err != nil {
+ t.Fatalf("error opening pty: %v", err)
+ }
+ defer ptyMaster.Close()
+ defer ptySlave.Close()
+
+ // Exec bash and attach a terminal.
+ args := &control.ExecArgs{
+ Filename: "/bin/bash",
+ // Don't let bash execute from profile or rc files, otherwise
+ // our PID counts get messed up.
+ Argv: []string{"/bin/bash", "--noprofile", "--norc"},
+ // Pass the pty slave as FD 0, 1, and 2.
+ FilePayload: urpc.FilePayload{
+ Files: []*os.File{ptySlave, ptySlave, ptySlave},
+ },
+ StdioIsPty: true,
+ }
+
+ pid, err := c.Execute(args)
+ if err != nil {
+ t.Fatalf("error executing: %v", err)
+ }
+ if pid != 2 {
+ t.Fatalf("exec got pid %d, wanted %d", pid, 2)
+ }
+
+ // Make sure all the processes are running.
+ expectedPL := []*control.Process{
+ // Root container process.
+ {PID: 1, Cmd: "sleep"},
+ // Bash from exec process.
+ {PID: 2, Cmd: "bash"},
+ }
+ if err := waitForProcessList(c, expectedPL); err != nil {
+ t.Error(err)
+ }
+
+ // Execute sleep.
+ ptyMaster.Write([]byte("sleep 100\n"))
+
+ // Wait for it to start. Sleep's PPID is bash's PID.
+ expectedPL = append(expectedPL, &control.Process{PID: 3, PPID: 2, Cmd: "sleep"})
+ if err := waitForProcessList(c, expectedPL); err != nil {
+ t.Error(err)
+ }
+
+ // Send a SIGTERM to the foreground process for the exec PID. Note that
+ // although we pass in the PID of "bash", it should actually terminate
+ // "sleep", since that is the foreground process.
+ if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.SIGTERM, true /* fgProcess */); err != nil {
+ t.Fatalf("error signaling container: %v", err)
+ }
+
+ // Sleep process should be gone.
+ expectedPL = expectedPL[:len(expectedPL)-1]
+ if err := waitForProcessList(c, expectedPL); err != nil {
+ t.Error(err)
+ }
+
+ // Sleep is dead, but it may take more time for bash to notice and
+ // change the foreground process back to itself. We know it is done
+ // when bash writes "Terminated" to the pty.
+ if err := testutil.WaitUntilRead(ptyMaster, "Terminated", nil, 5*time.Second); err != nil {
+ t.Fatalf("bash did not take over pty: %v", err)
+ }
+
+ // Send a SIGKILL to the foreground process again. This time "bash"
+ // should be killed. We use SIGKILL instead of SIGTERM or SIGINT
+ // because bash ignores those.
+ if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.SIGKILL, true /* fgProcess */); err != nil {
+ t.Fatalf("error signaling container: %v", err)
+ }
+ expectedPL = expectedPL[:1]
+ if err := waitForProcessList(c, expectedPL); err != nil {
+ t.Error(err)
+ }
+
+ // Make sure the process indicates it was killed by a SIGKILL.
+ ws, err := c.WaitPID(pid, true)
+ if err != nil {
+ t.Errorf("waiting on container failed: %v", err)
+ }
+ if !ws.Signaled() {
+ t.Error("ws.Signaled() got false, want true")
+ }
+ if got, want := ws.Signal(), syscall.SIGKILL; got != want {
+ t.Errorf("ws.Signal() got %v, want %v", got, want)
+ }
+}
+
// executeSync synchronously executes a new process.
func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) {
pid, err := cont.Execute(args)
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index e5f7daf60..ab200b75c 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -477,11 +477,12 @@ func TestMultiContainerDestroy(t *testing.T) {
}
func TestMultiContainerProcesses(t *testing.T) {
- // Note: use 'while true' to keep 'sh' process around. Otherwise, shell will
- // just execve into 'sleep' and both containers will look the same.
+ // Note: use curly braces to keep 'sh' process around. Otherwise, shell
+ // will just execve into 'sleep' and both containers will look the
+ // same.
specs, ids := createSpecs(
[]string{"sleep", "100"},
- []string{"sh", "-c", "while true; do sleep 100; done"})
+ []string{"sh", "-c", "{ sleep 100; }"})
conf := testutil.TestConfig()
containers, cleanup, err := startContainers(conf, specs, ids)
if err != nil {
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 4111b1a60..e4853af69 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -80,7 +80,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// StartRoot starts running the root container process inside the sandbox.
func (s *Sandbox) StartRoot(spec *specs.Spec, conf *boot.Config) error {
- log.Debugf("Start root sandbox %q, pid: %d", s.ID, s.Pid)
+ log.Debugf("Start root sandbox %q, PID: %d", s.ID, s.Pid)
conn, err := s.sandboxConnect()
if err != nil {
return err
@@ -107,7 +107,7 @@ func (s *Sandbox) Start(spec *specs.Spec, conf *boot.Config, cid string, goferFi
defer f.Close()
}
- log.Debugf("Start non-root container sandbox %q, pid: %d", s.ID, s.Pid)
+ log.Debugf("Start non-root container sandbox %q, PID: %d", s.ID, s.Pid)
sandboxConn, err := s.sandboxConnect()
if err != nil {
return fmt.Errorf("couldn't connect to sandbox: %v", err)
@@ -147,7 +147,7 @@ func (s *Sandbox) Restore(cid string, spec *specs.Spec, conf *boot.Config, f str
SandboxID: s.ID,
}
- // If the platform needs a device fd we must pass it in.
+ // If the platform needs a device FD we must pass it in.
if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil {
return err
} else if deviceFile != nil {
@@ -192,7 +192,7 @@ func (s *Sandbox) Processes(cid string) ([]*control.Process, error) {
return pl, nil
}
-// Execute runs the specified command in the container. It returns the pid of
+// Execute runs the specified command in the container. It returns the PID of
// the newly created process.
func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) {
log.Debugf("Executing new process in container %q in sandbox %q", args.ContainerID, s.ID)
@@ -239,7 +239,7 @@ func (s *Sandbox) sandboxConnect() (*urpc.Client, error) {
}
func (s *Sandbox) connError(err error) error {
- return fmt.Errorf("error connecting to control server at pid %d: %v", s.Pid, err)
+ return fmt.Errorf("error connecting to control server at PID %d: %v", s.Pid, err)
}
// createSandboxProcess starts the sandbox as a subprocess by running the "boot"
@@ -322,7 +322,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
nextFD++
}
- // If the platform needs a device fd we must pass it in.
+ // If the platform needs a device FD we must pass it in.
if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil {
return err
} else if deviceFile != nil {
@@ -338,7 +338,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
cmd.Stderr = os.Stderr
// If the console control socket file is provided, then create a new
- // pty master/slave pair and set the tty on the sandbox process.
+ // pty master/slave pair and set the TTY on the sandbox process.
if consoleEnabled {
// console.NewWithSocket will send the master on the socket,
// and return the slave.
@@ -461,7 +461,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// Add container as the last argument.
cmd.Args = append(cmd.Args, s.ID)
- // Log the fds we are donating to the sandbox process.
+ // Log the FDs we are donating to the sandbox process.
for i, f := range cmd.ExtraFiles {
log.Debugf("Donating FD %d: %q", i+3, f.Name())
}
@@ -472,7 +472,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
return err
}
s.Pid = cmd.Process.Pid
- log.Infof("Sandbox started, pid: %d", s.Pid)
+ log.Infof("Sandbox started, PID: %d", s.Pid)
return nil
}
@@ -572,9 +572,10 @@ func (s *Sandbox) destroy() error {
return nil
}
-// Signal sends the signal to a container in the sandbox. If all is true and
-// signal is SIGKILL, then waits for all processes to exit before returning.
-func (s *Sandbox) Signal(cid string, sig syscall.Signal, all bool) error {
+// SignalContainer sends the signal to a container in the sandbox. If all is
+// true and signal is SIGKILL, then waits for all processes to exit before
+// returning.
+func (s *Sandbox) SignalContainer(cid string, sig syscall.Signal, all bool) error {
log.Debugf("Signal sandbox %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
@@ -593,6 +594,30 @@ func (s *Sandbox) Signal(cid string, sig syscall.Signal, all bool) error {
return nil
}
+// SignalProcess sends the signal to a particular process in the container. If
+// fgProcess is true, then the signal is sent to the foreground process group
+// in the same session that PID belongs to. This is only valid if the process
+// is attached to a host TTY.
+func (s *Sandbox) SignalProcess(cid string, pid int32, sig syscall.Signal, fgProcess bool) error {
+ log.Debugf("Signal sandbox %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ args := boot.SignalProcessArgs{
+ CID: cid,
+ Signo: int32(sig),
+ PID: pid,
+ SendToForegroundProcess: fgProcess,
+ }
+ if err := conn.Call(boot.ContainerSignalProcess, &args, nil); err != nil {
+ return fmt.Errorf("err signaling container %q PID %d: %v", cid, pid, err)
+ }
+ return nil
+}
+
// Checkpoint sends the checkpoint call for a container in the sandbox.
// The statefile will be written to f.
func (s *Sandbox) Checkpoint(cid string, f *os.File) error {
diff --git a/runsc/test/integration/exec_test.go b/runsc/test/integration/exec_test.go
index 910c36597..ddd088223 100644
--- a/runsc/test/integration/exec_test.go
+++ b/runsc/test/integration/exec_test.go
@@ -27,6 +27,7 @@
package integration
import (
+ "syscall"
"testing"
"time"
@@ -60,3 +61,57 @@ func TestExecCapabilities(t *testing.T) {
t.Errorf("wrong capabilities, got: %q, want: %q", got, want)
}
}
+
+func TestExecJobControl(t *testing.T) {
+ if err := testutil.Pull("alpine"); err != nil {
+ t.Fatalf("docker pull failed: %v", err)
+ }
+ d := testutil.MakeDocker("exec-test")
+
+ // Start the container.
+ if _, err := d.Run("alpine", "sleep", "1000"); err != nil {
+ t.Fatalf("docker run failed: %v", err)
+ }
+ defer d.CleanUp()
+
+ // Exec 'sh' with an attached pty.
+ cmd, ptmx, err := d.ExecWithTerminal("sh")
+ if err != nil {
+ t.Fatalf("docker exec failed: %v", err)
+ }
+ defer ptmx.Close()
+
+ // Call "sleep 100" in the shell.
+ if _, err := ptmx.Write([]byte("sleep 100\n")); err != nil {
+ t.Fatalf("error writing to pty: %v", err)
+ }
+
+ // Give shell a few seconds to start executing the sleep.
+ time.Sleep(2 * time.Second)
+
+ // Send a ^C to the pty, which should kill sleep, but not the shell.
+ // \x03 is ASCII "end of text", which is the same as ^C.
+ if _, err := ptmx.Write([]byte{'\x03'}); err != nil {
+ t.Fatalf("error writing to pty: %v", err)
+ }
+
+ // The shell should still be alive at this point. Sleep should have
+ // exited with code 2+128=130. We'll exit with 10 plus that number, so
+ // that we can be sure that the shell did not get signalled.
+ if _, err := ptmx.Write([]byte("exit $(expr $? + 10)\n")); err != nil {
+ t.Fatalf("error writing to pty: %v", err)
+ }
+
+ // Exec process should exit with code 10+130=140.
+ ps, err := cmd.Process.Wait()
+ if err != nil {
+ t.Fatalf("error waiting for exec process: %v", err)
+ }
+ ws := ps.Sys().(syscall.WaitStatus)
+ if !ws.Exited() {
+ t.Errorf("ws.Exited got false, want true")
+ }
+ if got, want := ws.ExitStatus(), 140; got != want {
+ t.Errorf("ws.ExitedStatus got %d, want %d", got, want)
+ }
+}
diff --git a/runsc/test/testutil/BUILD b/runsc/test/testutil/BUILD
index ca91e07ff..da2535bfa 100644
--- a/runsc/test/testutil/BUILD
+++ b/runsc/test/testutil/BUILD
@@ -17,6 +17,7 @@ go_library(
"//runsc/boot",
"//runsc/specutils",
"@com_github_cenkalti_backoff//:go_default_library",
+ "@com_github_kr_pty//:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
"@com_github_syndtr_gocapability//capability:go_default_library",
],
diff --git a/runsc/test/testutil/docker.go b/runsc/test/testutil/docker.go
index 7f5909987..55ca353b8 100644
--- a/runsc/test/testutil/docker.go
+++ b/runsc/test/testutil/docker.go
@@ -26,6 +26,8 @@ import (
"strconv"
"strings"
"time"
+
+ "github.com/kr/pty"
)
func init() {
@@ -131,6 +133,17 @@ func do(args ...string) (string, error) {
return string(out), nil
}
+// doWithPty executes docker command with stdio attached to a pty.
+func doWithPty(args ...string) (*exec.Cmd, *os.File, error) {
+ fmt.Printf("Running with pty: docker %s\n", args)
+ cmd := exec.Command("docker", args...)
+ ptmx, err := pty.Start(cmd)
+ if err != nil {
+ return nil, nil, fmt.Errorf("error executing docker %s with a pty: %v", args, err)
+ }
+ return cmd, ptmx, nil
+}
+
// Pull pulls a docker image. This is used in tests to isolate the
// time to pull the image off the network from the time to actually
// start the container, to avoid timeouts over slow networks.
@@ -197,6 +210,14 @@ func (d *Docker) Exec(args ...string) (string, error) {
return do(a...)
}
+// ExecWithTerminal calls 'docker exec -it' with the arguments provided and
+// attaches a pty to stdio.
+func (d *Docker) ExecWithTerminal(args ...string) (*exec.Cmd, *os.File, error) {
+ a := []string{"exec", "-it", d.Name}
+ a = append(a, args...)
+ return doWithPty(a...)
+}
+
// Pause calls 'docker pause'.
func (d *Docker) Pause() error {
if _, err := do("pause", d.Name); err != nil {
diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go
index 07d66e469..cdc7f78c3 100644
--- a/runsc/test/testutil/testutil.go
+++ b/runsc/test/testutil/testutil.go
@@ -16,6 +16,7 @@
package testutil
import (
+ "bufio"
"context"
"encoding/json"
"fmt"
@@ -27,6 +28,8 @@ import (
"os/signal"
"path/filepath"
"runtime"
+ "strings"
+ "sync/atomic"
"syscall"
"time"
@@ -315,3 +318,36 @@ func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) {
}
}
}
+
+// WaitUntilRead reads from the given reader until the wanted string is found
+// or until timeout.
+func WaitUntilRead(r io.Reader, want string, split bufio.SplitFunc, timeout time.Duration) error {
+ sc := bufio.NewScanner(r)
+ if split != nil {
+ sc.Split(split)
+ }
+ // done must be accessed atomically. A value greater than 0 indicates
+ // that the read loop can exit.
+ var done uint32
+ doneCh := make(chan struct{})
+ go func() {
+ for sc.Scan() {
+ t := sc.Text()
+ if strings.Contains(t, want) {
+ atomic.StoreUint32(&done, 1)
+ close(doneCh)
+ break
+ }
+ if atomic.LoadUint32(&done) > 0 {
+ break
+ }
+ }
+ }()
+ select {
+ case <-time.After(timeout):
+ atomic.StoreUint32(&done, 1)
+ return fmt.Errorf("timeout waiting to read %q", want)
+ case <-doneCh:
+ return nil
+ }
+}