diff options
-rw-r--r-- | pkg/sentry/control/proc.go | 78 | ||||
-rw-r--r-- | pkg/sentry/fs/host/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fs/host/file.go | 144 | ||||
-rw-r--r-- | pkg/sentry/fs/host/inode.go | 18 | ||||
-rw-r--r-- | pkg/sentry/fs/host/tty.go | 185 | ||||
-rw-r--r-- | pkg/sentry/kernel/sessions.go | 5 | ||||
-rw-r--r-- | runsc/boot/controller.go | 46 | ||||
-rw-r--r-- | runsc/boot/fds.go | 8 | ||||
-rw-r--r-- | runsc/boot/loader.go | 108 | ||||
-rw-r--r-- | runsc/cmd/exec.go | 13 | ||||
-rw-r--r-- | runsc/container/BUILD | 2 | ||||
-rw-r--r-- | runsc/container/container.go | 51 | ||||
-rw-r--r-- | runsc/container/container_test.go | 117 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 7 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 49 | ||||
-rw-r--r-- | runsc/test/integration/exec_test.go | 55 | ||||
-rw-r--r-- | runsc/test/testutil/BUILD | 1 | ||||
-rw-r--r-- | runsc/test/testutil/docker.go | 21 | ||||
-rw-r--r-- | runsc/test/testutil/testutil.go | 36 |
19 files changed, 732 insertions, 213 deletions
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index faf1168bb..0ba730c1e 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -78,7 +78,7 @@ type ExecArgs struct { Capabilities *auth.TaskCapabilities // StdioIsPty indicates that FDs 0, 1, and 2 are connected to a host - // pty fd. + // pty FD. StdioIsPty bool // FilePayload determines the files to give to the new process. @@ -90,7 +90,7 @@ type ExecArgs struct { // Exec runs a new task. func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error { - newTG, _, err := proc.execAsync(args) + newTG, _, _, err := proc.execAsync(args) if err != nil { return err } @@ -103,18 +103,27 @@ func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error { // ExecAsync runs a new task, but doesn't wait for it to finish. It is defined // as a function rather than a method to avoid exposing execAsync as an RPC. -func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, error) { +func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, error) { return proc.execAsync(args) } // execAsync runs a new task, but doesn't wait for it to finish. It returns the -// newly created thread group and its PID. -func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, error) { +// newly created thread group and its PID. If the stdio FDs are TTYs, then a +// TTYFileOperations that wraps the TTY is also returned. +func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, error) { // Import file descriptors. l := limits.NewLimitSet() fdm := proc.Kernel.NewFDMap() defer fdm.DecRef() + // No matter what happens, we should close all files in the FilePayload + // before returning. Any files that are imported will be duped. + defer func() { + for _, f := range args.FilePayload.Files { + f.Close() + } + }() + creds := auth.NewUserCredentials( args.KUID, args.KGID, @@ -150,31 +159,62 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI paths := fs.GetPath(initArgs.Envv) f, err := proc.Kernel.RootMountNamespace().ResolveExecutablePath(ctx, initArgs.WorkingDirectory, initArgs.Argv[0], paths) if err != nil { - return nil, 0, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err) + return nil, 0, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err) } initArgs.Filename = f } mounter := fs.FileOwnerFromContext(ctx) - for appFD, f := range args.FilePayload.Files { - enableIoctl := args.StdioIsPty && appFD <= 2 - // Import the given file FD. This dups the FD as well. - file, err := host.ImportFile(ctx, int(f.Fd()), mounter, enableIoctl) - if err != nil { - return nil, 0, err + var ttyFile *fs.File + for appFD, hostFile := range args.FilePayload.Files { + var appFile *fs.File + + if args.StdioIsPty && appFD < 3 { + // Import the file as a host TTY file. + if ttyFile == nil { + var err error + appFile, err = host.ImportFile(ctx, int(hostFile.Fd()), mounter, true /* isTTY */) + if err != nil { + return nil, 0, nil, err + } + defer appFile.DecRef() + + // Remember this in the TTY file, as we will + // use it for the other stdio FDs. + ttyFile = appFile + } else { + // Re-use the existing TTY file, as all three + // stdio FDs must point to the same fs.File in + // order to share TTY state, specifically the + // foreground process group id. + appFile = ttyFile + } + } else { + // Import the file as a regular host file. + var err error + appFile, err = host.ImportFile(ctx, int(hostFile.Fd()), mounter, false /* isTTY */) + if err != nil { + return nil, 0, nil, err + } + defer appFile.DecRef() } - defer file.DecRef() - - // We're done with this file. - f.Close() - if err := fdm.NewFDAt(kdefs.FD(appFD), file, kernel.FDFlags{}, l); err != nil { - return nil, 0, err + // Add the file to the FD map. + if err := fdm.NewFDAt(kdefs.FD(appFD), appFile, kernel.FDFlags{}, l); err != nil { + return nil, 0, nil, err } } - return proc.Kernel.CreateProcess(initArgs) + tg, tid, err := proc.Kernel.CreateProcess(initArgs) + if err != nil { + return nil, 0, nil, err + } + + if ttyFile == nil { + return tg, tid, nil, nil + } + return tg, tid, ttyFile.FileOperations.(*host.TTYFileOperations), nil } // PsArgs is the set of arguments to ps. diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index d1a6eaf6e..c34f1c26b 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -17,6 +17,7 @@ go_library( "socket.go", "socket_state.go", "socket_unsafe.go", + "tty.go", "util.go", "util_unsafe.go", ], diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go index 6f469b5cc..22a5d9f12 100644 --- a/pkg/sentry/fs/host/file.go +++ b/pkg/sentry/fs/host/file.go @@ -18,15 +18,12 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/fd" "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/secio" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/context" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" @@ -39,6 +36,7 @@ import ( // // +stateify savable type fileOperations struct { + fsutil.NoIoctl `state:"nosave"` fsutil.NoopRelease `state:"nosave"` // iops are the Inode operations for this file. @@ -49,49 +47,49 @@ type fileOperations struct { // dirCursor is the directory cursor. dirCursor string - - // allowIoctl determines whether ioctls should be passed through to the - // host. - allowIoctl bool } // fileOperations implements fs.FileOperations. var _ fs.FileOperations = (*fileOperations)(nil) // NewFile creates a new File backed by the provided host file descriptor. If -// NewFile succeeds, ownership of the fd is transferred to the returned File. +// NewFile succeeds, ownership of the FD is transferred to the returned File. // // The returned File cannot be saved, since there is no guarantee that the same -// fd will exist or represent the same file at time of restore. If such a +// FD will exist or represent the same file at time of restore. If such a // guarantee does exist, use ImportFile instead. func NewFile(ctx context.Context, fd int, mounter fs.FileOwner) (*fs.File, error) { return newFileFromDonatedFD(ctx, fd, mounter, false, false) } // ImportFile creates a new File backed by the provided host file descriptor. -// Unlike NewFile, the file descriptor used by the File is duped from fd to -// ensure that later changes to fd are not reflected by the fs.File. +// Unlike NewFile, the file descriptor used by the File is duped from FD to +// ensure that later changes to FD are not reflected by the fs.File. // -// If the returned file is saved, it will be restored by re-importing the fd +// If the returned file is saved, it will be restored by re-importing the FD // originally passed to ImportFile. It is the restorer's responsibility to -// ensure that the fd represents the same file. -func ImportFile(ctx context.Context, fd int, mounter fs.FileOwner, allowIoctl bool) (*fs.File, error) { - return newFileFromDonatedFD(ctx, fd, mounter, true, allowIoctl) +// ensure that the FD represents the same file. +func ImportFile(ctx context.Context, fd int, mounter fs.FileOwner, isTTY bool) (*fs.File, error) { + return newFileFromDonatedFD(ctx, fd, mounter, true, isTTY) } -// newFileFromDonatedFD returns an fs.File from a donated fd. If the fd is +// newFileFromDonatedFD returns an fs.File from a donated FD. If the FD is // saveable, then saveable is true. -func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner, saveable, allowIoctl bool) (*fs.File, error) { +func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner, saveable, isTTY bool) (*fs.File, error) { var s syscall.Stat_t if err := syscall.Fstat(donated, &s); err != nil { return nil, err } + flags, err := fileFlagsFromDonatedFD(donated) + if err != nil { + return nil, err + } switch s.Mode & syscall.S_IFMT { case syscall.S_IFSOCK: - flags, err := fileFlagsFromDonatedFD(donated) - if err != nil { - return nil, err + if isTTY { + return nil, fmt.Errorf("cannot import host socket as TTY") } + s, err := newSocket(ctx, donated, saveable) if err != nil { return nil, err @@ -101,10 +99,6 @@ func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner }) return s, nil default: - flags, err := fileFlagsFromDonatedFD(donated) - if err != nil { - return nil, err - } msrc := newMountSource(ctx, "/", mounter, &Filesystem{}, fs.MountSourceFlags{}, false /* dontTranslateOwnership */) inode, err := newInode(ctx, msrc, donated, saveable, true /* donated */) if err != nil { @@ -116,14 +110,18 @@ func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner dirent := fs.NewDirent(inode, name) defer dirent.DecRef() - return newFile(ctx, dirent, flags, iops, allowIoctl), nil + if isTTY { + return newTTYFile(ctx, dirent, flags, iops), nil + } + + return newFile(ctx, dirent, flags, iops), nil } } func fileFlagsFromDonatedFD(donated int) (fs.FileFlags, error) { flags, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(donated), syscall.F_GETFL, 0) if errno != 0 { - log.Warningf("Failed to get file flags for donated fd %d (errno=%d)", donated, errno) + log.Warningf("Failed to get file flags for donated FD %d (errno=%d)", donated, errno) return fs.FileFlags{}, syscall.EIO } accmode := flags & syscall.O_ACCMODE @@ -138,17 +136,14 @@ func fileFlagsFromDonatedFD(donated int) (fs.FileFlags, error) { } // newFile returns a new fs.File. -func newFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags, iops *inodeOperations, allowIoctl bool) *fs.File { +func newFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags, iops *inodeOperations) *fs.File { if !iops.ReturnsWouldBlock() { // Allow reading/writing at an arbitrary offset for files // that support it. flags.Pread = true flags.Pwrite = true } - return fs.NewFile(ctx, dirent, flags, &fileOperations{ - iops: iops, - allowIoctl: allowIoctl, - }) + return fs.NewFile(ctx, dirent, flags, &fileOperations{iops: iops}) } // EventRegister implements waiter.Waitable.EventRegister. @@ -269,7 +264,7 @@ func (f *fileOperations) Fsync(ctx context.Context, file *fs.File, start int64, func (f *fileOperations) Flush(context.Context, *fs.File) error { // This is a no-op because flushing the resource backing this // file would mean closing it. We can't do that because other - // open files may depend on the backing host fd. + // open files may depend on the backing host FD. return nil } @@ -285,88 +280,3 @@ func (f *fileOperations) ConfigureMMap(ctx context.Context, file *fs.File, opts func (f *fileOperations) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) { return fsutil.SeekWithDirCursor(ctx, file, whence, offset, &f.dirCursor) } - -// Ioctl implements fs.FileOperations.Iocotl. -func (f *fileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { - if !f.allowIoctl { - return 0, syserror.ENOTTY - } - // Ignore arg[0]. This is the real FD: - fd := f.iops.fileState.FD() - ioctl := args[1].Uint64() - switch ioctl { - case linux.TCGETS: - termios, err := ioctlGetTermios(fd) - if err != nil { - return 0, err - } - _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), termios, usermem.IOOpts{ - AddressSpaceActive: true, - }) - return 0, err - - case linux.TCSETS, linux.TCSETSW, linux.TCSETSF: - var termios linux.Termios - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &termios, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { - return 0, err - } - err := ioctlSetTermios(fd, ioctl, &termios) - return 0, err - - case linux.TIOCGPGRP: - // Args: pid_t *argp - // When successful, equivalent to *argp = tcgetpgrp(fd). - // Get the process group ID of the foreground process group on - // this terminal. - - t := kernel.TaskFromContext(ctx) - if t == nil { - panic(fmt.Sprintf("cannot get thread group from context %v", ctx)) - } - tid := t.ThreadID() - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &tid, usermem.IOOpts{ - AddressSpaceActive: true, - }) - return 0, err - - case linux.TIOCSPGRP: - // Args: const pid_t *argp - // Equivalent to tcsetpgrp(fd, *argp). - // Set the foreground process group ID of this terminal. - - // Not much we can do with this one at the moment, so we just - // lie and pretend everything is great. Bash and Sh seem fine - // with this. - log.Warningf("Ignoring application ioctl(TIOCSPGRP) call") - return 0, nil - - case linux.TIOCGWINSZ: - // Args: struct winsize *argp - // Get window size. - winsize, err := ioctlGetWinsize(fd) - if err != nil { - return 0, err - } - _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), winsize, usermem.IOOpts{ - AddressSpaceActive: true, - }) - return 0, err - - case linux.TIOCSWINSZ: - // Args: const struct winsize *argp - // Set window size. - var winsize linux.Winsize - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &winsize, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { - return 0, err - } - err := ioctlSetWinsize(fd, &winsize) - return 0, err - - default: - return 0, syserror.ENOTTY - } -} diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index e7254fa7d..c2e8ba62f 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -73,7 +73,7 @@ type inodeFileState struct { // Common file system state. mops *superOperations `state:"wait"` - // descriptor is the backing host fd. + // descriptor is the backing host FD. descriptor *descriptor `state:"wait"` // Event queue for blocking operations. @@ -167,7 +167,7 @@ func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, err // inodeOperations implements fs.InodeOperations. var _ fs.InodeOperations = (*inodeOperations)(nil) -// newInode returns a new fs.Inode backed by the host fd. +// newInode returns a new fs.Inode backed by the host FD. func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool, donated bool) (*fs.Inode, error) { // Retrieve metadata. var s syscall.Stat_t @@ -212,8 +212,8 @@ func (i *inodeOperations) Mappable(inode *fs.Inode) memmap.Mappable { return i.cachingInodeOps } -// ReturnsWouldBlock returns true if this host fd can return EWOULDBLOCK -// for operations that would block. +// ReturnsWouldBlock returns true if this host FD can return EWOULDBLOCK for +// operations that would block. func (i *inodeOperations) ReturnsWouldBlock() bool { return i.fileState.descriptor.wouldBlock } @@ -226,7 +226,7 @@ func (i *inodeOperations) Release(context.Context) { // Lookup implements fs.InodeOperations.Lookup. func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) { - // Get a new fd relative to i at name. + // Get a new FD relative to i at name. fd, err := open(i, name) if err != nil { if err == syserror.ENOENT { @@ -321,7 +321,7 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) unix.Bound // GetFile implements fs.InodeOperations.GetFile. func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { - return newFile(ctx, d, flags, i, false), nil + return newFile(ctx, d, flags, i), nil } // canMap returns true if this fs.Inode can be memory mapped. @@ -362,7 +362,7 @@ func (i *inodeOperations) SetOwner(context.Context, *fs.Inode, fs.FileOwner) err func (i *inodeOperations) SetPermissions(ctx context.Context, inode *fs.Inode, f fs.FilePermissions) bool { // Can we use host kernel metadata caches? if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) { - // Then just change the timestamps on the fd, the host + // Then just change the timestamps on the FD, the host // will synchronize the metadata update with any host // inode and page cache. return syscall.Fchmod(i.fileState.FD(), uint32(f.LinuxMode())) == nil @@ -375,7 +375,7 @@ func (i *inodeOperations) SetPermissions(ctx context.Context, inode *fs.Inode, f func (i *inodeOperations) SetTimestamps(ctx context.Context, inode *fs.Inode, ts fs.TimeSpec) error { // Can we use host kernel metadata caches? if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) { - // Then just change the timestamps on the fd, the host + // Then just change the timestamps on the FD, the host // will synchronize the metadata update with any host // inode and page cache. return setTimestamps(i.fileState.FD(), ts) @@ -388,7 +388,7 @@ func (i *inodeOperations) SetTimestamps(ctx context.Context, inode *fs.Inode, ts func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, size int64) error { // Is the file not memory-mappable? if !canMap(inode) { - // Then just change the file size on the fd, the host + // Then just change the file size on the FD, the host // will synchronize the metadata update with any host // inode and page cache. return syscall.Ftruncate(i.fileState.FD(), size) diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go new file mode 100644 index 000000000..ad1323610 --- /dev/null +++ b/pkg/sentry/fs/host/tty.go @@ -0,0 +1,185 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host + +import ( + "sync" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +// TTYFileOperations implements fs.FileOperations for a host file descriptor +// that wraps a TTY FD. +// +// +stateify savable +type TTYFileOperations struct { + fileOperations + + // mu protects the fields below. + mu sync.Mutex + + // FGProcessGroup is the foreground process group this TTY. Will be + // nil if not set or if this file has been released. + fgProcessGroup *kernel.ProcessGroup +} + +// newTTYFile returns a new fs.File that wraps a TTY FD. +func newTTYFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags, iops *inodeOperations) *fs.File { + return fs.NewFile(ctx, dirent, flags, &TTYFileOperations{ + fileOperations: fileOperations{iops: iops}, + }) +} + +// ForegroundProcessGroup returns the foreground process for the TTY. This will +// be nil if the foreground process has not been set or if the file has been +// released. +func (t *TTYFileOperations) ForegroundProcessGroup() *kernel.ProcessGroup { + t.mu.Lock() + defer t.mu.Unlock() + return t.fgProcessGroup +} + +// Release implements fs.FileOperations.Release. +func (t *TTYFileOperations) Release() { + t.mu.Lock() + t.fgProcessGroup = nil + t.mu.Unlock() + + t.fileOperations.Release() +} + +// Ioctl implements fs.FileOperations.Ioctl. +func (t *TTYFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + // Ignore arg[0]. This is the real FD: + fd := t.fileOperations.iops.fileState.FD() + ioctl := args[1].Uint64() + switch ioctl { + case linux.TCGETS: + termios, err := ioctlGetTermios(fd) + if err != nil { + return 0, err + } + _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), termios, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err + + case linux.TCSETS, linux.TCSETSW, linux.TCSETSF: + var termios linux.Termios + if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &termios, usermem.IOOpts{ + AddressSpaceActive: true, + }); err != nil { + return 0, err + } + err := ioctlSetTermios(fd, ioctl, &termios) + return 0, err + + case linux.TIOCGPGRP: + // Args: pid_t *argp + // When successful, equivalent to *argp = tcgetpgrp(fd). + // Get the process group ID of the foreground process group on + // this terminal. + + t.mu.Lock() + defer t.mu.Unlock() + + if t.fgProcessGroup == nil { + // No process group has been set yet. Let's just lie + // and tell it the process group from the current task. + // The app is probably going to set it to something + // else very soon anyways. + t.fgProcessGroup = kernel.TaskFromContext(ctx).ThreadGroup().ProcessGroup() + } + + // Map the ProcessGroup into a ProcessGroupID in the task's PID + // namespace. + pgID := kernel.TaskFromContext(ctx).ThreadGroup().PIDNamespace().IDOfProcessGroup(t.fgProcessGroup) + _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err + + case linux.TIOCSPGRP: + // Args: const pid_t *argp + // Equivalent to tcsetpgrp(fd, *argp). + // Set the foreground process group ID of this terminal. + + var pgID kernel.ProcessGroupID + if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{ + AddressSpaceActive: true, + }); err != nil { + return 0, err + } + + // pgID must be non-negative. + if pgID < 0 { + return 0, syserror.EINVAL + } + + // Process group with pgID must exist in this PID namespace. + task := kernel.TaskFromContext(ctx) + pidns := task.PIDNamespace() + pg := pidns.ProcessGroupWithID(pgID) + if pg == nil { + return 0, syserror.ESRCH + } + + // Process group must be in same session as calling task's + // process group. + curSession := task.ThreadGroup().ProcessGroup().Session() + curSessionID := pidns.IDOfSession(curSession) + if pidns.IDOfSession(pg.Session()) != curSessionID { + return 0, syserror.EPERM + } + + t.mu.Lock() + t.fgProcessGroup = pg + t.mu.Unlock() + return 0, nil + + case linux.TIOCGWINSZ: + // Args: struct winsize *argp + // Get window size. + winsize, err := ioctlGetWinsize(fd) + if err != nil { + return 0, err + } + _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), winsize, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err + + case linux.TIOCSWINSZ: + // Args: const struct winsize *argp + // Set window size. + var winsize linux.Winsize + if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &winsize, usermem.IOOpts{ + AddressSpaceActive: true, + }); err != nil { + return 0, err + } + err := ioctlSetWinsize(fd, &winsize) + return 0, err + + default: + return 0, syserror.ENOTTY + } +} diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go index cf4e18805..b44d218d9 100644 --- a/pkg/sentry/kernel/sessions.go +++ b/pkg/sentry/kernel/sessions.go @@ -219,6 +219,11 @@ func (pg *ProcessGroup) handleOrphan() { return } +// Session returns the process group's session without taking a reference. +func (pg *ProcessGroup) Session() *Session { + return pg.session +} + // CreateSession creates a new Session, with the ThreadGroup as the leader. // // EPERM may be returned if either the given ThreadGroup is already a Session diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 98356e8b7..eaeb9e2d8 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -65,6 +65,10 @@ const ( // ContainerSignal is used to send a signal to a container. ContainerSignal = "containerManager.Signal" + // ContainerSignalProcess is used to send a signal to a particular + // process in a container. + ContainerSignalProcess = "containerManager.SignalProcess" + // ContainerStart is the URPC endpoint for running a non-root container // within a sandbox. ContainerStart = "containerManager.Start" @@ -92,7 +96,7 @@ const ( SandboxStacks = "debug.Stacks" ) -// ControlSocketAddr generates an abstract unix socket name for the given id. +// ControlSocketAddr generates an abstract unix socket name for the given ID. func ControlSocketAddr(id string) string { return fmt.Sprintf("\x00runsc-sandbox.%s", id) } @@ -248,7 +252,7 @@ func (cm *containerManager) Destroy(cid *string, _ *struct{}) error { } // ExecuteAsync starts running a command on a created or running sandbox. It -// returns the pid of the new process. +// returns the PID of the new process. func (cm *containerManager) ExecuteAsync(args *control.ExecArgs, pid *int32) error { log.Debugf("containerManager.ExecuteAsync: %+v", args) tgid, err := cm.l.executeAsync(args) @@ -373,8 +377,12 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { // restore the state of multiple containers, nor exec processes. cm.l.sandboxID = o.SandboxID cm.l.mu.Lock() - key := execID{cid: o.SandboxID} - cm.l.processes = map[execID]*kernel.ThreadGroup{key: cm.l.k.GlobalInit()} + eid := execID{cid: o.SandboxID} + cm.l.processes = map[execID]*execProcess{ + eid: &execProcess{ + tg: cm.l.k.GlobalInit(), + }, + } cm.l.mu.Unlock() // Tell the root container to start and wait for the result. @@ -419,7 +427,7 @@ func (cm *containerManager) WaitPID(args *WaitPIDArgs, waitStatus *uint32) error // SignalArgs are arguments to the Signal method. type SignalArgs struct { - // CID is the container id. + // CID is the container ID. CID string // Signo is the signal to send to the process. @@ -430,9 +438,31 @@ type SignalArgs struct { All bool } -// Signal sends a signal to the init process of the container. -// TODO: Send signal to exec process. +// Signal sends a signal to the root process of the container. func (cm *containerManager) Signal(args *SignalArgs, _ *struct{}) error { log.Debugf("containerManager.Signal %q %d, all: %t", args.CID, args.Signo, args.All) - return cm.l.signal(args.CID, args.Signo, args.All) + return cm.l.signalContainer(args.CID, args.Signo, args.All) +} + +// SignalProcessArgs are arguments to the Signal method. +type SignalProcessArgs struct { + // CID is the container ID. + CID string + + // PID is the process ID in the given container that will be signaled. + PID int32 + + // Signo is the signal to send to the process. + Signo int32 + + // SendToForegroundProcess indicates that the signal should be sent to + // the foreground process group in the session that PID belongs to. + // This is only valid if the process is attached to a host TTY. + SendToForegroundProcess bool +} + +// SignalProcess sends a signal to a particular process in the container. +func (cm *containerManager) SignalProcess(args *SignalProcessArgs, _ *struct{}) error { + log.Debugf("containerManager.Signal: %+v", args) + return cm.l.signalProcess(args.CID, args.PID, args.Signo, args.SendToForegroundProcess) } diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go index 92d641b68..a5a6ba8af 100644 --- a/runsc/boot/fds.go +++ b/runsc/boot/fds.go @@ -25,8 +25,8 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/limits" ) -// createFDMap creates an fd map that contains stdin, stdout, and stderr. If -// console is true, then ioctl calls will be passed through to the host fd. +// createFDMap creates an FD map that contains stdin, stdout, and stderr. If +// console is true, then ioctl calls will be passed through to the host FD. // Upon success, createFDMap dups then closes stdioFDs. func createFDMap(ctx context.Context, k *kernel.Kernel, l *limits.LimitSet, console bool, stdioFDs []int) (*kernel.FDMap, error) { if len(stdioFDs) != 3 { @@ -36,7 +36,7 @@ func createFDMap(ctx context.Context, k *kernel.Kernel, l *limits.LimitSet, cons fdm := k.NewFDMap() defer fdm.DecRef() - // Maps sandbox fd to host fd. + // Maps sandbox FD to host FD. fdMap := map[int]int{ 0: stdioFDs[0], 1: stdioFDs[1], @@ -45,7 +45,7 @@ func createFDMap(ctx context.Context, k *kernel.Kernel, l *limits.LimitSet, cons mounter := fs.FileOwnerFromContext(ctx) for sfd, hfd := range fdMap { - file, err := host.ImportFile(ctx, hfd, mounter, console /* allow ioctls */) + file, err := host.ImportFile(ctx, hfd, mounter, console /* isTTY */) if err != nil { return nil, fmt.Errorf("failed to import fd %d: %v", hfd, err) } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 9fa9b51a0..766a2e968 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -31,6 +31,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/control" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" "gvisor.googlesource.com/gvisor/pkg/sentry/inet" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" @@ -112,7 +113,7 @@ type Loader struct { // have the corresponding pid set. // // processes is guardded by mu. - processes map[execID]*kernel.ThreadGroup + processes map[execID]*execProcess } // execID uniquely identifies a sentry process. @@ -121,6 +122,14 @@ type execID struct { pid kernel.ThreadID } +// execProcess contains the thread group and host TTY of a sentry process. +type execProcess struct { + tg *kernel.ThreadGroup + + // tty will be nil if the process is not attached to a terminal. + tty *host.TTYFileOperations +} + func init() { // Initialize the random number generator. rand.Seed(gtime.Now().UnixNano()) @@ -276,7 +285,7 @@ func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int, startSignalForwarding: startSignalForwarding, rootProcArgs: procArgs, sandboxID: id, - processes: make(map[execID]*kernel.ThreadGroup), + processes: make(map[execID]*execProcess), } ctrl.manager.l = l return l, nil @@ -330,7 +339,7 @@ func createPlatform(conf *Config, deviceFD int) (platform.Platform, error) { case PlatformKVM: log.Infof("Platform: kvm") if deviceFD < 0 { - return nil, fmt.Errorf("kvm device fd must be provided") + return nil, fmt.Errorf("kvm device FD must be provided") } return kvm.New(os.NewFile(uintptr(deviceFD), "kvm device")) default: @@ -413,8 +422,8 @@ func (l *Loader) run() error { } l.mu.Lock() - key := execID{cid: l.sandboxID} - l.processes[key] = l.k.GlobalInit() + eid := execID{cid: l.sandboxID} + l.processes[eid] = &execProcess{tg: l.k.GlobalInit()} l.mu.Unlock() // Start signal forwarding only after an init process is created. @@ -510,8 +519,8 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config l.mu.Lock() defer l.mu.Unlock() - key := execID{cid: cid} - l.processes[key] = tg + eid := execID{cid: cid} + l.processes[eid] = &execProcess{tg: tg} return nil } @@ -520,7 +529,7 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config // filesystem. func (l *Loader) destroyContainer(cid string) error { // First kill and wait for all processes in the container. - if err := l.signal(cid, int32(linux.SIGKILL), true /*all*/); err != nil { + if err := l.signalContainer(cid, int32(linux.SIGKILL), true /*all*/); err != nil { return fmt.Errorf("failed to SIGKILL all container processes: %v", err) } @@ -549,12 +558,12 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { // process with the same Root. l.mu.Lock() rootKey := execID{cid: args.ContainerID} - tg, ok := l.processes[rootKey] + ep, ok := l.processes[rootKey] l.mu.Unlock() if !ok { return 0, fmt.Errorf("cannot exec in container %q: no such container", args.ContainerID) } - tg.Leader().WithMuLocked(func(t *kernel.Task) { + ep.tg.Leader().WithMuLocked(func(t *kernel.Task) { args.Root = t.FSContext().RootDirectory() }) if args.Root != nil { @@ -563,7 +572,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { // Start the process. proc := control.Proc{Kernel: l.k} - tg, tgid, err := control.ExecAsync(&proc, args) + tg, tgid, ttyFile, err := control.ExecAsync(&proc, args) if err != nil { return 0, fmt.Errorf("error executing: %+v: %v", args, err) } @@ -573,7 +582,10 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { l.mu.Lock() defer l.mu.Unlock() eid := execID{cid: args.ContainerID, pid: tgid} - l.processes[eid] = tg + l.processes[eid] = &execProcess{ + tg: tg, + tty: ttyFile, + } log.Debugf("updated processes: %v", l.processes) return tgid, nil @@ -584,8 +596,8 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { // Don't defer unlock, as doing so would make it impossible for // multiple clients to wait on the same container. l.mu.Lock() - key := execID{cid: cid} - tg, ok := l.processes[key] + eid := execID{cid: cid} + ep, ok := l.processes[eid] l.mu.Unlock() if !ok { return fmt.Errorf("can't find process for container %q in %v", cid, l.processes) @@ -593,7 +605,7 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { // If the thread either has already exited or exits during waiting, // consider the container exited. - ws := l.wait(tg) + ws := l.wait(ep.tg) *waitStatus = ws return nil } @@ -610,10 +622,10 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, wai // entry in l.processes. l.mu.Lock() eid := execID{cid: cid, pid: tgid} - tg, ok := l.processes[eid] + ep, ok := l.processes[eid] l.mu.Unlock() if ok { - ws := l.wait(tg) + ws := l.wait(ep.tg) *waitStatus = ws if clearStatus { // Remove tg from the cache. @@ -626,8 +638,8 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, wai } // This process wasn't created by runsc exec or start, so just find it - // by pid and hope it hasn't exited yet. - tg = l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid)) + // by PID and hope it hasn't exited yet. + tg := l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid)) if tg == nil { return fmt.Errorf("no thread group with ID %d", tgid) } @@ -682,18 +694,66 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) { } } -func (l *Loader) signal(cid string, signo int32, all bool) error { +// signalProcess sends a signal to the process with the given PID. If +// sendToFGProcess is true, then the signal will be sent to the foreground +// process group in the same session that PID belongs to. +func (l *Loader) signalProcess(cid string, pid, signo int32, sendToFGProcess bool) error { + si := arch.SignalInfo{Signo: signo} + + if pid <= 0 { + return fmt.Errorf("failed to signal container %q PID %d: PID must be positive", cid, pid) + } + + eid := execID{ + cid: cid, + pid: kernel.ThreadID(pid), + } l.mu.Lock() - key := execID{cid: cid} - tg, ok := l.processes[key] + ep, ok := l.processes[eid] l.mu.Unlock() + if !ok { - return fmt.Errorf("failed to signal container %q: no such container", cid) + return fmt.Errorf("failed to signal container %q PID %d: no such PID", cid, pid) + } + + if !sendToFGProcess { + // Send signal directly to exec process. + return ep.tg.SendSignal(&si) } + // Lookup foreground process group from the TTY for the given process, + // and send the signal to it. + if ep.tty == nil { + return fmt.Errorf("failed to signal foreground process group in container %q PID %d: no TTY attached", cid, pid) + } + pg := ep.tty.ForegroundProcessGroup() + if pg == nil { + // No foreground process group has been set. Signal the + // original thread group. + log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, pid, pid) + return ep.tg.SendSignal(&si) + } + + // Send the signal. + return pg.Originator().SendSignal(&si) +} + +// signalContainer sends a signal to the root container process, or to all +// processes in the container if all is true. +func (l *Loader) signalContainer(cid string, signo int32, all bool) error { si := arch.SignalInfo{Signo: signo} + + l.mu.Lock() + defer l.mu.Unlock() + + eid := execID{cid: cid} + ep, ok := l.processes[eid] + if !ok { + return fmt.Errorf("failed to signal container %q: no such container", cid) + } + if !all { - return tg.Leader().SendSignal(&si) + return ep.tg.SendSignal(&si) } // Pause the kernel to prevent new processes from being created while diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index 28229dbcf..336edf3f6 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -158,6 +158,13 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("error getting processes for container: %v", err) } + if e.StdioIsPty { + // Forward signals sent to this process to the foreground + // process in the sandbox. + stopForwarding := c.ForwardSignals(pid, true /* fgProcess */) + defer stopForwarding() + } + // Write the sandbox-internal pid if required. if ex.internalPidFile != "" { pidStr := []byte(strconv.Itoa(int(pid))) @@ -216,9 +223,9 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat cmd.Stderr = os.Stderr // If the console control socket file is provided, then create a new - // pty master/slave pair and set the tty on the sandbox process. + // pty master/slave pair and set the TTY on the sandbox process. if ex.consoleSocket != "" { - // Create a new tty pair and send the master on the provided + // Create a new TTY pair and send the master on the provided // socket. tty, err := console.NewWithSocket(ex.consoleSocket) if err != nil { @@ -226,7 +233,7 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat } defer tty.Close() - // Set stdio to the new tty slave. + // Set stdio to the new TTY slave. cmd.Stdin = tty cmd.Stdout = tty cmd.Stderr = tty diff --git a/runsc/container/BUILD b/runsc/container/BUILD index e68fb1e8e..bf8b9a2ab 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -50,10 +50,12 @@ go_test( "//pkg/sentry/control", "//pkg/sentry/kernel/auth", "//pkg/unet", + "//pkg/urpc", "//runsc/boot", "//runsc/specutils", "//runsc/test/testutil", "@com_github_cenkalti_backoff//:go_default_library", + "@com_github_kr_pty//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/runsc/container/container.go b/runsc/container/container.go index be833c03d..4b0037b4e 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -22,6 +22,7 @@ import ( "io/ioutil" "os" "os/exec" + "os/signal" "path/filepath" "regexp" "strconv" @@ -107,14 +108,13 @@ type Container struct { Owner string `json:"owner"` // ConsoleSocket is the path to a unix domain socket that will receive - // the console FD. It is only used during create, so we don't need to - // store it in the metadata. - ConsoleSocket string `json:"-"` + // the console FD. + ConsoleSocket string `json:"consoleSocket"` // Status is the current container Status. Status Status `json:"status"` - // GoferPid is the pid of the gofer running along side the sandbox. May + // GoferPid is the PID of the gofer running along side the sandbox. May // be 0 if the gofer has been killed. GoferPid int `json:"goferPid"` @@ -313,12 +313,12 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo return nil, err } - // Write the pid file. Containerd considers the create complete after + // Write the PID file. Containerd considers the create complete after // this file is created, so it must be the last thing we do. if pidFile != "" { if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(c.Pid())), 0644); err != nil { c.Destroy() - return nil, fmt.Errorf("error writing pid file: %v", err) + return nil, fmt.Errorf("error writing PID file: %v", err) } } @@ -406,7 +406,7 @@ func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke return c.Wait() } -// Execute runs the specified command in the container. It returns the pid of +// Execute runs the specified command in the container. It returns the PID of // the newly created process. func (c *Container) Execute(args *control.ExecArgs) (int32, error) { log.Debugf("Execute in container %q, args: %+v", c.ID, args) @@ -429,7 +429,7 @@ func (c *Container) Event() (*boot.Event, error) { // Pid returns the Pid of the sandbox the container is running in, or -1 if the // container is not running. func (c *Container) Pid() int { - if err := c.requireStatus("pid", Created, Running, Paused); err != nil { + if err := c.requireStatus("get PID", Created, Running, Paused); err != nil { return -1 } return c.Sandbox.Pid @@ -449,7 +449,7 @@ func (c *Container) Wait() (syscall.WaitStatus, error) { // WaitRootPID waits for process 'pid' in the sandbox's PID namespace and // returns its WaitStatus. func (c *Container) WaitRootPID(pid int32, clearStatus bool) (syscall.WaitStatus, error) { - log.Debugf("Wait on pid %d in sandbox %q", pid, c.Sandbox.ID) + log.Debugf("Wait on PID %d in sandbox %q", pid, c.Sandbox.ID) if !c.isSandboxRunning() { return 0, fmt.Errorf("container is not running") } @@ -459,7 +459,7 @@ func (c *Container) WaitRootPID(pid int32, clearStatus bool) (syscall.WaitStatus // WaitPID waits for process 'pid' in the container's PID namespace and returns // its WaitStatus. func (c *Container) WaitPID(pid int32, clearStatus bool) (syscall.WaitStatus, error) { - log.Debugf("Wait on pid %d in container %q", pid, c.ID) + log.Debugf("Wait on PID %d in container %q", pid, c.ID) if !c.isSandboxRunning() { return 0, fmt.Errorf("container is not running") } @@ -483,7 +483,30 @@ func (c *Container) Signal(sig syscall.Signal, all bool) error { if !c.isSandboxRunning() { return fmt.Errorf("container is not running") } - return c.Sandbox.Signal(c.ID, sig, all) + return c.Sandbox.SignalContainer(c.ID, sig, all) +} + +// ForwardSignals forwards all signals received by the current process to the +// container process inside the sandbox. It returns a function that will stop +// forwarding signals. +func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() { + log.Debugf("Forwarding all signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess) + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh) + go func() { + for s := range sigCh { + log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", s, c.ID, pid, fgProcess) + if err := c.Sandbox.SignalProcess(c.ID, pid, s.(syscall.Signal), fgProcess); err != nil { + log.Warningf("error forwarding signal %d to container %q: %v", s, c.ID, err) + } + } + log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess) + }() + + return func() { + signal.Stop(sigCh) + close(sigCh) + } } // Checkpoint sends the checkpoint call to the container. @@ -683,9 +706,9 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund if err != nil { return nil, err } - sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox io fd")) + sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox IO FD")) - goferEnd := os.NewFile(uintptr(fds[1]), "gofer io fd") + goferEnd := os.NewFile(uintptr(fds[1]), "gofer IO FD") defer goferEnd.Close() goferEnds = append(goferEnds, goferEnd) @@ -710,7 +733,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund if err := specutils.StartInNS(cmd, nss); err != nil { return nil, err } - log.Infof("Gofer started, pid: %d", cmd.Process.Pid) + log.Infof("Gofer started, PID: %d", cmd.Process.Pid) c.GoferPid = cmd.Process.Pid return sandEnds, nil } diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index aebfb2878..84b59ffd8 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -31,6 +31,7 @@ import ( "time" "github.com/cenkalti/backoff" + "github.com/kr/pty" specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gvisor.googlesource.com/gvisor/pkg/abi/linux" @@ -38,6 +39,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/control" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.googlesource.com/gvisor/pkg/urpc" "gvisor.googlesource.com/gvisor/runsc/boot" "gvisor.googlesource.com/gvisor/runsc/test/testutil" ) @@ -1577,6 +1579,121 @@ func TestRootNotMount(t *testing.T) { } } +func TestJobControlSignalExec(t *testing.T) { + spec := testutil.NewSpecWithArgs("/bin/sleep", "10000") + conf := testutil.TestConfig() + + rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(rootDir) + defer os.RemoveAll(bundleDir) + + // Create and start the container. + c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "") + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + + // Create a pty master/slave. The slave will be passed to the exec + // process. + ptyMaster, ptySlave, err := pty.Open() + if err != nil { + t.Fatalf("error opening pty: %v", err) + } + defer ptyMaster.Close() + defer ptySlave.Close() + + // Exec bash and attach a terminal. + args := &control.ExecArgs{ + Filename: "/bin/bash", + // Don't let bash execute from profile or rc files, otherwise + // our PID counts get messed up. + Argv: []string{"/bin/bash", "--noprofile", "--norc"}, + // Pass the pty slave as FD 0, 1, and 2. + FilePayload: urpc.FilePayload{ + Files: []*os.File{ptySlave, ptySlave, ptySlave}, + }, + StdioIsPty: true, + } + + pid, err := c.Execute(args) + if err != nil { + t.Fatalf("error executing: %v", err) + } + if pid != 2 { + t.Fatalf("exec got pid %d, wanted %d", pid, 2) + } + + // Make sure all the processes are running. + expectedPL := []*control.Process{ + // Root container process. + {PID: 1, Cmd: "sleep"}, + // Bash from exec process. + {PID: 2, Cmd: "bash"}, + } + if err := waitForProcessList(c, expectedPL); err != nil { + t.Error(err) + } + + // Execute sleep. + ptyMaster.Write([]byte("sleep 100\n")) + + // Wait for it to start. Sleep's PPID is bash's PID. + expectedPL = append(expectedPL, &control.Process{PID: 3, PPID: 2, Cmd: "sleep"}) + if err := waitForProcessList(c, expectedPL); err != nil { + t.Error(err) + } + + // Send a SIGTERM to the foreground process for the exec PID. Note that + // although we pass in the PID of "bash", it should actually terminate + // "sleep", since that is the foreground process. + if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.SIGTERM, true /* fgProcess */); err != nil { + t.Fatalf("error signaling container: %v", err) + } + + // Sleep process should be gone. + expectedPL = expectedPL[:len(expectedPL)-1] + if err := waitForProcessList(c, expectedPL); err != nil { + t.Error(err) + } + + // Sleep is dead, but it may take more time for bash to notice and + // change the foreground process back to itself. We know it is done + // when bash writes "Terminated" to the pty. + if err := testutil.WaitUntilRead(ptyMaster, "Terminated", nil, 5*time.Second); err != nil { + t.Fatalf("bash did not take over pty: %v", err) + } + + // Send a SIGKILL to the foreground process again. This time "bash" + // should be killed. We use SIGKILL instead of SIGTERM or SIGINT + // because bash ignores those. + if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.SIGKILL, true /* fgProcess */); err != nil { + t.Fatalf("error signaling container: %v", err) + } + expectedPL = expectedPL[:1] + if err := waitForProcessList(c, expectedPL); err != nil { + t.Error(err) + } + + // Make sure the process indicates it was killed by a SIGKILL. + ws, err := c.WaitPID(pid, true) + if err != nil { + t.Errorf("waiting on container failed: %v", err) + } + if !ws.Signaled() { + t.Error("ws.Signaled() got false, want true") + } + if got, want := ws.Signal(), syscall.SIGKILL; got != want { + t.Errorf("ws.Signal() got %v, want %v", got, want) + } +} + // executeSync synchronously executes a new process. func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) { pid, err := cont.Execute(args) diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index e5f7daf60..ab200b75c 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -477,11 +477,12 @@ func TestMultiContainerDestroy(t *testing.T) { } func TestMultiContainerProcesses(t *testing.T) { - // Note: use 'while true' to keep 'sh' process around. Otherwise, shell will - // just execve into 'sleep' and both containers will look the same. + // Note: use curly braces to keep 'sh' process around. Otherwise, shell + // will just execve into 'sleep' and both containers will look the + // same. specs, ids := createSpecs( []string{"sleep", "100"}, - []string{"sh", "-c", "while true; do sleep 100; done"}) + []string{"sh", "-c", "{ sleep 100; }"}) conf := testutil.TestConfig() containers, cleanup, err := startContainers(conf, specs, ids) if err != nil { diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 4111b1a60..e4853af69 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -80,7 +80,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // StartRoot starts running the root container process inside the sandbox. func (s *Sandbox) StartRoot(spec *specs.Spec, conf *boot.Config) error { - log.Debugf("Start root sandbox %q, pid: %d", s.ID, s.Pid) + log.Debugf("Start root sandbox %q, PID: %d", s.ID, s.Pid) conn, err := s.sandboxConnect() if err != nil { return err @@ -107,7 +107,7 @@ func (s *Sandbox) Start(spec *specs.Spec, conf *boot.Config, cid string, goferFi defer f.Close() } - log.Debugf("Start non-root container sandbox %q, pid: %d", s.ID, s.Pid) + log.Debugf("Start non-root container sandbox %q, PID: %d", s.ID, s.Pid) sandboxConn, err := s.sandboxConnect() if err != nil { return fmt.Errorf("couldn't connect to sandbox: %v", err) @@ -147,7 +147,7 @@ func (s *Sandbox) Restore(cid string, spec *specs.Spec, conf *boot.Config, f str SandboxID: s.ID, } - // If the platform needs a device fd we must pass it in. + // If the platform needs a device FD we must pass it in. if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil { return err } else if deviceFile != nil { @@ -192,7 +192,7 @@ func (s *Sandbox) Processes(cid string) ([]*control.Process, error) { return pl, nil } -// Execute runs the specified command in the container. It returns the pid of +// Execute runs the specified command in the container. It returns the PID of // the newly created process. func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) { log.Debugf("Executing new process in container %q in sandbox %q", args.ContainerID, s.ID) @@ -239,7 +239,7 @@ func (s *Sandbox) sandboxConnect() (*urpc.Client, error) { } func (s *Sandbox) connError(err error) error { - return fmt.Errorf("error connecting to control server at pid %d: %v", s.Pid, err) + return fmt.Errorf("error connecting to control server at PID %d: %v", s.Pid, err) } // createSandboxProcess starts the sandbox as a subprocess by running the "boot" @@ -322,7 +322,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund nextFD++ } - // If the platform needs a device fd we must pass it in. + // If the platform needs a device FD we must pass it in. if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil { return err } else if deviceFile != nil { @@ -338,7 +338,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund cmd.Stderr = os.Stderr // If the console control socket file is provided, then create a new - // pty master/slave pair and set the tty on the sandbox process. + // pty master/slave pair and set the TTY on the sandbox process. if consoleEnabled { // console.NewWithSocket will send the master on the socket, // and return the slave. @@ -461,7 +461,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // Add container as the last argument. cmd.Args = append(cmd.Args, s.ID) - // Log the fds we are donating to the sandbox process. + // Log the FDs we are donating to the sandbox process. for i, f := range cmd.ExtraFiles { log.Debugf("Donating FD %d: %q", i+3, f.Name()) } @@ -472,7 +472,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund return err } s.Pid = cmd.Process.Pid - log.Infof("Sandbox started, pid: %d", s.Pid) + log.Infof("Sandbox started, PID: %d", s.Pid) return nil } @@ -572,9 +572,10 @@ func (s *Sandbox) destroy() error { return nil } -// Signal sends the signal to a container in the sandbox. If all is true and -// signal is SIGKILL, then waits for all processes to exit before returning. -func (s *Sandbox) Signal(cid string, sig syscall.Signal, all bool) error { +// SignalContainer sends the signal to a container in the sandbox. If all is +// true and signal is SIGKILL, then waits for all processes to exit before +// returning. +func (s *Sandbox) SignalContainer(cid string, sig syscall.Signal, all bool) error { log.Debugf("Signal sandbox %q", s.ID) conn, err := s.sandboxConnect() if err != nil { @@ -593,6 +594,30 @@ func (s *Sandbox) Signal(cid string, sig syscall.Signal, all bool) error { return nil } +// SignalProcess sends the signal to a particular process in the container. If +// fgProcess is true, then the signal is sent to the foreground process group +// in the same session that PID belongs to. This is only valid if the process +// is attached to a host TTY. +func (s *Sandbox) SignalProcess(cid string, pid int32, sig syscall.Signal, fgProcess bool) error { + log.Debugf("Signal sandbox %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + args := boot.SignalProcessArgs{ + CID: cid, + Signo: int32(sig), + PID: pid, + SendToForegroundProcess: fgProcess, + } + if err := conn.Call(boot.ContainerSignalProcess, &args, nil); err != nil { + return fmt.Errorf("err signaling container %q PID %d: %v", cid, pid, err) + } + return nil +} + // Checkpoint sends the checkpoint call for a container in the sandbox. // The statefile will be written to f. func (s *Sandbox) Checkpoint(cid string, f *os.File) error { diff --git a/runsc/test/integration/exec_test.go b/runsc/test/integration/exec_test.go index 910c36597..ddd088223 100644 --- a/runsc/test/integration/exec_test.go +++ b/runsc/test/integration/exec_test.go @@ -27,6 +27,7 @@ package integration import ( + "syscall" "testing" "time" @@ -60,3 +61,57 @@ func TestExecCapabilities(t *testing.T) { t.Errorf("wrong capabilities, got: %q, want: %q", got, want) } } + +func TestExecJobControl(t *testing.T) { + if err := testutil.Pull("alpine"); err != nil { + t.Fatalf("docker pull failed: %v", err) + } + d := testutil.MakeDocker("exec-test") + + // Start the container. + if _, err := d.Run("alpine", "sleep", "1000"); err != nil { + t.Fatalf("docker run failed: %v", err) + } + defer d.CleanUp() + + // Exec 'sh' with an attached pty. + cmd, ptmx, err := d.ExecWithTerminal("sh") + if err != nil { + t.Fatalf("docker exec failed: %v", err) + } + defer ptmx.Close() + + // Call "sleep 100" in the shell. + if _, err := ptmx.Write([]byte("sleep 100\n")); err != nil { + t.Fatalf("error writing to pty: %v", err) + } + + // Give shell a few seconds to start executing the sleep. + time.Sleep(2 * time.Second) + + // Send a ^C to the pty, which should kill sleep, but not the shell. + // \x03 is ASCII "end of text", which is the same as ^C. + if _, err := ptmx.Write([]byte{'\x03'}); err != nil { + t.Fatalf("error writing to pty: %v", err) + } + + // The shell should still be alive at this point. Sleep should have + // exited with code 2+128=130. We'll exit with 10 plus that number, so + // that we can be sure that the shell did not get signalled. + if _, err := ptmx.Write([]byte("exit $(expr $? + 10)\n")); err != nil { + t.Fatalf("error writing to pty: %v", err) + } + + // Exec process should exit with code 10+130=140. + ps, err := cmd.Process.Wait() + if err != nil { + t.Fatalf("error waiting for exec process: %v", err) + } + ws := ps.Sys().(syscall.WaitStatus) + if !ws.Exited() { + t.Errorf("ws.Exited got false, want true") + } + if got, want := ws.ExitStatus(), 140; got != want { + t.Errorf("ws.ExitedStatus got %d, want %d", got, want) + } +} diff --git a/runsc/test/testutil/BUILD b/runsc/test/testutil/BUILD index ca91e07ff..da2535bfa 100644 --- a/runsc/test/testutil/BUILD +++ b/runsc/test/testutil/BUILD @@ -17,6 +17,7 @@ go_library( "//runsc/boot", "//runsc/specutils", "@com_github_cenkalti_backoff//:go_default_library", + "@com_github_kr_pty//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", "@com_github_syndtr_gocapability//capability:go_default_library", ], diff --git a/runsc/test/testutil/docker.go b/runsc/test/testutil/docker.go index 7f5909987..55ca353b8 100644 --- a/runsc/test/testutil/docker.go +++ b/runsc/test/testutil/docker.go @@ -26,6 +26,8 @@ import ( "strconv" "strings" "time" + + "github.com/kr/pty" ) func init() { @@ -131,6 +133,17 @@ func do(args ...string) (string, error) { return string(out), nil } +// doWithPty executes docker command with stdio attached to a pty. +func doWithPty(args ...string) (*exec.Cmd, *os.File, error) { + fmt.Printf("Running with pty: docker %s\n", args) + cmd := exec.Command("docker", args...) + ptmx, err := pty.Start(cmd) + if err != nil { + return nil, nil, fmt.Errorf("error executing docker %s with a pty: %v", args, err) + } + return cmd, ptmx, nil +} + // Pull pulls a docker image. This is used in tests to isolate the // time to pull the image off the network from the time to actually // start the container, to avoid timeouts over slow networks. @@ -197,6 +210,14 @@ func (d *Docker) Exec(args ...string) (string, error) { return do(a...) } +// ExecWithTerminal calls 'docker exec -it' with the arguments provided and +// attaches a pty to stdio. +func (d *Docker) ExecWithTerminal(args ...string) (*exec.Cmd, *os.File, error) { + a := []string{"exec", "-it", d.Name} + a = append(a, args...) + return doWithPty(a...) +} + // Pause calls 'docker pause'. func (d *Docker) Pause() error { if _, err := do("pause", d.Name); err != nil { diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go index 07d66e469..cdc7f78c3 100644 --- a/runsc/test/testutil/testutil.go +++ b/runsc/test/testutil/testutil.go @@ -16,6 +16,7 @@ package testutil import ( + "bufio" "context" "encoding/json" "fmt" @@ -27,6 +28,8 @@ import ( "os/signal" "path/filepath" "runtime" + "strings" + "sync/atomic" "syscall" "time" @@ -315,3 +318,36 @@ func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) { } } } + +// WaitUntilRead reads from the given reader until the wanted string is found +// or until timeout. +func WaitUntilRead(r io.Reader, want string, split bufio.SplitFunc, timeout time.Duration) error { + sc := bufio.NewScanner(r) + if split != nil { + sc.Split(split) + } + // done must be accessed atomically. A value greater than 0 indicates + // that the read loop can exit. + var done uint32 + doneCh := make(chan struct{}) + go func() { + for sc.Scan() { + t := sc.Text() + if strings.Contains(t, want) { + atomic.StoreUint32(&done, 1) + close(doneCh) + break + } + if atomic.LoadUint32(&done) > 0 { + break + } + } + }() + select { + case <-time.After(timeout): + atomic.StoreUint32(&done, 1) + return fmt.Errorf("timeout waiting to read %q", want) + case <-doneCh: + return nil + } +} |