diff options
author | Fabricio Voznika <fvoznika@google.com> | 2021-07-12 16:52:53 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-07-12 16:55:40 -0700 |
commit | f51e0486d4f3bd25371c9449de27a3d966b813e3 (patch) | |
tree | f80560cef9ddf213035fec07f710035fc6933fb0 | |
parent | 7132b9a07b55b1c2944f19bb938878d147785a72 (diff) |
Fix stdios ownership
Set stdio ownership based on the container's user to ensure the
user can open/read/write to/from stdios.
1. stdios in the host are changed to have the owner be the same
uid/gid of the process running the sandbox. This ensures that the
sandbox has full control over it.
2. stdios owner owner inside the sandbox is changed to match the
container's user to give access inside the container and make it
behave the same as runc.
Fixes #6180
PiperOrigin-RevId: 384347009
-rw-r--r-- | pkg/sentry/control/proc.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fdimport/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fdimport/fdimport.go | 22 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/host.go | 163 | ||||
-rw-r--r-- | runsc/boot/loader.go | 16 | ||||
-rw-r--r-- | runsc/cmd/exec.go | 6 | ||||
-rw-r--r-- | runsc/container/console_test.go | 2 | ||||
-rw-r--r-- | runsc/container/container.go | 9 | ||||
-rw-r--r-- | runsc/container/container_test.go | 42 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 42 | ||||
-rw-r--r-- | runsc/container/shared_volume_test.go | 28 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 135 | ||||
-rw-r--r-- | test/e2e/integration_test.go | 113 |
13 files changed, 414 insertions, 167 deletions
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index b7ee5425d..6352ea71a 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -223,7 +223,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI _ = fd.Close() } }() - ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, fds) + ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, args.KUID, args.KGID, fds) if err != nil { return nil, 0, nil, nil, err } diff --git a/pkg/sentry/fdimport/BUILD b/pkg/sentry/fdimport/BUILD index 6b4f8b0ed..563e96e0d 100644 --- a/pkg/sentry/fdimport/BUILD +++ b/pkg/sentry/fdimport/BUILD @@ -15,6 +15,7 @@ go_library( "//pkg/sentry/fs/host", "//pkg/sentry/fsimpl/host", "//pkg/sentry/kernel", + "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", ], ) diff --git a/pkg/sentry/fdimport/fdimport.go b/pkg/sentry/fdimport/fdimport.go index badd5b073..f2b9630eb 100644 --- a/pkg/sentry/fdimport/fdimport.go +++ b/pkg/sentry/fdimport/fdimport.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/host" hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -31,9 +32,9 @@ import ( // sets up TTY for the first 3 FDs in the slice representing stdin, stdout, // stderr. Used FDs are either closed or released. It's safe for the caller to // close any remaining files upon return. -func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []*fd.FD) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { +func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, uid auth.KUID, gid auth.KGID, fds []*fd.FD) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { if kernel.VFS2Enabled { - ttyFile, err := importVFS2(ctx, fdTable, console, fds) + ttyFile, err := importVFS2(ctx, fdTable, console, uid, gid, fds) return nil, ttyFile, err } ttyFile, err := importFS(ctx, fdTable, console, fds) @@ -89,7 +90,7 @@ func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds [] return ttyFile.FileOperations.(*host.TTYFileOperations), nil } -func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdioFDs []*fd.FD) (*hostvfs2.TTYFileDescription, error) { +func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, uid auth.KUID, gid auth.KGID, stdioFDs []*fd.FD) (*hostvfs2.TTYFileDescription, error) { k := kernel.KernelFromContext(ctx) if k == nil { return nil, fmt.Errorf("cannot find kernel from context") @@ -103,7 +104,13 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi // Import the file as a host TTY file. if ttyFile == nil { var err error - appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD.FD(), true /* isTTY */) + appFile, err = hostvfs2.NewFD(ctx, k.HostMount(), hostFD.FD(), &hostvfs2.NewFDOptions{ + Savable: true, + IsTTY: true, + VirtualOwner: true, + UID: uid, + GID: gid, + }) if err != nil { return nil, err } @@ -121,7 +128,12 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi } } else { var err error - appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD.FD(), false /* isTTY */) + appFile, err = hostvfs2.NewFD(ctx, k.HostMount(), hostFD.FD(), &hostvfs2.NewFDOptions{ + Savable: true, + VirtualOwner: true, + UID: uid, + GID: gid, + }) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 9998bd0c3..89aa7b3d9 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -42,6 +42,36 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// These are the modes that are stored with virtualOwner. +const virtualOwnerModes = linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID + +// +stateify savable +type virtualOwner struct { + // This field is initialized at creation time and is immutable. + enabled bool + + // mu protects the fields below and they can be accessed using atomic memory + // operations. + mu sync.Mutex `state:"nosave"` + uid uint32 + gid uint32 + // mode is also stored, otherwise setting the host file to `0000` could remove + // access to the file. + mode uint32 +} + +func (v *virtualOwner) atomicUID() uint32 { + return atomic.LoadUint32(&v.uid) +} + +func (v *virtualOwner) atomicGID() uint32 { + return atomic.LoadUint32(&v.gid) +} + +func (v *virtualOwner) atomicMode() uint32 { + return atomic.LoadUint32(&v.mode) +} + // inode implements kernfs.Inode. // // +stateify savable @@ -98,6 +128,11 @@ type inode struct { // Event queue for blocking operations. queue waiter.Queue + // virtualOwner caches ownership and permission information to override the + // underlying file owner and permission. This is used to allow the unstrusted + // application to change these fields without affecting the host. + virtualOwner virtualOwner + // If haveBuf is non-zero, hostFD represents a pipe, and buf contains data // read from the pipe from previous calls to inode.beforeSave(). haveBuf // and buf are protected by bufMu. haveBuf is accessed using atomic memory @@ -147,7 +182,7 @@ func newInode(ctx context.Context, fs *filesystem, hostFD int, savable bool, fil type NewFDOptions struct { // If Savable is true, the host file descriptor may be saved/restored by // numeric value; the sandbox API requires a corresponding host FD with the - // same numeric value to be provieded at time of restore. + // same numeric value to be provided at time of restore. Savable bool // If IsTTY is true, the file descriptor is a TTY. @@ -157,6 +192,12 @@ type NewFDOptions struct { // the new file description will inherit flags from hostFD. HaveFlags bool Flags uint32 + + // VirtualOwner allow the host file to have owner and permissions different + // than the underlying host file. + VirtualOwner bool + UID auth.KUID + GID auth.KGID } // NewFD returns a vfs.FileDescription representing the given host file @@ -168,8 +209,8 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) } // Retrieve metadata. - var s unix.Stat_t - if err := unix.Fstat(hostFD, &s); err != nil { + var stat unix.Stat_t + if err := unix.Fstat(hostFD, &stat); err != nil { return nil, err } @@ -183,11 +224,19 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) flags = uint32(flagsInt) } - d := &kernfs.Dentry{} - i, err := newInode(ctx, fs, hostFD, opts.Savable, linux.FileMode(s.Mode).FileType(), opts.IsTTY) + fileType := linux.FileMode(stat.Mode).FileType() + i, err := newInode(ctx, fs, hostFD, opts.Savable, fileType, opts.IsTTY) if err != nil { return nil, err } + if opts.VirtualOwner { + i.virtualOwner.enabled = true + i.virtualOwner.uid = uint32(opts.UID) + i.virtualOwner.gid = uint32(opts.GID) + i.virtualOwner.mode = stat.Mode + } + + d := &kernfs.Dentry{} d.Init(&fs.Filesystem, i) // i.open will take a reference on d. @@ -196,15 +245,7 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) // For simplicity, fileDescription.offset is set to 0. Technically, we // should only set to 0 on files that are not seekable (sockets, pipes, // etc.), and use the offset from the host fd otherwise when importing. - return i.open(ctx, d, mnt, flags) -} - -// ImportFD sets up and returns a vfs.FileDescription from a donated fd. -func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) { - return NewFD(ctx, mnt, hostFD, &NewFDOptions{ - Savable: true, - IsTTY: isTTY, - }) + return i.open(ctx, d, mnt, fileType, flags) } // filesystemType implements vfs.FilesystemType. @@ -270,7 +311,7 @@ func (fs *filesystem) MountOptions() string { // CheckPermissions implements kernfs.Inode.CheckPermissions. func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { var s unix.Stat_t - if err := unix.Fstat(i.hostFD, &s); err != nil { + if err := i.stat(&s); err != nil { return err } return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(s.Mode), auth.KUID(s.Uid), auth.KGID(s.Gid)) @@ -279,7 +320,7 @@ func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, a // Mode implements kernfs.Inode.Mode. func (i *inode) Mode() linux.FileMode { var s unix.Stat_t - if err := unix.Fstat(i.hostFD, &s); err != nil { + if err := i.stat(&s); err != nil { // Retrieving the mode from the host fd using fstat(2) should not fail. // If the syscall does not succeed, something is fundamentally wrong. panic(fmt.Sprintf("failed to retrieve mode from host fd %d: %v", i.hostFD, err)) @@ -306,7 +347,7 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp // Fallback to fstat(2), if statx(2) is not supported on the host. // // TODO(b/151263641): Remove fallback. - return i.fstat(fs) + return i.statxFromStat(fs) } if err != nil { return linux.Statx{}, err @@ -330,19 +371,35 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp // device numbers. ls.Mask |= s.Mask & linux.STATX_ALL if s.Mask&linux.STATX_TYPE != 0 { - ls.Mode |= s.Mode & linux.S_IFMT + if i.virtualOwner.enabled { + ls.Mode |= uint16(i.virtualOwner.atomicMode()) & linux.S_IFMT + } else { + ls.Mode |= s.Mode & linux.S_IFMT + } } if s.Mask&linux.STATX_MODE != 0 { - ls.Mode |= s.Mode &^ linux.S_IFMT + if i.virtualOwner.enabled { + ls.Mode |= uint16(i.virtualOwner.atomicMode()) &^ linux.S_IFMT + } else { + ls.Mode |= s.Mode &^ linux.S_IFMT + } } if s.Mask&linux.STATX_NLINK != 0 { ls.Nlink = s.Nlink } if s.Mask&linux.STATX_UID != 0 { - ls.UID = s.Uid + if i.virtualOwner.enabled { + ls.UID = i.virtualOwner.atomicUID() + } else { + ls.UID = s.Uid + } } if s.Mask&linux.STATX_GID != 0 { - ls.GID = s.Gid + if i.virtualOwner.enabled { + ls.GID = i.virtualOwner.atomicGID() + } else { + ls.GID = s.Gid + } } if s.Mask&linux.STATX_ATIME != 0 { ls.Atime = unixToLinuxStatxTimestamp(s.Atime) @@ -366,7 +423,7 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp return ls, nil } -// fstat is a best-effort fallback for inode.Stat() if the host does not +// statxFromStat is a best-effort fallback for inode.Stat() if the host does not // support statx(2). // // We ignore the mask and sync flags in opts and simply supply @@ -374,9 +431,9 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp // of a mask or sync flags. fstat(2) does not provide any metadata // equivalent to Statx.Attributes, Statx.AttributesMask, or Statx.Btime, so // those fields remain empty. -func (i *inode) fstat(fs *filesystem) (linux.Statx, error) { +func (i *inode) statxFromStat(fs *filesystem) (linux.Statx, error) { var s unix.Stat_t - if err := unix.Fstat(i.hostFD, &s); err != nil { + if err := i.stat(&s); err != nil { return linux.Statx{}, err } @@ -400,7 +457,21 @@ func (i *inode) fstat(fs *filesystem) (linux.Statx, error) { }, nil } +func (i *inode) stat(stat *unix.Stat_t) error { + if err := unix.Fstat(i.hostFD, stat); err != nil { + return err + } + if i.virtualOwner.enabled { + stat.Uid = i.virtualOwner.atomicUID() + stat.Gid = i.virtualOwner.atomicGID() + stat.Mode = i.virtualOwner.atomicMode() + } + return nil +} + // SetStat implements kernfs.Inode.SetStat. +// +// +checklocksignore func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { s := &opts.Stat @@ -408,11 +479,22 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre if m == 0 { return nil } - if m&^(linux.STATX_MODE|linux.STATX_SIZE|linux.STATX_ATIME|linux.STATX_MTIME) != 0 { + supportedModes := uint32(linux.STATX_MODE | linux.STATX_SIZE | linux.STATX_ATIME | linux.STATX_MTIME) + if i.virtualOwner.enabled { + if m&virtualOwnerModes != 0 { + // Take lock if any of the virtual owner fields will be updated. + i.virtualOwner.mu.Lock() + defer i.virtualOwner.mu.Unlock() + } + + supportedModes |= virtualOwnerModes + } + if m&^supportedModes != 0 { return linuxerr.EPERM } + var hostStat unix.Stat_t - if err := unix.Fstat(i.hostFD, &hostStat); err != nil { + if err := i.stat(&hostStat); err != nil { return err } if err := vfs.CheckSetStat(ctx, creds, &opts, linux.FileMode(hostStat.Mode), auth.KUID(hostStat.Uid), auth.KGID(hostStat.Gid)); err != nil { @@ -420,8 +502,12 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre } if m&linux.STATX_MODE != 0 { - if err := unix.Fchmod(i.hostFD, uint32(s.Mode)); err != nil { - return err + if i.virtualOwner.enabled { + i.virtualOwner.mode = uint32(opts.Stat.Mode) + } else { + if err := unix.Fchmod(i.hostFD, uint32(s.Mode)); err != nil { + return err + } } } if m&linux.STATX_SIZE != 0 { @@ -449,6 +535,14 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre return err } } + if i.virtualOwner.enabled { + if m&linux.STATX_UID != 0 { + i.virtualOwner.uid = opts.Stat.UID + } + if m&linux.STATX_GID != 0 { + i.virtualOwner.gid = opts.Stat.GID + } + } return nil } @@ -473,16 +567,15 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentr if i.Mode().FileType() == linux.S_IFSOCK { return nil, linuxerr.ENXIO } - return i.open(ctx, d, rp.Mount(), opts.Flags) -} - -func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, flags uint32) (*vfs.FileDescription, error) { - var s unix.Stat_t - if err := unix.Fstat(i.hostFD, &s); err != nil { + var stat unix.Stat_t + if err := i.stat(&stat); err != nil { return nil, err } - fileType := s.Mode & linux.FileTypeMask + fileType := linux.FileMode(stat.Mode).FileType() + return i.open(ctx, d, rp.Mount(), fileType, opts.Flags) +} +func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, fileType linux.FileMode, flags uint32) (*vfs.FileDescription, error) { // Constrain flags to a subset we can handle. // // TODO(gvisor.dev/issue/2601): Support O_NONBLOCK by adding RWF_NOWAIT to pread/pwrite calls. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 165fb2ebb..5bfb15971 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -715,7 +715,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin return fmt.Errorf("using TTY, stdios not expected: %d", l) } if ep.hostTTY == nil { - return fmt.Errorf("terminal enabled but no TTY provided (--console-socket possibly passed)") + return fmt.Errorf("terminal enabled but no TTY provided. Did you set --console-socket on create?") } info.stdioFDs = []*fd.FD{ep.hostTTY, ep.hostTTY, ep.hostTTY} ep.hostTTY = nil @@ -734,7 +734,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin func (l *Loader) createContainerProcess(root bool, cid string, info *containerInfo) (*kernel.ThreadGroup, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { // Create the FD map, which will set stdin, stdout, and stderr. ctx := info.procArgs.NewContext(l.k) - fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, info.spec.Process.Terminal, info.stdioFDs) + fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, info.spec.Process.Terminal, info.stdioFDs, info.spec.Process.User) if err != nil { return nil, nil, nil, fmt.Errorf("importing fds: %w", err) } @@ -980,7 +980,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { tty: ttyFile, ttyVFS2: ttyFileVFS2, } - log.Debugf("updated processes: %s", l.processes) + log.Debugf("updated processes: %v", l.processes) return tgid, nil } @@ -1024,7 +1024,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e l.mu.Lock() delete(l.processes, eid) - log.Debugf("updated processes (removal): %s", l.processes) + log.Debugf("updated processes (removal): %v", l.processes) l.mu.Unlock() return nil } @@ -1092,7 +1092,7 @@ func newRootNetworkNamespace(conf *config.Config, clock tcpip.Clock, uniqueID st return inet.NewRootNamespace(s, creator), nil default: - panic(fmt.Sprintf("invalid network configuration: %d", conf.Network)) + panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) } } @@ -1212,7 +1212,7 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e return nil default: - panic(fmt.Sprintf("unknown signal delivery mode %s", mode)) + panic(fmt.Sprintf("unknown signal delivery mode %v", mode)) } } @@ -1337,14 +1337,14 @@ func (l *Loader) ttyFromIDLocked(key execID) (*host.TTYFileOperations, *hostvfs2 return ep.tty, ep.ttyVFS2, nil } -func createFDTable(ctx context.Context, console bool, stdioFDs []*fd.FD) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { +func createFDTable(ctx context.Context, console bool, stdioFDs []*fd.FD, user specs.User) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { if len(stdioFDs) != 3 { return nil, nil, nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) } k := kernel.KernelFromContext(ctx) fdTable := k.NewFDTable() - ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, stdioFDs) + ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, auth.KUID(user.UID), auth.KGID(user.GID), stdioFDs) if err != nil { fdTable.DecRef(ctx) return nil, nil, nil, err diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index 242d474b8..2139fdf53 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -146,12 +146,12 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) if ex.detach { return ex.execChildAndWait(waitStatus) } - return ex.exec(c, e, waitStatus) + return ex.exec(conf, c, e, waitStatus) } -func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *unix.WaitStatus) subcommands.ExitStatus { +func (ex *Exec) exec(conf *config.Config, c *container.Container, e *control.ExecArgs, waitStatus *unix.WaitStatus) subcommands.ExitStatus { // Start the new process and get its pid. - pid, err := c.Execute(e) + pid, err := c.Execute(conf, e) if err != nil { return Errorf("executing processes for container: %v", err) } diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 8c65b9cd4..9d36086c3 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -288,7 +288,7 @@ func TestJobControlSignalExec(t *testing.T) { StdioIsPty: true, } - pid, err := c.Execute(execArgs) + pid, err := c.Execute(conf, execArgs) if err != nil { t.Fatalf("error executing: %v", err) } diff --git a/runsc/container/container.go b/runsc/container/container.go index b789bc7da..213fbc771 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -310,7 +310,7 @@ func New(conf *config.Config, args Args) (*Container, error) { defer tty.Close() } - if err := c.Sandbox.CreateContainer(c.ID, tty); err != nil { + if err := c.Sandbox.CreateContainer(conf, c.ID, tty); err != nil { return nil, err } } @@ -480,13 +480,13 @@ func Run(conf *config.Config, args Args) (unix.WaitStatus, error) { // Execute runs the specified command in the container. It returns the PID of // the newly created process. -func (c *Container) Execute(args *control.ExecArgs) (int32, error) { +func (c *Container) Execute(conf *config.Config, args *control.ExecArgs) (int32, error) { log.Debugf("Execute in container, cid: %s, args: %+v", c.ID, args) if err := c.requireStatus("execute in", Created, Running); err != nil { return 0, err } args.ContainerID = c.ID - return c.Sandbox.Execute(args) + return c.Sandbox.Execute(conf, args) } // Event returns events for the container. @@ -910,6 +910,9 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu binPath := specutils.ExePath cmd := exec.Command(binPath, args...) cmd.ExtraFiles = goferEnds + + // Set Args[0] to make easier to spot the gofer process. Otherwise it's + // shown as `exe`. cmd.Args[0] = "runsc-gofer" if attached { diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 7360eae35..5fb4a3672 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -60,15 +60,15 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } -func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) { +func execute(conf *config.Config, cont *Container, name string, arg ...string) (unix.WaitStatus, error) { args := &control.ExecArgs{ Filename: name, Argv: append([]string{name}, arg...), } - return cont.executeSync(args) + return cont.executeSync(conf, args) } -func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) { +func executeCombinedOutput(conf *config.Config, cont *Container, name string, arg ...string) ([]byte, error) { r, w, err := os.Pipe() if err != nil { return nil, err @@ -80,7 +80,7 @@ func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, Argv: append([]string{name}, arg...), FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}}, } - ws, err := cont.executeSync(args) + ws, err := cont.executeSync(conf, args) w.Close() if err != nil { return nil, err @@ -94,8 +94,8 @@ func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, } // executeSync synchronously executes a new process. -func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) { - pid, err := c.Execute(args) +func (c *Container) executeSync(conf *config.Config, args *control.ExecArgs) (unix.WaitStatus, error) { + pid, err := c.Execute(conf, args) if err != nil { return 0, fmt.Errorf("error executing: %v", err) } @@ -172,8 +172,8 @@ func blockUntilWaitable(pid int) error { } // execPS executes `ps` inside the container and return the processes. -func execPS(c *Container) ([]*control.Process, error) { - out, err := executeCombinedOutput(c, "/bin/ps", "-e") +func execPS(conf *config.Config, c *Container) ([]*control.Process, error) { + out, err := executeCombinedOutput(conf, c, "/bin/ps", "-e") if err != nil { return nil, err } @@ -864,7 +864,7 @@ func TestExec(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { // t.Parallel() - if ws, err := cont.executeSync(&tc.args); err != nil { + if ws, err := cont.executeSync(conf, &tc.args); err != nil { t.Fatalf("executeAsync(%+v): %v", tc.args, err) } else if ws != 0 { t.Fatalf("executeAsync(%+v) failed with exit: %v", tc.args, ws) @@ -882,7 +882,7 @@ func TestExec(t *testing.T) { } defer unix.Close(fds[0]) - _, err = cont.executeSync(&control.ExecArgs{ + _, err = cont.executeSync(conf, &control.ExecArgs{ Argv: []string{"/nonexist"}, FilePayload: urpc.FilePayload{ Files: []*os.File{os.NewFile(uintptr(fds[1]), "sock")}, @@ -937,7 +937,7 @@ func TestExecProcList(t *testing.T) { // start running exec (which blocks). ch := make(chan error) go func() { - exitStatus, err := cont.executeSync(execArgs) + exitStatus, err := cont.executeSync(conf, execArgs) if err != nil { ch <- err } else if exitStatus != 0 { @@ -1544,7 +1544,7 @@ func TestCapabilities(t *testing.T) { } // "exe" should fail because we don't have the necessary permissions. - if _, err := cont.executeSync(execArgs); err == nil { + if _, err := cont.executeSync(conf, execArgs); err == nil { t.Fatalf("container executed without error, but an error was expected") } @@ -1553,7 +1553,7 @@ func TestCapabilities(t *testing.T) { EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE), } // "exe" should not fail this time. - if _, err := cont.executeSync(execArgs); err != nil { + if _, err := cont.executeSync(conf, execArgs); err != nil { t.Fatalf("container failed to exec %v: %v", args, err) } }) @@ -1664,7 +1664,7 @@ func TestReadonlyRoot(t *testing.T) { } // Read mounts to check that root is readonly. - out, err := executeCombinedOutput(c, "/bin/sh", "-c", "mount | grep ' / ' | grep -o -e '(.*)'") + out, err := executeCombinedOutput(conf, c, "/bin/sh", "-c", "mount | grep ' / ' | grep -o -e '(.*)'") if err != nil { t.Fatalf("exec failed: %v", err) } @@ -1674,7 +1674,7 @@ func TestReadonlyRoot(t *testing.T) { } // Check that file cannot be created. - ws, err := execute(c, "/bin/touch", "/foo") + ws, err := execute(conf, c, "/bin/touch", "/foo") if err != nil { t.Fatalf("touch file in ro mount: %v", err) } @@ -1723,7 +1723,7 @@ func TestReadonlyMount(t *testing.T) { // Read mounts to check that volume is readonly. cmd := fmt.Sprintf("mount | grep ' %s ' | grep -o -e '(.*)'", dir) - out, err := executeCombinedOutput(c, "/bin/sh", "-c", cmd) + out, err := executeCombinedOutput(conf, c, "/bin/sh", "-c", cmd) if err != nil { t.Fatalf("exec failed, err: %v", err) } @@ -1733,7 +1733,7 @@ func TestReadonlyMount(t *testing.T) { } // Check that file cannot be created. - ws, err := execute(c, "/bin/touch", path.Join(dir, "file")) + ws, err := execute(conf, c, "/bin/touch", path.Join(dir, "file")) if err != nil { t.Fatalf("touch file in ro mount: %v", err) } @@ -2278,13 +2278,13 @@ func TestMountPropagation(t *testing.T) { // Check that mount didn't propagate to private mount. privFile := filepath.Join(priv, "mnt", "file") - if ws, err := execute(cont, "/usr/bin/test", "!", "-f", privFile); err != nil || ws != 0 { + if ws, err := execute(conf, cont, "/usr/bin/test", "!", "-f", privFile); err != nil || ws != 0 { t.Fatalf("exec: test ! -f %q, ws: %v, err: %v", privFile, ws, err) } // Check that mount propagated to slave mount. slaveFile := filepath.Join(slave, "mnt", "file") - if ws, err := execute(cont, "/usr/bin/test", "-f", slaveFile); err != nil || ws != 0 { + if ws, err := execute(conf, cont, "/usr/bin/test", "-f", slaveFile); err != nil || ws != 0 { t.Fatalf("exec: test -f %q, ws: %v, err: %v", privFile, ws, err) } } @@ -2350,7 +2350,7 @@ func TestMountSymlink(t *testing.T) { // Check that symlink was resolved and mount was created where the symlink // is pointing to. file := path.Join(target, "file") - if ws, err := execute(cont, "/usr/bin/test", "-f", file); err != nil || ws != 0 { + if ws, err := execute(conf, cont, "/usr/bin/test", "-f", file); err != nil || ws != 0 { t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err) } }) @@ -2589,7 +2589,7 @@ func TestRlimitsExec(t *testing.T) { t.Fatalf("error starting container: %v", err) } - got, err := executeCombinedOutput(cont, "/bin/sh", "-c", "ulimit -n") + got, err := executeCombinedOutput(conf, cont, "/bin/sh", "-c", "ulimit -n") if err != nil { t.Fatal(err) } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 58ae18232..9d8022e50 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -105,11 +105,11 @@ type execDesc struct { name string } -func execMany(t *testing.T, execs []execDesc) { +func execMany(t *testing.T, conf *config.Config, execs []execDesc) { for _, exec := range execs { t.Run(exec.name, func(t *testing.T) { args := &control.ExecArgs{Argv: exec.cmd} - if ws, err := exec.c.executeSync(args); err != nil { + if ws, err := exec.c.executeSync(conf, args); err != nil { t.Errorf("error executing %+v: %v", args, err) } else if ws.ExitStatus() != exec.want { t.Errorf("%q: exec %q got exit status: %d, want: %d", exec.name, exec.cmd, ws.ExitStatus(), exec.want) @@ -217,7 +217,7 @@ func TestMultiPIDNS(t *testing.T) { newProcessBuilder().PID(2).Cmd("sleep").Process(), newProcessBuilder().Cmd("ps").Process(), } - got, err := execPS(containers[0]) + got, err := execPS(conf, containers[0]) if err != nil { t.Fatal(err) } @@ -229,7 +229,7 @@ func TestMultiPIDNS(t *testing.T) { newProcessBuilder().PID(1).Cmd("sleep").Process(), newProcessBuilder().Cmd("ps").Process(), } - got, err = execPS(containers[1]) + got, err = execPS(conf, containers[1]) if err != nil { t.Fatal(err) } @@ -313,7 +313,7 @@ func TestMultiPIDNSPath(t *testing.T) { newProcessBuilder().PID(3).Cmd("sleep").Process(), newProcessBuilder().Cmd("ps").Process(), } - got, err := execPS(containers[0]) + got, err := execPS(conf, containers[0]) if err != nil { t.Fatal(err) } @@ -328,7 +328,7 @@ func TestMultiPIDNSPath(t *testing.T) { newProcessBuilder().PID(3).Cmd("sleep").Process(), newProcessBuilder().Cmd("ps").Process(), } - got, err = execPS(containers[1]) + got, err = execPS(conf, containers[1]) if err != nil { t.Fatal(err) } @@ -341,7 +341,7 @@ func TestMultiPIDNSPath(t *testing.T) { newProcessBuilder().PID(1).Cmd("sleep").Process(), newProcessBuilder().Cmd("ps").Process(), } - got, err = execPS(containers[2]) + got, err = execPS(conf, containers[2]) if err != nil { t.Fatal(err) } @@ -541,7 +541,7 @@ func TestExecWait(t *testing.T) { WorkingDirectory: "/", KUID: 0, } - pid, err := containers[0].Execute(args) + pid, err := containers[0].Execute(conf, args) if err != nil { t.Fatalf("error executing: %v", err) } @@ -744,7 +744,7 @@ func TestMultiContainerDestroy(t *testing.T) { Filename: app, Argv: []string{app, "fork-bomb"}, } - if _, err := containers[1].Execute(args); err != nil { + if _, err := containers[1].Execute(conf, args); err != nil { t.Fatalf("error exec'ing: %v", err) } @@ -821,7 +821,7 @@ func TestMultiContainerProcesses(t *testing.T) { Filename: "/bin/sleep", Argv: []string{"/bin/sleep", "100"}, } - if _, err := containers[1].Execute(args); err != nil { + if _, err := containers[1].Execute(conf, args); err != nil { t.Fatalf("error exec'ing: %v", err) } expectedPL1 = append(expectedPL1, newProcessBuilder().PID(4).Cmd("sleep").Process()) @@ -882,7 +882,7 @@ func TestMultiContainerKillAll(t *testing.T) { Filename: app, Argv: []string{app, "task-tree", "--depth=2", "--width=2"}, } - if _, err := containers[1].Execute(args); err != nil { + if _, err := containers[1].Execute(conf, args); err != nil { t.Fatalf("error exec'ing: %v", err) } // Wait for these new processes to start. @@ -1317,7 +1317,7 @@ func TestMultiContainerSharedMount(t *testing.T) { name: "dir removed from container1", }, } - execMany(t, execs) + execMany(t, conf, execs) }) } } @@ -1382,7 +1382,7 @@ func TestMultiContainerSharedMountReadonly(t *testing.T) { name: "fails to write to container1", }, } - execMany(t, execs) + execMany(t, conf, execs) }) } } @@ -1440,7 +1440,7 @@ func TestMultiContainerSharedMountRestart(t *testing.T) { name: "file appears in container1", }, } - execMany(t, execs) + execMany(t, conf, execs) containers[1].Destroy() @@ -1490,7 +1490,7 @@ func TestMultiContainerSharedMountRestart(t *testing.T) { name: "file removed from container1", }, } - execMany(t, execs) + execMany(t, conf, execs) }) } } @@ -1543,7 +1543,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { name: "directory is mounted in container1", }, } - execMany(t, execs) + execMany(t, conf, execs) }) } } @@ -1654,7 +1654,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { } // Check that container isn't running anymore. - if _, err := execute(c, "/bin/true"); err == nil { + if _, err := execute(conf, c, "/bin/true"); err == nil { t.Fatalf("Container %q was not stopped after gofer death", c.ID) } @@ -1669,7 +1669,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { if err := waitForProcessList(c, pl); err != nil { t.Errorf("Container %q was affected by another container: %v", c.ID, err) } - if _, err := execute(c, "/bin/true"); err != nil { + if _, err := execute(conf, c, "/bin/true"); err != nil { t.Fatalf("Container %q was affected by another container: %v", c.ID, err) } } @@ -1691,7 +1691,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { // Check that entire sandbox isn't running anymore. for _, c := range containers { - if _, err := execute(c, "/bin/true"); err == nil { + if _, err := execute(conf, c, "/bin/true"); err == nil { t.Fatalf("Container %q was not stopped after gofer death", c.ID) } } @@ -1867,7 +1867,7 @@ func TestMultiContainerHomeEnvDir(t *testing.T) { defer cleanup() // Exec into the root container synchronously. - if _, err := execute(containers[0], "/bin/sh", "-c", execCmd); err != nil { + if _, err := execute(conf, containers[0], "/bin/sh", "-c", execCmd); err != nil { t.Errorf("error executing %+v: %v", execCmd, err) } @@ -2056,7 +2056,7 @@ func TestDuplicateEnvVariable(t *testing.T) { Argv: []string{"/bin/sh", "-c", cmdExec}, Envv: []string{"VAR=foo", "VAR=bar"}, } - if ws, err := containers[0].executeSync(execArgs); err != nil || ws.ExitStatus() != 0 { + if ws, err := containers[0].executeSync(conf, execArgs); err != nil || ws.ExitStatus() != 0 { t.Fatalf("exec failed, ws: %v, err: %v", ws, err) } diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go index 7d05ee16b..f16b2bd02 100644 --- a/runsc/container/shared_volume_test.go +++ b/runsc/container/shared_volume_test.go @@ -72,7 +72,7 @@ func TestSharedVolume(t *testing.T) { Filename: "/usr/bin/test", Argv: []string{"test", "-f", filename}, } - if ws, err := c.executeSync(argsTestFile); err != nil { + if ws, err := c.executeSync(conf, argsTestFile); err != nil { t.Fatalf("unexpected error testing file %q: %v", filename, err) } else if ws.ExitStatus() == 0 { t.Errorf("test %q exited with code %v, wanted not zero", ws.ExitStatus(), err) @@ -84,7 +84,7 @@ func TestSharedVolume(t *testing.T) { } // Now we should be able to test the file from within the sandbox. - if ws, err := c.executeSync(argsTestFile); err != nil { + if ws, err := c.executeSync(conf, argsTestFile); err != nil { t.Fatalf("unexpected error testing file %q: %v", filename, err) } else if ws.ExitStatus() != 0 { t.Errorf("test %q exited with code %v, wanted zero", filename, ws.ExitStatus()) @@ -97,7 +97,7 @@ func TestSharedVolume(t *testing.T) { } // File should no longer exist at the old path within the sandbox. - if ws, err := c.executeSync(argsTestFile); err != nil { + if ws, err := c.executeSync(conf, argsTestFile); err != nil { t.Fatalf("unexpected error testing file %q: %v", filename, err) } else if ws.ExitStatus() == 0 { t.Errorf("test %q exited with code %v, wanted not zero", filename, ws.ExitStatus()) @@ -108,7 +108,7 @@ func TestSharedVolume(t *testing.T) { Filename: "/usr/bin/test", Argv: []string{"test", "-f", newFilename}, } - if ws, err := c.executeSync(argsTestNewFile); err != nil { + if ws, err := c.executeSync(conf, argsTestNewFile); err != nil { t.Fatalf("unexpected error testing file %q: %v", newFilename, err) } else if ws.ExitStatus() != 0 { t.Errorf("test %q exited with code %v, wanted zero", newFilename, ws.ExitStatus()) @@ -120,7 +120,7 @@ func TestSharedVolume(t *testing.T) { } // Renamed file should no longer exist at the old path within the sandbox. - if ws, err := c.executeSync(argsTestNewFile); err != nil { + if ws, err := c.executeSync(conf, argsTestNewFile); err != nil { t.Fatalf("unexpected error testing file %q: %v", newFilename, err) } else if ws.ExitStatus() == 0 { t.Errorf("test %q exited with code %v, wanted not zero", newFilename, ws.ExitStatus()) @@ -133,7 +133,7 @@ func TestSharedVolume(t *testing.T) { KUID: auth.KUID(os.Getuid()), KGID: auth.KGID(os.Getgid()), } - if ws, err := c.executeSync(argsTouch); err != nil { + if ws, err := c.executeSync(conf, argsTouch); err != nil { t.Fatalf("unexpected error touching file %q: %v", filename, err) } else if ws.ExitStatus() != 0 { t.Errorf("touch %q exited with code %v, wanted zero", filename, ws.ExitStatus()) @@ -154,7 +154,7 @@ func TestSharedVolume(t *testing.T) { Filename: "/bin/rm", Argv: []string{"rm", filename}, } - if ws, err := c.executeSync(argsRemove); err != nil { + if ws, err := c.executeSync(conf, argsRemove); err != nil { t.Fatalf("unexpected error removing file %q: %v", filename, err) } else if ws.ExitStatus() != 0 { t.Errorf("remove %q exited with code %v, wanted zero", filename, ws.ExitStatus()) @@ -166,9 +166,9 @@ func TestSharedVolume(t *testing.T) { } } -func checkFile(c *Container, filename string, want []byte) error { +func checkFile(conf *config.Config, c *Container, filename string, want []byte) error { cpy := filename + ".copy" - if _, err := execute(c, "/bin/cp", "-f", filename, cpy); err != nil { + if _, err := execute(conf, c, "/bin/cp", "-f", filename, cpy); err != nil { return fmt.Errorf("unexpected error copying file %q to %q: %v", filename, cpy, err) } got, err := ioutil.ReadFile(cpy) @@ -226,16 +226,16 @@ func TestSharedVolumeFile(t *testing.T) { if err := ioutil.WriteFile(filename, []byte(want), 0666); err != nil { t.Fatalf("Error writing to %q: %v", filename, err) } - if err := checkFile(c, filename, want); err != nil { + if err := checkFile(conf, c, filename, want); err != nil { t.Fatal(err.Error()) } // Append to file inside the container and check that content is not lost. - if _, err := execute(c, "/bin/bash", "-c", "echo -n sandbox- >> "+filename); err != nil { + if _, err := execute(conf, c, "/bin/bash", "-c", "echo -n sandbox- >> "+filename); err != nil { t.Fatalf("unexpected error appending file %q: %v", filename, err) } want = []byte("host-sandbox-") - if err := checkFile(c, filename, want); err != nil { + if err := checkFile(conf, c, filename, want); err != nil { t.Fatal(err.Error()) } @@ -250,7 +250,7 @@ func TestSharedVolumeFile(t *testing.T) { t.Fatalf("Error writing to file %q: %v", filename, err) } want = []byte("host-sandbox-host") - if err := checkFile(c, filename, want); err != nil { + if err := checkFile(conf, c, filename, want); err != nil { t.Fatal(err.Error()) } @@ -259,7 +259,7 @@ func TestSharedVolumeFile(t *testing.T) { t.Fatalf("Error truncating file %q: %v", filename, err) } want = want[:5] - if err := checkFile(c, filename, want); err != nil { + if err := checkFile(conf, c, filename, want); err != nil { t.Fatal(err.Error()) } } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 9dea7c4d2..95b5d9615 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -65,6 +65,11 @@ type Sandbox struct { // is not running. Pid int `json:"pid"` + // UID is the user ID in the parent namespace that the sandbox is running as. + UID int `json:"uid"` + // GID is the group ID in the parent namespace that the sandbox is running as. + GID int `json:"gid"` + // Cgroup has the cgroup configuration for the sandbox. Cgroup *cgroup.Cgroup `json:"cgroup"` @@ -176,18 +181,22 @@ func New(conf *config.Config, args *Args) (*Sandbox, error) { } // CreateContainer creates a non-root container inside the sandbox. -func (s *Sandbox) CreateContainer(cid string, tty *os.File) error { +func (s *Sandbox) CreateContainer(conf *config.Config, cid string, tty *os.File) error { log.Debugf("Create non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid) - sandboxConn, err := s.sandboxConnect() - if err != nil { - return fmt.Errorf("couldn't connect to sandbox: %v", err) - } - defer sandboxConn.Close() var files []*os.File if tty != nil { files = []*os.File{tty} } + if err := s.configureStdios(conf, files); err != nil { + return err + } + + sandboxConn, err := s.sandboxConnect() + if err != nil { + return fmt.Errorf("couldn't connect to sandbox: %v", err) + } + defer sandboxConn.Close() args := boot.CreateArgs{ CID: cid, @@ -225,6 +234,11 @@ func (s *Sandbox) StartRoot(spec *specs.Spec, conf *config.Config) error { // StartContainer starts running a non-root container inside the sandbox. func (s *Sandbox) StartContainer(spec *specs.Spec, conf *config.Config, cid string, stdios, goferFiles []*os.File) error { log.Debugf("Start non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid) + + if err := s.configureStdios(conf, stdios); err != nil { + return err + } + sandboxConn, err := s.sandboxConnect() if err != nil { return fmt.Errorf("couldn't connect to sandbox: %v", err) @@ -318,8 +332,13 @@ func (s *Sandbox) NewCGroup() (*cgroup.Cgroup, error) { // Execute runs the specified command in the container. It returns the PID of // the newly created process. -func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) { +func (s *Sandbox) Execute(conf *config.Config, args *control.ExecArgs) (int32, error) { log.Debugf("Executing new process in container %q in sandbox %q", args.ContainerID, s.ID) + + if err := s.configureStdios(conf, args.Files); err != nil { + return 0, err + } + conn, err := s.sandboxConnect() if err != nil { return 0, s.connError(err) @@ -505,6 +524,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn cmd.Stdin = nil cmd.Stdout = nil cmd.Stderr = nil + var stdios [3]*os.File // If the console control socket file is provided, then create a new // pty master/replica pair and set the TTY on the sandbox process. @@ -525,11 +545,9 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn cmd.SysProcAttr.Ctty = nextFD // Pass the tty as all stdio fds to sandbox. - for i := 0; i < 3; i++ { - cmd.ExtraFiles = append(cmd.ExtraFiles, tty) - cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD)) - nextFD++ - } + stdios[0] = tty + stdios[1] = tty + stdios[2] = tty if conf.Debug { // If debugging, send the boot process stdio to the @@ -541,11 +559,9 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn } else { // If not using a console, pass our current stdio as the // container stdio via flags. - for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} { - cmd.ExtraFiles = append(cmd.ExtraFiles, f) - cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD)) - nextFD++ - } + stdios[0] = os.Stdin + stdios[1] = os.Stdout + stdios[2] = os.Stderr if conf.Debug { // If debugging, send the boot process stdio to the @@ -595,6 +611,10 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace}) } + // These are set to the uid/gid that the sandbox process will use. + s.UID = os.Getuid() + s.GID = os.Getgid() + // User namespace depends on the network type. Host network requires to run // inside the user namespace specified in the spec or the current namespace // if none is configured. @@ -636,51 +656,49 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn const nobody = 65534 if conf.Rootless { log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid()) - cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ - { - ContainerID: nobody, - HostID: os.Getuid(), - Size: 1, - }, - } - cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ - { - ContainerID: nobody, - HostID: os.Getgid(), - Size: 1, - }, - } - } else { // Map nobody in the new namespace to nobody in the parent namespace. - // - // A sandbox process will construct an empty - // root for itself, so it has to have - // CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities. - cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ - { - ContainerID: nobody, - HostID: nobody, - Size: 1, - }, - } - cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ - { - ContainerID: nobody, - HostID: nobody, - Size: 1, - }, - } + s.UID = nobody + s.GID = nobody } // Set credentials to run as user and group nobody. cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody} + cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ + { + ContainerID: nobody, + HostID: s.UID, + Size: 1, + }, + } + cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ + { + ContainerID: nobody, + HostID: s.GID, + Size: 1, + }, + } + + // A sandbox process will construct an empty root for itself, so it has + // to have CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities. cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT)) + } else { return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID") } } + if err := s.configureStdios(conf, stdios[:]); err != nil { + return fmt.Errorf("configuring stdios: %w", err) + } + for _, file := range stdios { + cmd.ExtraFiles = append(cmd.ExtraFiles, file) + cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD)) + nextFD++ + } + + // Set Args[0] to make easier to spot the sandbox process. Otherwise it's + // shown as `exe`. cmd.Args[0] = "runsc-sandbox" if s.Cgroup != nil { @@ -1167,6 +1185,23 @@ func (s *Sandbox) waitForStopped() error { return backoff.Retry(op, b) } +// configureStdios change stdios ownership to give access to the sandbox +// process. This may be skipped depending on the configuration. +func (s *Sandbox) configureStdios(conf *config.Config, stdios []*os.File) error { + if conf.Rootless || conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + // Cannot change ownership without CAP_CHOWN. + return nil + } + + for _, file := range stdios { + log.Debugf("Changing %q ownership to %d/%d", file.Name(), s.UID, s.GID) + if err := file.Chown(s.UID, s.GID); err != nil { + return err + } + } + return nil +} + // deviceFileForPlatform opens the device file for the given platform. If the // platform does not need a device file, then nil is returned. func deviceFileForPlatform(name string) (*os.File, error) { diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 1accc3b3b..f53417cab 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -30,6 +30,7 @@ import ( "net/http" "os" "path/filepath" + "regexp" "strconv" "strings" "testing" @@ -426,10 +427,10 @@ func TestTmpMount(t *testing.T) { // Test that it is allowed to mount a file on top of /dev files, e.g. // /dev/random. func TestMountOverDev(t *testing.T) { - if usingVFS2, err := dockerutil.UsingVFS2(); !usingVFS2 { - t.Skip("VFS1 doesn't allow /dev/random to be mounted.") - } else if err != nil { + if vfs2, err := dockerutil.UsingVFS2(); err != nil { t.Fatalf("Failed to read config for runtime %s: %v", dockerutil.Runtime(), err) + } else if !vfs2 { + t.Skip("VFS1 doesn't allow /dev/random to be mounted.") } random, err := ioutil.TempFile(testutil.TmpDir(), "random") @@ -574,11 +575,12 @@ func runIntegrationTest(t *testing.T, capAdd []string, args ...string) { d := dockerutil.MakeContainer(ctx, t) defer d.CleanUp(ctx) - if got, err := d.Run(ctx, dockerutil.RunOpts{ + opts := dockerutil.RunOpts{ Image: "basic/integrationtest", WorkDir: "/root", CapAdd: capAdd, - }, args...); err != nil { + } + if got, err := d.Run(ctx, opts, args...); err != nil { t.Fatalf("docker run failed: %v", err) } else if got != "" { t.Errorf("test failed:\n%s", got) @@ -609,6 +611,107 @@ func TestBindOverlay(t *testing.T) { } } +func TestStdios(t *testing.T) { + if vfs2, err := dockerutil.UsingVFS2(); err != nil { + t.Fatalf("Failed to read config for runtime %s: %v", dockerutil.Runtime(), err) + } else if !vfs2 { + t.Skip("VFS1 doesn't adjust stdios user") + } + + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) + + testStdios(t, func(user string, args ...string) (string, error) { + defer d.CleanUp(ctx) + opts := dockerutil.RunOpts{ + Image: "basic/alpine", + User: user, + } + return d.Run(ctx, opts, args...) + }) +} + +func TestStdiosExec(t *testing.T) { + if vfs2, err := dockerutil.UsingVFS2(); err != nil { + t.Fatalf("Failed to read config for runtime %s: %v", dockerutil.Runtime(), err) + } else if !vfs2 { + t.Skip("VFS1 doesn't adjust stdios user") + } + + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) + + runOpts := dockerutil.RunOpts{Image: "basic/alpine"} + if err := d.Spawn(ctx, runOpts, "sleep", "100"); err != nil { + t.Fatalf("docker run failed: %v", err) + } + + testStdios(t, func(user string, args ...string) (string, error) { + opts := dockerutil.ExecOpts{User: user} + return d.Exec(ctx, opts, args...) + }) +} + +func testStdios(t *testing.T, run func(string, ...string) (string, error)) { + const cmd = "stat -L /proc/self/fd/0 /proc/self/fd/1 /proc/self/fd/2 | grep 'Uid:'" + got, err := run("123", "/bin/sh", "-c", cmd) + if err != nil { + t.Fatalf("docker exec failed: %v", err) + } + if len(got) == 0 { + t.Errorf("Unexpected empty output from %q", cmd) + } + re := regexp.MustCompile(`Uid: \(\s*(\w+)\/.*\)`) + for _, line := range strings.SplitN(got, "\n", 3) { + t.Logf("stat -L: %s", line) + matches := re.FindSubmatch([]byte(line)) + if len(matches) != 2 { + t.Fatalf("wrong output format: %q: matches: %v", line, matches) + } + if want, got := "123", string(matches[1]); want != got { + t.Errorf("wrong user, want: %q, got: %q", want, got) + } + } + + // Check that stdout and stderr can be open and written to. This checks + // that ownership and permissions are correct inside gVisor. + got, err = run("456", "/bin/sh", "-c", "echo foobar | tee /proc/self/fd/1 > /proc/self/fd/2") + if err != nil { + t.Fatalf("docker run failed: %v", err) + } + t.Logf("echo foobar: %q", got) + // Check it repeats twice, once for stdout and once for stderr. + if want := "foobar\nfoobar\n"; want != got { + t.Errorf("Wrong echo output, want: %q, got: %q", want, got) + } + + // Check that timestamps can be changed. Setting timestamps require an extra + // write check _after_ the file was opened, and may fail if the underlying + // host file is not setup correctly. + if _, err := run("789", "touch", "/proc/self/fd/0", "/proc/self/fd/1", "/proc/self/fd/2"); err != nil { + t.Fatalf("docker run failed: %v", err) + } +} + +func TestStdiosChown(t *testing.T) { + if vfs2, err := dockerutil.UsingVFS2(); err != nil { + t.Fatalf("Failed to read config for runtime %s: %v", dockerutil.Runtime(), err) + } else if !vfs2 { + t.Skip("VFS1 doesn't adjust stdios user") + } + + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) + + opts := dockerutil.RunOpts{Image: "basic/alpine"} + if _, err := d.Run(ctx, opts, "chown", "123", "/proc/self/fd/0", "/proc/self/fd/1", "/proc/self/fd/2"); err != nil { + t.Fatalf("docker run failed: %v", err) + } +} + func TestMain(m *testing.M) { dockerutil.EnsureSupportedDockerVersion() flag.Parse() |