summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/control/proc.go2
-rw-r--r--pkg/sentry/fdimport/fdimport.go22
-rw-r--r--pkg/sentry/fsimpl/host/host.go163
-rw-r--r--pkg/sentry/fsimpl/host/host_state_autogen.go46
-rw-r--r--runsc/boot/loader.go16
-rw-r--r--runsc/cmd/exec.go6
-rw-r--r--runsc/container/container.go9
-rw-r--r--runsc/sandbox/sandbox.go135
8 files changed, 290 insertions, 109 deletions
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go
index b7ee5425d..6352ea71a 100644
--- a/pkg/sentry/control/proc.go
+++ b/pkg/sentry/control/proc.go
@@ -223,7 +223,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
_ = fd.Close()
}
}()
- ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, fds)
+ ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, args.KUID, args.KGID, fds)
if err != nil {
return nil, 0, nil, nil, err
}
diff --git a/pkg/sentry/fdimport/fdimport.go b/pkg/sentry/fdimport/fdimport.go
index badd5b073..f2b9630eb 100644
--- a/pkg/sentry/fdimport/fdimport.go
+++ b/pkg/sentry/fdimport/fdimport.go
@@ -24,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs/host"
hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
"gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
)
@@ -31,9 +32,9 @@ import (
// sets up TTY for the first 3 FDs in the slice representing stdin, stdout,
// stderr. Used FDs are either closed or released. It's safe for the caller to
// close any remaining files upon return.
-func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []*fd.FD) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, uid auth.KUID, gid auth.KGID, fds []*fd.FD) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
if kernel.VFS2Enabled {
- ttyFile, err := importVFS2(ctx, fdTable, console, fds)
+ ttyFile, err := importVFS2(ctx, fdTable, console, uid, gid, fds)
return nil, ttyFile, err
}
ttyFile, err := importFS(ctx, fdTable, console, fds)
@@ -89,7 +90,7 @@ func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []
return ttyFile.FileOperations.(*host.TTYFileOperations), nil
}
-func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdioFDs []*fd.FD) (*hostvfs2.TTYFileDescription, error) {
+func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, uid auth.KUID, gid auth.KGID, stdioFDs []*fd.FD) (*hostvfs2.TTYFileDescription, error) {
k := kernel.KernelFromContext(ctx)
if k == nil {
return nil, fmt.Errorf("cannot find kernel from context")
@@ -103,7 +104,13 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi
// Import the file as a host TTY file.
if ttyFile == nil {
var err error
- appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD.FD(), true /* isTTY */)
+ appFile, err = hostvfs2.NewFD(ctx, k.HostMount(), hostFD.FD(), &hostvfs2.NewFDOptions{
+ Savable: true,
+ IsTTY: true,
+ VirtualOwner: true,
+ UID: uid,
+ GID: gid,
+ })
if err != nil {
return nil, err
}
@@ -121,7 +128,12 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi
}
} else {
var err error
- appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD.FD(), false /* isTTY */)
+ appFile, err = hostvfs2.NewFD(ctx, k.HostMount(), hostFD.FD(), &hostvfs2.NewFDOptions{
+ Savable: true,
+ VirtualOwner: true,
+ UID: uid,
+ GID: gid,
+ })
if err != nil {
return nil, err
}
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 9998bd0c3..89aa7b3d9 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -42,6 +42,36 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
+// These are the modes that are stored with virtualOwner.
+const virtualOwnerModes = linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID
+
+// +stateify savable
+type virtualOwner struct {
+ // This field is initialized at creation time and is immutable.
+ enabled bool
+
+ // mu protects the fields below and they can be accessed using atomic memory
+ // operations.
+ mu sync.Mutex `state:"nosave"`
+ uid uint32
+ gid uint32
+ // mode is also stored, otherwise setting the host file to `0000` could remove
+ // access to the file.
+ mode uint32
+}
+
+func (v *virtualOwner) atomicUID() uint32 {
+ return atomic.LoadUint32(&v.uid)
+}
+
+func (v *virtualOwner) atomicGID() uint32 {
+ return atomic.LoadUint32(&v.gid)
+}
+
+func (v *virtualOwner) atomicMode() uint32 {
+ return atomic.LoadUint32(&v.mode)
+}
+
// inode implements kernfs.Inode.
//
// +stateify savable
@@ -98,6 +128,11 @@ type inode struct {
// Event queue for blocking operations.
queue waiter.Queue
+ // virtualOwner caches ownership and permission information to override the
+ // underlying file owner and permission. This is used to allow the unstrusted
+ // application to change these fields without affecting the host.
+ virtualOwner virtualOwner
+
// If haveBuf is non-zero, hostFD represents a pipe, and buf contains data
// read from the pipe from previous calls to inode.beforeSave(). haveBuf
// and buf are protected by bufMu. haveBuf is accessed using atomic memory
@@ -147,7 +182,7 @@ func newInode(ctx context.Context, fs *filesystem, hostFD int, savable bool, fil
type NewFDOptions struct {
// If Savable is true, the host file descriptor may be saved/restored by
// numeric value; the sandbox API requires a corresponding host FD with the
- // same numeric value to be provieded at time of restore.
+ // same numeric value to be provided at time of restore.
Savable bool
// If IsTTY is true, the file descriptor is a TTY.
@@ -157,6 +192,12 @@ type NewFDOptions struct {
// the new file description will inherit flags from hostFD.
HaveFlags bool
Flags uint32
+
+ // VirtualOwner allow the host file to have owner and permissions different
+ // than the underlying host file.
+ VirtualOwner bool
+ UID auth.KUID
+ GID auth.KGID
}
// NewFD returns a vfs.FileDescription representing the given host file
@@ -168,8 +209,8 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
}
// Retrieve metadata.
- var s unix.Stat_t
- if err := unix.Fstat(hostFD, &s); err != nil {
+ var stat unix.Stat_t
+ if err := unix.Fstat(hostFD, &stat); err != nil {
return nil, err
}
@@ -183,11 +224,19 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
flags = uint32(flagsInt)
}
- d := &kernfs.Dentry{}
- i, err := newInode(ctx, fs, hostFD, opts.Savable, linux.FileMode(s.Mode).FileType(), opts.IsTTY)
+ fileType := linux.FileMode(stat.Mode).FileType()
+ i, err := newInode(ctx, fs, hostFD, opts.Savable, fileType, opts.IsTTY)
if err != nil {
return nil, err
}
+ if opts.VirtualOwner {
+ i.virtualOwner.enabled = true
+ i.virtualOwner.uid = uint32(opts.UID)
+ i.virtualOwner.gid = uint32(opts.GID)
+ i.virtualOwner.mode = stat.Mode
+ }
+
+ d := &kernfs.Dentry{}
d.Init(&fs.Filesystem, i)
// i.open will take a reference on d.
@@ -196,15 +245,7 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
// For simplicity, fileDescription.offset is set to 0. Technically, we
// should only set to 0 on files that are not seekable (sockets, pipes,
// etc.), and use the offset from the host fd otherwise when importing.
- return i.open(ctx, d, mnt, flags)
-}
-
-// ImportFD sets up and returns a vfs.FileDescription from a donated fd.
-func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) {
- return NewFD(ctx, mnt, hostFD, &NewFDOptions{
- Savable: true,
- IsTTY: isTTY,
- })
+ return i.open(ctx, d, mnt, fileType, flags)
}
// filesystemType implements vfs.FilesystemType.
@@ -270,7 +311,7 @@ func (fs *filesystem) MountOptions() string {
// CheckPermissions implements kernfs.Inode.CheckPermissions.
func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
var s unix.Stat_t
- if err := unix.Fstat(i.hostFD, &s); err != nil {
+ if err := i.stat(&s); err != nil {
return err
}
return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(s.Mode), auth.KUID(s.Uid), auth.KGID(s.Gid))
@@ -279,7 +320,7 @@ func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, a
// Mode implements kernfs.Inode.Mode.
func (i *inode) Mode() linux.FileMode {
var s unix.Stat_t
- if err := unix.Fstat(i.hostFD, &s); err != nil {
+ if err := i.stat(&s); err != nil {
// Retrieving the mode from the host fd using fstat(2) should not fail.
// If the syscall does not succeed, something is fundamentally wrong.
panic(fmt.Sprintf("failed to retrieve mode from host fd %d: %v", i.hostFD, err))
@@ -306,7 +347,7 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
// Fallback to fstat(2), if statx(2) is not supported on the host.
//
// TODO(b/151263641): Remove fallback.
- return i.fstat(fs)
+ return i.statxFromStat(fs)
}
if err != nil {
return linux.Statx{}, err
@@ -330,19 +371,35 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
// device numbers.
ls.Mask |= s.Mask & linux.STATX_ALL
if s.Mask&linux.STATX_TYPE != 0 {
- ls.Mode |= s.Mode & linux.S_IFMT
+ if i.virtualOwner.enabled {
+ ls.Mode |= uint16(i.virtualOwner.atomicMode()) & linux.S_IFMT
+ } else {
+ ls.Mode |= s.Mode & linux.S_IFMT
+ }
}
if s.Mask&linux.STATX_MODE != 0 {
- ls.Mode |= s.Mode &^ linux.S_IFMT
+ if i.virtualOwner.enabled {
+ ls.Mode |= uint16(i.virtualOwner.atomicMode()) &^ linux.S_IFMT
+ } else {
+ ls.Mode |= s.Mode &^ linux.S_IFMT
+ }
}
if s.Mask&linux.STATX_NLINK != 0 {
ls.Nlink = s.Nlink
}
if s.Mask&linux.STATX_UID != 0 {
- ls.UID = s.Uid
+ if i.virtualOwner.enabled {
+ ls.UID = i.virtualOwner.atomicUID()
+ } else {
+ ls.UID = s.Uid
+ }
}
if s.Mask&linux.STATX_GID != 0 {
- ls.GID = s.Gid
+ if i.virtualOwner.enabled {
+ ls.GID = i.virtualOwner.atomicGID()
+ } else {
+ ls.GID = s.Gid
+ }
}
if s.Mask&linux.STATX_ATIME != 0 {
ls.Atime = unixToLinuxStatxTimestamp(s.Atime)
@@ -366,7 +423,7 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
return ls, nil
}
-// fstat is a best-effort fallback for inode.Stat() if the host does not
+// statxFromStat is a best-effort fallback for inode.Stat() if the host does not
// support statx(2).
//
// We ignore the mask and sync flags in opts and simply supply
@@ -374,9 +431,9 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
// of a mask or sync flags. fstat(2) does not provide any metadata
// equivalent to Statx.Attributes, Statx.AttributesMask, or Statx.Btime, so
// those fields remain empty.
-func (i *inode) fstat(fs *filesystem) (linux.Statx, error) {
+func (i *inode) statxFromStat(fs *filesystem) (linux.Statx, error) {
var s unix.Stat_t
- if err := unix.Fstat(i.hostFD, &s); err != nil {
+ if err := i.stat(&s); err != nil {
return linux.Statx{}, err
}
@@ -400,7 +457,21 @@ func (i *inode) fstat(fs *filesystem) (linux.Statx, error) {
}, nil
}
+func (i *inode) stat(stat *unix.Stat_t) error {
+ if err := unix.Fstat(i.hostFD, stat); err != nil {
+ return err
+ }
+ if i.virtualOwner.enabled {
+ stat.Uid = i.virtualOwner.atomicUID()
+ stat.Gid = i.virtualOwner.atomicGID()
+ stat.Mode = i.virtualOwner.atomicMode()
+ }
+ return nil
+}
+
// SetStat implements kernfs.Inode.SetStat.
+//
+// +checklocksignore
func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
s := &opts.Stat
@@ -408,11 +479,22 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
if m == 0 {
return nil
}
- if m&^(linux.STATX_MODE|linux.STATX_SIZE|linux.STATX_ATIME|linux.STATX_MTIME) != 0 {
+ supportedModes := uint32(linux.STATX_MODE | linux.STATX_SIZE | linux.STATX_ATIME | linux.STATX_MTIME)
+ if i.virtualOwner.enabled {
+ if m&virtualOwnerModes != 0 {
+ // Take lock if any of the virtual owner fields will be updated.
+ i.virtualOwner.mu.Lock()
+ defer i.virtualOwner.mu.Unlock()
+ }
+
+ supportedModes |= virtualOwnerModes
+ }
+ if m&^supportedModes != 0 {
return linuxerr.EPERM
}
+
var hostStat unix.Stat_t
- if err := unix.Fstat(i.hostFD, &hostStat); err != nil {
+ if err := i.stat(&hostStat); err != nil {
return err
}
if err := vfs.CheckSetStat(ctx, creds, &opts, linux.FileMode(hostStat.Mode), auth.KUID(hostStat.Uid), auth.KGID(hostStat.Gid)); err != nil {
@@ -420,8 +502,12 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
}
if m&linux.STATX_MODE != 0 {
- if err := unix.Fchmod(i.hostFD, uint32(s.Mode)); err != nil {
- return err
+ if i.virtualOwner.enabled {
+ i.virtualOwner.mode = uint32(opts.Stat.Mode)
+ } else {
+ if err := unix.Fchmod(i.hostFD, uint32(s.Mode)); err != nil {
+ return err
+ }
}
}
if m&linux.STATX_SIZE != 0 {
@@ -449,6 +535,14 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
return err
}
}
+ if i.virtualOwner.enabled {
+ if m&linux.STATX_UID != 0 {
+ i.virtualOwner.uid = opts.Stat.UID
+ }
+ if m&linux.STATX_GID != 0 {
+ i.virtualOwner.gid = opts.Stat.GID
+ }
+ }
return nil
}
@@ -473,16 +567,15 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentr
if i.Mode().FileType() == linux.S_IFSOCK {
return nil, linuxerr.ENXIO
}
- return i.open(ctx, d, rp.Mount(), opts.Flags)
-}
-
-func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, flags uint32) (*vfs.FileDescription, error) {
- var s unix.Stat_t
- if err := unix.Fstat(i.hostFD, &s); err != nil {
+ var stat unix.Stat_t
+ if err := i.stat(&stat); err != nil {
return nil, err
}
- fileType := s.Mode & linux.FileTypeMask
+ fileType := linux.FileMode(stat.Mode).FileType()
+ return i.open(ctx, d, rp.Mount(), fileType, opts.Flags)
+}
+func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, fileType linux.FileMode, flags uint32) (*vfs.FileDescription, error) {
// Constrain flags to a subset we can handle.
//
// TODO(gvisor.dev/issue/2601): Support O_NONBLOCK by adding RWF_NOWAIT to pread/pwrite calls.
diff --git a/pkg/sentry/fsimpl/host/host_state_autogen.go b/pkg/sentry/fsimpl/host/host_state_autogen.go
index 47253a87e..607474165 100644
--- a/pkg/sentry/fsimpl/host/host_state_autogen.go
+++ b/pkg/sentry/fsimpl/host/host_state_autogen.go
@@ -30,6 +30,40 @@ func (r *ConnectedEndpointRefs) StateLoad(stateSourceObject state.Source) {
stateSourceObject.AfterLoad(r.afterLoad)
}
+func (v *virtualOwner) StateTypeName() string {
+ return "pkg/sentry/fsimpl/host.virtualOwner"
+}
+
+func (v *virtualOwner) StateFields() []string {
+ return []string{
+ "enabled",
+ "uid",
+ "gid",
+ "mode",
+ }
+}
+
+func (v *virtualOwner) beforeSave() {}
+
+// +checklocksignore
+func (v *virtualOwner) StateSave(stateSinkObject state.Sink) {
+ v.beforeSave()
+ stateSinkObject.Save(0, &v.enabled)
+ stateSinkObject.Save(1, &v.uid)
+ stateSinkObject.Save(2, &v.gid)
+ stateSinkObject.Save(3, &v.mode)
+}
+
+func (v *virtualOwner) afterLoad() {}
+
+// +checklocksignore
+func (v *virtualOwner) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &v.enabled)
+ stateSourceObject.Load(1, &v.uid)
+ stateSourceObject.Load(2, &v.gid)
+ stateSourceObject.Load(3, &v.mode)
+}
+
func (i *inode) StateTypeName() string {
return "pkg/sentry/fsimpl/host.inode"
}
@@ -51,6 +85,7 @@ func (i *inode) StateFields() []string {
"isTTY",
"savable",
"queue",
+ "virtualOwner",
"haveBuf",
"buf",
}
@@ -74,8 +109,9 @@ func (i *inode) StateSave(stateSinkObject state.Sink) {
stateSinkObject.Save(12, &i.isTTY)
stateSinkObject.Save(13, &i.savable)
stateSinkObject.Save(14, &i.queue)
- stateSinkObject.Save(15, &i.haveBuf)
- stateSinkObject.Save(16, &i.buf)
+ stateSinkObject.Save(15, &i.virtualOwner)
+ stateSinkObject.Save(16, &i.haveBuf)
+ stateSinkObject.Save(17, &i.buf)
}
// +checklocksignore
@@ -95,8 +131,9 @@ func (i *inode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(12, &i.isTTY)
stateSourceObject.Load(13, &i.savable)
stateSourceObject.Load(14, &i.queue)
- stateSourceObject.Load(15, &i.haveBuf)
- stateSourceObject.Load(16, &i.buf)
+ stateSourceObject.Load(15, &i.virtualOwner)
+ stateSourceObject.Load(16, &i.haveBuf)
+ stateSourceObject.Load(17, &i.buf)
stateSourceObject.AfterLoad(i.afterLoad)
}
@@ -279,6 +316,7 @@ func (t *TTYFileDescription) StateLoad(stateSourceObject state.Source) {
func init() {
state.Register((*ConnectedEndpointRefs)(nil))
+ state.Register((*virtualOwner)(nil))
state.Register((*inode)(nil))
state.Register((*filesystemType)(nil))
state.Register((*filesystem)(nil))
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 165fb2ebb..5bfb15971 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -715,7 +715,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin
return fmt.Errorf("using TTY, stdios not expected: %d", l)
}
if ep.hostTTY == nil {
- return fmt.Errorf("terminal enabled but no TTY provided (--console-socket possibly passed)")
+ return fmt.Errorf("terminal enabled but no TTY provided. Did you set --console-socket on create?")
}
info.stdioFDs = []*fd.FD{ep.hostTTY, ep.hostTTY, ep.hostTTY}
ep.hostTTY = nil
@@ -734,7 +734,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin
func (l *Loader) createContainerProcess(root bool, cid string, info *containerInfo) (*kernel.ThreadGroup, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
// Create the FD map, which will set stdin, stdout, and stderr.
ctx := info.procArgs.NewContext(l.k)
- fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, info.spec.Process.Terminal, info.stdioFDs)
+ fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, info.spec.Process.Terminal, info.stdioFDs, info.spec.Process.User)
if err != nil {
return nil, nil, nil, fmt.Errorf("importing fds: %w", err)
}
@@ -980,7 +980,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
tty: ttyFile,
ttyVFS2: ttyFileVFS2,
}
- log.Debugf("updated processes: %s", l.processes)
+ log.Debugf("updated processes: %v", l.processes)
return tgid, nil
}
@@ -1024,7 +1024,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e
l.mu.Lock()
delete(l.processes, eid)
- log.Debugf("updated processes (removal): %s", l.processes)
+ log.Debugf("updated processes (removal): %v", l.processes)
l.mu.Unlock()
return nil
}
@@ -1092,7 +1092,7 @@ func newRootNetworkNamespace(conf *config.Config, clock tcpip.Clock, uniqueID st
return inet.NewRootNamespace(s, creator), nil
default:
- panic(fmt.Sprintf("invalid network configuration: %d", conf.Network))
+ panic(fmt.Sprintf("invalid network configuration: %v", conf.Network))
}
}
@@ -1212,7 +1212,7 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e
return nil
default:
- panic(fmt.Sprintf("unknown signal delivery mode %s", mode))
+ panic(fmt.Sprintf("unknown signal delivery mode %v", mode))
}
}
@@ -1337,14 +1337,14 @@ func (l *Loader) ttyFromIDLocked(key execID) (*host.TTYFileOperations, *hostvfs2
return ep.tty, ep.ttyVFS2, nil
}
-func createFDTable(ctx context.Context, console bool, stdioFDs []*fd.FD) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+func createFDTable(ctx context.Context, console bool, stdioFDs []*fd.FD, user specs.User) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
if len(stdioFDs) != 3 {
return nil, nil, nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
}
k := kernel.KernelFromContext(ctx)
fdTable := k.NewFDTable()
- ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, stdioFDs)
+ ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, auth.KUID(user.UID), auth.KGID(user.GID), stdioFDs)
if err != nil {
fdTable.DecRef(ctx)
return nil, nil, nil, err
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index 242d474b8..2139fdf53 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -146,12 +146,12 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
if ex.detach {
return ex.execChildAndWait(waitStatus)
}
- return ex.exec(c, e, waitStatus)
+ return ex.exec(conf, c, e, waitStatus)
}
-func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *unix.WaitStatus) subcommands.ExitStatus {
+func (ex *Exec) exec(conf *config.Config, c *container.Container, e *control.ExecArgs, waitStatus *unix.WaitStatus) subcommands.ExitStatus {
// Start the new process and get its pid.
- pid, err := c.Execute(e)
+ pid, err := c.Execute(conf, e)
if err != nil {
return Errorf("executing processes for container: %v", err)
}
diff --git a/runsc/container/container.go b/runsc/container/container.go
index b789bc7da..213fbc771 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -310,7 +310,7 @@ func New(conf *config.Config, args Args) (*Container, error) {
defer tty.Close()
}
- if err := c.Sandbox.CreateContainer(c.ID, tty); err != nil {
+ if err := c.Sandbox.CreateContainer(conf, c.ID, tty); err != nil {
return nil, err
}
}
@@ -480,13 +480,13 @@ func Run(conf *config.Config, args Args) (unix.WaitStatus, error) {
// Execute runs the specified command in the container. It returns the PID of
// the newly created process.
-func (c *Container) Execute(args *control.ExecArgs) (int32, error) {
+func (c *Container) Execute(conf *config.Config, args *control.ExecArgs) (int32, error) {
log.Debugf("Execute in container, cid: %s, args: %+v", c.ID, args)
if err := c.requireStatus("execute in", Created, Running); err != nil {
return 0, err
}
args.ContainerID = c.ID
- return c.Sandbox.Execute(args)
+ return c.Sandbox.Execute(conf, args)
}
// Event returns events for the container.
@@ -910,6 +910,9 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu
binPath := specutils.ExePath
cmd := exec.Command(binPath, args...)
cmd.ExtraFiles = goferEnds
+
+ // Set Args[0] to make easier to spot the gofer process. Otherwise it's
+ // shown as `exe`.
cmd.Args[0] = "runsc-gofer"
if attached {
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 9dea7c4d2..95b5d9615 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -65,6 +65,11 @@ type Sandbox struct {
// is not running.
Pid int `json:"pid"`
+ // UID is the user ID in the parent namespace that the sandbox is running as.
+ UID int `json:"uid"`
+ // GID is the group ID in the parent namespace that the sandbox is running as.
+ GID int `json:"gid"`
+
// Cgroup has the cgroup configuration for the sandbox.
Cgroup *cgroup.Cgroup `json:"cgroup"`
@@ -176,18 +181,22 @@ func New(conf *config.Config, args *Args) (*Sandbox, error) {
}
// CreateContainer creates a non-root container inside the sandbox.
-func (s *Sandbox) CreateContainer(cid string, tty *os.File) error {
+func (s *Sandbox) CreateContainer(conf *config.Config, cid string, tty *os.File) error {
log.Debugf("Create non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid)
- sandboxConn, err := s.sandboxConnect()
- if err != nil {
- return fmt.Errorf("couldn't connect to sandbox: %v", err)
- }
- defer sandboxConn.Close()
var files []*os.File
if tty != nil {
files = []*os.File{tty}
}
+ if err := s.configureStdios(conf, files); err != nil {
+ return err
+ }
+
+ sandboxConn, err := s.sandboxConnect()
+ if err != nil {
+ return fmt.Errorf("couldn't connect to sandbox: %v", err)
+ }
+ defer sandboxConn.Close()
args := boot.CreateArgs{
CID: cid,
@@ -225,6 +234,11 @@ func (s *Sandbox) StartRoot(spec *specs.Spec, conf *config.Config) error {
// StartContainer starts running a non-root container inside the sandbox.
func (s *Sandbox) StartContainer(spec *specs.Spec, conf *config.Config, cid string, stdios, goferFiles []*os.File) error {
log.Debugf("Start non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid)
+
+ if err := s.configureStdios(conf, stdios); err != nil {
+ return err
+ }
+
sandboxConn, err := s.sandboxConnect()
if err != nil {
return fmt.Errorf("couldn't connect to sandbox: %v", err)
@@ -318,8 +332,13 @@ func (s *Sandbox) NewCGroup() (*cgroup.Cgroup, error) {
// Execute runs the specified command in the container. It returns the PID of
// the newly created process.
-func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) {
+func (s *Sandbox) Execute(conf *config.Config, args *control.ExecArgs) (int32, error) {
log.Debugf("Executing new process in container %q in sandbox %q", args.ContainerID, s.ID)
+
+ if err := s.configureStdios(conf, args.Files); err != nil {
+ return 0, err
+ }
+
conn, err := s.sandboxConnect()
if err != nil {
return 0, s.connError(err)
@@ -505,6 +524,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
cmd.Stdin = nil
cmd.Stdout = nil
cmd.Stderr = nil
+ var stdios [3]*os.File
// If the console control socket file is provided, then create a new
// pty master/replica pair and set the TTY on the sandbox process.
@@ -525,11 +545,9 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
cmd.SysProcAttr.Ctty = nextFD
// Pass the tty as all stdio fds to sandbox.
- for i := 0; i < 3; i++ {
- cmd.ExtraFiles = append(cmd.ExtraFiles, tty)
- cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD))
- nextFD++
- }
+ stdios[0] = tty
+ stdios[1] = tty
+ stdios[2] = tty
if conf.Debug {
// If debugging, send the boot process stdio to the
@@ -541,11 +559,9 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
} else {
// If not using a console, pass our current stdio as the
// container stdio via flags.
- for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} {
- cmd.ExtraFiles = append(cmd.ExtraFiles, f)
- cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD))
- nextFD++
- }
+ stdios[0] = os.Stdin
+ stdios[1] = os.Stdout
+ stdios[2] = os.Stderr
if conf.Debug {
// If debugging, send the boot process stdio to the
@@ -595,6 +611,10 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace})
}
+ // These are set to the uid/gid that the sandbox process will use.
+ s.UID = os.Getuid()
+ s.GID = os.Getgid()
+
// User namespace depends on the network type. Host network requires to run
// inside the user namespace specified in the spec or the current namespace
// if none is configured.
@@ -636,51 +656,49 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
const nobody = 65534
if conf.Rootless {
log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
- cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: nobody,
- HostID: os.Getuid(),
- Size: 1,
- },
- }
- cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: nobody,
- HostID: os.Getgid(),
- Size: 1,
- },
- }
-
} else {
// Map nobody in the new namespace to nobody in the parent namespace.
- //
- // A sandbox process will construct an empty
- // root for itself, so it has to have
- // CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities.
- cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: nobody,
- HostID: nobody,
- Size: 1,
- },
- }
- cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: nobody,
- HostID: nobody,
- Size: 1,
- },
- }
+ s.UID = nobody
+ s.GID = nobody
}
// Set credentials to run as user and group nobody.
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody}
+ cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: nobody,
+ HostID: s.UID,
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: nobody,
+ HostID: s.GID,
+ Size: 1,
+ },
+ }
+
+ // A sandbox process will construct an empty root for itself, so it has
+ // to have CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities.
cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT))
+
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}
}
+ if err := s.configureStdios(conf, stdios[:]); err != nil {
+ return fmt.Errorf("configuring stdios: %w", err)
+ }
+ for _, file := range stdios {
+ cmd.ExtraFiles = append(cmd.ExtraFiles, file)
+ cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD))
+ nextFD++
+ }
+
+ // Set Args[0] to make easier to spot the sandbox process. Otherwise it's
+ // shown as `exe`.
cmd.Args[0] = "runsc-sandbox"
if s.Cgroup != nil {
@@ -1167,6 +1185,23 @@ func (s *Sandbox) waitForStopped() error {
return backoff.Retry(op, b)
}
+// configureStdios change stdios ownership to give access to the sandbox
+// process. This may be skipped depending on the configuration.
+func (s *Sandbox) configureStdios(conf *config.Config, stdios []*os.File) error {
+ if conf.Rootless || conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+ // Cannot change ownership without CAP_CHOWN.
+ return nil
+ }
+
+ for _, file := range stdios {
+ log.Debugf("Changing %q ownership to %d/%d", file.Name(), s.UID, s.GID)
+ if err := file.Chown(s.UID, s.GID); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
// deviceFileForPlatform opens the device file for the given platform. If the
// platform does not need a device file, then nil is returned.
func deviceFileForPlatform(name string) (*os.File, error) {