diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/amutex/BUILD | 1 | ||||
-rw-r--r-- | pkg/amutex/amutex.go | 17 | ||||
-rw-r--r-- | pkg/sentry/arch/signal_arm64.go | 22 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/filesystem.go | 62 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/gofer.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/special_file.go | 28 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/host.go | 41 | ||||
-rw-r--r-- | pkg/sentry/socket/netstack/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/socket/netstack/netstack.go | 13 | ||||
-rw-r--r-- | pkg/sentry/socket/netstack/netstack_vfs2.go | 17 | ||||
-rw-r--r-- | pkg/tcpip/link/qdisc/fifo/endpoint.go | 2 |
11 files changed, 150 insertions, 58 deletions
diff --git a/pkg/amutex/BUILD b/pkg/amutex/BUILD index 9612f072e..ffc918846 100644 --- a/pkg/amutex/BUILD +++ b/pkg/amutex/BUILD @@ -6,6 +6,7 @@ go_library( name = "amutex", srcs = ["amutex.go"], visibility = ["//:sandbox"], + deps = ["//pkg/syserror"], ) go_test( diff --git a/pkg/amutex/amutex.go b/pkg/amutex/amutex.go index 1c4fd1784..a078a31db 100644 --- a/pkg/amutex/amutex.go +++ b/pkg/amutex/amutex.go @@ -18,6 +18,8 @@ package amutex import ( "sync/atomic" + + "gvisor.dev/gvisor/pkg/syserror" ) // Sleeper must be implemented by users of the abortable mutex to allow for @@ -53,6 +55,21 @@ func (NoopSleeper) SleepFinish(success bool) {} // Interrupted implements Sleeper.Interrupted. func (NoopSleeper) Interrupted() bool { return false } +// Block blocks until either receiving from ch succeeds (in which case it +// returns nil) or sleeper is interrupted (in which case it returns +// syserror.ErrInterrupted). +func Block(sleeper Sleeper, ch <-chan struct{}) error { + cancel := sleeper.SleepStart() + select { + case <-ch: + sleeper.SleepFinish(true) + return nil + case <-cancel: + sleeper.SleepFinish(false) + return syserror.ErrInterrupted + } +} + // AbortableMutex is an abortable mutex. It allows Lock() to be aborted while it // waits to acquire the mutex. type AbortableMutex struct { diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go index 1cb1adf8c..642c79dda 100644 --- a/pkg/sentry/arch/signal_arm64.go +++ b/pkg/sentry/arch/signal_arm64.go @@ -19,6 +19,7 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/usermem" ) @@ -134,6 +135,11 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt c.Regs.Regs[1] = uint64(infoAddr) c.Regs.Regs[2] = uint64(ucAddr) c.Regs.Regs[30] = uint64(act.Restorer) + + // Save the thread's floating point state. + c.sigFPState = append(c.sigFPState, c.aarch64FPState) + // Signal handler gets a clean floating point state. + c.aarch64FPState = newAarch64FPState() return nil } @@ -155,5 +161,21 @@ func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, SignalSt c.Regs.Sp = uc.MContext.Sp c.Regs.Pstate = uc.MContext.Pstate + // Restore floating point state. + l := len(c.sigFPState) + if l > 0 { + c.aarch64FPState = c.sigFPState[l-1] + // NOTE(cl/133042258): State save requires that any slice + // elements from '[len:cap]' to be zero value. + c.sigFPState[l-1] = nil + c.sigFPState = c.sigFPState[0 : l-1] + } else { + // This might happen if sigreturn(2) calls are unbalanced with + // respect to signal handler entries. This is not expected so + // don't bother to do anything fancy with the floating point + // state. + log.Warningf("sigreturn unable to restore application fpstate") + } + return uc.Sigset, uc.Stack, nil } diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 4a32821bd..7f2181216 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -21,6 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" @@ -835,6 +837,9 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf if d.isSynthetic() { return nil, syserror.ENXIO } + if d.fs.iopts.OpenSocketsByConnecting { + return d.connectSocketLocked(ctx, opts) + } case linux.S_IFIFO: if d.isSynthetic() { return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags) @@ -843,10 +848,28 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf return d.openSpecialFileLocked(ctx, mnt, opts) } +func (d *dentry) connectSocketLocked(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { + if opts.Flags&linux.O_DIRECT != 0 { + return nil, syserror.EINVAL + } + fdObj, err := d.file.connect(ctx, p9.AnonymousSocket) + if err != nil { + return nil, err + } + fd, err := host.NewFD(ctx, kernel.KernelFromContext(ctx).HostMount(), fdObj.FD(), &host.NewFDOptions{ + HaveFlags: true, + Flags: opts.Flags, + }) + if err != nil { + fdObj.Close() + return nil, err + } + fdObj.Release() + return fd, nil +} + func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { ats := vfs.AccessTypesForOpenFlags(opts) - // Treat as a special file. This is done for non-synthetic pipes as well as - // regular files when d.fs.opts.regularFilesUseSpecialFileFD is true. if opts.Flags&linux.O_DIRECT != 0 { return nil, syserror.EINVAL } @@ -854,10 +877,15 @@ func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts if err != nil { return nil, err } + seekable := d.fileType() == linux.S_IFREG fd := &specialFileFD{ - handle: h, + handle: h, + seekable: seekable, } - if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ + DenyPRead: !seekable, + DenyPWrite: !seekable, + }); err != nil { h.close(ctx) return nil, err } @@ -888,7 +916,11 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving } creds := rp.Credentials() name := rp.Component() - fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, (p9.OpenFlags)(opts.Flags), (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) + // Filter file creation flags and O_LARGEFILE out; the create RPC already + // has the semantics of O_CREAT|O_EXCL, while some servers will choke on + // O_LARGEFILE. + createFlags := p9.OpenFlags(opts.Flags &^ (linux.O_CREAT | linux.O_EXCL | linux.O_NOCTTY | linux.O_TRUNC | linux.O_LARGEFILE)) + fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, createFlags, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) if err != nil { dirfile.close(ctx) return nil, err @@ -896,7 +928,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving // Then we need to walk to the file we just created to get a non-open fid // representing it, and to get its metadata. This must use d.file since, as // explained above, dirfile was invalidated by dirfile.Create(). - walkQID, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name) + _, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name) if err != nil { openFile.close(ctx) if fdobj != nil { @@ -904,17 +936,6 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving } return nil, err } - // Sanity-check that we walked to the file we created. - if createQID.Path != walkQID.Path { - // Probably due to concurrent remote filesystem mutation? - ctx.Warningf("gofer.dentry.createAndOpenChildLocked: created file has QID %v before walk, QID %v after (interop=%v)", createQID, walkQID, d.fs.opts.interop) - nonOpenFile.close(ctx) - openFile.close(ctx) - if fdobj != nil { - fdobj.Close() - } - return nil, syserror.EAGAIN - } // Construct the new dentry. child, err := d.fs.newDentry(ctx, nonOpenFile, createQID, attrMask, &attr) @@ -960,16 +981,21 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving } childVFSFD = &fd.vfsfd } else { + seekable := child.fileType() == linux.S_IFREG fd := &specialFileFD{ handle: handle{ file: openFile, fd: -1, }, + seekable: seekable, } if fdobj != nil { fd.handle.fd = int32(fdobj.Release()) } - if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{ + DenyPRead: !seekable, + DenyPWrite: !seekable, + }); err != nil { fd.handle.close(ctx) return nil, err } diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index e68e37ebc..1da8d5d82 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -221,6 +221,10 @@ type InternalFilesystemOptions struct { // which servers can handle only a single client and report failure if that // client disconnects. LeakConnection bool + + // If OpenSocketsByConnecting is true, silently translate attempts to open + // files identifying as sockets to connect RPCs. + OpenSocketsByConnecting bool } // Name implements vfs.FilesystemType.Name. diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index 507e0e276..a464e6a94 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -33,13 +33,14 @@ import ( type specialFileFD struct { fileDescription - // handle is immutable. + // handle is used for file I/O. handle is immutable. handle handle - // off is the file offset. off is protected by mu. (POSIX 2.9.7 only - // requires operations using the file offset to be atomic for regular files - // and symlinks; however, since specialFileFD may be used for regular - // files, we apply this atomicity unconditionally.) + // seekable is true if this file description represents a file for which + // file offset is significant, i.e. a regular file. seekable is immutable. + seekable bool + + // If seekable is true, off is the file offset. off is protected by mu. mu sync.Mutex off int64 } @@ -63,7 +64,7 @@ func (fd *specialFileFD) OnClose(ctx context.Context) error { // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - if offset < 0 { + if fd.seekable && offset < 0 { return 0, syserror.EINVAL } if opts.Flags != 0 { @@ -91,6 +92,10 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs // Read implements vfs.FileDescriptionImpl.Read. func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + if !fd.seekable { + return fd.PRead(ctx, dst, -1, opts) + } + fd.mu.Lock() n, err := fd.PRead(ctx, dst, fd.off, opts) fd.off += n @@ -100,14 +105,14 @@ func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts // PWrite implements vfs.FileDescriptionImpl.PWrite. func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - if offset < 0 { + if fd.seekable && offset < 0 { return 0, syserror.EINVAL } if opts.Flags != 0 { return 0, syserror.EOPNOTSUPP } - if fd.dentry().fileType() == linux.S_IFREG { + if fd.seekable { limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes()) if err != nil { return 0, err @@ -130,6 +135,10 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off // Write implements vfs.FileDescriptionImpl.Write. func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + if !fd.seekable { + return fd.PWrite(ctx, src, -1, opts) + } + fd.mu.Lock() n, err := fd.PWrite(ctx, src, fd.off, opts) fd.off += n @@ -139,6 +148,9 @@ func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts // Seek implements vfs.FileDescriptionImpl.Seek. func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + if !fd.seekable { + return 0, syserror.ESPIPE + } fd.mu.Lock() defer fd.mu.Unlock() switch whence { diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 36bceeaa4..8caf55a1b 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -40,8 +40,20 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) -// ImportFD sets up and returns a vfs.FileDescription from a donated fd. -func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) { +// NewFDOptions contains options to NewFD. +type NewFDOptions struct { + // If IsTTY is true, the file descriptor is a TTY. + IsTTY bool + + // If HaveFlags is true, use Flags for the new file description. Otherwise, + // the new file description will inherit flags from hostFD. + HaveFlags bool + Flags uint32 +} + +// NewFD returns a vfs.FileDescription representing the given host file +// descriptor. mnt must be Kernel.HostMount(). +func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) (*vfs.FileDescription, error) { fs, ok := mnt.Filesystem().Impl().(*filesystem) if !ok { return nil, fmt.Errorf("can't import host FDs into filesystems of type %T", mnt.Filesystem().Impl()) @@ -53,10 +65,14 @@ func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs return nil, err } - // Get flags for the imported FD. - flags, err := unix.FcntlInt(uintptr(hostFD), syscall.F_GETFL, 0) - if err != nil { - return nil, err + flags := opts.Flags + if !opts.HaveFlags { + // Get flags for the imported FD. + flagsInt, err := unix.FcntlInt(uintptr(hostFD), syscall.F_GETFL, 0) + if err != nil { + return nil, err + } + flags = uint32(flagsInt) } fileMode := linux.FileMode(s.Mode) @@ -65,13 +81,13 @@ func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs // Determine if hostFD is seekable. If not, this syscall will return ESPIPE // (see fs/read_write.c:llseek), e.g. for pipes, sockets, and some character // devices. - _, err = unix.Seek(hostFD, 0, linux.SEEK_CUR) + _, err := unix.Seek(hostFD, 0, linux.SEEK_CUR) seekable := err != syserror.ESPIPE i := &inode{ hostFD: hostFD, seekable: seekable, - isTTY: isTTY, + isTTY: opts.IsTTY, canMap: canMap(uint32(fileType)), wouldBlock: wouldBlock(uint32(fileType)), ino: fs.NextIno(), @@ -101,7 +117,14 @@ func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs // i.open will take a reference on d. defer d.DecRef() - return i.open(ctx, d.VFSDentry(), mnt, uint32(flags)) + return i.open(ctx, d.VFSDentry(), mnt, flags) +} + +// ImportFD sets up and returns a vfs.FileDescription from a donated fd. +func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) { + return NewFD(ctx, mnt, hostFD, &NewFDOptions{ + IsTTY: isTTY, + }) } // filesystemType implements vfs.FilesystemType. diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index 6129fb83d..333e0042e 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -18,6 +18,7 @@ go_library( ], deps = [ "//pkg/abi/linux", + "//pkg/amutex", "//pkg/binary", "//pkg/context", "//pkg/log", diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 81053d8ef..9d032f052 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -34,6 +34,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/amutex" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" @@ -553,11 +554,9 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO } if resCh != nil { - t := kernel.TaskFromContext(ctx) - if err := t.Block(resCh); err != nil { - return 0, syserr.FromError(err).ToError() + if err := amutex.Block(ctx, resCh); err != nil { + return 0, err } - n, _, err = s.Endpoint.Write(f, tcpip.WriteOptions{}) } @@ -626,11 +625,9 @@ func (s *SocketOperations) ReadFrom(ctx context.Context, _ *fs.File, r io.Reader } if resCh != nil { - t := kernel.TaskFromContext(ctx) - if err := t.Block(resCh); err != nil { - return 0, syserr.FromError(err).ToError() + if err := amutex.Block(ctx, resCh); err != nil { + return 0, err } - n, _, err = s.Endpoint.Write(f, tcpip.WriteOptions{ Atomic: true, // See above. }) diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index 191970d41..fcd8013c0 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -16,6 +16,7 @@ package netstack import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/amutex" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" @@ -89,11 +90,6 @@ func (s *SocketVFS2) EventUnregister(e *waiter.Entry) { s.socketOpsCommon.EventUnregister(e) } -// PRead implements vfs.FileDescriptionImpl. -func (s *SocketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.ESPIPE -} - // Read implements vfs.FileDescriptionImpl. func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { // All flags other than RWF_NOWAIT should be ignored. @@ -115,11 +111,6 @@ func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs. return int64(n), nil } -// PWrite implements vfs.FileDescriptionImpl. -func (s *SocketVFS2) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.ESPIPE -} - // Write implements vfs.FileDescriptionImpl. func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { // All flags other than RWF_NOWAIT should be ignored. @@ -135,11 +126,9 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs } if resCh != nil { - t := kernel.TaskFromContext(ctx) - if err := t.Block(resCh); err != nil { - return 0, syserr.FromError(err).ToError() + if err := amutex.Block(ctx, resCh); err != nil { + return 0, err } - n, _, err = s.Endpoint.Write(f, tcpip.WriteOptions{}) } diff --git a/pkg/tcpip/link/qdisc/fifo/endpoint.go b/pkg/tcpip/link/qdisc/fifo/endpoint.go index be9fec3b3..54432194d 100644 --- a/pkg/tcpip/link/qdisc/fifo/endpoint.go +++ b/pkg/tcpip/link/qdisc/fifo/endpoint.go @@ -163,7 +163,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne // WritePackets implements stack.LinkEndpoint.WritePackets. // -// Being a batch API each packet in pkts should have the following fields +// Being a batch API, each packet in pkts should have the following fields // populated: // - pkt.EgressRoute // - pkt.GSOOptions |