diff options
Diffstat (limited to 'pkg/sentry/fs')
-rw-r--r-- | pkg/sentry/fs/dev/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fs/dev/net_tun.go | 52 | ||||
-rw-r--r-- | pkg/sentry/fs/fsutil/file_range_set.go | 21 | ||||
-rw-r--r-- | pkg/sentry/fs/fsutil/inode_cached.go | 17 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/task.go | 44 | ||||
-rw-r--r-- | pkg/sentry/fs/tmpfs/inode_file.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/user/path.go | 1 | ||||
-rw-r--r-- | pkg/sentry/fs/user/user.go | 1 |
8 files changed, 103 insertions, 36 deletions
diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index 9379a4d7b..6b7b451b8 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -34,6 +34,7 @@ go_library( "//pkg/sentry/socket/netstack", "//pkg/syserror", "//pkg/tcpip/link/tun", + "//pkg/tcpip/network/arp", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go index 5f8c9b5a2..19ffdec47 100644 --- a/pkg/sentry/fs/dev/net_tun.go +++ b/pkg/sentry/fs/dev/net_tun.go @@ -15,6 +15,8 @@ package dev import ( + "fmt" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -25,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/link/tun" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -60,7 +63,7 @@ func newNetTunDevice(ctx context.Context, owner fs.FileOwner, mode linux.FileMod } // GetFile implements fs.InodeOperations.GetFile. -func (iops *netTunInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { +func (*netTunInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { return fs.NewFile(ctx, d, flags, &netTunFileOperations{}), nil } @@ -80,12 +83,12 @@ type netTunFileOperations struct { var _ fs.FileOperations = (*netTunFileOperations)(nil) // Release implements fs.FileOperations.Release. -func (fops *netTunFileOperations) Release(ctx context.Context) { - fops.device.Release(ctx) +func (n *netTunFileOperations) Release(ctx context.Context) { + n.device.Release(ctx) } // Ioctl implements fs.FileOperations.Ioctl. -func (fops *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (n *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { request := args[1].Uint() data := args[2].Pointer() @@ -109,16 +112,25 @@ func (fops *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io u return 0, err } flags := usermem.ByteOrder.Uint16(req.Data[:]) - return 0, fops.device.SetIff(stack.Stack, req.Name(), flags) + created, err := n.device.SetIff(stack.Stack, req.Name(), flags) + if err == nil && created { + // Always start with an ARP address for interfaces so they can handle ARP + // packets. + nicID := n.device.NICID() + if err := stack.Stack.AddAddress(nicID, arp.ProtocolNumber, arp.ProtocolAddress); err != nil { + panic(fmt.Sprintf("failed to add ARP address after creating new TUN/TAP interface with ID = %d", nicID)) + } + } + return 0, err case linux.TUNGETIFF: var req linux.IFReq - copy(req.IFName[:], fops.device.Name()) + copy(req.IFName[:], n.device.Name()) // Linux adds IFF_NOFILTER (the same value as IFF_NO_PI unfortunately) when // there is no sk_filter. See __tun_chr_ioctl() in net/drivers/tun.c. - flags := fops.device.Flags() | linux.IFF_NOFILTER + flags := n.device.Flags() | linux.IFF_NOFILTER usermem.ByteOrder.PutUint16(req.Data[:], flags) _, err := req.CopyOut(t, data) @@ -130,41 +142,41 @@ func (fops *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io u } // Write implements fs.FileOperations.Write. -func (fops *netTunFileOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { +func (n *netTunFileOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { data := make([]byte, src.NumBytes()) if _, err := src.CopyIn(ctx, data); err != nil { return 0, err } - return fops.device.Write(data) + return n.device.Write(data) } // Read implements fs.FileOperations.Read. -func (fops *netTunFileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { - data, err := fops.device.Read() +func (n *netTunFileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + data, err := n.device.Read() if err != nil { return 0, err } - n, err := dst.CopyOut(ctx, data) - if n > 0 && n < len(data) { + bytesCopied, err := dst.CopyOut(ctx, data) + if bytesCopied > 0 && bytesCopied < len(data) { // Not an error for partial copying. Packet truncated. err = nil } - return int64(n), err + return int64(bytesCopied), err } // Readiness implements watier.Waitable.Readiness. -func (fops *netTunFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask { - return fops.device.Readiness(mask) +func (n *netTunFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask { + return n.device.Readiness(mask) } // EventRegister implements watier.Waitable.EventRegister. -func (fops *netTunFileOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) { - fops.device.EventRegister(e, mask) +func (n *netTunFileOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + n.device.EventRegister(e, mask) } // EventUnregister implements watier.Waitable.EventUnregister. -func (fops *netTunFileOperations) EventUnregister(e *waiter.Entry) { - fops.device.EventUnregister(e) +func (n *netTunFileOperations) EventUnregister(e *waiter.Entry) { + n.device.EventUnregister(e) } // isNetTunSupported returns whether /dev/net/tun device is supported for s. diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go index 9197aeb88..1dc409d38 100644 --- a/pkg/sentry/fs/fsutil/file_range_set.go +++ b/pkg/sentry/fs/fsutil/file_range_set.go @@ -84,7 +84,8 @@ func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) memmap.FileRan // returns a successful partial read, Fill will call it repeatedly until all // bytes have been read.) EOF is handled consistently with the requirements of // mmap(2): bytes after EOF on the same page are zeroed; pages after EOF are -// invalid. +// invalid. fileSize is an upper bound on the file's size; bytes after fileSize +// will be zeroed without calling readAt. // // Fill may read offsets outside of required, but will never read offsets // outside of optional. It returns a non-nil error if any error occurs, even @@ -94,7 +95,7 @@ func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) memmap.FileRan // * required.Length() > 0. // * optional.IsSupersetOf(required). // * required and optional must be page-aligned. -func (frs *FileRangeSet) Fill(ctx context.Context, required, optional memmap.MappableRange, mf *pgalloc.MemoryFile, kind usage.MemoryKind, readAt func(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error)) error { +func (frs *FileRangeSet) Fill(ctx context.Context, required, optional memmap.MappableRange, fileSize uint64, mf *pgalloc.MemoryFile, kind usage.MemoryKind, readAt func(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error)) error { gap := frs.LowerBoundGap(required.Start) for gap.Ok() && gap.Start() < required.End { if gap.Range().Length() == 0 { @@ -107,7 +108,21 @@ func (frs *FileRangeSet) Fill(ctx context.Context, required, optional memmap.Map fr, err := mf.AllocateAndFill(gr.Length(), kind, safemem.ReaderFunc(func(dsts safemem.BlockSeq) (uint64, error) { var done uint64 for !dsts.IsEmpty() { - n, err := readAt(ctx, dsts, gr.Start+done) + n, err := func() (uint64, error) { + off := gr.Start + done + if off >= fileSize { + return 0, io.EOF + } + if off+dsts.NumBytes() > fileSize { + rd := fileSize - off + n, err := readAt(ctx, dsts.TakeFirst64(rd), off) + if n == rd && err == nil { + return n, io.EOF + } + return n, err + } + return readAt(ctx, dsts, off) + }() done += n dsts = dsts.DropFirst64(n) if err != nil { diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index 9eb6f522e..82eda3e43 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -22,7 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/kernel/time" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" @@ -444,7 +443,7 @@ func (c *CachingInodeOperations) TouchAccessTime(ctx context.Context, inode *fs. // time. // // Preconditions: c.attrMu is locked for writing. -func (c *CachingInodeOperations) touchAccessTimeLocked(now time.Time) { +func (c *CachingInodeOperations) touchAccessTimeLocked(now ktime.Time) { c.attr.AccessTime = now c.dirtyAttr.AccessTime = true } @@ -461,7 +460,7 @@ func (c *CachingInodeOperations) TouchModificationAndStatusChangeTime(ctx contex // and status change times in-place to the current time. // // Preconditions: c.attrMu is locked for writing. -func (c *CachingInodeOperations) touchModificationAndStatusChangeTimeLocked(now time.Time) { +func (c *CachingInodeOperations) touchModificationAndStatusChangeTimeLocked(now ktime.Time) { c.attr.ModificationTime = now c.dirtyAttr.ModificationTime = true c.attr.StatusChangeTime = now @@ -480,7 +479,7 @@ func (c *CachingInodeOperations) TouchStatusChangeTime(ctx context.Context) { // in-place to the current time. // // Preconditions: c.attrMu is locked for writing. -func (c *CachingInodeOperations) touchStatusChangeTimeLocked(now time.Time) { +func (c *CachingInodeOperations) touchStatusChangeTimeLocked(now ktime.Time) { c.attr.StatusChangeTime = now c.dirtyAttr.StatusChangeTime = true } @@ -645,7 +644,7 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { End: fs.OffsetPageEnd(int64(gapMR.End)), } optMR := gap.Range() - err := rw.c.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mem, usage.PageCache, rw.c.backingFile.ReadToBlocksAt) + err := rw.c.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), uint64(rw.c.attr.Size), mem, usage.PageCache, rw.c.backingFile.ReadToBlocksAt) mem.MarkEvictable(rw.c, pgalloc.EvictableRange{optMR.Start, optMR.End}) seg, gap = rw.c.cache.Find(uint64(rw.offset)) if !seg.Ok() { @@ -672,9 +671,6 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { // Continue. seg, gap = gap.NextSegment(), FileRangeGapIterator{} } - - default: - break } } unlock() @@ -768,9 +764,6 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error // Continue. seg, gap = gap.NextSegment(), FileRangeGapIterator{} - - default: - break } } rw.maybeGrowFile() @@ -877,7 +870,7 @@ func (c *CachingInodeOperations) Translate(ctx context.Context, required, option } mf := c.mfp.MemoryFile() - cerr := c.cache.Fill(ctx, required, maxFillRange(required, optional), mf, usage.PageCache, c.backingFile.ReadToBlocksAt) + cerr := c.cache.Fill(ctx, required, maxFillRange(required, optional), uint64(c.attr.Size), mf, usage.PageCache, c.backingFile.ReadToBlocksAt) var ts []memmap.Translation var translatedEnd uint64 diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 103bfc600..22d658acf 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -84,6 +84,7 @@ func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, isThreadGroup bo "auxv": newAuxvec(t, msrc), "cmdline": newExecArgInode(t, msrc, cmdlineExecArg), "comm": newComm(t, msrc), + "cwd": newCwd(t, msrc), "environ": newExecArgInode(t, msrc, environExecArg), "exe": newExe(t, msrc), "fd": newFdDir(t, msrc), @@ -300,6 +301,49 @@ func (e *exe) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { return exec.PathnameWithDeleted(ctx), nil } +// cwd is an fs.InodeOperations symlink for the /proc/PID/cwd file. +// +// +stateify savable +type cwd struct { + ramfs.Symlink + + t *kernel.Task +} + +func newCwd(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + cwdSymlink := &cwd{ + Symlink: *ramfs.NewSymlink(t, fs.RootOwner, ""), + t: t, + } + return newProcInode(t, cwdSymlink, msrc, fs.Symlink, t) +} + +// Readlink implements fs.InodeOperations. +func (e *cwd) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { + if !kernel.ContextCanTrace(ctx, e.t, false) { + return "", syserror.EACCES + } + if err := checkTaskState(e.t); err != nil { + return "", err + } + cwd := e.t.FSContext().WorkingDirectory() + if cwd == nil { + // It could have raced with process deletion. + return "", syserror.ESRCH + } + defer cwd.DecRef(ctx) + + root := fs.RootFromContext(ctx) + if root == nil { + // It could have raced with process deletion. + return "", syserror.ESRCH + } + defer root.DecRef(ctx) + + name, _ := cwd.FullName(root) + return name, nil +} + // namespaceSymlink represents a symlink in the namespacefs, such as the files // in /proc/<pid>/ns. // diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go index 1dc75291d..fc0498f17 100644 --- a/pkg/sentry/fs/tmpfs/inode_file.go +++ b/pkg/sentry/fs/tmpfs/inode_file.go @@ -613,7 +613,7 @@ func (f *fileInodeOperations) Translate(ctx context.Context, required, optional } mf := f.kernel.MemoryFile() - cerr := f.data.Fill(ctx, required, optional, mf, f.memUsage, func(_ context.Context, dsts safemem.BlockSeq, _ uint64) (uint64, error) { + cerr := f.data.Fill(ctx, required, optional, uint64(f.attr.Size), mf, f.memUsage, func(_ context.Context, dsts safemem.BlockSeq, _ uint64) (uint64, error) { // Newly-allocated pages are zeroed, so we don't need to do anything. return dsts.NumBytes(), nil }) diff --git a/pkg/sentry/fs/user/path.go b/pkg/sentry/fs/user/path.go index 2f5a43b84..124bc95ed 100644 --- a/pkg/sentry/fs/user/path.go +++ b/pkg/sentry/fs/user/path.go @@ -121,6 +121,7 @@ func resolve(ctx context.Context, mns *fs.MountNamespace, paths []string, name s func resolveVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, paths []string, name string) (string, error) { root := mns.Root() + root.IncRef() defer root.DecRef(ctx) for _, p := range paths { if !path.IsAbs(p) { diff --git a/pkg/sentry/fs/user/user.go b/pkg/sentry/fs/user/user.go index 936fd3932..1f8684dc6 100644 --- a/pkg/sentry/fs/user/user.go +++ b/pkg/sentry/fs/user/user.go @@ -105,6 +105,7 @@ func getExecUserHomeVFS2(ctx context.Context, mns *vfs.MountNamespace, uid auth. const defaultHome = "/" root := mns.Root() + root.IncRef() defer root.DecRef(ctx) creds := auth.CredentialsFromContext(ctx) |