diff options
Diffstat (limited to 'pkg/sentry/fs')
-rw-r--r-- | pkg/sentry/fs/file.go | 14 | ||||
-rw-r--r-- | pkg/sentry/fs/inode.go | 19 | ||||
-rw-r--r-- | pkg/sentry/fs/inode_overlay.go | 6 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/net.go | 169 | ||||
-rw-r--r-- | pkg/sentry/fs/splice.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/tmpfs/fs.go | 3 |
7 files changed, 190 insertions, 24 deletions
diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index 55ffe6c0c..8e1f5674d 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -310,9 +310,11 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error return 0, syserror.ErrInterrupted } + unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append) // Handle append mode. if f.Flags().Append { if err := f.offsetForAppend(ctx, &f.offset); err != nil { + unlockAppendMu() f.mu.Unlock() return 0, err } @@ -322,6 +324,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error limit, ok := f.checkLimit(ctx, f.offset) switch { case ok && limit == 0: + unlockAppendMu() f.mu.Unlock() return 0, syserror.ErrExceedsFileSizeLimit case ok: @@ -333,6 +336,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error if n >= 0 && !f.flags.NonSeekable { atomic.StoreInt64(&f.offset, f.offset+n) } + unlockAppendMu() f.mu.Unlock() return n, err } @@ -348,13 +352,11 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64 // However, on Linux, if a file is opened with O_APPEND, pwrite() // appends data to the end of the file, regardless of the value of // offset." + unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append) + defer unlockAppendMu() + if f.Flags().Append { - if !f.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted - } - defer f.mu.Unlock() if err := f.offsetForAppend(ctx, &offset); err != nil { - f.mu.Unlock() return 0, err } } @@ -373,7 +375,7 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64 // offsetForAppend sets the given offset to the end of the file. // -// Precondition: the underlying file mutex should be held. +// Precondition: the file.Dirent.Inode.appendMu mutex should be held for writing. func (f *File) offsetForAppend(ctx context.Context, offset *int64) error { uattr, err := f.Dirent.Inode.UnstableAttr(ctx) if err != nil { diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index a889586aa..e4aae1135 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -15,6 +15,8 @@ package fs import ( + "sync" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" @@ -55,6 +57,12 @@ type Inode struct { // overlay is the overlay entry for this Inode. overlay *overlayEntry + + // appendMu is used to synchronize write operations into files which + // have been opened with O_APPEND. Operations which change a file size + // have to take this lock for read. Write operations to files with + // O_APPEND have to take this lock for write. + appendMu sync.RWMutex `state:"nosave"` } // LockCtx is an Inode's lock context and contains different personalities of locks; both @@ -337,6 +345,8 @@ func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error { if i.overlay != nil { return overlayTruncate(ctx, i.overlay, d, size) } + i.appendMu.RLock() + defer i.appendMu.RUnlock() return i.InodeOperations.Truncate(ctx, i, size) } @@ -438,3 +448,12 @@ func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool { } return creds.HasCapability(cp) } + +func (i *Inode) lockAppendMu(appendMode bool) func() { + if appendMode { + i.appendMu.Lock() + return i.appendMu.Unlock + } + i.appendMu.RLock() + return i.appendMu.RUnlock +} diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 57b8b14e3..920d86042 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -537,12 +537,6 @@ func overlayCheck(ctx context.Context, o *overlayEntry, p PermMask) error { if o.upper != nil { err = o.upper.check(ctx, p) } else { - if p.Write { - // Since writes will be redirected to the upper filesystem, the lower - // filesystem need not be writable, but must be readable for copy-up. - p.Write = false - p.Read = true - } err = o.lower.check(ctx, p) } o.copyMu.RUnlock() diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index b70c583f3..da41a10ab 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -31,6 +31,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/binary", "//pkg/log", "//pkg/sentry/context", "//pkg/sentry/fs", diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index caa1a5c4d..37694620c 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -20,6 +20,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -55,9 +56,8 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo "psched": newStaticProcInode(ctx, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))), "ptype": newStaticProcInode(ctx, msrc, []byte("Type Device Function")), "route": newStaticProcInode(ctx, msrc, []byte("Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT")), - "tcp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode")), - - "udp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops")), + "tcp": seqfile.NewSeqFileInode(ctx, &netTCP{k: k}, msrc), + "udp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops")), "unix": seqfile.NewSeqFileInode(ctx, &netUnix{k: k}, msrc), } @@ -210,10 +210,6 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s } var buf bytes.Buffer - // Header - fmt.Fprintf(&buf, "Num RefCount Protocol Flags Type St Inode Path\n") - - // Entries for _, se := range n.k.ListSockets() { s := se.Sock.Get() if s == nil { @@ -222,6 +218,7 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s } sfile := s.(*fs.File) if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX { + s.DecRef() // Not a unix socket. continue } @@ -281,12 +278,160 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s } fmt.Fprintf(&buf, "\n") - sfile.DecRef() + s.DecRef() + } + + data := []seqfile.SeqData{ + { + Buf: []byte("Num RefCount Protocol Flags Type St Inode Path\n"), + Handle: n, + }, + { + Buf: buf.Bytes(), + Handle: n, + }, + } + return data, 0 +} + +// netTCP implements seqfile.SeqSource for /proc/net/tcp. +// +// +stateify savable +type netTCP struct { + k *kernel.Kernel +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (*netTCP) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + t := kernel.TaskFromContext(ctx) + + if h != nil { + return nil, 0 + } + + var buf bytes.Buffer + for _, se := range n.k.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock) + continue + } + sfile := s.(*fs.File) + sops, ok := sfile.FileOperations.(socket.Socket) + if !ok { + panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) + } + if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) { + s.DecRef() + // Not tcp4 sockets. + continue + } + + // Linux's documentation for the fields below can be found at + // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt. + // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock(). + // Note that the header doesn't contain labels for all the fields. + + // Field: sl; entry number. + fmt.Fprintf(&buf, "%4d: ", se.ID) + + portBuf := make([]byte, 2) + + // Field: local_adddress. + var localAddr linux.SockAddrInet + if local, _, err := sops.GetSockName(t); err == nil { + localAddr = local.(linux.SockAddrInet) + } + binary.LittleEndian.PutUint16(portBuf, localAddr.Port) + fmt.Fprintf(&buf, "%08X:%04X ", + binary.LittleEndian.Uint32(localAddr.Addr[:]), + portBuf) + + // Field: rem_address. + var remoteAddr linux.SockAddrInet + if remote, _, err := sops.GetPeerName(t); err == nil { + remoteAddr = remote.(linux.SockAddrInet) + } + binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port) + fmt.Fprintf(&buf, "%08X:%04X ", + binary.LittleEndian.Uint32(remoteAddr.Addr[:]), + portBuf) + + // Field: state; socket state. + fmt.Fprintf(&buf, "%02X ", sops.State()) + + // Field: tx_queue, rx_queue; number of packets in the transmit and + // receive queue. Unimplemented. + fmt.Fprintf(&buf, "%08X:%08X ", 0, 0) + + // Field: tr, tm->when; timer active state and number of jiffies + // until timer expires. Unimplemented. + fmt.Fprintf(&buf, "%02X:%08X ", 0, 0) + + // Field: retrnsmt; number of unrecovered RTO timeouts. + // Unimplemented. + fmt.Fprintf(&buf, "%08X ", 0) + + // Field: uid. + uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) + if err != nil { + log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) + fmt.Fprintf(&buf, "%5d ", 0) + } else { + fmt.Fprintf(&buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow())) + } + + // Field: timeout; number of unanswered 0-window probes. + // Unimplemented. + fmt.Fprintf(&buf, "%8d ", 0) + + // Field: inode. + fmt.Fprintf(&buf, "%8d ", sfile.InodeID()) + + // Field: refcount. Don't count the ref we obtain while deferencing + // the weakref to this socket. + fmt.Fprintf(&buf, "%d ", sfile.ReadRefs()-1) + + // Field: Socket struct address. Redacted due to the same reason as + // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. + fmt.Fprintf(&buf, "%#016p ", (*socket.Socket)(nil)) + + // Field: retransmit timeout. Unimplemented. + fmt.Fprintf(&buf, "%d ", 0) + + // Field: predicted tick of soft clock (delayed ACK control data). + // Unimplemented. + fmt.Fprintf(&buf, "%d ", 0) + + // Field: (ack.quick<<1)|ack.pingpong, Unimplemented. + fmt.Fprintf(&buf, "%d ", 0) + + // Field: sending congestion window, Unimplemented. + fmt.Fprintf(&buf, "%d ", 0) + + // Field: Slow start size threshold, -1 if threshold >= 0xFFFF. + // Unimplemented, report as large threshold. + fmt.Fprintf(&buf, "%d", -1) + + fmt.Fprintf(&buf, "\n") + + s.DecRef() } - data := []seqfile.SeqData{{ - Buf: buf.Bytes(), - Handle: (*netUnix)(nil), - }} + data := []seqfile.SeqData{ + { + Buf: []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n"), + Handle: n, + }, + { + Buf: buf.Bytes(), + Handle: n, + }, + } return data, 0 } diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go index 978dc679b..eed1c2854 100644 --- a/pkg/sentry/fs/splice.go +++ b/pkg/sentry/fs/splice.go @@ -88,6 +88,8 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, // Check append-only mode and the limit. if !dstPipe { + unlock := dst.Dirent.Inode.lockAppendMu(dst.Flags().Append) + defer unlock() if dst.Flags().Append { if opts.DstOffset { // We need to acquire the lock. diff --git a/pkg/sentry/fs/tmpfs/fs.go b/pkg/sentry/fs/tmpfs/fs.go index a5fcdf969..881dd89b0 100644 --- a/pkg/sentry/fs/tmpfs/fs.go +++ b/pkg/sentry/fs/tmpfs/fs.go @@ -133,6 +133,9 @@ func (f *Filesystem) Mount(ctx context.Context, device string, flags fs.MountSou } // Construct a mount which will follow the cache options provided. + // + // TODO(gvisor.dev/issue/179): There should be no reason to disable + // caching once bind mounts are properly supported. var msrc *fs.MountSource switch options[cacheKey] { case "", cacheAll: |