summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fs')
-rw-r--r--pkg/sentry/fs/file.go14
-rw-r--r--pkg/sentry/fs/inode.go19
-rw-r--r--pkg/sentry/fs/inode_overlay.go6
-rw-r--r--pkg/sentry/fs/proc/BUILD1
-rw-r--r--pkg/sentry/fs/proc/net.go169
-rw-r--r--pkg/sentry/fs/splice.go2
-rw-r--r--pkg/sentry/fs/tmpfs/fs.go3
7 files changed, 190 insertions, 24 deletions
diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go
index 55ffe6c0c..8e1f5674d 100644
--- a/pkg/sentry/fs/file.go
+++ b/pkg/sentry/fs/file.go
@@ -310,9 +310,11 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
return 0, syserror.ErrInterrupted
}
+ unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
// Handle append mode.
if f.Flags().Append {
if err := f.offsetForAppend(ctx, &f.offset); err != nil {
+ unlockAppendMu()
f.mu.Unlock()
return 0, err
}
@@ -322,6 +324,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
limit, ok := f.checkLimit(ctx, f.offset)
switch {
case ok && limit == 0:
+ unlockAppendMu()
f.mu.Unlock()
return 0, syserror.ErrExceedsFileSizeLimit
case ok:
@@ -333,6 +336,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
if n >= 0 && !f.flags.NonSeekable {
atomic.StoreInt64(&f.offset, f.offset+n)
}
+ unlockAppendMu()
f.mu.Unlock()
return n, err
}
@@ -348,13 +352,11 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
// However, on Linux, if a file is opened with O_APPEND, pwrite()
// appends data to the end of the file, regardless of the value of
// offset."
+ unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
+ defer unlockAppendMu()
+
if f.Flags().Append {
- if !f.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
- }
- defer f.mu.Unlock()
if err := f.offsetForAppend(ctx, &offset); err != nil {
- f.mu.Unlock()
return 0, err
}
}
@@ -373,7 +375,7 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
// offsetForAppend sets the given offset to the end of the file.
//
-// Precondition: the underlying file mutex should be held.
+// Precondition: the file.Dirent.Inode.appendMu mutex should be held for writing.
func (f *File) offsetForAppend(ctx context.Context, offset *int64) error {
uattr, err := f.Dirent.Inode.UnstableAttr(ctx)
if err != nil {
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index a889586aa..e4aae1135 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -15,6 +15,8 @@
package fs
import (
+ "sync"
+
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/metric"
@@ -55,6 +57,12 @@ type Inode struct {
// overlay is the overlay entry for this Inode.
overlay *overlayEntry
+
+ // appendMu is used to synchronize write operations into files which
+ // have been opened with O_APPEND. Operations which change a file size
+ // have to take this lock for read. Write operations to files with
+ // O_APPEND have to take this lock for write.
+ appendMu sync.RWMutex `state:"nosave"`
}
// LockCtx is an Inode's lock context and contains different personalities of locks; both
@@ -337,6 +345,8 @@ func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error {
if i.overlay != nil {
return overlayTruncate(ctx, i.overlay, d, size)
}
+ i.appendMu.RLock()
+ defer i.appendMu.RUnlock()
return i.InodeOperations.Truncate(ctx, i, size)
}
@@ -438,3 +448,12 @@ func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool {
}
return creds.HasCapability(cp)
}
+
+func (i *Inode) lockAppendMu(appendMode bool) func() {
+ if appendMode {
+ i.appendMu.Lock()
+ return i.appendMu.Unlock
+ }
+ i.appendMu.RLock()
+ return i.appendMu.RUnlock
+}
diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index 57b8b14e3..920d86042 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -537,12 +537,6 @@ func overlayCheck(ctx context.Context, o *overlayEntry, p PermMask) error {
if o.upper != nil {
err = o.upper.check(ctx, p)
} else {
- if p.Write {
- // Since writes will be redirected to the upper filesystem, the lower
- // filesystem need not be writable, but must be readable for copy-up.
- p.Write = false
- p.Read = true
- }
err = o.lower.check(ctx, p)
}
o.copyMu.RUnlock()
diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD
index b70c583f3..da41a10ab 100644
--- a/pkg/sentry/fs/proc/BUILD
+++ b/pkg/sentry/fs/proc/BUILD
@@ -31,6 +31,7 @@ go_library(
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
+ "//pkg/binary",
"//pkg/log",
"//pkg/sentry/context",
"//pkg/sentry/fs",
diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go
index caa1a5c4d..37694620c 100644
--- a/pkg/sentry/fs/proc/net.go
+++ b/pkg/sentry/fs/proc/net.go
@@ -20,6 +20,7 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -55,9 +56,8 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo
"psched": newStaticProcInode(ctx, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))),
"ptype": newStaticProcInode(ctx, msrc, []byte("Type Device Function")),
"route": newStaticProcInode(ctx, msrc, []byte("Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT")),
- "tcp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode")),
-
- "udp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops")),
+ "tcp": seqfile.NewSeqFileInode(ctx, &netTCP{k: k}, msrc),
+ "udp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops")),
"unix": seqfile.NewSeqFileInode(ctx, &netUnix{k: k}, msrc),
}
@@ -210,10 +210,6 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s
}
var buf bytes.Buffer
- // Header
- fmt.Fprintf(&buf, "Num RefCount Protocol Flags Type St Inode Path\n")
-
- // Entries
for _, se := range n.k.ListSockets() {
s := se.Sock.Get()
if s == nil {
@@ -222,6 +218,7 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s
}
sfile := s.(*fs.File)
if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX {
+ s.DecRef()
// Not a unix socket.
continue
}
@@ -281,12 +278,160 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s
}
fmt.Fprintf(&buf, "\n")
- sfile.DecRef()
+ s.DecRef()
+ }
+
+ data := []seqfile.SeqData{
+ {
+ Buf: []byte("Num RefCount Protocol Flags Type St Inode Path\n"),
+ Handle: n,
+ },
+ {
+ Buf: buf.Bytes(),
+ Handle: n,
+ },
+ }
+ return data, 0
+}
+
+// netTCP implements seqfile.SeqSource for /proc/net/tcp.
+//
+// +stateify savable
+type netTCP struct {
+ k *kernel.Kernel
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (*netTCP) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ t := kernel.TaskFromContext(ctx)
+
+ if h != nil {
+ return nil, 0
+ }
+
+ var buf bytes.Buffer
+ for _, se := range n.k.ListSockets() {
+ s := se.Sock.Get()
+ if s == nil {
+ log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock)
+ continue
+ }
+ sfile := s.(*fs.File)
+ sops, ok := sfile.FileOperations.(socket.Socket)
+ if !ok {
+ panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
+ }
+ if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) {
+ s.DecRef()
+ // Not tcp4 sockets.
+ continue
+ }
+
+ // Linux's documentation for the fields below can be found at
+ // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt.
+ // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock().
+ // Note that the header doesn't contain labels for all the fields.
+
+ // Field: sl; entry number.
+ fmt.Fprintf(&buf, "%4d: ", se.ID)
+
+ portBuf := make([]byte, 2)
+
+ // Field: local_adddress.
+ var localAddr linux.SockAddrInet
+ if local, _, err := sops.GetSockName(t); err == nil {
+ localAddr = local.(linux.SockAddrInet)
+ }
+ binary.LittleEndian.PutUint16(portBuf, localAddr.Port)
+ fmt.Fprintf(&buf, "%08X:%04X ",
+ binary.LittleEndian.Uint32(localAddr.Addr[:]),
+ portBuf)
+
+ // Field: rem_address.
+ var remoteAddr linux.SockAddrInet
+ if remote, _, err := sops.GetPeerName(t); err == nil {
+ remoteAddr = remote.(linux.SockAddrInet)
+ }
+ binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port)
+ fmt.Fprintf(&buf, "%08X:%04X ",
+ binary.LittleEndian.Uint32(remoteAddr.Addr[:]),
+ portBuf)
+
+ // Field: state; socket state.
+ fmt.Fprintf(&buf, "%02X ", sops.State())
+
+ // Field: tx_queue, rx_queue; number of packets in the transmit and
+ // receive queue. Unimplemented.
+ fmt.Fprintf(&buf, "%08X:%08X ", 0, 0)
+
+ // Field: tr, tm->when; timer active state and number of jiffies
+ // until timer expires. Unimplemented.
+ fmt.Fprintf(&buf, "%02X:%08X ", 0, 0)
+
+ // Field: retrnsmt; number of unrecovered RTO timeouts.
+ // Unimplemented.
+ fmt.Fprintf(&buf, "%08X ", 0)
+
+ // Field: uid.
+ uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx)
+ if err != nil {
+ log.Warningf("Failed to retrieve unstable attr for socket file: %v", err)
+ fmt.Fprintf(&buf, "%5d ", 0)
+ } else {
+ fmt.Fprintf(&buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow()))
+ }
+
+ // Field: timeout; number of unanswered 0-window probes.
+ // Unimplemented.
+ fmt.Fprintf(&buf, "%8d ", 0)
+
+ // Field: inode.
+ fmt.Fprintf(&buf, "%8d ", sfile.InodeID())
+
+ // Field: refcount. Don't count the ref we obtain while deferencing
+ // the weakref to this socket.
+ fmt.Fprintf(&buf, "%d ", sfile.ReadRefs()-1)
+
+ // Field: Socket struct address. Redacted due to the same reason as
+ // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
+ fmt.Fprintf(&buf, "%#016p ", (*socket.Socket)(nil))
+
+ // Field: retransmit timeout. Unimplemented.
+ fmt.Fprintf(&buf, "%d ", 0)
+
+ // Field: predicted tick of soft clock (delayed ACK control data).
+ // Unimplemented.
+ fmt.Fprintf(&buf, "%d ", 0)
+
+ // Field: (ack.quick<<1)|ack.pingpong, Unimplemented.
+ fmt.Fprintf(&buf, "%d ", 0)
+
+ // Field: sending congestion window, Unimplemented.
+ fmt.Fprintf(&buf, "%d ", 0)
+
+ // Field: Slow start size threshold, -1 if threshold >= 0xFFFF.
+ // Unimplemented, report as large threshold.
+ fmt.Fprintf(&buf, "%d", -1)
+
+ fmt.Fprintf(&buf, "\n")
+
+ s.DecRef()
}
- data := []seqfile.SeqData{{
- Buf: buf.Bytes(),
- Handle: (*netUnix)(nil),
- }}
+ data := []seqfile.SeqData{
+ {
+ Buf: []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n"),
+ Handle: n,
+ },
+ {
+ Buf: buf.Bytes(),
+ Handle: n,
+ },
+ }
return data, 0
}
diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go
index 978dc679b..eed1c2854 100644
--- a/pkg/sentry/fs/splice.go
+++ b/pkg/sentry/fs/splice.go
@@ -88,6 +88,8 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64,
// Check append-only mode and the limit.
if !dstPipe {
+ unlock := dst.Dirent.Inode.lockAppendMu(dst.Flags().Append)
+ defer unlock()
if dst.Flags().Append {
if opts.DstOffset {
// We need to acquire the lock.
diff --git a/pkg/sentry/fs/tmpfs/fs.go b/pkg/sentry/fs/tmpfs/fs.go
index a5fcdf969..881dd89b0 100644
--- a/pkg/sentry/fs/tmpfs/fs.go
+++ b/pkg/sentry/fs/tmpfs/fs.go
@@ -133,6 +133,9 @@ func (f *Filesystem) Mount(ctx context.Context, device string, flags fs.MountSou
}
// Construct a mount which will follow the cache options provided.
+ //
+ // TODO(gvisor.dev/issue/179): There should be no reason to disable
+ // caching once bind mounts are properly supported.
var msrc *fs.MountSource
switch options[cacheKey] {
case "", cacheAll: