summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl')
-rw-r--r--pkg/sentry/fsimpl/devpts/line_discipline.go4
-rw-r--r--pkg/sentry/fsimpl/devpts/master.go4
-rw-r--r--pkg/sentry/fsimpl/devpts/queue.go4
-rw-r--r--pkg/sentry/fsimpl/devpts/slave.go4
-rw-r--r--pkg/sentry/fsimpl/devpts/terminal.go4
-rw-r--r--pkg/sentry/fsimpl/gofer/BUILD1
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go8
-rw-r--r--pkg/sentry/fsimpl/gofer/pagemath.go31
-rw-r--r--pkg/sentry/fsimpl/gofer/regular_file.go47
-rw-r--r--pkg/sentry/fsimpl/host/BUILD4
-rw-r--r--pkg/sentry/fsimpl/host/host.go132
-rw-r--r--pkg/sentry/fsimpl/host/mmap.go132
-rw-r--r--pkg/sentry/fsimpl/tmpfs/BUILD3
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go46
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go42
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file_test.go138
-rw-r--r--pkg/sentry/fsimpl/tmpfs/stat_test.go4
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go38
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs_test.go156
19 files changed, 504 insertions, 298 deletions
diff --git a/pkg/sentry/fsimpl/devpts/line_discipline.go b/pkg/sentry/fsimpl/devpts/line_discipline.go
index e201801d6..f7bc325d1 100644
--- a/pkg/sentry/fsimpl/devpts/line_discipline.go
+++ b/pkg/sentry/fsimpl/devpts/line_discipline.go
@@ -27,8 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
-// LINT.IfChange
-
const (
// canonMaxBytes is the number of bytes that fit into a single line of
// terminal input in canonical mode. This corresponds to N_TTY_BUF_SIZE
@@ -445,5 +443,3 @@ func (l *lineDiscipline) peek(b []byte) int {
}
return size
}
-
-// LINT.ThenChange(../../fs/tty/line_discipline.go)
diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go
index 04a292927..7a7ce5d81 100644
--- a/pkg/sentry/fsimpl/devpts/master.go
+++ b/pkg/sentry/fsimpl/devpts/master.go
@@ -27,8 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
-// LINT.IfChange
-
// masterInode is the inode for the master end of the Terminal.
type masterInode struct {
kernfs.InodeAttrs
@@ -222,5 +220,3 @@ func maybeEmitUnimplementedEvent(ctx context.Context, cmd uint32) {
unimpl.EmitUnimplementedEvent(ctx)
}
}
-
-// LINT.ThenChange(../../fs/tty/master.go)
diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go
index 29a6be858..dffb4232c 100644
--- a/pkg/sentry/fsimpl/devpts/queue.go
+++ b/pkg/sentry/fsimpl/devpts/queue.go
@@ -25,8 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
-// LINT.IfChange
-
// waitBufMaxBytes is the maximum size of a wait buffer. It is based on
// TTYB_DEFAULT_MEM_LIMIT.
const waitBufMaxBytes = 131072
@@ -236,5 +234,3 @@ func (q *queue) waitBufAppend(b []byte) {
q.waitBuf = append(q.waitBuf, b)
q.waitBufLen += uint64(len(b))
}
-
-// LINT.ThenChange(../../fs/tty/queue.go)
diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go
index 0a98dc896..526cd406c 100644
--- a/pkg/sentry/fsimpl/devpts/slave.go
+++ b/pkg/sentry/fsimpl/devpts/slave.go
@@ -26,8 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
-// LINT.IfChange
-
// slaveInode is the inode for the slave end of the Terminal.
type slaveInode struct {
kernfs.InodeAttrs
@@ -182,5 +180,3 @@ func (sfd *slaveFileDescription) Stat(ctx context.Context, opts vfs.StatOptions)
fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem()
return sfd.inode.Stat(fs, opts)
}
-
-// LINT.ThenChange(../../fs/tty/slave.go)
diff --git a/pkg/sentry/fsimpl/devpts/terminal.go b/pkg/sentry/fsimpl/devpts/terminal.go
index b44e673d8..7d2781c54 100644
--- a/pkg/sentry/fsimpl/devpts/terminal.go
+++ b/pkg/sentry/fsimpl/devpts/terminal.go
@@ -22,8 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
-// LINT.IfChanges
-
// Terminal is a pseudoterminal.
//
// +stateify savable
@@ -120,5 +118,3 @@ func (tm *Terminal) tty(isMaster bool) *kernel.TTY {
}
return tm.slaveKTTY
}
-
-// LINT.ThenChange(../../fs/tty/terminal.go)
diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD
index 5ce82b793..67e916525 100644
--- a/pkg/sentry/fsimpl/gofer/BUILD
+++ b/pkg/sentry/fsimpl/gofer/BUILD
@@ -36,7 +36,6 @@ go_library(
"gofer.go",
"handle.go",
"p9file.go",
- "pagemath.go",
"regular_file.go",
"socket.go",
"special_file.go",
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 353e2cf5b..6295f6b54 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -869,8 +869,8 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
Size: stat.Mask&linux.STATX_SIZE != 0,
ATime: stat.Mask&linux.STATX_ATIME != 0,
MTime: stat.Mask&linux.STATX_MTIME != 0,
- ATimeNotSystemTime: stat.Atime.Nsec != linux.UTIME_NOW,
- MTimeNotSystemTime: stat.Mtime.Nsec != linux.UTIME_NOW,
+ ATimeNotSystemTime: stat.Mask&linux.STATX_ATIME != 0 && stat.Atime.Nsec != linux.UTIME_NOW,
+ MTimeNotSystemTime: stat.Mask&linux.STATX_MTIME != 0 && stat.Mtime.Nsec != linux.UTIME_NOW,
}, p9.SetAttr{
Permissions: p9.FileMode(stat.Mode),
UID: p9.UID(stat.UID),
@@ -928,8 +928,8 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
// so we can't race with Write or another truncate.)
d.dataMu.Unlock()
if d.size < oldSize {
- oldpgend := pageRoundUp(oldSize)
- newpgend := pageRoundUp(d.size)
+ oldpgend, _ := usermem.PageRoundUp(oldSize)
+ newpgend, _ := usermem.PageRoundUp(d.size)
if oldpgend != newpgend {
d.mapsMu.Lock()
d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
diff --git a/pkg/sentry/fsimpl/gofer/pagemath.go b/pkg/sentry/fsimpl/gofer/pagemath.go
deleted file mode 100644
index 847cb0784..000000000
--- a/pkg/sentry/fsimpl/gofer/pagemath.go
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gofer
-
-import (
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// This are equivalent to usermem.Addr.RoundDown/Up, but without the
-// potentially truncating conversion to usermem.Addr. This is necessary because
-// there is no way to define generic "PageRoundDown/Up" functions in Go.
-
-func pageRoundDown(x uint64) uint64 {
- return x &^ (usermem.PageSize - 1)
-}
-
-func pageRoundUp(x uint64) uint64 {
- return pageRoundDown(x + usermem.PageSize - 1)
-}
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index 857f7c74e..0d10cf7ac 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -148,9 +148,9 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
return 0, err
}
// Remove touched pages from the cache.
- pgstart := pageRoundDown(uint64(offset))
- pgend := pageRoundUp(uint64(offset + src.NumBytes()))
- if pgend < pgstart {
+ pgstart := usermem.PageRoundDown(uint64(offset))
+ pgend, ok := usermem.PageRoundUp(uint64(offset + src.NumBytes()))
+ if !ok {
return 0, syserror.EINVAL
}
mr := memmap.MappableRange{pgstart, pgend}
@@ -306,9 +306,10 @@ func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error)
if fillCache {
// Read into the cache, then re-enter the loop to read from the
// cache.
+ gapEnd, _ := usermem.PageRoundUp(gapMR.End)
reqMR := memmap.MappableRange{
- Start: pageRoundDown(gapMR.Start),
- End: pageRoundUp(gapMR.End),
+ Start: usermem.PageRoundDown(gapMR.Start),
+ End: gapEnd,
}
optMR := gap.Range()
err := rw.d.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mf, usage.PageCache, rw.d.handle.readToBlocksAt)
@@ -671,7 +672,7 @@ func (d *dentry) Translate(ctx context.Context, required, optional memmap.Mappab
// Constrain translations to d.size (rounded up) to prevent translation to
// pages that may be concurrently truncated.
- pgend := pageRoundUp(d.size)
+ pgend, _ := usermem.PageRoundUp(d.size)
var beyondEOF bool
if required.End > pgend {
if required.Start >= pgend {
@@ -818,43 +819,15 @@ type dentryPlatformFile struct {
// IncRef implements platform.File.IncRef.
func (d *dentryPlatformFile) IncRef(fr platform.FileRange) {
d.dataMu.Lock()
- seg, gap := d.fdRefs.Find(fr.Start)
- for {
- switch {
- case seg.Ok() && seg.Start() < fr.End:
- seg = d.fdRefs.Isolate(seg, fr)
- seg.SetValue(seg.Value() + 1)
- seg, gap = seg.NextNonEmpty()
- case gap.Ok() && gap.Start() < fr.End:
- newRange := gap.Range().Intersect(fr)
- usage.MemoryAccounting.Inc(newRange.Length(), usage.Mapped)
- seg, gap = d.fdRefs.InsertWithoutMerging(gap, newRange, 1).NextNonEmpty()
- default:
- d.fdRefs.MergeAdjacent(fr)
- d.dataMu.Unlock()
- return
- }
- }
+ d.fdRefs.IncRefAndAccount(fr)
+ d.dataMu.Unlock()
}
// DecRef implements platform.File.DecRef.
func (d *dentryPlatformFile) DecRef(fr platform.FileRange) {
d.dataMu.Lock()
- seg := d.fdRefs.FindSegment(fr.Start)
-
- for seg.Ok() && seg.Start() < fr.End {
- seg = d.fdRefs.Isolate(seg, fr)
- if old := seg.Value(); old == 1 {
- usage.MemoryAccounting.Dec(seg.Range().Length(), usage.Mapped)
- seg = d.fdRefs.Remove(seg).NextSegment()
- } else {
- seg.SetValue(old - 1)
- seg = seg.NextSegment()
- }
- }
- d.fdRefs.MergeAdjacent(fr)
+ d.fdRefs.DecRefAndAccount(fr)
d.dataMu.Unlock()
-
}
// MapInternal implements platform.File.MapInternal.
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index 39509f703..ca0fe6d2b 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -8,6 +8,7 @@ go_library(
"control.go",
"host.go",
"ioctl_unsafe.go",
+ "mmap.go",
"socket.go",
"socket_iovec.go",
"socket_unsafe.go",
@@ -23,12 +24,15 @@ go_library(
"//pkg/fspath",
"//pkg/log",
"//pkg/refs",
+ "//pkg/safemem",
"//pkg/sentry/arch",
+ "//pkg/sentry/fs/fsutil",
"//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/hostfd",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/memmap",
+ "//pkg/sentry/platform",
"//pkg/sentry/socket/control",
"//pkg/sentry/socket/unix",
"//pkg/sentry/socket/unix/transport",
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 8caf55a1b..18b127521 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -86,15 +86,13 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
i := &inode{
hostFD: hostFD,
- seekable: seekable,
+ ino: fs.NextIno(),
isTTY: opts.IsTTY,
- canMap: canMap(uint32(fileType)),
wouldBlock: wouldBlock(uint32(fileType)),
- ino: fs.NextIno(),
- // For simplicity, set offset to 0. Technically, we should use the existing
- // offset on the host if the file is seekable.
- offset: 0,
+ seekable: seekable,
+ canMap: canMap(uint32(fileType)),
}
+ i.pf.inode = i
// Non-seekable files can't be memory mapped, assert this.
if !i.seekable && i.canMap {
@@ -117,6 +115,10 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
// i.open will take a reference on d.
defer d.DecRef()
+
+ // For simplicity, fileDescription.offset is set to 0. Technically, we
+ // should only set to 0 on files that are not seekable (sockets, pipes,
+ // etc.), and use the offset from the host fd otherwise when importing.
return i.open(ctx, d.VFSDentry(), mnt, flags)
}
@@ -189,11 +191,15 @@ type inode struct {
// This field is initialized at creation time and is immutable.
hostFD int
- // wouldBlock is true if the host FD would return EWOULDBLOCK for
- // operations that would block.
+ // ino is an inode number unique within this filesystem.
//
// This field is initialized at creation time and is immutable.
- wouldBlock bool
+ ino uint64
+
+ // isTTY is true if this file represents a TTY.
+ //
+ // This field is initialized at creation time and is immutable.
+ isTTY bool
// seekable is false if the host fd points to a file representing a stream,
// e.g. a socket or a pipe. Such files are not seekable and can return
@@ -202,29 +208,29 @@ type inode struct {
// This field is initialized at creation time and is immutable.
seekable bool
- // isTTY is true if this file represents a TTY.
+ // wouldBlock is true if the host FD would return EWOULDBLOCK for
+ // operations that would block.
//
// This field is initialized at creation time and is immutable.
- isTTY bool
+ wouldBlock bool
+
+ // Event queue for blocking operations.
+ queue waiter.Queue
// canMap specifies whether we allow the file to be memory mapped.
//
// This field is initialized at creation time and is immutable.
canMap bool
- // ino is an inode number unique within this filesystem.
- //
- // This field is initialized at creation time and is immutable.
- ino uint64
+ // mapsMu protects mappings.
+ mapsMu sync.Mutex
- // offsetMu protects offset.
- offsetMu sync.Mutex
-
- // offset specifies the current file offset.
- offset int64
+ // If canMap is true, mappings tracks mappings of hostFD into
+ // memmap.MappingSpaces.
+ mappings memmap.MappingSet
- // Event queue for blocking operations.
- queue waiter.Queue
+ // pf implements platform.File for mappings of hostFD.
+ pf inodePlatformFile
}
// CheckPermissions implements kernfs.Inode.
@@ -388,6 +394,21 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
if err := syscall.Ftruncate(i.hostFD, int64(s.Size)); err != nil {
return err
}
+ oldSize := uint64(hostStat.Size)
+ if s.Size < oldSize {
+ oldpgend, _ := usermem.PageRoundUp(oldSize)
+ newpgend, _ := usermem.PageRoundUp(s.Size)
+ if oldpgend != newpgend {
+ i.mapsMu.Lock()
+ i.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
+ // Compare Linux's mm/truncate.c:truncate_setsize() =>
+ // truncate_pagecache() =>
+ // mm/memory.c:unmap_mapping_range(evencows=1).
+ InvalidatePrivate: true,
+ })
+ i.mapsMu.Unlock()
+ }
+ }
}
if m&(linux.STATX_ATIME|linux.STATX_MTIME) != 0 {
ts := [2]syscall.Timespec{
@@ -464,9 +485,6 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u
return vfsfd, nil
}
- // For simplicity, set offset to 0. Technically, we should
- // only set to 0 on files that are not seekable (sockets, pipes, etc.),
- // and use the offset from the host fd otherwise.
fd := &fileDescription{inode: i}
vfsfd := &fd.vfsfd
if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil {
@@ -487,6 +505,13 @@ type fileDescription struct {
//
// inode is immutable after fileDescription creation.
inode *inode
+
+ // offsetMu protects offset.
+ offsetMu sync.Mutex
+
+ // offset specifies the current file offset. It is only meaningful when
+ // inode.seekable is true.
+ offset int64
}
// SetStat implements vfs.FileDescriptionImpl.
@@ -532,10 +557,10 @@ func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts
return n, err
}
// TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so.
- i.offsetMu.Lock()
- n, err := readFromHostFD(ctx, i.hostFD, dst, i.offset, opts.Flags)
- i.offset += n
- i.offsetMu.Unlock()
+ f.offsetMu.Lock()
+ n, err := readFromHostFD(ctx, i.hostFD, dst, f.offset, opts.Flags)
+ f.offset += n
+ f.offsetMu.Unlock()
return n, err
}
@@ -572,10 +597,10 @@ func (f *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opt
}
// TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so.
// TODO(gvisor.dev/issue/1672): Write to end of file and update offset if O_APPEND is set on this file.
- i.offsetMu.Lock()
- n, err := writeToHostFD(ctx, i.hostFD, src, i.offset, opts.Flags)
- i.offset += n
- i.offsetMu.Unlock()
+ f.offsetMu.Lock()
+ n, err := writeToHostFD(ctx, i.hostFD, src, f.offset, opts.Flags)
+ f.offset += n
+ f.offsetMu.Unlock()
return n, err
}
@@ -600,41 +625,41 @@ func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (i
return 0, syserror.ESPIPE
}
- i.offsetMu.Lock()
- defer i.offsetMu.Unlock()
+ f.offsetMu.Lock()
+ defer f.offsetMu.Unlock()
switch whence {
case linux.SEEK_SET:
if offset < 0 {
- return i.offset, syserror.EINVAL
+ return f.offset, syserror.EINVAL
}
- i.offset = offset
+ f.offset = offset
case linux.SEEK_CUR:
- // Check for overflow. Note that underflow cannot occur, since i.offset >= 0.
- if offset > math.MaxInt64-i.offset {
- return i.offset, syserror.EOVERFLOW
+ // Check for overflow. Note that underflow cannot occur, since f.offset >= 0.
+ if offset > math.MaxInt64-f.offset {
+ return f.offset, syserror.EOVERFLOW
}
- if i.offset+offset < 0 {
- return i.offset, syserror.EINVAL
+ if f.offset+offset < 0 {
+ return f.offset, syserror.EINVAL
}
- i.offset += offset
+ f.offset += offset
case linux.SEEK_END:
var s syscall.Stat_t
if err := syscall.Fstat(i.hostFD, &s); err != nil {
- return i.offset, err
+ return f.offset, err
}
size := s.Size
// Check for overflow. Note that underflow cannot occur, since size >= 0.
if offset > math.MaxInt64-size {
- return i.offset, syserror.EOVERFLOW
+ return f.offset, syserror.EOVERFLOW
}
if size+offset < 0 {
- return i.offset, syserror.EINVAL
+ return f.offset, syserror.EINVAL
}
- i.offset = size + offset
+ f.offset = size + offset
case linux.SEEK_DATA, linux.SEEK_HOLE:
// Modifying the offset in the host file table should not matter, since
@@ -643,16 +668,16 @@ func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (i
// For reading and writing, we always rely on our internal offset.
n, err := unix.Seek(i.hostFD, offset, int(whence))
if err != nil {
- return i.offset, err
+ return f.offset, err
}
- i.offset = n
+ f.offset = n
default:
// Invalid whence.
- return i.offset, syserror.EINVAL
+ return f.offset, syserror.EINVAL
}
- return i.offset, nil
+ return f.offset, nil
}
// Sync implements FileDescriptionImpl.
@@ -666,8 +691,9 @@ func (f *fileDescription) ConfigureMMap(_ context.Context, opts *memmap.MMapOpts
if !f.inode.canMap {
return syserror.ENODEV
}
- // TODO(gvisor.dev/issue/1672): Implement ConfigureMMap and Mappable interface.
- return syserror.ENODEV
+ i := f.inode
+ i.pf.fileMapperInitOnce.Do(i.pf.fileMapper.Init)
+ return vfs.GenericConfigureMMap(&f.vfsfd, i, opts)
}
// EventRegister implements waiter.Waitable.EventRegister.
diff --git a/pkg/sentry/fsimpl/host/mmap.go b/pkg/sentry/fsimpl/host/mmap.go
new file mode 100644
index 000000000..8545a82f0
--- /dev/null
+++ b/pkg/sentry/fsimpl/host/mmap.go
@@ -0,0 +1,132 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package host
+
+import (
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// inodePlatformFile implements platform.File. It exists solely because inode
+// cannot implement both kernfs.Inode.IncRef and platform.File.IncRef.
+//
+// inodePlatformFile should only be used if inode.canMap is true.
+type inodePlatformFile struct {
+ *inode
+
+ // fdRefsMu protects fdRefs.
+ fdRefsMu sync.Mutex
+
+ // fdRefs counts references on platform.File offsets. It is used solely for
+ // memory accounting.
+ fdRefs fsutil.FrameRefSet
+
+ // fileMapper caches mappings of the host file represented by this inode.
+ fileMapper fsutil.HostFileMapper
+
+ // fileMapperInitOnce is used to lazily initialize fileMapper.
+ fileMapperInitOnce sync.Once
+}
+
+// IncRef implements platform.File.IncRef.
+//
+// Precondition: i.inode.canMap must be true.
+func (i *inodePlatformFile) IncRef(fr platform.FileRange) {
+ i.fdRefsMu.Lock()
+ i.fdRefs.IncRefAndAccount(fr)
+ i.fdRefsMu.Unlock()
+}
+
+// DecRef implements platform.File.DecRef.
+//
+// Precondition: i.inode.canMap must be true.
+func (i *inodePlatformFile) DecRef(fr platform.FileRange) {
+ i.fdRefsMu.Lock()
+ i.fdRefs.DecRefAndAccount(fr)
+ i.fdRefsMu.Unlock()
+}
+
+// MapInternal implements platform.File.MapInternal.
+//
+// Precondition: i.inode.canMap must be true.
+func (i *inodePlatformFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+ return i.fileMapper.MapInternal(fr, i.hostFD, at.Write)
+}
+
+// FD implements platform.File.FD.
+func (i *inodePlatformFile) FD() int {
+ return i.hostFD
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+//
+// Precondition: i.inode.canMap must be true.
+func (i *inode) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+ i.mapsMu.Lock()
+ mapped := i.mappings.AddMapping(ms, ar, offset, writable)
+ for _, r := range mapped {
+ i.pf.fileMapper.IncRefOn(r)
+ }
+ i.mapsMu.Unlock()
+ return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+//
+// Precondition: i.inode.canMap must be true.
+func (i *inode) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+ i.mapsMu.Lock()
+ unmapped := i.mappings.RemoveMapping(ms, ar, offset, writable)
+ for _, r := range unmapped {
+ i.pf.fileMapper.DecRefOn(r)
+ }
+ i.mapsMu.Unlock()
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+//
+// Precondition: i.inode.canMap must be true.
+func (i *inode) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+ return i.AddMapping(ctx, ms, dstAR, offset, writable)
+}
+
+// Translate implements memmap.Mappable.Translate.
+//
+// Precondition: i.inode.canMap must be true.
+func (i *inode) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+ mr := optional
+ return []memmap.Translation{
+ {
+ Source: mr,
+ File: &i.pf,
+ Offset: mr.Start,
+ Perms: usermem.AnyAccess,
+ },
+ }, nil
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+//
+// Precondition: i.inode.canMap must be true.
+func (i *inode) InvalidateUnsavable(ctx context.Context) error {
+ // We expect the same host fd across save/restore, so all translations
+ // should be valid.
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
index a2d9649e7..007be1572 100644
--- a/pkg/sentry/fsimpl/tmpfs/BUILD
+++ b/pkg/sentry/fsimpl/tmpfs/BUILD
@@ -52,7 +52,6 @@ go_library(
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/fs/lock",
- "//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/pipe",
"//pkg/sentry/kernel/time",
@@ -96,6 +95,7 @@ go_test(
"pipe_test.go",
"regular_file_test.go",
"stat_test.go",
+ "tmpfs_test.go",
],
library = ":tmpfs",
deps = [
@@ -105,7 +105,6 @@ go_test(
"//pkg/sentry/contexttest",
"//pkg/sentry/fs/lock",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/contexttest",
"//pkg/sentry/vfs",
"//pkg/syserror",
"//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 36ffcb592..80fa7b29d 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -16,6 +16,7 @@ package tmpfs
import (
"fmt"
+ "sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
@@ -24,6 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// Sync implements vfs.FilesystemImpl.Sync.
@@ -76,8 +78,8 @@ afterSymlink:
return nil, err
}
if symlink, ok := child.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
- // TODO(gvisor.dev/issue/1197): Symlink traversals updates
- // access time.
+ // Symlink traversal updates access time.
+ atomic.StoreInt64(&d.inode.atime, d.inode.fs.clock.Now().Nanoseconds())
if err := rp.HandleSymlink(symlink.target); err != nil {
return nil, err
}
@@ -361,8 +363,8 @@ afterTrailingSymlink:
}
// Do we need to resolve a trailing symlink?
if symlink, ok := child.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
- // TODO(gvisor.dev/issue/1197): Symlink traversals updates
- // access time.
+ // Symlink traversal updates access time.
+ atomic.StoreInt64(&child.inode.atime, child.inode.fs.clock.Now().Nanoseconds())
if err := rp.HandleSymlink(symlink.target); err != nil {
return nil, err
}
@@ -636,12 +638,19 @@ func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
fs.mu.RLock()
defer fs.mu.RUnlock()
- _, err := resolveLocked(rp)
- if err != nil {
+ if _, err := resolveLocked(rp); err != nil {
return linux.Statfs{}, err
}
- // TODO(gvisor.dev/issue/1197): Actually implement statfs.
- return linux.Statfs{}, syserror.ENOSYS
+ statfs := linux.Statfs{
+ Type: linux.TMPFS_MAGIC,
+ BlockSize: usermem.PageSize,
+ FragmentSize: usermem.PageSize,
+ NameLength: linux.NAME_MAX,
+ // TODO(b/29637826): Allow configuring a tmpfs size and enforce it.
+ Blocks: 0,
+ BlocksFree: 0,
+ }
+ return statfs, nil
}
// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
@@ -763,5 +772,24 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath,
func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
fs.mu.RLock()
defer fs.mu.RUnlock()
- return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
+ mnt := vd.Mount()
+ d := vd.Dentry().Impl().(*dentry)
+ for {
+ if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() {
+ return vfs.PrependPathAtVFSRootError{}
+ }
+ if &d.vfsd == mnt.Root() {
+ return nil
+ }
+ if d.parent == nil {
+ if d.name != "" {
+ // This must be an anonymous memfd file.
+ b.PrependComponent("/" + d.name)
+ return vfs.PrependPathSyntheticError{}
+ }
+ return vfs.PrependPathAtNonMountRootError{}
+ }
+ b.PrependComponent(d.name)
+ d = d.parent
+ }
}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index 57e5e28ec..3f433d666 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -88,6 +88,7 @@ type regularFile struct {
func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode {
file := &regularFile{
memFile: fs.memFile,
+ seals: linux.F_SEAL_SEAL,
}
file.inode.init(file, fs, creds, linux.S_IFREG|mode)
file.inode.nlink = 1 // from parent directory
@@ -577,3 +578,44 @@ exitLoop:
return done, retErr
}
+
+// GetSeals returns the current set of seals on a memfd inode.
+func GetSeals(fd *vfs.FileDescription) (uint32, error) {
+ f, ok := fd.Impl().(*regularFileFD)
+ if !ok {
+ return 0, syserror.EINVAL
+ }
+ rf := f.inode().impl.(*regularFile)
+ rf.dataMu.RLock()
+ defer rf.dataMu.RUnlock()
+ return rf.seals, nil
+}
+
+// AddSeals adds new file seals to a memfd inode.
+func AddSeals(fd *vfs.FileDescription, val uint32) error {
+ f, ok := fd.Impl().(*regularFileFD)
+ if !ok {
+ return syserror.EINVAL
+ }
+ rf := f.inode().impl.(*regularFile)
+ rf.mapsMu.Lock()
+ defer rf.mapsMu.Unlock()
+ rf.dataMu.RLock()
+ defer rf.dataMu.RUnlock()
+
+ if rf.seals&linux.F_SEAL_SEAL != 0 {
+ // Seal applied which prevents addition of any new seals.
+ return syserror.EPERM
+ }
+
+ // F_SEAL_WRITE can only be added if there are no active writable maps.
+ if rf.seals&linux.F_SEAL_WRITE == 0 && val&linux.F_SEAL_WRITE != 0 {
+ if rf.writableMappingPages > 0 {
+ return syserror.EBUSY
+ }
+ }
+
+ // Seals can only be added, never removed.
+ rf.seals |= val
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
index 0399725cf..64e1c40ad 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
@@ -18,152 +18,16 @@ import (
"bytes"
"fmt"
"io"
- "sync/atomic"
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs/lock"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
-// nextFileID is used to generate unique file names.
-var nextFileID int64
-
-// newTmpfsRoot creates a new tmpfs mount, and returns the root. If the error
-// is not nil, then cleanup should be called when the root is no longer needed.
-func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentry, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
-
- vfsObj := &vfs.VirtualFilesystem{}
- if err := vfsObj.Init(); err != nil {
- return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("VFS init: %v", err)
- }
-
- vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
- })
- mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
- if err != nil {
- return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err)
- }
- root := mntns.Root()
- return vfsObj, root, func() {
- root.DecRef()
- mntns.DecRef()
- }, nil
-}
-
-// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If
-// the returned err is not nil, then cleanup should be called when the FD is no
-// longer needed.
-func newFileFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
- vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
- if err != nil {
- return nil, nil, err
- }
-
- filename := fmt.Sprintf("tmpfs-test-file-%d", atomic.AddInt64(&nextFileID, 1))
-
- // Create the file that will be write/read.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(filename),
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
- Mode: linux.ModeRegular | mode,
- })
- if err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err)
- }
-
- return fd, cleanup, nil
-}
-
-// newDirFD is like newFileFD, but for directories.
-func newDirFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
- vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
- if err != nil {
- return nil, nil, err
- }
-
- dirname := fmt.Sprintf("tmpfs-test-dir-%d", atomic.AddInt64(&nextFileID, 1))
-
- // Create the dir.
- if err := vfsObj.MkdirAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(dirname),
- }, &vfs.MkdirOptions{
- Mode: linux.ModeDirectory | mode,
- }); err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to create directory %q: %v", dirname, err)
- }
-
- // Open the dir and return it.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(dirname),
- }, &vfs.OpenOptions{
- Flags: linux.O_RDONLY | linux.O_DIRECTORY,
- })
- if err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to open directory %q: %v", dirname, err)
- }
-
- return fd, cleanup, nil
-}
-
-// newPipeFD is like newFileFD, but for pipes.
-func newPipeFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
- vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
- if err != nil {
- return nil, nil, err
- }
-
- pipename := fmt.Sprintf("tmpfs-test-pipe-%d", atomic.AddInt64(&nextFileID, 1))
-
- // Create the pipe.
- if err := vfsObj.MknodAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(pipename),
- }, &vfs.MknodOptions{
- Mode: linux.ModeNamedPipe | mode,
- }); err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to create pipe %q: %v", pipename, err)
- }
-
- // Open the pipe and return it.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(pipename),
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR,
- })
- if err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to open pipe %q: %v", pipename, err)
- }
-
- return fd, cleanup, nil
-}
-
// Test that we can write some data to a file and read it back.`
func TestSimpleWriteRead(t *testing.T) {
ctx := contexttest.Context(t)
diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go
index 60c2c980e..f7ee4aab2 100644
--- a/pkg/sentry/fsimpl/tmpfs/stat_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/stat_test.go
@@ -19,8 +19,8 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
"gvisor.dev/gvisor/pkg/sentry/vfs"
)
@@ -29,7 +29,6 @@ func TestStatAfterCreate(t *testing.T) {
mode := linux.FileMode(0644)
// Run with different file types.
- // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets.
for _, typ := range []string{"file", "dir", "pipe"} {
t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) {
var (
@@ -175,7 +174,6 @@ func TestSetStat(t *testing.T) {
mode := linux.FileMode(0644)
// Run with different file types.
- // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets.
for _, typ := range []string{"file", "dir", "pipe"} {
t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) {
var (
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 405928bd0..1e781aecd 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -94,7 +94,7 @@ type FilesystemOpts struct {
}
// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
-func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, _ string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
memFileProvider := pgalloc.MemoryFileProviderFromContext(ctx)
if memFileProvider == nil {
panic("MemoryFileProviderFromContext returned nil")
@@ -139,6 +139,11 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
return &fs.vfsfs, &root.vfsd, nil
}
+// NewFilesystem returns a new tmpfs filesystem.
+func NewFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) (*vfs.Filesystem, *vfs.Dentry, error) {
+ return FilesystemType{}.GetFilesystem(ctx, vfsObj, creds, "", vfs.GetFilesystemOptions{})
+}
+
// Release implements vfs.FilesystemImpl.Release.
func (fs *filesystem) Release() {
fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
@@ -658,3 +663,34 @@ func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOption
func (fd *fileDescription) Removexattr(ctx context.Context, name string) error {
return fd.inode().removexattr(auth.CredentialsFromContext(ctx), name)
}
+
+// NewMemfd creates a new tmpfs regular file and file description that can back
+// an anonymous fd created by memfd_create.
+func NewMemfd(mount *vfs.Mount, creds *auth.Credentials, allowSeals bool, name string) (*vfs.FileDescription, error) {
+ fs, ok := mount.Filesystem().Impl().(*filesystem)
+ if !ok {
+ panic("NewMemfd() called with non-tmpfs mount")
+ }
+
+ // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd inodes are set up with
+ // S_IRWXUGO.
+ mode := linux.FileMode(0777)
+ inode := fs.newRegularFile(creds, mode)
+ rf := inode.impl.(*regularFile)
+ if allowSeals {
+ rf.seals = 0
+ }
+
+ d := fs.newDentry(inode)
+ defer d.DecRef()
+ d.name = name
+
+ // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with
+ // FMODE_READ | FMODE_WRITE.
+ var fd regularFileFD
+ flags := uint32(linux.O_RDWR)
+ if err := fd.vfsfd.Init(&fd, flags, mount, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ return nil, err
+ }
+ return &fd.vfsfd, nil
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go
new file mode 100644
index 000000000..a240fb276
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go
@@ -0,0 +1,156 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tmpfs
+
+import (
+ "fmt"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// nextFileID is used to generate unique file names.
+var nextFileID int64
+
+// newTmpfsRoot creates a new tmpfs mount, and returns the root. If the error
+// is not nil, then cleanup should be called when the root is no longer needed.
+func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentry, func(), error) {
+ creds := auth.CredentialsFromContext(ctx)
+
+ vfsObj := &vfs.VirtualFilesystem{}
+ if err := vfsObj.Init(); err != nil {
+ return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("VFS init: %v", err)
+ }
+
+ vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ })
+ mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+ if err != nil {
+ return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err)
+ }
+ root := mntns.Root()
+ return vfsObj, root, func() {
+ root.DecRef()
+ mntns.DecRef()
+ }, nil
+}
+
+// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If
+// the returned err is not nil, then cleanup should be called when the FD is no
+// longer needed.
+func newFileFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
+ creds := auth.CredentialsFromContext(ctx)
+ vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ filename := fmt.Sprintf("tmpfs-test-file-%d", atomic.AddInt64(&nextFileID, 1))
+
+ // Create the file that will be write/read.
+ fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(filename),
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
+ Mode: linux.ModeRegular | mode,
+ })
+ if err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err)
+ }
+
+ return fd, cleanup, nil
+}
+
+// newDirFD is like newFileFD, but for directories.
+func newDirFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
+ creds := auth.CredentialsFromContext(ctx)
+ vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ dirname := fmt.Sprintf("tmpfs-test-dir-%d", atomic.AddInt64(&nextFileID, 1))
+
+ // Create the dir.
+ if err := vfsObj.MkdirAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(dirname),
+ }, &vfs.MkdirOptions{
+ Mode: linux.ModeDirectory | mode,
+ }); err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to create directory %q: %v", dirname, err)
+ }
+
+ // Open the dir and return it.
+ fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(dirname),
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDONLY | linux.O_DIRECTORY,
+ })
+ if err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to open directory %q: %v", dirname, err)
+ }
+
+ return fd, cleanup, nil
+}
+
+// newPipeFD is like newFileFD, but for pipes.
+func newPipeFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
+ creds := auth.CredentialsFromContext(ctx)
+ vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ name := fmt.Sprintf("tmpfs-test-%d", atomic.AddInt64(&nextFileID, 1))
+
+ if err := vfsObj.MknodAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(name),
+ }, &vfs.MknodOptions{
+ Mode: linux.ModeNamedPipe | mode,
+ }); err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to create pipe %q: %v", name, err)
+ }
+
+ fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(name),
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDWR,
+ })
+ if err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to open pipe %q: %v", name, err)
+ }
+
+ return fd, cleanup, nil
+}