summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl
diff options
context:
space:
mode:
authorDean Deng <deandeng@google.com>2020-03-14 07:13:15 -0700
committergVisor bot <gvisor-bot@google.com>2020-03-14 07:14:33 -0700
commit5e413cad10d2358a21dd08216953faee70e62a0b (patch)
tree672fc1c7a2ca334ea24a16f54caa83283da64f4b /pkg/sentry/fsimpl
parent45a8ae240dd180f1b8b4c56e77ac67e4cd3af96f (diff)
Plumb VFS2 imported fds into virtual filesystem.
- When setting up the virtual filesystem, mount a host.filesystem to contain all files that need to be imported. - Make read/preadv syscalls to the host in cases where preadv2 may not be supported yet (likewise for writing). - Make save/restore functions in kernel/kernel.go return early if vfs2 is enabled. PiperOrigin-RevId: 300922353
Diffstat (limited to 'pkg/sentry/fsimpl')
-rw-r--r--pkg/sentry/fsimpl/host/BUILD2
-rw-r--r--pkg/sentry/fsimpl/host/default_file.go45
-rw-r--r--pkg/sentry/fsimpl/host/host.go124
-rw-r--r--pkg/sentry/fsimpl/host/util.go28
4 files changed, 147 insertions, 52 deletions
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index 731f192b3..5d67f88e3 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -9,9 +9,11 @@ go_library(
"host.go",
"util.go",
],
+ visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/fd",
"//pkg/log",
"//pkg/refs",
"//pkg/safemem",
diff --git a/pkg/sentry/fsimpl/host/default_file.go b/pkg/sentry/fsimpl/host/default_file.go
index 172cdb161..98682ba5e 100644
--- a/pkg/sentry/fsimpl/host/default_file.go
+++ b/pkg/sentry/fsimpl/host/default_file.go
@@ -21,6 +21,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -64,9 +65,7 @@ func (f *defaultFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts v
panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped")
}
- f.mu.Lock()
n, err := readFromHostFD(ctx, f.inode.hostFD, dst, -1, int(opts.Flags))
- f.mu.Unlock()
if isBlockError(err) {
// If we got any data at all, return it as a "completed" partial read
// rather than retrying until complete.
@@ -86,16 +85,22 @@ func (f *defaultFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts v
return n, err
}
-func readFromHostFD(ctx context.Context, fd int, dst usermem.IOSequence, offset int64, flags int) (int64, error) {
- if flags&^(linux.RWF_VALID) != 0 {
+func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, offset int64, flags int) (int64, error) {
+ // TODO(gvisor.dev/issue/1672): Support select preadv2 flags.
+ if flags != 0 {
return 0, syserror.EOPNOTSUPP
}
- reader := safemem.FromVecReaderFunc{
- func(srcs [][]byte) (int64, error) {
- n, err := unix.Preadv2(fd, srcs, offset, flags)
- return int64(n), err
- },
+ var reader safemem.Reader
+ if offset == -1 {
+ reader = safemem.FromIOReader{fd.NewReadWriter(hostFD)}
+ } else {
+ reader = safemem.FromVecReaderFunc{
+ func(srcs [][]byte) (int64, error) {
+ n, err := unix.Preadv(hostFD, srcs, offset)
+ return int64(n), err
+ },
+ }
}
n, err := dst.CopyOutFrom(ctx, reader)
return int64(n), err
@@ -120,9 +125,7 @@ func (f *defaultFileFD) Write(ctx context.Context, src usermem.IOSequence, opts
panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped")
}
- f.mu.Lock()
n, err := writeToHostFD(ctx, f.inode.hostFD, src, -1, int(opts.Flags))
- f.mu.Unlock()
if isBlockError(err) {
err = syserror.ErrWouldBlock
}
@@ -137,16 +140,22 @@ func (f *defaultFileFD) Write(ctx context.Context, src usermem.IOSequence, opts
return n, err
}
-func writeToHostFD(ctx context.Context, fd int, src usermem.IOSequence, offset int64, flags int) (int64, error) {
- if flags&^(linux.RWF_VALID) != 0 {
+func writeToHostFD(ctx context.Context, hostFD int, src usermem.IOSequence, offset int64, flags int) (int64, error) {
+ // TODO(gvisor.dev/issue/1672): Support select pwritev2 flags.
+ if flags != 0 {
return 0, syserror.EOPNOTSUPP
}
- writer := safemem.FromVecWriterFunc{
- func(srcs [][]byte) (int64, error) {
- n, err := unix.Pwritev2(fd, srcs, offset, flags)
- return int64(n), err
- },
+ var writer safemem.Writer
+ if offset == -1 {
+ writer = safemem.FromIOWriter{fd.NewReadWriter(hostFD)}
+ } else {
+ writer = safemem.FromVecWriterFunc{
+ func(srcs [][]byte) (int64, error) {
+ n, err := unix.Pwritev(hostFD, srcs, offset)
+ return int64(n), err
+ },
+ }
}
n, err := src.CopyInTo(ctx, writer)
return int64(n), err
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index c205e6a0b..0be812d13 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -38,10 +38,19 @@ type filesystem struct {
kernfs.Filesystem
}
+// NewMount returns a new disconnected mount in vfsObj that may be passed to ImportFD.
+func NewMount(vfsObj *vfs.VirtualFilesystem) (*vfs.Mount, error) {
+ fs := &filesystem{}
+ fs.Init(vfsObj)
+ vfsfs := fs.VFSFilesystem()
+ // NewDisconnectedMount will take an additional reference on vfsfs.
+ defer vfsfs.DecRef()
+ return vfsObj.NewDisconnectedMount(vfsfs, nil, &vfs.MountOptions{})
+}
+
// ImportFD sets up and returns a vfs.FileDescription from a donated fd.
func ImportFD(mnt *vfs.Mount, hostFD int, ownerUID auth.KUID, ownerGID auth.KGID, isTTY bool) (*vfs.FileDescription, error) {
- // Must be importing to a mount of host.filesystem.
- fs, ok := mnt.Filesystem().Impl().(*filesystem)
+ fs, ok := mnt.Filesystem().Impl().(*kernfs.Filesystem)
if !ok {
return nil, fmt.Errorf("can't import host FDs into filesystems of type %T", mnt.Filesystem().Impl())
}
@@ -54,8 +63,7 @@ func ImportFD(mnt *vfs.Mount, hostFD int, ownerUID auth.KUID, ownerGID auth.KGID
fileMode := linux.FileMode(s.Mode)
fileType := fileMode.FileType()
- // Pipes, character devices, and sockets can return EWOULDBLOCK for
- // operations that would block.
+ // Pipes, character devices, and sockets.
isStream := fileType == syscall.S_IFIFO || fileType == syscall.S_IFCHR || fileType == syscall.S_IFSOCK
i := &inode{
@@ -143,11 +151,109 @@ func (i *inode) Mode() linux.FileMode {
// Stat implements kernfs.Inode.
func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+ if opts.Mask&linux.STATX__RESERVED != 0 {
+ return linux.Statx{}, syserror.EINVAL
+ }
+ if opts.Sync&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE {
+ return linux.Statx{}, syserror.EINVAL
+ }
+
+ // Limit our host call only to known flags.
+ mask := opts.Mask & linux.STATX_ALL
var s unix.Statx_t
- if err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(opts.Mask), &s); err != nil {
+ err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(mask), &s)
+ // Fallback to fstat(2), if statx(2) is not supported on the host.
+ //
+ // TODO(b/151263641): Remove fallback.
+ if err == syserror.ENOSYS {
+ return i.fstat(opts)
+ } else if err != nil {
+ return linux.Statx{}, err
+ }
+
+ ls := linux.Statx{Mask: mask}
+ // Unconditionally fill blksize, attributes, and device numbers, as indicated
+ // by /include/uapi/linux/stat.h.
+ //
+ // RdevMajor/RdevMinor are left as zero, so as not to expose host device
+ // numbers.
+ //
+ // TODO(gvisor.dev/issue/1672): Use kernfs-specific, internally defined
+ // device numbers. If we use the device number from the host, it may collide
+ // with another sentry-internal device number. We handle device/inode
+ // numbers without relying on the host to prevent collisions.
+ ls.Blksize = s.Blksize
+ ls.Attributes = s.Attributes
+ ls.AttributesMask = s.Attributes_mask
+
+ if mask|linux.STATX_TYPE != 0 {
+ ls.Mode |= s.Mode & linux.S_IFMT
+ }
+ if mask|linux.STATX_MODE != 0 {
+ ls.Mode |= s.Mode &^ linux.S_IFMT
+ }
+ if mask|linux.STATX_NLINK != 0 {
+ ls.Nlink = s.Nlink
+ }
+ if mask|linux.STATX_ATIME != 0 {
+ ls.Atime = unixToLinuxStatxTimestamp(s.Atime)
+ }
+ if mask|linux.STATX_BTIME != 0 {
+ ls.Btime = unixToLinuxStatxTimestamp(s.Btime)
+ }
+ if mask|linux.STATX_CTIME != 0 {
+ ls.Ctime = unixToLinuxStatxTimestamp(s.Ctime)
+ }
+ if mask|linux.STATX_MTIME != 0 {
+ ls.Mtime = unixToLinuxStatxTimestamp(s.Mtime)
+ }
+ if mask|linux.STATX_SIZE != 0 {
+ ls.Size = s.Size
+ }
+ if mask|linux.STATX_BLOCKS != 0 {
+ ls.Blocks = s.Blocks
+ }
+
+ // Use our own internal inode number and file owner.
+ if mask|linux.STATX_INO != 0 {
+ ls.Ino = i.ino
+ }
+ if mask|linux.STATX_UID != 0 {
+ ls.UID = uint32(i.uid)
+ }
+ if mask|linux.STATX_GID != 0 {
+ ls.GID = uint32(i.gid)
+ }
+
+ return ls, nil
+}
+
+// fstat is a best-effort fallback for inode.Stat() if the host does not
+// support statx(2).
+//
+// We ignore the mask and sync flags in opts and simply supply
+// STATX_BASIC_STATS, as fstat(2) itself does not allow the specification
+// of a mask or sync flags. fstat(2) does not provide any metadata
+// equivalent to Statx.Attributes, Statx.AttributesMask, or Statx.Btime, so
+// those fields remain empty.
+func (i *inode) fstat(opts vfs.StatOptions) (linux.Statx, error) {
+ var s unix.Stat_t
+ if err := unix.Fstat(i.hostFD, &s); err != nil {
return linux.Statx{}, err
}
- ls := unixToLinuxStatx(s)
+
+ // Note that rdev numbers are left as 0; do not expose host device numbers.
+ ls := linux.Statx{
+ Mask: linux.STATX_BASIC_STATS,
+ Blksize: uint32(s.Blksize),
+ Nlink: uint32(s.Nlink),
+ Mode: uint16(s.Mode),
+ Size: uint64(s.Size),
+ Blocks: uint64(s.Blocks),
+ Atime: timespecToStatxTimestamp(s.Atim),
+ Ctime: timespecToStatxTimestamp(s.Ctim),
+ Mtime: timespecToStatxTimestamp(s.Mtim),
+ }
// Use our own internal inode number and file owner.
//
@@ -159,9 +265,6 @@ func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, erro
ls.UID = uint32(i.uid)
ls.GID = uint32(i.gid)
- // Update file mode from the host.
- i.mode = linux.FileMode(ls.Mode)
-
return ls, nil
}
@@ -217,7 +320,6 @@ func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptio
}
func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error) {
-
fileType := i.mode.FileType()
if fileType == syscall.S_IFSOCK {
if i.isTTY {
@@ -227,6 +329,8 @@ func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error
return nil, errors.New("importing host sockets not supported")
}
+ // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that
+ // we don't allow importing arbitrary file types without proper support.
if i.isTTY {
// TODO(gvisor.dev/issue/1672): support importing host fd as TTY.
return nil, errors.New("importing host fd as TTY not supported")
diff --git a/pkg/sentry/fsimpl/host/util.go b/pkg/sentry/fsimpl/host/util.go
index e1ccacb4d..d519feef5 100644
--- a/pkg/sentry/fsimpl/host/util.go
+++ b/pkg/sentry/fsimpl/host/util.go
@@ -35,34 +35,14 @@ func toTimespec(ts linux.StatxTimestamp, omit bool) unix.Timespec {
}
}
-func unixToLinuxStatx(s unix.Statx_t) linux.Statx {
- return linux.Statx{
- Mask: s.Mask,
- Blksize: s.Blksize,
- Attributes: s.Attributes,
- Nlink: s.Nlink,
- UID: s.Uid,
- GID: s.Gid,
- Mode: s.Mode,
- Ino: s.Ino,
- Size: s.Size,
- Blocks: s.Blocks,
- AttributesMask: s.Attributes_mask,
- Atime: unixToLinuxStatxTimestamp(s.Atime),
- Btime: unixToLinuxStatxTimestamp(s.Btime),
- Ctime: unixToLinuxStatxTimestamp(s.Ctime),
- Mtime: unixToLinuxStatxTimestamp(s.Mtime),
- RdevMajor: s.Rdev_major,
- RdevMinor: s.Rdev_minor,
- DevMajor: s.Dev_major,
- DevMinor: s.Dev_minor,
- }
-}
-
func unixToLinuxStatxTimestamp(ts unix.StatxTimestamp) linux.StatxTimestamp {
return linux.StatxTimestamp{Sec: ts.Sec, Nsec: ts.Nsec}
}
+func timespecToStatxTimestamp(ts unix.Timespec) linux.StatxTimestamp {
+ return linux.StatxTimestamp{Sec: int64(ts.Sec), Nsec: uint32(ts.Nsec)}
+}
+
// wouldBlock returns true for file types that can return EWOULDBLOCK
// for blocking operations, e.g. pipes, character devices, and sockets.
func wouldBlock(fileType uint32) bool {