summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/host
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl/host')
-rw-r--r--pkg/sentry/fsimpl/host/BUILD34
-rw-r--r--pkg/sentry/fsimpl/host/host.go667
-rw-r--r--pkg/sentry/fsimpl/host/ioctl_unsafe.go56
-rw-r--r--pkg/sentry/fsimpl/host/tty.go379
-rw-r--r--pkg/sentry/fsimpl/host/util.go66
-rw-r--r--pkg/sentry/fsimpl/host/util_unsafe.go34
6 files changed, 0 insertions, 1236 deletions
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
deleted file mode 100644
index 82e1fb74b..000000000
--- a/pkg/sentry/fsimpl/host/BUILD
+++ /dev/null
@@ -1,34 +0,0 @@
-load("//tools:defs.bzl", "go_library")
-
-licenses(["notice"])
-
-go_library(
- name = "host",
- srcs = [
- "host.go",
- "ioctl_unsafe.go",
- "tty.go",
- "util.go",
- "util_unsafe.go",
- ],
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/context",
- "//pkg/fd",
- "//pkg/log",
- "//pkg/refs",
- "//pkg/safemem",
- "//pkg/sentry/arch",
- "//pkg/sentry/fsimpl/kernfs",
- "//pkg/sentry/kernel",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/memmap",
- "//pkg/sentry/unimpl",
- "//pkg/sentry/vfs",
- "//pkg/sync",
- "//pkg/syserror",
- "//pkg/usermem",
- "@org_golang_x_sys//unix:go_default_library",
- ],
-)
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
deleted file mode 100644
index 97fa7f7ab..000000000
--- a/pkg/sentry/fsimpl/host/host.go
+++ /dev/null
@@ -1,667 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package host provides a filesystem implementation for host files imported as
-// file descriptors.
-package host
-
-import (
- "errors"
- "fmt"
- "math"
- "syscall"
-
- "golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/fd"
- "gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/refs"
- "gvisor.dev/gvisor/pkg/safemem"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// filesystemType implements vfs.FilesystemType.
-type filesystemType struct{}
-
-// GetFilesystem implements FilesystemType.GetFilesystem.
-func (filesystemType) GetFilesystem(context.Context, *vfs.VirtualFilesystem, *auth.Credentials, string, vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
- panic("cannot instaniate a host filesystem")
-}
-
-// Name implements FilesystemType.Name.
-func (filesystemType) Name() string {
- return "none"
-}
-
-// filesystem implements vfs.FilesystemImpl.
-type filesystem struct {
- kernfs.Filesystem
-}
-
-// NewMount returns a new disconnected mount in vfsObj that may be passed to ImportFD.
-func NewMount(vfsObj *vfs.VirtualFilesystem) (*vfs.Mount, error) {
- fs := &filesystem{}
- fs.Init(vfsObj, &filesystemType{})
- vfsfs := fs.VFSFilesystem()
- // NewDisconnectedMount will take an additional reference on vfsfs.
- defer vfsfs.DecRef()
- return vfsObj.NewDisconnectedMount(vfsfs, nil, &vfs.MountOptions{})
-}
-
-// ImportFD sets up and returns a vfs.FileDescription from a donated fd.
-func ImportFD(mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) {
- fs, ok := mnt.Filesystem().Impl().(*kernfs.Filesystem)
- if !ok {
- return nil, fmt.Errorf("can't import host FDs into filesystems of type %T", mnt.Filesystem().Impl())
- }
-
- // Retrieve metadata.
- var s unix.Stat_t
- if err := unix.Fstat(hostFD, &s); err != nil {
- return nil, err
- }
-
- fileMode := linux.FileMode(s.Mode)
- fileType := fileMode.FileType()
-
- // Determine if hostFD is seekable. If not, this syscall will return ESPIPE
- // (see fs/read_write.c:llseek), e.g. for pipes, sockets, and some character
- // devices.
- _, err := unix.Seek(hostFD, 0, linux.SEEK_CUR)
- seekable := err != syserror.ESPIPE
-
- i := &inode{
- hostFD: hostFD,
- seekable: seekable,
- isTTY: isTTY,
- canMap: canMap(uint32(fileType)),
- ino: fs.NextIno(),
- mode: fileMode,
- // For simplicity, set offset to 0. Technically, we should use the existing
- // offset on the host if the file is seekable.
- offset: 0,
- }
-
- // Non-seekable files can't be memory mapped, assert this.
- if !i.seekable && i.canMap {
- panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped")
- }
-
- d := &kernfs.Dentry{}
- d.Init(i)
- // i.open will take a reference on d.
- defer d.DecRef()
-
- return i.open(d.VFSDentry(), mnt)
-}
-
-// inode implements kernfs.Inode.
-type inode struct {
- kernfs.InodeNotDirectory
- kernfs.InodeNotSymlink
-
- // When the reference count reaches zero, the host fd is closed.
- refs.AtomicRefCount
-
- // hostFD contains the host fd that this file was originally created from,
- // which must be available at time of restore.
- //
- // This field is initialized at creation time and is immutable.
- hostFD int
-
- // seekable is false if the host fd points to a file representing a stream,
- // e.g. a socket or a pipe. Such files are not seekable and can return
- // EWOULDBLOCK for I/O operations.
- //
- // This field is initialized at creation time and is immutable.
- seekable bool
-
- // isTTY is true if this file represents a TTY.
- //
- // This field is initialized at creation time and is immutable.
- isTTY bool
-
- // canMap specifies whether we allow the file to be memory mapped.
- //
- // This field is initialized at creation time and is immutable.
- canMap bool
-
- // ino is an inode number unique within this filesystem.
- //
- // This field is initialized at creation time and is immutable.
- ino uint64
-
- // modeMu protects mode.
- modeMu sync.Mutex
-
- // mode is a cached version of the file mode on the host. Note that it may
- // become out of date if the mode is changed on the host, e.g. with chmod.
- //
- // Generally, it is better to retrieve the mode from the host through an
- // fstat syscall. We only use this value in inode.Mode(), which cannot
- // return an error, if the syscall to host fails.
- //
- // FIXME(b/152294168): Plumb error into Inode.Mode() return value so we
- // can get rid of this.
- mode linux.FileMode
-
- // offsetMu protects offset.
- offsetMu sync.Mutex
-
- // offset specifies the current file offset.
- offset int64
-}
-
-// Note that these flags may become out of date, since they can be modified
-// on the host, e.g. with fcntl.
-func fileFlagsFromHostFD(fd int) (int, error) {
- flags, err := unix.FcntlInt(uintptr(fd), syscall.F_GETFL, 0)
- if err != nil {
- log.Warningf("Failed to get file flags for donated FD %d: %v", fd, err)
- return 0, err
- }
- // TODO(gvisor.dev/issue/1672): implement behavior corresponding to these allowed flags.
- flags &= syscall.O_ACCMODE | syscall.O_DIRECT | syscall.O_NONBLOCK | syscall.O_DSYNC | syscall.O_SYNC | syscall.O_APPEND
- return flags, nil
-}
-
-// CheckPermissions implements kernfs.Inode.
-func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
- mode, uid, gid, err := i.getPermissions()
- if err != nil {
- return err
- }
- return vfs.GenericCheckPermissions(creds, ats, mode, uid, gid)
-}
-
-// Mode implements kernfs.Inode.
-func (i *inode) Mode() linux.FileMode {
- mode, _, _, err := i.getPermissions()
- if err != nil {
- return i.mode
- }
-
- return linux.FileMode(mode)
-}
-
-func (i *inode) getPermissions() (linux.FileMode, auth.KUID, auth.KGID, error) {
- // Retrieve metadata.
- var s syscall.Stat_t
- if err := syscall.Fstat(i.hostFD, &s); err != nil {
- return 0, 0, 0, err
- }
-
- // Update cached mode.
- i.modeMu.Lock()
- i.mode = linux.FileMode(s.Mode)
- i.modeMu.Unlock()
- return linux.FileMode(s.Mode), auth.KUID(s.Uid), auth.KGID(s.Gid), nil
-}
-
-// Stat implements kernfs.Inode.
-func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
- if opts.Mask&linux.STATX__RESERVED != 0 {
- return linux.Statx{}, syserror.EINVAL
- }
- if opts.Sync&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE {
- return linux.Statx{}, syserror.EINVAL
- }
-
- // Limit our host call only to known flags.
- mask := opts.Mask & linux.STATX_ALL
- var s unix.Statx_t
- err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(mask), &s)
- // Fallback to fstat(2), if statx(2) is not supported on the host.
- //
- // TODO(b/151263641): Remove fallback.
- if err == syserror.ENOSYS {
- return i.fstat(opts)
- } else if err != nil {
- return linux.Statx{}, err
- }
-
- ls := linux.Statx{Mask: mask}
- // Unconditionally fill blksize, attributes, and device numbers, as indicated
- // by /include/uapi/linux/stat.h.
- //
- // RdevMajor/RdevMinor are left as zero, so as not to expose host device
- // numbers.
- //
- // TODO(gvisor.dev/issue/1672): Use kernfs-specific, internally defined
- // device numbers. If we use the device number from the host, it may collide
- // with another sentry-internal device number. We handle device/inode
- // numbers without relying on the host to prevent collisions.
- ls.Blksize = s.Blksize
- ls.Attributes = s.Attributes
- ls.AttributesMask = s.Attributes_mask
-
- if mask&linux.STATX_TYPE != 0 {
- ls.Mode |= s.Mode & linux.S_IFMT
- }
- if mask&linux.STATX_MODE != 0 {
- ls.Mode |= s.Mode &^ linux.S_IFMT
- }
- if mask&linux.STATX_NLINK != 0 {
- ls.Nlink = s.Nlink
- }
- if mask&linux.STATX_UID != 0 {
- ls.UID = s.Uid
- }
- if mask&linux.STATX_GID != 0 {
- ls.GID = s.Gid
- }
- if mask&linux.STATX_ATIME != 0 {
- ls.Atime = unixToLinuxStatxTimestamp(s.Atime)
- }
- if mask&linux.STATX_BTIME != 0 {
- ls.Btime = unixToLinuxStatxTimestamp(s.Btime)
- }
- if mask&linux.STATX_CTIME != 0 {
- ls.Ctime = unixToLinuxStatxTimestamp(s.Ctime)
- }
- if mask&linux.STATX_MTIME != 0 {
- ls.Mtime = unixToLinuxStatxTimestamp(s.Mtime)
- }
- if mask&linux.STATX_SIZE != 0 {
- ls.Size = s.Size
- }
- if mask&linux.STATX_BLOCKS != 0 {
- ls.Blocks = s.Blocks
- }
-
- // Use our own internal inode number.
- if mask&linux.STATX_INO != 0 {
- ls.Ino = i.ino
- }
-
- // Update cached mode.
- if (mask&linux.STATX_TYPE != 0) && (mask&linux.STATX_MODE != 0) {
- i.modeMu.Lock()
- i.mode = linux.FileMode(s.Mode)
- i.modeMu.Unlock()
- }
- return ls, nil
-}
-
-// fstat is a best-effort fallback for inode.Stat() if the host does not
-// support statx(2).
-//
-// We ignore the mask and sync flags in opts and simply supply
-// STATX_BASIC_STATS, as fstat(2) itself does not allow the specification
-// of a mask or sync flags. fstat(2) does not provide any metadata
-// equivalent to Statx.Attributes, Statx.AttributesMask, or Statx.Btime, so
-// those fields remain empty.
-func (i *inode) fstat(opts vfs.StatOptions) (linux.Statx, error) {
- var s unix.Stat_t
- if err := unix.Fstat(i.hostFD, &s); err != nil {
- return linux.Statx{}, err
- }
-
- // Note that rdev numbers are left as 0; do not expose host device numbers.
- ls := linux.Statx{
- Mask: linux.STATX_BASIC_STATS,
- Blksize: uint32(s.Blksize),
- Nlink: uint32(s.Nlink),
- UID: s.Uid,
- GID: s.Gid,
- Mode: uint16(s.Mode),
- Size: uint64(s.Size),
- Blocks: uint64(s.Blocks),
- Atime: timespecToStatxTimestamp(s.Atim),
- Ctime: timespecToStatxTimestamp(s.Ctim),
- Mtime: timespecToStatxTimestamp(s.Mtim),
- }
-
- // Use our own internal inode number.
- //
- // TODO(gvisor.dev/issue/1672): Use a kernfs-specific device number as well.
- // If we use the device number from the host, it may collide with another
- // sentry-internal device number. We handle device/inode numbers without
- // relying on the host to prevent collisions.
- ls.Ino = i.ino
-
- return ls, nil
-}
-
-// SetStat implements kernfs.Inode.
-func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
- s := opts.Stat
-
- m := s.Mask
- if m == 0 {
- return nil
- }
- if m&^(linux.STATX_MODE|linux.STATX_SIZE|linux.STATX_ATIME|linux.STATX_MTIME) != 0 {
- return syserror.EPERM
- }
- mode, uid, gid, err := i.getPermissions()
- if err != nil {
- return err
- }
- if err := vfs.CheckSetStat(ctx, creds, &s, mode.Permissions(), uid, gid); err != nil {
- return err
- }
-
- if m&linux.STATX_MODE != 0 {
- if err := syscall.Fchmod(i.hostFD, uint32(s.Mode)); err != nil {
- return err
- }
- i.modeMu.Lock()
- i.mode = linux.FileMode(s.Mode)
- i.modeMu.Unlock()
- }
- if m&linux.STATX_SIZE != 0 {
- if err := syscall.Ftruncate(i.hostFD, int64(s.Size)); err != nil {
- return err
- }
- }
- if m&(linux.STATX_ATIME|linux.STATX_MTIME) != 0 {
- ts := [2]syscall.Timespec{
- toTimespec(s.Atime, m&linux.STATX_ATIME == 0),
- toTimespec(s.Mtime, m&linux.STATX_MTIME == 0),
- }
- if err := setTimestamps(i.hostFD, &ts); err != nil {
- return err
- }
- }
- return nil
-}
-
-// DecRef implements kernfs.Inode.
-func (i *inode) DecRef() {
- i.AtomicRefCount.DecRefWithDestructor(i.Destroy)
-}
-
-// Destroy implements kernfs.Inode.
-func (i *inode) Destroy() {
- if err := unix.Close(i.hostFD); err != nil {
- log.Warningf("failed to close host fd %d: %v", i.hostFD, err)
- }
-}
-
-// Open implements kernfs.Inode.
-func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- return i.open(vfsd, rp.Mount())
-}
-
-func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error) {
- mode, _, _, err := i.getPermissions()
- if err != nil {
- return nil, err
- }
- fileType := mode.FileType()
- if fileType == syscall.S_IFSOCK {
- if i.isTTY {
- return nil, errors.New("cannot use host socket as TTY")
- }
- // TODO(gvisor.dev/issue/1672): support importing sockets.
- return nil, errors.New("importing host sockets not supported")
- }
-
- // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that
- // we don't allow importing arbitrary file types without proper support.
- var (
- vfsfd *vfs.FileDescription
- fdImpl vfs.FileDescriptionImpl
- )
- if i.isTTY {
- fd := &ttyFD{
- fileDescription: fileDescription{inode: i},
- termios: linux.DefaultSlaveTermios,
- }
- vfsfd = &fd.vfsfd
- fdImpl = fd
- } else {
- // For simplicity, set offset to 0. Technically, we should
- // only set to 0 on files that are not seekable (sockets, pipes, etc.),
- // and use the offset from the host fd otherwise.
- fd := &fileDescription{inode: i}
- vfsfd = &fd.vfsfd
- fdImpl = fd
- }
-
- flags, err := fileFlagsFromHostFD(i.hostFD)
- if err != nil {
- return nil, err
- }
-
- if err := vfsfd.Init(fdImpl, uint32(flags), mnt, d, &vfs.FileDescriptionOptions{}); err != nil {
- return nil, err
- }
- return vfsfd, nil
-}
-
-// fileDescription is embedded by host fd implementations of FileDescriptionImpl.
-//
-// TODO(gvisor.dev/issue/1672): Implement Waitable interface.
-type fileDescription struct {
- vfsfd vfs.FileDescription
- vfs.FileDescriptionDefaultImpl
-
- // inode is vfsfd.Dentry().Impl().(*kernfs.Dentry).Inode().(*inode), but
- // cached to reduce indirections and casting. fileDescription does not hold
- // a reference on the inode through the inode field (since one is already
- // held via the Dentry).
- //
- // inode is immutable after fileDescription creation.
- inode *inode
-}
-
-// SetStat implements vfs.FileDescriptionImpl.
-func (f *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
- creds := auth.CredentialsFromContext(ctx)
- return f.inode.SetStat(ctx, nil, creds, opts)
-}
-
-// Stat implements vfs.FileDescriptionImpl.
-func (f *fileDescription) Stat(_ context.Context, opts vfs.StatOptions) (linux.Statx, error) {
- return f.inode.Stat(nil, opts)
-}
-
-// Release implements vfs.FileDescriptionImpl.
-func (f *fileDescription) Release() {
- // noop
-}
-
-// PRead implements FileDescriptionImpl.
-func (f *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- i := f.inode
- if !i.seekable {
- return 0, syserror.ESPIPE
- }
-
- return readFromHostFD(ctx, i.hostFD, dst, offset, opts.Flags)
-}
-
-// Read implements FileDescriptionImpl.
-func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
- i := f.inode
- if !i.seekable {
- n, err := readFromHostFD(ctx, i.hostFD, dst, -1, opts.Flags)
- if isBlockError(err) {
- // If we got any data at all, return it as a "completed" partial read
- // rather than retrying until complete.
- if n != 0 {
- err = nil
- } else {
- err = syserror.ErrWouldBlock
- }
- }
- return n, err
- }
- // TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so.
- i.offsetMu.Lock()
- n, err := readFromHostFD(ctx, i.hostFD, dst, i.offset, opts.Flags)
- i.offset += n
- i.offsetMu.Unlock()
- return n, err
-}
-
-func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, offset int64, flags uint32) (int64, error) {
- // TODO(gvisor.dev/issue/1672): Support select preadv2 flags.
- if flags != 0 {
- return 0, syserror.EOPNOTSUPP
- }
-
- var reader safemem.Reader
- if offset == -1 {
- reader = safemem.FromIOReader{fd.NewReadWriter(hostFD)}
- } else {
- reader = safemem.FromVecReaderFunc{
- func(srcs [][]byte) (int64, error) {
- n, err := unix.Preadv(hostFD, srcs, offset)
- return int64(n), err
- },
- }
- }
- n, err := dst.CopyOutFrom(ctx, reader)
- return int64(n), err
-}
-
-// PWrite implements FileDescriptionImpl.
-func (f *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- i := f.inode
- if !i.seekable {
- return 0, syserror.ESPIPE
- }
-
- return writeToHostFD(ctx, i.hostFD, src, offset, opts.Flags)
-}
-
-// Write implements FileDescriptionImpl.
-func (f *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- i := f.inode
- if !i.seekable {
- n, err := writeToHostFD(ctx, i.hostFD, src, -1, opts.Flags)
- if isBlockError(err) {
- err = syserror.ErrWouldBlock
- }
- return n, err
- }
- // TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so.
- // TODO(gvisor.dev/issue/1672): Write to end of file and update offset if O_APPEND is set on this file.
- i.offsetMu.Lock()
- n, err := writeToHostFD(ctx, i.hostFD, src, i.offset, opts.Flags)
- i.offset += n
- i.offsetMu.Unlock()
- return n, err
-}
-
-func writeToHostFD(ctx context.Context, hostFD int, src usermem.IOSequence, offset int64, flags uint32) (int64, error) {
- // TODO(gvisor.dev/issue/1672): Support select pwritev2 flags.
- if flags != 0 {
- return 0, syserror.EOPNOTSUPP
- }
-
- var writer safemem.Writer
- if offset == -1 {
- writer = safemem.FromIOWriter{fd.NewReadWriter(hostFD)}
- } else {
- writer = safemem.FromVecWriterFunc{
- func(srcs [][]byte) (int64, error) {
- n, err := unix.Pwritev(hostFD, srcs, offset)
- return int64(n), err
- },
- }
- }
- n, err := src.CopyInTo(ctx, writer)
- return int64(n), err
-}
-
-// Seek implements FileDescriptionImpl.
-//
-// Note that we do not support seeking on directories, since we do not even
-// allow directory fds to be imported at all.
-func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (int64, error) {
- i := f.inode
- if !i.seekable {
- return 0, syserror.ESPIPE
- }
-
- i.offsetMu.Lock()
- defer i.offsetMu.Unlock()
-
- switch whence {
- case linux.SEEK_SET:
- if offset < 0 {
- return i.offset, syserror.EINVAL
- }
- i.offset = offset
-
- case linux.SEEK_CUR:
- // Check for overflow. Note that underflow cannot occur, since i.offset >= 0.
- if offset > math.MaxInt64-i.offset {
- return i.offset, syserror.EOVERFLOW
- }
- if i.offset+offset < 0 {
- return i.offset, syserror.EINVAL
- }
- i.offset += offset
-
- case linux.SEEK_END:
- var s syscall.Stat_t
- if err := syscall.Fstat(i.hostFD, &s); err != nil {
- return i.offset, err
- }
- size := s.Size
-
- // Check for overflow. Note that underflow cannot occur, since size >= 0.
- if offset > math.MaxInt64-size {
- return i.offset, syserror.EOVERFLOW
- }
- if size+offset < 0 {
- return i.offset, syserror.EINVAL
- }
- i.offset = size + offset
-
- case linux.SEEK_DATA, linux.SEEK_HOLE:
- // Modifying the offset in the host file table should not matter, since
- // this is the only place where we use it.
- //
- // For reading and writing, we always rely on our internal offset.
- n, err := unix.Seek(i.hostFD, offset, int(whence))
- if err != nil {
- return i.offset, err
- }
- i.offset = n
-
- default:
- // Invalid whence.
- return i.offset, syserror.EINVAL
- }
-
- return i.offset, nil
-}
-
-// Sync implements FileDescriptionImpl.
-func (f *fileDescription) Sync(context.Context) error {
- // TODO(gvisor.dev/issue/1672): Currently we do not support the SyncData optimization, so we always sync everything.
- return unix.Fsync(f.inode.hostFD)
-}
-
-// ConfigureMMap implements FileDescriptionImpl.
-func (f *fileDescription) ConfigureMMap(_ context.Context, opts *memmap.MMapOpts) error {
- if !f.inode.canMap {
- return syserror.ENODEV
- }
- // TODO(gvisor.dev/issue/1672): Implement ConfigureMMap and Mappable interface.
- return syserror.ENODEV
-}
diff --git a/pkg/sentry/fsimpl/host/ioctl_unsafe.go b/pkg/sentry/fsimpl/host/ioctl_unsafe.go
deleted file mode 100644
index 0983bf7d8..000000000
--- a/pkg/sentry/fsimpl/host/ioctl_unsafe.go
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package host
-
-import (
- "syscall"
- "unsafe"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
-)
-
-func ioctlGetTermios(fd int) (*linux.Termios, error) {
- var t linux.Termios
- _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TCGETS, uintptr(unsafe.Pointer(&t)))
- if errno != 0 {
- return nil, errno
- }
- return &t, nil
-}
-
-func ioctlSetTermios(fd int, req uint64, t *linux.Termios) error {
- _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(unsafe.Pointer(t)))
- if errno != 0 {
- return errno
- }
- return nil
-}
-
-func ioctlGetWinsize(fd int) (*linux.Winsize, error) {
- var w linux.Winsize
- _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TIOCGWINSZ, uintptr(unsafe.Pointer(&w)))
- if errno != 0 {
- return nil, errno
- }
- return &w, nil
-}
-
-func ioctlSetWinsize(fd int, w *linux.Winsize) error {
- _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TIOCSWINSZ, uintptr(unsafe.Pointer(w)))
- if errno != 0 {
- return errno
- }
- return nil
-}
diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go
deleted file mode 100644
index 8936afb06..000000000
--- a/pkg/sentry/fsimpl/host/tty.go
+++ /dev/null
@@ -1,379 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package host
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/unimpl"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// ttyFD implements vfs.FileDescriptionImpl for a host file descriptor
-// that wraps a TTY FD.
-type ttyFD struct {
- fileDescription
-
- // mu protects the fields below.
- mu sync.Mutex `state:"nosave"`
-
- // session is the session attached to this ttyFD.
- session *kernel.Session
-
- // fgProcessGroup is the foreground process group that is currently
- // connected to this TTY.
- fgProcessGroup *kernel.ProcessGroup
-
- // termios contains the terminal attributes for this TTY.
- termios linux.KernelTermios
-}
-
-// InitForegroundProcessGroup sets the foreground process group and session for
-// the TTY. This should only be called once, after the foreground process group
-// has been created, but before it has started running.
-func (t *ttyFD) InitForegroundProcessGroup(pg *kernel.ProcessGroup) {
- t.mu.Lock()
- defer t.mu.Unlock()
- if t.fgProcessGroup != nil {
- panic("foreground process group is already set")
- }
- t.fgProcessGroup = pg
- t.session = pg.Session()
-}
-
-// ForegroundProcessGroup returns the foreground process for the TTY.
-func (t *ttyFD) ForegroundProcessGroup() *kernel.ProcessGroup {
- t.mu.Lock()
- defer t.mu.Unlock()
- return t.fgProcessGroup
-}
-
-// Release implements fs.FileOperations.Release.
-func (t *ttyFD) Release() {
- t.mu.Lock()
- t.fgProcessGroup = nil
- t.mu.Unlock()
-
- t.fileDescription.Release()
-}
-
-// PRead implements vfs.FileDescriptionImpl.
-//
-// Reading from a TTY is only allowed for foreground process groups. Background
-// process groups will either get EIO or a SIGTTIN.
-func (t *ttyFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- t.mu.Lock()
- defer t.mu.Unlock()
-
- // Are we allowed to do the read?
- // drivers/tty/n_tty.c:n_tty_read()=>job_control()=>tty_check_change().
- if err := t.checkChange(ctx, linux.SIGTTIN); err != nil {
- return 0, err
- }
-
- // Do the read.
- return t.fileDescription.PRead(ctx, dst, offset, opts)
-}
-
-// Read implements vfs.FileDescriptionImpl.
-//
-// Reading from a TTY is only allowed for foreground process groups. Background
-// process groups will either get EIO or a SIGTTIN.
-func (t *ttyFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
- t.mu.Lock()
- defer t.mu.Unlock()
-
- // Are we allowed to do the read?
- // drivers/tty/n_tty.c:n_tty_read()=>job_control()=>tty_check_change().
- if err := t.checkChange(ctx, linux.SIGTTIN); err != nil {
- return 0, err
- }
-
- // Do the read.
- return t.fileDescription.Read(ctx, dst, opts)
-}
-
-// PWrite implements vfs.FileDescriptionImpl.
-func (t *ttyFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- t.mu.Lock()
- defer t.mu.Unlock()
-
- // Check whether TOSTOP is enabled. This corresponds to the check in
- // drivers/tty/n_tty.c:n_tty_write().
- if t.termios.LEnabled(linux.TOSTOP) {
- if err := t.checkChange(ctx, linux.SIGTTOU); err != nil {
- return 0, err
- }
- }
- return t.fileDescription.PWrite(ctx, src, offset, opts)
-}
-
-// Write implements vfs.FileDescriptionImpl.
-func (t *ttyFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- t.mu.Lock()
- defer t.mu.Unlock()
-
- // Check whether TOSTOP is enabled. This corresponds to the check in
- // drivers/tty/n_tty.c:n_tty_write().
- if t.termios.LEnabled(linux.TOSTOP) {
- if err := t.checkChange(ctx, linux.SIGTTOU); err != nil {
- return 0, err
- }
- }
- return t.fileDescription.Write(ctx, src, opts)
-}
-
-// Ioctl implements vfs.FileDescriptionImpl.
-func (t *ttyFD) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- // Ignore arg[0]. This is the real FD:
- fd := t.inode.hostFD
- ioctl := args[1].Uint64()
- switch ioctl {
- case linux.TCGETS:
- termios, err := ioctlGetTermios(fd)
- if err != nil {
- return 0, err
- }
- _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), termios, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
-
- case linux.TCSETS, linux.TCSETSW, linux.TCSETSF:
- t.mu.Lock()
- defer t.mu.Unlock()
-
- if err := t.checkChange(ctx, linux.SIGTTOU); err != nil {
- return 0, err
- }
-
- var termios linux.Termios
- if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &termios, usermem.IOOpts{
- AddressSpaceActive: true,
- }); err != nil {
- return 0, err
- }
- err := ioctlSetTermios(fd, ioctl, &termios)
- if err == nil {
- t.termios.FromTermios(termios)
- }
- return 0, err
-
- case linux.TIOCGPGRP:
- // Args: pid_t *argp
- // When successful, equivalent to *argp = tcgetpgrp(fd).
- // Get the process group ID of the foreground process group on this
- // terminal.
-
- pidns := kernel.PIDNamespaceFromContext(ctx)
- if pidns == nil {
- return 0, syserror.ENOTTY
- }
-
- t.mu.Lock()
- defer t.mu.Unlock()
-
- // Map the ProcessGroup into a ProcessGroupID in the task's PID namespace.
- pgID := pidns.IDOfProcessGroup(t.fgProcessGroup)
- _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
-
- case linux.TIOCSPGRP:
- // Args: const pid_t *argp
- // Equivalent to tcsetpgrp(fd, *argp).
- // Set the foreground process group ID of this terminal.
-
- task := kernel.TaskFromContext(ctx)
- if task == nil {
- return 0, syserror.ENOTTY
- }
-
- t.mu.Lock()
- defer t.mu.Unlock()
-
- // Check that we are allowed to set the process group.
- if err := t.checkChange(ctx, linux.SIGTTOU); err != nil {
- // drivers/tty/tty_io.c:tiocspgrp() converts -EIO from tty_check_change()
- // to -ENOTTY.
- if err == syserror.EIO {
- return 0, syserror.ENOTTY
- }
- return 0, err
- }
-
- // Check that calling task's process group is in the TTY session.
- if task.ThreadGroup().Session() != t.session {
- return 0, syserror.ENOTTY
- }
-
- var pgID kernel.ProcessGroupID
- if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{
- AddressSpaceActive: true,
- }); err != nil {
- return 0, err
- }
-
- // pgID must be non-negative.
- if pgID < 0 {
- return 0, syserror.EINVAL
- }
-
- // Process group with pgID must exist in this PID namespace.
- pidns := task.PIDNamespace()
- pg := pidns.ProcessGroupWithID(pgID)
- if pg == nil {
- return 0, syserror.ESRCH
- }
-
- // Check that new process group is in the TTY session.
- if pg.Session() != t.session {
- return 0, syserror.EPERM
- }
-
- t.fgProcessGroup = pg
- return 0, nil
-
- case linux.TIOCGWINSZ:
- // Args: struct winsize *argp
- // Get window size.
- winsize, err := ioctlGetWinsize(fd)
- if err != nil {
- return 0, err
- }
- _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), winsize, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
-
- case linux.TIOCSWINSZ:
- // Args: const struct winsize *argp
- // Set window size.
-
- // Unlike setting the termios, any process group (even background ones) can
- // set the winsize.
-
- var winsize linux.Winsize
- if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &winsize, usermem.IOOpts{
- AddressSpaceActive: true,
- }); err != nil {
- return 0, err
- }
- err := ioctlSetWinsize(fd, &winsize)
- return 0, err
-
- // Unimplemented commands.
- case linux.TIOCSETD,
- linux.TIOCSBRK,
- linux.TIOCCBRK,
- linux.TCSBRK,
- linux.TCSBRKP,
- linux.TIOCSTI,
- linux.TIOCCONS,
- linux.FIONBIO,
- linux.TIOCEXCL,
- linux.TIOCNXCL,
- linux.TIOCGEXCL,
- linux.TIOCNOTTY,
- linux.TIOCSCTTY,
- linux.TIOCGSID,
- linux.TIOCGETD,
- linux.TIOCVHANGUP,
- linux.TIOCGDEV,
- linux.TIOCMGET,
- linux.TIOCMSET,
- linux.TIOCMBIC,
- linux.TIOCMBIS,
- linux.TIOCGICOUNT,
- linux.TCFLSH,
- linux.TIOCSSERIAL,
- linux.TIOCGPTPEER:
-
- unimpl.EmitUnimplementedEvent(ctx)
- fallthrough
- default:
- return 0, syserror.ENOTTY
- }
-}
-
-// checkChange checks that the process group is allowed to read, write, or
-// change the state of the TTY.
-//
-// This corresponds to Linux drivers/tty/tty_io.c:tty_check_change(). The logic
-// is a bit convoluted, but documented inline.
-//
-// Preconditions: t.mu must be held.
-func (t *ttyFD) checkChange(ctx context.Context, sig linux.Signal) error {
- task := kernel.TaskFromContext(ctx)
- if task == nil {
- // No task? Linux does not have an analog for this case, but
- // tty_check_change is more of a blacklist of cases than a
- // whitelist, and is surprisingly permissive. Allowing the
- // change seems most appropriate.
- return nil
- }
-
- tg := task.ThreadGroup()
- pg := tg.ProcessGroup()
-
- // If the session for the task is different than the session for the
- // controlling TTY, then the change is allowed. Seems like a bad idea,
- // but that's exactly what linux does.
- if tg.Session() != t.fgProcessGroup.Session() {
- return nil
- }
-
- // If we are the foreground process group, then the change is allowed.
- if pg == t.fgProcessGroup {
- return nil
- }
-
- // We are not the foreground process group.
-
- // Is the provided signal blocked or ignored?
- if (task.SignalMask()&linux.SignalSetOf(sig) != 0) || tg.SignalHandlers().IsIgnored(sig) {
- // If the signal is SIGTTIN, then we are attempting to read
- // from the TTY. Don't send the signal and return EIO.
- if sig == linux.SIGTTIN {
- return syserror.EIO
- }
-
- // Otherwise, we are writing or changing terminal state. This is allowed.
- return nil
- }
-
- // If the process group is an orphan, return EIO.
- if pg.IsOrphan() {
- return syserror.EIO
- }
-
- // Otherwise, send the signal to the process group and return ERESTARTSYS.
- //
- // Note that Linux also unconditionally sets TIF_SIGPENDING on current,
- // but this isn't necessary in gVisor because the rationale given in
- // 040b6362d58f "tty: fix leakage of -ERESTARTSYS to userland" doesn't
- // apply: the sentry will handle -ERESTARTSYS in
- // kernel.runApp.execute() even if the kernel.Task isn't interrupted.
- //
- // Linux ignores the result of kill_pgrp().
- _ = pg.SendSignal(kernel.SignalInfoPriv(sig))
- return kernel.ERESTARTSYS
-}
diff --git a/pkg/sentry/fsimpl/host/util.go b/pkg/sentry/fsimpl/host/util.go
deleted file mode 100644
index 2bc757b1a..000000000
--- a/pkg/sentry/fsimpl/host/util.go
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package host
-
-import (
- "syscall"
-
- "golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-func toTimespec(ts linux.StatxTimestamp, omit bool) syscall.Timespec {
- if omit {
- return syscall.Timespec{
- Sec: 0,
- Nsec: unix.UTIME_OMIT,
- }
- }
- return syscall.Timespec{
- Sec: ts.Sec,
- Nsec: int64(ts.Nsec),
- }
-}
-
-func unixToLinuxStatxTimestamp(ts unix.StatxTimestamp) linux.StatxTimestamp {
- return linux.StatxTimestamp{Sec: ts.Sec, Nsec: ts.Nsec}
-}
-
-func timespecToStatxTimestamp(ts unix.Timespec) linux.StatxTimestamp {
- return linux.StatxTimestamp{Sec: int64(ts.Sec), Nsec: uint32(ts.Nsec)}
-}
-
-// wouldBlock returns true for file types that can return EWOULDBLOCK
-// for blocking operations, e.g. pipes, character devices, and sockets.
-func wouldBlock(fileType uint32) bool {
- return fileType == syscall.S_IFIFO || fileType == syscall.S_IFCHR || fileType == syscall.S_IFSOCK
-}
-
-// canMap returns true if a file with fileType is allowed to be memory mapped.
-// This is ported over from VFS1, but it's probably not the best way for us
-// to check if a file can be memory mapped.
-func canMap(fileType uint32) bool {
- // TODO(gvisor.dev/issue/1672): Also allow "special files" to be mapped (see fs/host:canMap()).
- //
- // TODO(b/38213152): Some obscure character devices can be mapped.
- return fileType == syscall.S_IFREG
-}
-
-// isBlockError checks if an error is EAGAIN or EWOULDBLOCK.
-// If so, they can be transformed into syserror.ErrWouldBlock.
-func isBlockError(err error) bool {
- return err == syserror.EAGAIN || err == syserror.EWOULDBLOCK
-}
diff --git a/pkg/sentry/fsimpl/host/util_unsafe.go b/pkg/sentry/fsimpl/host/util_unsafe.go
deleted file mode 100644
index 5136ac844..000000000
--- a/pkg/sentry/fsimpl/host/util_unsafe.go
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package host
-
-import (
- "syscall"
- "unsafe"
-)
-
-func setTimestamps(fd int, ts *[2]syscall.Timespec) error {
- _, _, errno := syscall.Syscall6(
- syscall.SYS_UTIMENSAT,
- uintptr(fd),
- 0, /* path */
- uintptr(unsafe.Pointer(ts)),
- 0, /* flags */
- 0, 0)
- if errno != 0 {
- return errno
- }
- return nil
-}