diff options
Diffstat (limited to 'pkg/sentry/fsimpl/host')
-rw-r--r-- | pkg/sentry/fsimpl/host/BUILD | 34 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/host.go | 667 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/ioctl_unsafe.go | 56 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/tty.go | 379 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/util.go | 66 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/util_unsafe.go | 34 |
6 files changed, 0 insertions, 1236 deletions
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD deleted file mode 100644 index 82e1fb74b..000000000 --- a/pkg/sentry/fsimpl/host/BUILD +++ /dev/null @@ -1,34 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "host", - srcs = [ - "host.go", - "ioctl_unsafe.go", - "tty.go", - "util.go", - "util_unsafe.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/fd", - "//pkg/log", - "//pkg/refs", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/unimpl", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go deleted file mode 100644 index 97fa7f7ab..000000000 --- a/pkg/sentry/fsimpl/host/host.go +++ /dev/null @@ -1,667 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package host provides a filesystem implementation for host files imported as -// file descriptors. -package host - -import ( - "errors" - "fmt" - "math" - "syscall" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -// filesystemType implements vfs.FilesystemType. -type filesystemType struct{} - -// GetFilesystem implements FilesystemType.GetFilesystem. -func (filesystemType) GetFilesystem(context.Context, *vfs.VirtualFilesystem, *auth.Credentials, string, vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { - panic("cannot instaniate a host filesystem") -} - -// Name implements FilesystemType.Name. -func (filesystemType) Name() string { - return "none" -} - -// filesystem implements vfs.FilesystemImpl. -type filesystem struct { - kernfs.Filesystem -} - -// NewMount returns a new disconnected mount in vfsObj that may be passed to ImportFD. -func NewMount(vfsObj *vfs.VirtualFilesystem) (*vfs.Mount, error) { - fs := &filesystem{} - fs.Init(vfsObj, &filesystemType{}) - vfsfs := fs.VFSFilesystem() - // NewDisconnectedMount will take an additional reference on vfsfs. - defer vfsfs.DecRef() - return vfsObj.NewDisconnectedMount(vfsfs, nil, &vfs.MountOptions{}) -} - -// ImportFD sets up and returns a vfs.FileDescription from a donated fd. -func ImportFD(mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) { - fs, ok := mnt.Filesystem().Impl().(*kernfs.Filesystem) - if !ok { - return nil, fmt.Errorf("can't import host FDs into filesystems of type %T", mnt.Filesystem().Impl()) - } - - // Retrieve metadata. - var s unix.Stat_t - if err := unix.Fstat(hostFD, &s); err != nil { - return nil, err - } - - fileMode := linux.FileMode(s.Mode) - fileType := fileMode.FileType() - - // Determine if hostFD is seekable. If not, this syscall will return ESPIPE - // (see fs/read_write.c:llseek), e.g. for pipes, sockets, and some character - // devices. - _, err := unix.Seek(hostFD, 0, linux.SEEK_CUR) - seekable := err != syserror.ESPIPE - - i := &inode{ - hostFD: hostFD, - seekable: seekable, - isTTY: isTTY, - canMap: canMap(uint32(fileType)), - ino: fs.NextIno(), - mode: fileMode, - // For simplicity, set offset to 0. Technically, we should use the existing - // offset on the host if the file is seekable. - offset: 0, - } - - // Non-seekable files can't be memory mapped, assert this. - if !i.seekable && i.canMap { - panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped") - } - - d := &kernfs.Dentry{} - d.Init(i) - // i.open will take a reference on d. - defer d.DecRef() - - return i.open(d.VFSDentry(), mnt) -} - -// inode implements kernfs.Inode. -type inode struct { - kernfs.InodeNotDirectory - kernfs.InodeNotSymlink - - // When the reference count reaches zero, the host fd is closed. - refs.AtomicRefCount - - // hostFD contains the host fd that this file was originally created from, - // which must be available at time of restore. - // - // This field is initialized at creation time and is immutable. - hostFD int - - // seekable is false if the host fd points to a file representing a stream, - // e.g. a socket or a pipe. Such files are not seekable and can return - // EWOULDBLOCK for I/O operations. - // - // This field is initialized at creation time and is immutable. - seekable bool - - // isTTY is true if this file represents a TTY. - // - // This field is initialized at creation time and is immutable. - isTTY bool - - // canMap specifies whether we allow the file to be memory mapped. - // - // This field is initialized at creation time and is immutable. - canMap bool - - // ino is an inode number unique within this filesystem. - // - // This field is initialized at creation time and is immutable. - ino uint64 - - // modeMu protects mode. - modeMu sync.Mutex - - // mode is a cached version of the file mode on the host. Note that it may - // become out of date if the mode is changed on the host, e.g. with chmod. - // - // Generally, it is better to retrieve the mode from the host through an - // fstat syscall. We only use this value in inode.Mode(), which cannot - // return an error, if the syscall to host fails. - // - // FIXME(b/152294168): Plumb error into Inode.Mode() return value so we - // can get rid of this. - mode linux.FileMode - - // offsetMu protects offset. - offsetMu sync.Mutex - - // offset specifies the current file offset. - offset int64 -} - -// Note that these flags may become out of date, since they can be modified -// on the host, e.g. with fcntl. -func fileFlagsFromHostFD(fd int) (int, error) { - flags, err := unix.FcntlInt(uintptr(fd), syscall.F_GETFL, 0) - if err != nil { - log.Warningf("Failed to get file flags for donated FD %d: %v", fd, err) - return 0, err - } - // TODO(gvisor.dev/issue/1672): implement behavior corresponding to these allowed flags. - flags &= syscall.O_ACCMODE | syscall.O_DIRECT | syscall.O_NONBLOCK | syscall.O_DSYNC | syscall.O_SYNC | syscall.O_APPEND - return flags, nil -} - -// CheckPermissions implements kernfs.Inode. -func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { - mode, uid, gid, err := i.getPermissions() - if err != nil { - return err - } - return vfs.GenericCheckPermissions(creds, ats, mode, uid, gid) -} - -// Mode implements kernfs.Inode. -func (i *inode) Mode() linux.FileMode { - mode, _, _, err := i.getPermissions() - if err != nil { - return i.mode - } - - return linux.FileMode(mode) -} - -func (i *inode) getPermissions() (linux.FileMode, auth.KUID, auth.KGID, error) { - // Retrieve metadata. - var s syscall.Stat_t - if err := syscall.Fstat(i.hostFD, &s); err != nil { - return 0, 0, 0, err - } - - // Update cached mode. - i.modeMu.Lock() - i.mode = linux.FileMode(s.Mode) - i.modeMu.Unlock() - return linux.FileMode(s.Mode), auth.KUID(s.Uid), auth.KGID(s.Gid), nil -} - -// Stat implements kernfs.Inode. -func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { - if opts.Mask&linux.STATX__RESERVED != 0 { - return linux.Statx{}, syserror.EINVAL - } - if opts.Sync&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE { - return linux.Statx{}, syserror.EINVAL - } - - // Limit our host call only to known flags. - mask := opts.Mask & linux.STATX_ALL - var s unix.Statx_t - err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(mask), &s) - // Fallback to fstat(2), if statx(2) is not supported on the host. - // - // TODO(b/151263641): Remove fallback. - if err == syserror.ENOSYS { - return i.fstat(opts) - } else if err != nil { - return linux.Statx{}, err - } - - ls := linux.Statx{Mask: mask} - // Unconditionally fill blksize, attributes, and device numbers, as indicated - // by /include/uapi/linux/stat.h. - // - // RdevMajor/RdevMinor are left as zero, so as not to expose host device - // numbers. - // - // TODO(gvisor.dev/issue/1672): Use kernfs-specific, internally defined - // device numbers. If we use the device number from the host, it may collide - // with another sentry-internal device number. We handle device/inode - // numbers without relying on the host to prevent collisions. - ls.Blksize = s.Blksize - ls.Attributes = s.Attributes - ls.AttributesMask = s.Attributes_mask - - if mask&linux.STATX_TYPE != 0 { - ls.Mode |= s.Mode & linux.S_IFMT - } - if mask&linux.STATX_MODE != 0 { - ls.Mode |= s.Mode &^ linux.S_IFMT - } - if mask&linux.STATX_NLINK != 0 { - ls.Nlink = s.Nlink - } - if mask&linux.STATX_UID != 0 { - ls.UID = s.Uid - } - if mask&linux.STATX_GID != 0 { - ls.GID = s.Gid - } - if mask&linux.STATX_ATIME != 0 { - ls.Atime = unixToLinuxStatxTimestamp(s.Atime) - } - if mask&linux.STATX_BTIME != 0 { - ls.Btime = unixToLinuxStatxTimestamp(s.Btime) - } - if mask&linux.STATX_CTIME != 0 { - ls.Ctime = unixToLinuxStatxTimestamp(s.Ctime) - } - if mask&linux.STATX_MTIME != 0 { - ls.Mtime = unixToLinuxStatxTimestamp(s.Mtime) - } - if mask&linux.STATX_SIZE != 0 { - ls.Size = s.Size - } - if mask&linux.STATX_BLOCKS != 0 { - ls.Blocks = s.Blocks - } - - // Use our own internal inode number. - if mask&linux.STATX_INO != 0 { - ls.Ino = i.ino - } - - // Update cached mode. - if (mask&linux.STATX_TYPE != 0) && (mask&linux.STATX_MODE != 0) { - i.modeMu.Lock() - i.mode = linux.FileMode(s.Mode) - i.modeMu.Unlock() - } - return ls, nil -} - -// fstat is a best-effort fallback for inode.Stat() if the host does not -// support statx(2). -// -// We ignore the mask and sync flags in opts and simply supply -// STATX_BASIC_STATS, as fstat(2) itself does not allow the specification -// of a mask or sync flags. fstat(2) does not provide any metadata -// equivalent to Statx.Attributes, Statx.AttributesMask, or Statx.Btime, so -// those fields remain empty. -func (i *inode) fstat(opts vfs.StatOptions) (linux.Statx, error) { - var s unix.Stat_t - if err := unix.Fstat(i.hostFD, &s); err != nil { - return linux.Statx{}, err - } - - // Note that rdev numbers are left as 0; do not expose host device numbers. - ls := linux.Statx{ - Mask: linux.STATX_BASIC_STATS, - Blksize: uint32(s.Blksize), - Nlink: uint32(s.Nlink), - UID: s.Uid, - GID: s.Gid, - Mode: uint16(s.Mode), - Size: uint64(s.Size), - Blocks: uint64(s.Blocks), - Atime: timespecToStatxTimestamp(s.Atim), - Ctime: timespecToStatxTimestamp(s.Ctim), - Mtime: timespecToStatxTimestamp(s.Mtim), - } - - // Use our own internal inode number. - // - // TODO(gvisor.dev/issue/1672): Use a kernfs-specific device number as well. - // If we use the device number from the host, it may collide with another - // sentry-internal device number. We handle device/inode numbers without - // relying on the host to prevent collisions. - ls.Ino = i.ino - - return ls, nil -} - -// SetStat implements kernfs.Inode. -func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { - s := opts.Stat - - m := s.Mask - if m == 0 { - return nil - } - if m&^(linux.STATX_MODE|linux.STATX_SIZE|linux.STATX_ATIME|linux.STATX_MTIME) != 0 { - return syserror.EPERM - } - mode, uid, gid, err := i.getPermissions() - if err != nil { - return err - } - if err := vfs.CheckSetStat(ctx, creds, &s, mode.Permissions(), uid, gid); err != nil { - return err - } - - if m&linux.STATX_MODE != 0 { - if err := syscall.Fchmod(i.hostFD, uint32(s.Mode)); err != nil { - return err - } - i.modeMu.Lock() - i.mode = linux.FileMode(s.Mode) - i.modeMu.Unlock() - } - if m&linux.STATX_SIZE != 0 { - if err := syscall.Ftruncate(i.hostFD, int64(s.Size)); err != nil { - return err - } - } - if m&(linux.STATX_ATIME|linux.STATX_MTIME) != 0 { - ts := [2]syscall.Timespec{ - toTimespec(s.Atime, m&linux.STATX_ATIME == 0), - toTimespec(s.Mtime, m&linux.STATX_MTIME == 0), - } - if err := setTimestamps(i.hostFD, &ts); err != nil { - return err - } - } - return nil -} - -// DecRef implements kernfs.Inode. -func (i *inode) DecRef() { - i.AtomicRefCount.DecRefWithDestructor(i.Destroy) -} - -// Destroy implements kernfs.Inode. -func (i *inode) Destroy() { - if err := unix.Close(i.hostFD); err != nil { - log.Warningf("failed to close host fd %d: %v", i.hostFD, err) - } -} - -// Open implements kernfs.Inode. -func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - return i.open(vfsd, rp.Mount()) -} - -func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error) { - mode, _, _, err := i.getPermissions() - if err != nil { - return nil, err - } - fileType := mode.FileType() - if fileType == syscall.S_IFSOCK { - if i.isTTY { - return nil, errors.New("cannot use host socket as TTY") - } - // TODO(gvisor.dev/issue/1672): support importing sockets. - return nil, errors.New("importing host sockets not supported") - } - - // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that - // we don't allow importing arbitrary file types without proper support. - var ( - vfsfd *vfs.FileDescription - fdImpl vfs.FileDescriptionImpl - ) - if i.isTTY { - fd := &ttyFD{ - fileDescription: fileDescription{inode: i}, - termios: linux.DefaultSlaveTermios, - } - vfsfd = &fd.vfsfd - fdImpl = fd - } else { - // For simplicity, set offset to 0. Technically, we should - // only set to 0 on files that are not seekable (sockets, pipes, etc.), - // and use the offset from the host fd otherwise. - fd := &fileDescription{inode: i} - vfsfd = &fd.vfsfd - fdImpl = fd - } - - flags, err := fileFlagsFromHostFD(i.hostFD) - if err != nil { - return nil, err - } - - if err := vfsfd.Init(fdImpl, uint32(flags), mnt, d, &vfs.FileDescriptionOptions{}); err != nil { - return nil, err - } - return vfsfd, nil -} - -// fileDescription is embedded by host fd implementations of FileDescriptionImpl. -// -// TODO(gvisor.dev/issue/1672): Implement Waitable interface. -type fileDescription struct { - vfsfd vfs.FileDescription - vfs.FileDescriptionDefaultImpl - - // inode is vfsfd.Dentry().Impl().(*kernfs.Dentry).Inode().(*inode), but - // cached to reduce indirections and casting. fileDescription does not hold - // a reference on the inode through the inode field (since one is already - // held via the Dentry). - // - // inode is immutable after fileDescription creation. - inode *inode -} - -// SetStat implements vfs.FileDescriptionImpl. -func (f *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { - creds := auth.CredentialsFromContext(ctx) - return f.inode.SetStat(ctx, nil, creds, opts) -} - -// Stat implements vfs.FileDescriptionImpl. -func (f *fileDescription) Stat(_ context.Context, opts vfs.StatOptions) (linux.Statx, error) { - return f.inode.Stat(nil, opts) -} - -// Release implements vfs.FileDescriptionImpl. -func (f *fileDescription) Release() { - // noop -} - -// PRead implements FileDescriptionImpl. -func (f *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - i := f.inode - if !i.seekable { - return 0, syserror.ESPIPE - } - - return readFromHostFD(ctx, i.hostFD, dst, offset, opts.Flags) -} - -// Read implements FileDescriptionImpl. -func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - i := f.inode - if !i.seekable { - n, err := readFromHostFD(ctx, i.hostFD, dst, -1, opts.Flags) - if isBlockError(err) { - // If we got any data at all, return it as a "completed" partial read - // rather than retrying until complete. - if n != 0 { - err = nil - } else { - err = syserror.ErrWouldBlock - } - } - return n, err - } - // TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so. - i.offsetMu.Lock() - n, err := readFromHostFD(ctx, i.hostFD, dst, i.offset, opts.Flags) - i.offset += n - i.offsetMu.Unlock() - return n, err -} - -func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, offset int64, flags uint32) (int64, error) { - // TODO(gvisor.dev/issue/1672): Support select preadv2 flags. - if flags != 0 { - return 0, syserror.EOPNOTSUPP - } - - var reader safemem.Reader - if offset == -1 { - reader = safemem.FromIOReader{fd.NewReadWriter(hostFD)} - } else { - reader = safemem.FromVecReaderFunc{ - func(srcs [][]byte) (int64, error) { - n, err := unix.Preadv(hostFD, srcs, offset) - return int64(n), err - }, - } - } - n, err := dst.CopyOutFrom(ctx, reader) - return int64(n), err -} - -// PWrite implements FileDescriptionImpl. -func (f *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - i := f.inode - if !i.seekable { - return 0, syserror.ESPIPE - } - - return writeToHostFD(ctx, i.hostFD, src, offset, opts.Flags) -} - -// Write implements FileDescriptionImpl. -func (f *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - i := f.inode - if !i.seekable { - n, err := writeToHostFD(ctx, i.hostFD, src, -1, opts.Flags) - if isBlockError(err) { - err = syserror.ErrWouldBlock - } - return n, err - } - // TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so. - // TODO(gvisor.dev/issue/1672): Write to end of file and update offset if O_APPEND is set on this file. - i.offsetMu.Lock() - n, err := writeToHostFD(ctx, i.hostFD, src, i.offset, opts.Flags) - i.offset += n - i.offsetMu.Unlock() - return n, err -} - -func writeToHostFD(ctx context.Context, hostFD int, src usermem.IOSequence, offset int64, flags uint32) (int64, error) { - // TODO(gvisor.dev/issue/1672): Support select pwritev2 flags. - if flags != 0 { - return 0, syserror.EOPNOTSUPP - } - - var writer safemem.Writer - if offset == -1 { - writer = safemem.FromIOWriter{fd.NewReadWriter(hostFD)} - } else { - writer = safemem.FromVecWriterFunc{ - func(srcs [][]byte) (int64, error) { - n, err := unix.Pwritev(hostFD, srcs, offset) - return int64(n), err - }, - } - } - n, err := src.CopyInTo(ctx, writer) - return int64(n), err -} - -// Seek implements FileDescriptionImpl. -// -// Note that we do not support seeking on directories, since we do not even -// allow directory fds to be imported at all. -func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (int64, error) { - i := f.inode - if !i.seekable { - return 0, syserror.ESPIPE - } - - i.offsetMu.Lock() - defer i.offsetMu.Unlock() - - switch whence { - case linux.SEEK_SET: - if offset < 0 { - return i.offset, syserror.EINVAL - } - i.offset = offset - - case linux.SEEK_CUR: - // Check for overflow. Note that underflow cannot occur, since i.offset >= 0. - if offset > math.MaxInt64-i.offset { - return i.offset, syserror.EOVERFLOW - } - if i.offset+offset < 0 { - return i.offset, syserror.EINVAL - } - i.offset += offset - - case linux.SEEK_END: - var s syscall.Stat_t - if err := syscall.Fstat(i.hostFD, &s); err != nil { - return i.offset, err - } - size := s.Size - - // Check for overflow. Note that underflow cannot occur, since size >= 0. - if offset > math.MaxInt64-size { - return i.offset, syserror.EOVERFLOW - } - if size+offset < 0 { - return i.offset, syserror.EINVAL - } - i.offset = size + offset - - case linux.SEEK_DATA, linux.SEEK_HOLE: - // Modifying the offset in the host file table should not matter, since - // this is the only place where we use it. - // - // For reading and writing, we always rely on our internal offset. - n, err := unix.Seek(i.hostFD, offset, int(whence)) - if err != nil { - return i.offset, err - } - i.offset = n - - default: - // Invalid whence. - return i.offset, syserror.EINVAL - } - - return i.offset, nil -} - -// Sync implements FileDescriptionImpl. -func (f *fileDescription) Sync(context.Context) error { - // TODO(gvisor.dev/issue/1672): Currently we do not support the SyncData optimization, so we always sync everything. - return unix.Fsync(f.inode.hostFD) -} - -// ConfigureMMap implements FileDescriptionImpl. -func (f *fileDescription) ConfigureMMap(_ context.Context, opts *memmap.MMapOpts) error { - if !f.inode.canMap { - return syserror.ENODEV - } - // TODO(gvisor.dev/issue/1672): Implement ConfigureMMap and Mappable interface. - return syserror.ENODEV -} diff --git a/pkg/sentry/fsimpl/host/ioctl_unsafe.go b/pkg/sentry/fsimpl/host/ioctl_unsafe.go deleted file mode 100644 index 0983bf7d8..000000000 --- a/pkg/sentry/fsimpl/host/ioctl_unsafe.go +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "syscall" - "unsafe" - - "gvisor.dev/gvisor/pkg/abi/linux" -) - -func ioctlGetTermios(fd int) (*linux.Termios, error) { - var t linux.Termios - _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TCGETS, uintptr(unsafe.Pointer(&t))) - if errno != 0 { - return nil, errno - } - return &t, nil -} - -func ioctlSetTermios(fd int, req uint64, t *linux.Termios) error { - _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(unsafe.Pointer(t))) - if errno != 0 { - return errno - } - return nil -} - -func ioctlGetWinsize(fd int) (*linux.Winsize, error) { - var w linux.Winsize - _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TIOCGWINSZ, uintptr(unsafe.Pointer(&w))) - if errno != 0 { - return nil, errno - } - return &w, nil -} - -func ioctlSetWinsize(fd int, w *linux.Winsize) error { - _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TIOCSWINSZ, uintptr(unsafe.Pointer(w))) - if errno != 0 { - return errno - } - return nil -} diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go deleted file mode 100644 index 8936afb06..000000000 --- a/pkg/sentry/fsimpl/host/tty.go +++ /dev/null @@ -1,379 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/unimpl" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -// ttyFD implements vfs.FileDescriptionImpl for a host file descriptor -// that wraps a TTY FD. -type ttyFD struct { - fileDescription - - // mu protects the fields below. - mu sync.Mutex `state:"nosave"` - - // session is the session attached to this ttyFD. - session *kernel.Session - - // fgProcessGroup is the foreground process group that is currently - // connected to this TTY. - fgProcessGroup *kernel.ProcessGroup - - // termios contains the terminal attributes for this TTY. - termios linux.KernelTermios -} - -// InitForegroundProcessGroup sets the foreground process group and session for -// the TTY. This should only be called once, after the foreground process group -// has been created, but before it has started running. -func (t *ttyFD) InitForegroundProcessGroup(pg *kernel.ProcessGroup) { - t.mu.Lock() - defer t.mu.Unlock() - if t.fgProcessGroup != nil { - panic("foreground process group is already set") - } - t.fgProcessGroup = pg - t.session = pg.Session() -} - -// ForegroundProcessGroup returns the foreground process for the TTY. -func (t *ttyFD) ForegroundProcessGroup() *kernel.ProcessGroup { - t.mu.Lock() - defer t.mu.Unlock() - return t.fgProcessGroup -} - -// Release implements fs.FileOperations.Release. -func (t *ttyFD) Release() { - t.mu.Lock() - t.fgProcessGroup = nil - t.mu.Unlock() - - t.fileDescription.Release() -} - -// PRead implements vfs.FileDescriptionImpl. -// -// Reading from a TTY is only allowed for foreground process groups. Background -// process groups will either get EIO or a SIGTTIN. -func (t *ttyFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - t.mu.Lock() - defer t.mu.Unlock() - - // Are we allowed to do the read? - // drivers/tty/n_tty.c:n_tty_read()=>job_control()=>tty_check_change(). - if err := t.checkChange(ctx, linux.SIGTTIN); err != nil { - return 0, err - } - - // Do the read. - return t.fileDescription.PRead(ctx, dst, offset, opts) -} - -// Read implements vfs.FileDescriptionImpl. -// -// Reading from a TTY is only allowed for foreground process groups. Background -// process groups will either get EIO or a SIGTTIN. -func (t *ttyFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - t.mu.Lock() - defer t.mu.Unlock() - - // Are we allowed to do the read? - // drivers/tty/n_tty.c:n_tty_read()=>job_control()=>tty_check_change(). - if err := t.checkChange(ctx, linux.SIGTTIN); err != nil { - return 0, err - } - - // Do the read. - return t.fileDescription.Read(ctx, dst, opts) -} - -// PWrite implements vfs.FileDescriptionImpl. -func (t *ttyFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - t.mu.Lock() - defer t.mu.Unlock() - - // Check whether TOSTOP is enabled. This corresponds to the check in - // drivers/tty/n_tty.c:n_tty_write(). - if t.termios.LEnabled(linux.TOSTOP) { - if err := t.checkChange(ctx, linux.SIGTTOU); err != nil { - return 0, err - } - } - return t.fileDescription.PWrite(ctx, src, offset, opts) -} - -// Write implements vfs.FileDescriptionImpl. -func (t *ttyFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - t.mu.Lock() - defer t.mu.Unlock() - - // Check whether TOSTOP is enabled. This corresponds to the check in - // drivers/tty/n_tty.c:n_tty_write(). - if t.termios.LEnabled(linux.TOSTOP) { - if err := t.checkChange(ctx, linux.SIGTTOU); err != nil { - return 0, err - } - } - return t.fileDescription.Write(ctx, src, opts) -} - -// Ioctl implements vfs.FileDescriptionImpl. -func (t *ttyFD) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { - // Ignore arg[0]. This is the real FD: - fd := t.inode.hostFD - ioctl := args[1].Uint64() - switch ioctl { - case linux.TCGETS: - termios, err := ioctlGetTermios(fd) - if err != nil { - return 0, err - } - _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), termios, usermem.IOOpts{ - AddressSpaceActive: true, - }) - return 0, err - - case linux.TCSETS, linux.TCSETSW, linux.TCSETSF: - t.mu.Lock() - defer t.mu.Unlock() - - if err := t.checkChange(ctx, linux.SIGTTOU); err != nil { - return 0, err - } - - var termios linux.Termios - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &termios, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { - return 0, err - } - err := ioctlSetTermios(fd, ioctl, &termios) - if err == nil { - t.termios.FromTermios(termios) - } - return 0, err - - case linux.TIOCGPGRP: - // Args: pid_t *argp - // When successful, equivalent to *argp = tcgetpgrp(fd). - // Get the process group ID of the foreground process group on this - // terminal. - - pidns := kernel.PIDNamespaceFromContext(ctx) - if pidns == nil { - return 0, syserror.ENOTTY - } - - t.mu.Lock() - defer t.mu.Unlock() - - // Map the ProcessGroup into a ProcessGroupID in the task's PID namespace. - pgID := pidns.IDOfProcessGroup(t.fgProcessGroup) - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{ - AddressSpaceActive: true, - }) - return 0, err - - case linux.TIOCSPGRP: - // Args: const pid_t *argp - // Equivalent to tcsetpgrp(fd, *argp). - // Set the foreground process group ID of this terminal. - - task := kernel.TaskFromContext(ctx) - if task == nil { - return 0, syserror.ENOTTY - } - - t.mu.Lock() - defer t.mu.Unlock() - - // Check that we are allowed to set the process group. - if err := t.checkChange(ctx, linux.SIGTTOU); err != nil { - // drivers/tty/tty_io.c:tiocspgrp() converts -EIO from tty_check_change() - // to -ENOTTY. - if err == syserror.EIO { - return 0, syserror.ENOTTY - } - return 0, err - } - - // Check that calling task's process group is in the TTY session. - if task.ThreadGroup().Session() != t.session { - return 0, syserror.ENOTTY - } - - var pgID kernel.ProcessGroupID - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { - return 0, err - } - - // pgID must be non-negative. - if pgID < 0 { - return 0, syserror.EINVAL - } - - // Process group with pgID must exist in this PID namespace. - pidns := task.PIDNamespace() - pg := pidns.ProcessGroupWithID(pgID) - if pg == nil { - return 0, syserror.ESRCH - } - - // Check that new process group is in the TTY session. - if pg.Session() != t.session { - return 0, syserror.EPERM - } - - t.fgProcessGroup = pg - return 0, nil - - case linux.TIOCGWINSZ: - // Args: struct winsize *argp - // Get window size. - winsize, err := ioctlGetWinsize(fd) - if err != nil { - return 0, err - } - _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), winsize, usermem.IOOpts{ - AddressSpaceActive: true, - }) - return 0, err - - case linux.TIOCSWINSZ: - // Args: const struct winsize *argp - // Set window size. - - // Unlike setting the termios, any process group (even background ones) can - // set the winsize. - - var winsize linux.Winsize - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &winsize, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { - return 0, err - } - err := ioctlSetWinsize(fd, &winsize) - return 0, err - - // Unimplemented commands. - case linux.TIOCSETD, - linux.TIOCSBRK, - linux.TIOCCBRK, - linux.TCSBRK, - linux.TCSBRKP, - linux.TIOCSTI, - linux.TIOCCONS, - linux.FIONBIO, - linux.TIOCEXCL, - linux.TIOCNXCL, - linux.TIOCGEXCL, - linux.TIOCNOTTY, - linux.TIOCSCTTY, - linux.TIOCGSID, - linux.TIOCGETD, - linux.TIOCVHANGUP, - linux.TIOCGDEV, - linux.TIOCMGET, - linux.TIOCMSET, - linux.TIOCMBIC, - linux.TIOCMBIS, - linux.TIOCGICOUNT, - linux.TCFLSH, - linux.TIOCSSERIAL, - linux.TIOCGPTPEER: - - unimpl.EmitUnimplementedEvent(ctx) - fallthrough - default: - return 0, syserror.ENOTTY - } -} - -// checkChange checks that the process group is allowed to read, write, or -// change the state of the TTY. -// -// This corresponds to Linux drivers/tty/tty_io.c:tty_check_change(). The logic -// is a bit convoluted, but documented inline. -// -// Preconditions: t.mu must be held. -func (t *ttyFD) checkChange(ctx context.Context, sig linux.Signal) error { - task := kernel.TaskFromContext(ctx) - if task == nil { - // No task? Linux does not have an analog for this case, but - // tty_check_change is more of a blacklist of cases than a - // whitelist, and is surprisingly permissive. Allowing the - // change seems most appropriate. - return nil - } - - tg := task.ThreadGroup() - pg := tg.ProcessGroup() - - // If the session for the task is different than the session for the - // controlling TTY, then the change is allowed. Seems like a bad idea, - // but that's exactly what linux does. - if tg.Session() != t.fgProcessGroup.Session() { - return nil - } - - // If we are the foreground process group, then the change is allowed. - if pg == t.fgProcessGroup { - return nil - } - - // We are not the foreground process group. - - // Is the provided signal blocked or ignored? - if (task.SignalMask()&linux.SignalSetOf(sig) != 0) || tg.SignalHandlers().IsIgnored(sig) { - // If the signal is SIGTTIN, then we are attempting to read - // from the TTY. Don't send the signal and return EIO. - if sig == linux.SIGTTIN { - return syserror.EIO - } - - // Otherwise, we are writing or changing terminal state. This is allowed. - return nil - } - - // If the process group is an orphan, return EIO. - if pg.IsOrphan() { - return syserror.EIO - } - - // Otherwise, send the signal to the process group and return ERESTARTSYS. - // - // Note that Linux also unconditionally sets TIF_SIGPENDING on current, - // but this isn't necessary in gVisor because the rationale given in - // 040b6362d58f "tty: fix leakage of -ERESTARTSYS to userland" doesn't - // apply: the sentry will handle -ERESTARTSYS in - // kernel.runApp.execute() even if the kernel.Task isn't interrupted. - // - // Linux ignores the result of kill_pgrp(). - _ = pg.SendSignal(kernel.SignalInfoPriv(sig)) - return kernel.ERESTARTSYS -} diff --git a/pkg/sentry/fsimpl/host/util.go b/pkg/sentry/fsimpl/host/util.go deleted file mode 100644 index 2bc757b1a..000000000 --- a/pkg/sentry/fsimpl/host/util.go +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "syscall" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/syserror" -) - -func toTimespec(ts linux.StatxTimestamp, omit bool) syscall.Timespec { - if omit { - return syscall.Timespec{ - Sec: 0, - Nsec: unix.UTIME_OMIT, - } - } - return syscall.Timespec{ - Sec: ts.Sec, - Nsec: int64(ts.Nsec), - } -} - -func unixToLinuxStatxTimestamp(ts unix.StatxTimestamp) linux.StatxTimestamp { - return linux.StatxTimestamp{Sec: ts.Sec, Nsec: ts.Nsec} -} - -func timespecToStatxTimestamp(ts unix.Timespec) linux.StatxTimestamp { - return linux.StatxTimestamp{Sec: int64(ts.Sec), Nsec: uint32(ts.Nsec)} -} - -// wouldBlock returns true for file types that can return EWOULDBLOCK -// for blocking operations, e.g. pipes, character devices, and sockets. -func wouldBlock(fileType uint32) bool { - return fileType == syscall.S_IFIFO || fileType == syscall.S_IFCHR || fileType == syscall.S_IFSOCK -} - -// canMap returns true if a file with fileType is allowed to be memory mapped. -// This is ported over from VFS1, but it's probably not the best way for us -// to check if a file can be memory mapped. -func canMap(fileType uint32) bool { - // TODO(gvisor.dev/issue/1672): Also allow "special files" to be mapped (see fs/host:canMap()). - // - // TODO(b/38213152): Some obscure character devices can be mapped. - return fileType == syscall.S_IFREG -} - -// isBlockError checks if an error is EAGAIN or EWOULDBLOCK. -// If so, they can be transformed into syserror.ErrWouldBlock. -func isBlockError(err error) bool { - return err == syserror.EAGAIN || err == syserror.EWOULDBLOCK -} diff --git a/pkg/sentry/fsimpl/host/util_unsafe.go b/pkg/sentry/fsimpl/host/util_unsafe.go deleted file mode 100644 index 5136ac844..000000000 --- a/pkg/sentry/fsimpl/host/util_unsafe.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "syscall" - "unsafe" -) - -func setTimestamps(fd int, ts *[2]syscall.Timespec) error { - _, _, errno := syscall.Syscall6( - syscall.SYS_UTIMENSAT, - uintptr(fd), - 0, /* path */ - uintptr(unsafe.Pointer(ts)), - 0, /* flags */ - 0, 0) - if errno != 0 { - return errno - } - return nil -} |