From 390bb9c241c2b05c311579562d95cc39d899157b Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Fri, 24 Jan 2020 11:58:13 -0800 Subject: Ignore external SIGURG Go 1.14+ sends SIGURG to Ms to attempt asynchronous preemption of a G. Since it can't guarantee that a SIGURG is only related to preemption, it continues to forward them to signal.Notify (see runtime.sighandler). We should ignore these signals, as applications shouldn't receive them. Note that this means that truly external SIGURG can no longer be sent to the application (as with SIGCHLD). PiperOrigin-RevId: 291415357 --- pkg/sentry/kernel/signal.go | 3 +++ 1 file changed, 3 insertions(+) (limited to 'pkg/sentry') diff --git a/pkg/sentry/kernel/signal.go b/pkg/sentry/kernel/signal.go index 02eede93d..e8cce37d0 100644 --- a/pkg/sentry/kernel/signal.go +++ b/pkg/sentry/kernel/signal.go @@ -38,6 +38,9 @@ const SignalPanic = linux.SIGUSR2 // Preconditions: Kernel must have an init process. func (k *Kernel) sendExternalSignal(info *arch.SignalInfo, context string) { switch linux.Signal(info.Signo) { + case linux.SIGURG: + // Sent by the Go 1.14+ runtime for asynchronous goroutine preemption. + case platform.SignalInterrupt: // Assume that a call to platform.Context.Interrupt() misfired. -- cgit v1.2.3 From d135b5abf6eafa92d2745dc98d48ef39d2f90e75 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Fri, 24 Jan 2020 12:53:29 -0800 Subject: Add anonymous device number allocation to VFS2. Note that in VFS2, filesystem device numbers are per-vfs.FilesystemImpl rather than global, avoiding the need for a "registry" type to handle save/restore. (This is more consistent with Linux anyway: compare e.g. mm/shmem.c:shmem_mount() => fs/super.c:mount_nodev() => (indirectly) set_anon_super().) PiperOrigin-RevId: 291425193 --- pkg/sentry/vfs/device.go | 29 +++++++++++++++++++++++++++++ pkg/sentry/vfs/vfs.go | 18 ++++++++++++++---- 2 files changed, 43 insertions(+), 4 deletions(-) (limited to 'pkg/sentry') diff --git a/pkg/sentry/vfs/device.go b/pkg/sentry/vfs/device.go index cb672e36f..9f9d6e783 100644 --- a/pkg/sentry/vfs/device.go +++ b/pkg/sentry/vfs/device.go @@ -98,3 +98,32 @@ func (vfs *VirtualFilesystem) OpenDeviceSpecialFile(ctx context.Context, mnt *Mo } return rd.dev.Open(ctx, mnt, d, *opts) } + +// GetAnonBlockDevMinor allocates and returns an unused minor device number for +// an "anonymous" block device with major number 0. +func (vfs *VirtualFilesystem) GetAnonBlockDevMinor() (uint32, error) { + vfs.anonBlockDevMinorMu.Lock() + defer vfs.anonBlockDevMinorMu.Unlock() + minor := vfs.anonBlockDevMinorNext + const maxDevMinor = (1 << 20) - 1 + for minor < maxDevMinor { + if _, ok := vfs.anonBlockDevMinor[minor]; !ok { + vfs.anonBlockDevMinor[minor] = struct{}{} + vfs.anonBlockDevMinorNext = minor + 1 + return minor, nil + } + minor++ + } + return 0, syserror.EMFILE +} + +// PutAnonBlockDevMinor deallocates a minor device number returned by a +// previous call to GetAnonBlockDevMinor. +func (vfs *VirtualFilesystem) PutAnonBlockDevMinor(minor uint32) { + vfs.anonBlockDevMinorMu.Lock() + defer vfs.anonBlockDevMinorMu.Unlock() + delete(vfs.anonBlockDevMinor, minor) + if minor < vfs.anonBlockDevMinorNext { + vfs.anonBlockDevMinorNext = minor + } +} diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 1f21b0b31..1f6f56293 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -80,6 +80,14 @@ type VirtualFilesystem struct { devicesMu sync.RWMutex devices map[devTuple]*registeredDevice + // anonBlockDevMinor contains all allocated anonymous block device minor + // numbers. anonBlockDevMinorNext is a lower bound for the smallest + // unallocated anonymous block device number. anonBlockDevMinorNext and + // anonBlockDevMinor are protected by anonBlockDevMinorMu. + anonBlockDevMinorMu sync.Mutex + anonBlockDevMinorNext uint32 + anonBlockDevMinor map[uint32]struct{} + // fsTypes contains all registered FilesystemTypes. fsTypes is protected by // fsTypesMu. fsTypesMu sync.RWMutex @@ -94,10 +102,12 @@ type VirtualFilesystem struct { // New returns a new VirtualFilesystem with no mounts or FilesystemTypes. func New() *VirtualFilesystem { vfs := &VirtualFilesystem{ - mountpoints: make(map[*Dentry]map[*Mount]struct{}), - devices: make(map[devTuple]*registeredDevice), - fsTypes: make(map[string]*registeredFilesystemType), - filesystems: make(map[*Filesystem]struct{}), + mountpoints: make(map[*Dentry]map[*Mount]struct{}), + devices: make(map[devTuple]*registeredDevice), + anonBlockDevMinorNext: 1, + anonBlockDevMinor: make(map[uint32]struct{}), + fsTypes: make(map[string]*registeredFilesystemType), + filesystems: make(map[*Filesystem]struct{}), } vfs.mounts.Init() return vfs -- cgit v1.2.3 From 18a7e1309decb9bc09879e337adbc00f81d420c5 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Fri, 24 Jan 2020 17:06:30 -0800 Subject: Add support for device special files to VFS2 tmpfs. PiperOrigin-RevId: 291471892 --- pkg/sentry/fsimpl/tmpfs/BUILD | 1 + pkg/sentry/fsimpl/tmpfs/device_file.go | 39 ++++++++++++++++++++++++++++++ pkg/sentry/fsimpl/tmpfs/filesystem.go | 43 +++++++++++++++++++--------------- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 30 +++++++++++++++++++----- 4 files changed, 88 insertions(+), 25 deletions(-) create mode 100644 pkg/sentry/fsimpl/tmpfs/device_file.go (limited to 'pkg/sentry') diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index 7601c7c04..691476b4f 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -20,6 +20,7 @@ go_library( name = "tmpfs", srcs = [ "dentry_list.go", + "device_file.go", "directory.go", "filesystem.go", "named_pipe.go", diff --git a/pkg/sentry/fsimpl/tmpfs/device_file.go b/pkg/sentry/fsimpl/tmpfs/device_file.go new file mode 100644 index 000000000..84b181b90 --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/device_file.go @@ -0,0 +1,39 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tmpfs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +type deviceFile struct { + inode inode + kind vfs.DeviceKind + major uint32 + minor uint32 +} + +func (fs *filesystem) newDeviceFile(creds *auth.Credentials, mode linux.FileMode, kind vfs.DeviceKind, major, minor uint32) *inode { + file := &deviceFile{ + kind: kind, + major: major, + minor: minor, + } + file.inode.init(file, fs, creds, mode) + file.inode.nlink = 1 // from parent directory + return &file.inode +} diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index a9f66a42a..d726f03c5 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -228,23 +228,26 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v // MknodAt implements vfs.FilesystemImpl.MknodAt. func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { + var childInode *inode switch opts.Mode.FileType() { case 0, linux.S_IFREG: - child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil + childInode = fs.newRegularFile(rp.Credentials(), opts.Mode) case linux.S_IFIFO: - child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK: + childInode = fs.newNamedPipe(rp.Credentials(), opts.Mode) + case linux.S_IFBLK: + childInode = fs.newDeviceFile(rp.Credentials(), opts.Mode, vfs.BlockDevice, opts.DevMajor, opts.DevMinor) + case linux.S_IFCHR: + childInode = fs.newDeviceFile(rp.Credentials(), opts.Mode, vfs.CharDevice, opts.DevMajor, opts.DevMinor) + case linux.S_IFSOCK: // Not yet supported. return syserror.EPERM default: return syserror.EINVAL } + child := fs.newDentry(childInode) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil }) } @@ -264,7 +267,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf if err != nil { return nil, err } - return d.open(ctx, rp, opts.Flags, false /* afterCreate */) + return d.open(ctx, rp, &opts, false /* afterCreate */) } mustCreate := opts.Flags&linux.O_EXCL != 0 @@ -279,7 +282,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf if mustCreate { return nil, syserror.EEXIST } - return start.open(ctx, rp, opts.Flags, false /* afterCreate */) + return start.open(ctx, rp, &opts, false /* afterCreate */) } afterTrailingSymlink: parent, err := walkParentDirLocked(rp, start) @@ -313,7 +316,7 @@ afterTrailingSymlink: child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) parent.vfsd.InsertChild(&child.vfsd, name) parent.inode.impl.(*directory).childList.PushBack(child) - return child.open(ctx, rp, opts.Flags, true) + return child.open(ctx, rp, &opts, true) } if err != nil { return nil, err @@ -327,11 +330,11 @@ afterTrailingSymlink: if mustCreate { return nil, syserror.EEXIST } - return child.open(ctx, rp, opts.Flags, false) + return child.open(ctx, rp, &opts, false) } -func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) { - ats := vfs.AccessTypesForOpenFlags(flags) +func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, afterCreate bool) (*vfs.FileDescription, error) { + ats := vfs.AccessTypesForOpenFlags(opts.Flags) if !afterCreate { if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil { return nil, err @@ -340,10 +343,10 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, switch impl := d.inode.impl.(type) { case *regularFile: var fd regularFileFD - if err := fd.vfsfd.Init(&fd, flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } - if flags&linux.O_TRUNC != 0 { + if opts.Flags&linux.O_TRUNC != 0 { impl.mu.Lock() impl.data.Truncate(0, impl.memFile) atomic.StoreUint64(&impl.size, 0) @@ -356,7 +359,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, return nil, syserror.EISDIR } var fd directoryFD - if err := fd.vfsfd.Init(&fd, flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } return &fd.vfsfd, nil @@ -364,7 +367,9 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, // Can't open symlinks without O_PATH (which is unimplemented). return nil, syserror.ELOOP case *namedPipe: - return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags) + return newNamedPipeFD(ctx, impl, rp, &d.vfsd, opts.Flags) + case *deviceFile: + return rp.VirtualFilesystem().OpenDeviceSpecialFile(ctx, rp.Mount(), &d.vfsd, impl.kind, impl.major, impl.minor, opts) default: panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl)) } diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 1d4889c89..515f033f2 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -149,6 +149,10 @@ type inode struct { ctime int64 // nanoseconds mtime int64 // nanoseconds + // Only meaningful for device special files. + rdevMajor uint32 + rdevMinor uint32 + impl interface{} // immutable } @@ -269,6 +273,15 @@ func (i *inode) statTo(stat *linux.Statx) { stat.Blocks = allocatedBlocksForSize(stat.Size) case *namedPipe: stat.Mode |= linux.S_IFIFO + case *deviceFile: + switch impl.kind { + case vfs.BlockDevice: + stat.Mode |= linux.S_IFBLK + case vfs.CharDevice: + stat.Mode |= linux.S_IFCHR + } + stat.RdevMajor = impl.major + stat.RdevMinor = impl.minor default: panic(fmt.Sprintf("unknown inode type: %T", i.impl)) } @@ -309,12 +322,8 @@ func (i *inode) setStat(stat linux.Statx) error { } case *directory: return syserror.EISDIR - case *symlink: - return syserror.EINVAL - case *namedPipe: - // Nothing. default: - panic(fmt.Sprintf("unknown inode type: %T", i.impl)) + return syserror.EINVAL } } if mask&linux.STATX_ATIME != 0 { @@ -353,13 +362,22 @@ func allocatedBlocksForSize(size uint64) uint64 { } func (i *inode) direntType() uint8 { - switch i.impl.(type) { + switch impl := i.impl.(type) { case *regularFile: return linux.DT_REG case *directory: return linux.DT_DIR case *symlink: return linux.DT_LNK + case *deviceFile: + switch impl.kind { + case vfs.BlockDevice: + return linux.DT_BLK + case vfs.CharDevice: + return linux.DT_CHR + default: + panic(fmt.Sprintf("unknown vfs.DeviceKind: %v", impl.kind)) + } default: panic(fmt.Sprintf("unknown inode type: %T", i.impl)) } -- cgit v1.2.3