diff options
Diffstat (limited to 'pkg/sentry/vfs')
-rw-r--r-- | pkg/sentry/vfs/BUILD | 37 | ||||
-rw-r--r-- | pkg/sentry/vfs/anonfs.go | 259 | ||||
-rw-r--r-- | pkg/sentry/vfs/context.go | 2 | ||||
-rw-r--r-- | pkg/sentry/vfs/device.go | 2 | ||||
-rw-r--r-- | pkg/sentry/vfs/epoll.go | 377 | ||||
-rw-r--r-- | pkg/sentry/vfs/file_description.go | 63 | ||||
-rw-r--r-- | pkg/sentry/vfs/file_description_impl_util.go | 101 | ||||
-rw-r--r-- | pkg/sentry/vfs/file_description_impl_util_test.go | 156 | ||||
-rw-r--r-- | pkg/sentry/vfs/filesystem.go | 2 | ||||
-rw-r--r-- | pkg/sentry/vfs/filesystem_type.go | 2 | ||||
-rw-r--r-- | pkg/sentry/vfs/lock/BUILD | 13 | ||||
-rw-r--r-- | pkg/sentry/vfs/lock/lock.go | 72 | ||||
-rw-r--r-- | pkg/sentry/vfs/mount.go | 5 | ||||
-rw-r--r-- | pkg/sentry/vfs/pathname.go | 2 | ||||
-rw-r--r-- | pkg/sentry/vfs/testutil.go | 173 | ||||
-rw-r--r-- | pkg/sentry/vfs/vfs.go | 42 |
16 files changed, 1049 insertions, 259 deletions
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 35c7be259..14b39eb9d 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -1,15 +1,30 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") -package(licenses = ["notice"]) +licenses(["notice"]) + +go_template_instance( + name = "epoll_interest_list", + out = "epoll_interest_list.go", + package = "vfs", + prefix = "epollInterest", + template = "//pkg/ilist:generic_list", + types = { + "Element": "*epollInterest", + "Linker": "*epollInterest", + }, +) go_library( name = "vfs", srcs = [ + "anonfs.go", "context.go", "debug.go", "dentry.go", "device.go", + "epoll.go", + "epoll_interest_list.go", "file_description.go", "file_description_impl_util.go", "filesystem.go", @@ -21,21 +36,20 @@ go_library( "pathname.go", "permissions.go", "resolving_path.go", - "testutil.go", "vfs.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/vfs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/fspath", "//pkg/sentry/arch", - "//pkg/sentry/context", + "//pkg/sentry/fs/lock", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -47,14 +61,13 @@ go_test( "file_description_impl_util_test.go", "mount_test.go", ], - embed = [":vfs"], + library = ":vfs", deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/usermem", + "//pkg/context", + "//pkg/sentry/contexttest", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go new file mode 100644 index 000000000..2db25be49 --- /dev/null +++ b/pkg/sentry/vfs/anonfs.go @@ -0,0 +1,259 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs + +import ( + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// NewAnonVirtualDentry returns a VirtualDentry with the given synthetic name, +// consistent with Linux's fs/anon_inodes.c:anon_inode_getfile(). References +// are taken on the returned VirtualDentry. +func (vfs *VirtualFilesystem) NewAnonVirtualDentry(name string) VirtualDentry { + d := anonDentry{ + name: name, + } + d.vfsd.Init(&d) + vfs.anonMount.IncRef() + // anonDentry no-ops refcounting. + return VirtualDentry{ + mount: vfs.anonMount, + dentry: &d.vfsd, + } +} + +const anonfsBlockSize = usermem.PageSize // via fs/libfs.c:pseudo_fs_fill_super() + +// anonFilesystem is the implementation of FilesystemImpl that backs +// VirtualDentries returned by VirtualFilesystem.NewAnonVirtualDentry(). +// +// Since all Dentries in anonFilesystem are non-directories, all FilesystemImpl +// methods that would require an anonDentry to be a directory return ENOTDIR. +type anonFilesystem struct { + vfsfs Filesystem + + devMinor uint32 +} + +type anonDentry struct { + vfsd Dentry + + name string +} + +// Release implements FilesystemImpl.Release. +func (fs *anonFilesystem) Release() { +} + +// Sync implements FilesystemImpl.Sync. +func (fs *anonFilesystem) Sync(ctx context.Context) error { + return nil +} + +// GetDentryAt implements FilesystemImpl.GetDentryAt. +func (fs *anonFilesystem) GetDentryAt(ctx context.Context, rp *ResolvingPath, opts GetDentryOptions) (*Dentry, error) { + if !rp.Done() { + return nil, syserror.ENOTDIR + } + if opts.CheckSearchable { + return nil, syserror.ENOTDIR + } + // anonDentry no-ops refcounting. + return rp.Start(), nil +} + +// GetParentDentryAt implements FilesystemImpl.GetParentDentryAt. +func (fs *anonFilesystem) GetParentDentryAt(ctx context.Context, rp *ResolvingPath) (*Dentry, error) { + if !rp.Final() { + return nil, syserror.ENOTDIR + } + // anonDentry no-ops refcounting. + return rp.Start(), nil +} + +// LinkAt implements FilesystemImpl.LinkAt. +func (fs *anonFilesystem) LinkAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry) error { + if !rp.Final() { + return syserror.ENOTDIR + } + return syserror.EPERM +} + +// MkdirAt implements FilesystemImpl.MkdirAt. +func (fs *anonFilesystem) MkdirAt(ctx context.Context, rp *ResolvingPath, opts MkdirOptions) error { + if !rp.Final() { + return syserror.ENOTDIR + } + return syserror.EPERM +} + +// MknodAt implements FilesystemImpl.MknodAt. +func (fs *anonFilesystem) MknodAt(ctx context.Context, rp *ResolvingPath, opts MknodOptions) error { + if !rp.Final() { + return syserror.ENOTDIR + } + return syserror.EPERM +} + +// OpenAt implements FilesystemImpl.OpenAt. +func (fs *anonFilesystem) OpenAt(ctx context.Context, rp *ResolvingPath, opts OpenOptions) (*FileDescription, error) { + if !rp.Done() { + return nil, syserror.ENOTDIR + } + return nil, syserror.ENODEV +} + +// ReadlinkAt implements FilesystemImpl.ReadlinkAt. +func (fs *anonFilesystem) ReadlinkAt(ctx context.Context, rp *ResolvingPath) (string, error) { + if !rp.Done() { + return "", syserror.ENOTDIR + } + return "", syserror.EINVAL +} + +// RenameAt implements FilesystemImpl.RenameAt. +func (fs *anonFilesystem) RenameAt(ctx context.Context, rp *ResolvingPath, oldParentVD VirtualDentry, oldName string, opts RenameOptions) error { + if !rp.Final() { + return syserror.ENOTDIR + } + return syserror.EPERM +} + +// RmdirAt implements FilesystemImpl.RmdirAt. +func (fs *anonFilesystem) RmdirAt(ctx context.Context, rp *ResolvingPath) error { + if !rp.Final() { + return syserror.ENOTDIR + } + return syserror.EPERM +} + +// SetStatAt implements FilesystemImpl.SetStatAt. +func (fs *anonFilesystem) SetStatAt(ctx context.Context, rp *ResolvingPath, opts SetStatOptions) error { + if !rp.Done() { + return syserror.ENOTDIR + } + // Linux actually permits anon_inode_inode's metadata to be set, which is + // visible to all users of anon_inode_inode. We just silently ignore + // metadata changes. + return nil +} + +// StatAt implements FilesystemImpl.StatAt. +func (fs *anonFilesystem) StatAt(ctx context.Context, rp *ResolvingPath, opts StatOptions) (linux.Statx, error) { + if !rp.Done() { + return linux.Statx{}, syserror.ENOTDIR + } + // See fs/anon_inodes.c:anon_inode_init() => fs/libfs.c:alloc_anon_inode(). + return linux.Statx{ + Mask: linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS, + Blksize: anonfsBlockSize, + Nlink: 1, + UID: uint32(auth.RootKUID), + GID: uint32(auth.RootKGID), + Mode: 0600, // no type is correct + Ino: 1, + Size: 0, + Blocks: 0, + DevMajor: 0, + DevMinor: fs.devMinor, + }, nil +} + +// StatFSAt implements FilesystemImpl.StatFSAt. +func (fs *anonFilesystem) StatFSAt(ctx context.Context, rp *ResolvingPath) (linux.Statfs, error) { + if !rp.Done() { + return linux.Statfs{}, syserror.ENOTDIR + } + return linux.Statfs{ + Type: linux.ANON_INODE_FS_MAGIC, + BlockSize: anonfsBlockSize, + }, nil +} + +// SymlinkAt implements FilesystemImpl.SymlinkAt. +func (fs *anonFilesystem) SymlinkAt(ctx context.Context, rp *ResolvingPath, target string) error { + if !rp.Final() { + return syserror.ENOTDIR + } + return syserror.EPERM +} + +// UnlinkAt implements FilesystemImpl.UnlinkAt. +func (fs *anonFilesystem) UnlinkAt(ctx context.Context, rp *ResolvingPath) error { + if !rp.Final() { + return syserror.ENOTDIR + } + return syserror.EPERM +} + +// ListxattrAt implements FilesystemImpl.ListxattrAt. +func (fs *anonFilesystem) ListxattrAt(ctx context.Context, rp *ResolvingPath) ([]string, error) { + if !rp.Done() { + return nil, syserror.ENOTDIR + } + return nil, nil +} + +// GetxattrAt implements FilesystemImpl.GetxattrAt. +func (fs *anonFilesystem) GetxattrAt(ctx context.Context, rp *ResolvingPath, name string) (string, error) { + if !rp.Done() { + return "", syserror.ENOTDIR + } + return "", syserror.ENOTSUP +} + +// SetxattrAt implements FilesystemImpl.SetxattrAt. +func (fs *anonFilesystem) SetxattrAt(ctx context.Context, rp *ResolvingPath, opts SetxattrOptions) error { + if !rp.Done() { + return syserror.ENOTDIR + } + return syserror.EPERM +} + +// RemovexattrAt implements FilesystemImpl.RemovexattrAt. +func (fs *anonFilesystem) RemovexattrAt(ctx context.Context, rp *ResolvingPath, name string) error { + if !rp.Done() { + return syserror.ENOTDIR + } + return syserror.EPERM +} + +// PrependPath implements FilesystemImpl.PrependPath. +func (fs *anonFilesystem) PrependPath(ctx context.Context, vfsroot, vd VirtualDentry, b *fspath.Builder) error { + b.PrependComponent(fmt.Sprintf("anon_inode:%s", vd.dentry.impl.(*anonDentry).name)) + return PrependPathSyntheticError{} +} + +// IncRef implements DentryImpl.IncRef. +func (d *anonDentry) IncRef() { + // no-op +} + +// TryIncRef implements DentryImpl.TryIncRef. +func (d *anonDentry) TryIncRef() bool { + return true +} + +// DecRef implements DentryImpl.DecRef. +func (d *anonDentry) DecRef() { + // no-op +} diff --git a/pkg/sentry/vfs/context.go b/pkg/sentry/vfs/context.go index 705194ebc..d97362b9a 100644 --- a/pkg/sentry/vfs/context.go +++ b/pkg/sentry/vfs/context.go @@ -15,7 +15,7 @@ package vfs import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ) // contextID is this package's type for context.Context.Value keys. diff --git a/pkg/sentry/vfs/device.go b/pkg/sentry/vfs/device.go index 9f9d6e783..3af2aa58d 100644 --- a/pkg/sentry/vfs/device.go +++ b/pkg/sentry/vfs/device.go @@ -17,7 +17,7 @@ package vfs import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go new file mode 100644 index 000000000..7c83f9a5a --- /dev/null +++ b/pkg/sentry/vfs/epoll.go @@ -0,0 +1,377 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" +) + +// epollCycleMu serializes attempts to register EpollInstances with other +// EpollInstances in order to check for cycles. +var epollCycleMu sync.Mutex + +// EpollInstance represents an epoll instance, as described by epoll(7). +type EpollInstance struct { + vfsfd FileDescription + FileDescriptionDefaultImpl + DentryMetadataFileDescriptionImpl + + // q holds waiters on this EpollInstance. + q waiter.Queue + + // interest is the set of file descriptors that are registered with the + // EpollInstance for monitoring. interest is protected by interestMu. + interestMu sync.Mutex + interest map[epollInterestKey]*epollInterest + + // mu protects fields in registered epollInterests. + mu sync.Mutex + + // ready is the set of file descriptors that may be "ready" for I/O. Note + // that this must be an ordered list, not a map: "If more than maxevents + // file descriptors are ready when epoll_wait() is called, then successive + // epoll_wait() calls will round robin through the set of ready file + // descriptors. This behavior helps avoid starvation scenarios, where a + // process fails to notice that additional file descriptors are ready + // because it focuses on a set of file descriptors that are already known + // to be ready." - epoll_wait(2) + ready epollInterestList +} + +type epollInterestKey struct { + // file is the registered FileDescription. No reference is held on file; + // instead, when the last reference is dropped, FileDescription.DecRef() + // removes the FileDescription from all EpollInstances. file is immutable. + file *FileDescription + + // num is the file descriptor number with which this entry was registered. + // num is immutable. + num int32 +} + +// epollInterest represents an EpollInstance's interest in a file descriptor. +type epollInterest struct { + // epoll is the owning EpollInstance. epoll is immutable. + epoll *EpollInstance + + // key is the file to which this epollInterest applies. key is immutable. + key epollInterestKey + + // waiter is registered with key.file. entry is protected by epoll.mu. + waiter waiter.Entry + + // mask is the event mask associated with this registration, including + // flags EPOLLET and EPOLLONESHOT. mask is protected by epoll.mu. + mask uint32 + + // ready is true if epollInterestEntry is linked into epoll.ready. ready + // and epollInterestEntry are protected by epoll.mu. + ready bool + epollInterestEntry + + // userData is the epoll_data_t associated with this epollInterest. + // userData is protected by epoll.mu. + userData [2]int32 +} + +// NewEpollInstanceFD returns a FileDescription representing a new epoll +// instance. A reference is taken on the returned FileDescription. +func (vfs *VirtualFilesystem) NewEpollInstanceFD() (*FileDescription, error) { + vd := vfs.NewAnonVirtualDentry("[eventpoll]") + defer vd.DecRef() + ep := &EpollInstance{ + interest: make(map[epollInterestKey]*epollInterest), + } + if err := ep.vfsfd.Init(ep, linux.O_RDWR, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{ + UseDentryMetadata: true, + }); err != nil { + return nil, err + } + return &ep.vfsfd, nil +} + +// Release implements FileDescriptionImpl.Release. +func (ep *EpollInstance) Release() { + // Unregister all polled fds. + ep.interestMu.Lock() + defer ep.interestMu.Unlock() + for key, epi := range ep.interest { + file := key.file + file.epollMu.Lock() + delete(file.epolls, epi) + file.epollMu.Unlock() + file.EventUnregister(&epi.waiter) + } + ep.interest = nil +} + +// Readiness implements waiter.Waitable.Readiness. +func (ep *EpollInstance) Readiness(mask waiter.EventMask) waiter.EventMask { + if mask&waiter.EventIn == 0 { + return 0 + } + ep.mu.Lock() + for epi := ep.ready.Front(); epi != nil; epi = epi.Next() { + wmask := waiter.EventMaskFromLinux(epi.mask) + if epi.key.file.Readiness(wmask)&wmask != 0 { + ep.mu.Unlock() + return waiter.EventIn + } + } + ep.mu.Unlock() + return 0 +} + +// EventRegister implements waiter.Waitable.EventRegister. +func (ep *EpollInstance) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + ep.q.EventRegister(e, mask) +} + +// EventUnregister implements waiter.Waitable.EventUnregister. +func (ep *EpollInstance) EventUnregister(e *waiter.Entry) { + ep.q.EventUnregister(e) +} + +// Seek implements FileDescriptionImpl.Seek. +func (ep *EpollInstance) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + // Linux: fs/eventpoll.c:eventpoll_fops.llseek == noop_llseek + return 0, nil +} + +// AddInterest implements the semantics of EPOLL_CTL_ADD. +// +// Preconditions: A reference must be held on file. +func (ep *EpollInstance) AddInterest(file *FileDescription, num int32, mask uint32, userData [2]int32) error { + // Check for cyclic polling if necessary. + subep, _ := file.impl.(*EpollInstance) + if subep != nil { + epollCycleMu.Lock() + // epollCycleMu must be locked for the rest of AddInterest to ensure + // that cyclic polling is not introduced after the check. + defer epollCycleMu.Unlock() + if subep.mightPoll(ep) { + return syserror.ELOOP + } + } + + ep.interestMu.Lock() + defer ep.interestMu.Unlock() + + // Fail if the key is already registered. + key := epollInterestKey{ + file: file, + num: num, + } + if _, ok := ep.interest[key]; ok { + return syserror.EEXIST + } + + // Register interest in file. + mask |= linux.EPOLLERR | linux.EPOLLRDHUP + epi := &epollInterest{ + epoll: ep, + key: key, + mask: mask, + userData: userData, + } + ep.interest[key] = epi + wmask := waiter.EventMaskFromLinux(mask) + file.EventRegister(&epi.waiter, wmask) + + // Check if the file is already ready. + if file.Readiness(wmask)&wmask != 0 { + epi.Callback(nil) + } + + // Add epi to file.epolls so that it is removed when the last + // FileDescription reference is dropped. + file.epollMu.Lock() + file.epolls[epi] = struct{}{} + file.epollMu.Unlock() + + return nil +} + +func (ep *EpollInstance) mightPoll(ep2 *EpollInstance) bool { + return ep.mightPollRecursive(ep2, 4) // Linux: fs/eventpoll.c:EP_MAX_NESTS +} + +func (ep *EpollInstance) mightPollRecursive(ep2 *EpollInstance, remainingRecursion int) bool { + ep.interestMu.Lock() + defer ep.interestMu.Unlock() + for key := range ep.interest { + nextep, ok := key.file.impl.(*EpollInstance) + if !ok { + continue + } + if nextep == ep2 { + return true + } + if remainingRecursion == 0 { + return true + } + if nextep.mightPollRecursive(ep2, remainingRecursion-1) { + return true + } + } + return false +} + +// ModifyInterest implements the semantics of EPOLL_CTL_MOD. +// +// Preconditions: A reference must be held on file. +func (ep *EpollInstance) ModifyInterest(file *FileDescription, num int32, mask uint32, userData [2]int32) error { + ep.interestMu.Lock() + defer ep.interestMu.Unlock() + + // Fail if the key is not already registered. + epi, ok := ep.interest[epollInterestKey{ + file: file, + num: num, + }] + if !ok { + return syserror.ENOENT + } + + // Update epi for the next call to ep.ReadEvents(). + ep.mu.Lock() + epi.mask = mask + epi.userData = userData + ep.mu.Unlock() + + // Re-register with the new mask. + mask |= linux.EPOLLERR | linux.EPOLLRDHUP + file.EventUnregister(&epi.waiter) + wmask := waiter.EventMaskFromLinux(mask) + file.EventRegister(&epi.waiter, wmask) + + // Check if the file is already ready with the new mask. + if file.Readiness(wmask)&wmask != 0 { + epi.Callback(nil) + } + + return nil +} + +// DeleteInterest implements the semantics of EPOLL_CTL_DEL. +// +// Preconditions: A reference must be held on file. +func (ep *EpollInstance) DeleteInterest(file *FileDescription, num int32) error { + ep.interestMu.Lock() + defer ep.interestMu.Unlock() + + // Fail if the key is not already registered. + epi, ok := ep.interest[epollInterestKey{ + file: file, + num: num, + }] + if !ok { + return syserror.ENOENT + } + + // Unregister from the file so that epi will no longer be readied. + file.EventUnregister(&epi.waiter) + + // Forget about epi. + ep.removeLocked(epi) + + file.epollMu.Lock() + delete(file.epolls, epi) + file.epollMu.Unlock() + + return nil +} + +// Callback implements waiter.EntryCallback.Callback. +func (epi *epollInterest) Callback(*waiter.Entry) { + newReady := false + epi.epoll.mu.Lock() + if !epi.ready { + newReady = true + epi.ready = true + epi.epoll.ready.PushBack(epi) + } + epi.epoll.mu.Unlock() + if newReady { + epi.epoll.q.Notify(waiter.EventIn) + } +} + +// Preconditions: ep.interestMu must be locked. +func (ep *EpollInstance) removeLocked(epi *epollInterest) { + delete(ep.interest, epi.key) + ep.mu.Lock() + if epi.ready { + epi.ready = false + ep.ready.Remove(epi) + } + ep.mu.Unlock() +} + +// ReadEvents reads up to len(events) ready events into events and returns the +// number of events read. +// +// Preconditions: len(events) != 0. +func (ep *EpollInstance) ReadEvents(events []linux.EpollEvent) int { + i := 0 + // Hot path: avoid defer. + ep.mu.Lock() + var next *epollInterest + var requeue epollInterestList + for epi := ep.ready.Front(); epi != nil; epi = next { + next = epi.Next() + // Regardless of what else happens, epi is initially removed from the + // ready list. + ep.ready.Remove(epi) + wmask := waiter.EventMaskFromLinux(epi.mask) + ievents := epi.key.file.Readiness(wmask) & wmask + if ievents == 0 { + // Leave epi off the ready list. + epi.ready = false + continue + } + // Determine what we should do with epi. + switch { + case epi.mask&linux.EPOLLONESHOT != 0: + // Clear all events from the mask; they must be re-added by + // EPOLL_CTL_MOD. + epi.mask &= linux.EP_PRIVATE_BITS + fallthrough + case epi.mask&linux.EPOLLET != 0: + // Leave epi off the ready list. + epi.ready = false + default: + // Queue epi to be moved to the end of the ready list. + requeue.PushBack(epi) + } + // Report ievents. + events[i] = linux.EpollEvent{ + Events: ievents.ToLinux(), + Fd: epi.userData[0], + Data: epi.userData[1], + } + i++ + if i == len(events) { + break + } + } + ep.ready.PushBackList(&requeue) + ep.mu.Unlock() + return i +} diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 51c95c2d9..5bac660c7 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -18,12 +18,14 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -45,6 +47,11 @@ type FileDescription struct { // memory operations. statusFlags uint32 + // epolls is the set of epollInterests registered for this FileDescription. + // epolls is protected by epollMu. + epollMu sync.Mutex + epolls map[*epollInterest]struct{} + // vd is the filesystem location at which this FileDescription was opened. // A reference is held on vd. vd is immutable. vd VirtualDentry @@ -141,6 +148,23 @@ func (fd *FileDescription) TryIncRef() bool { // DecRef decrements fd's reference count. func (fd *FileDescription) DecRef() { if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 { + // Unregister fd from all epoll instances. + fd.epollMu.Lock() + epolls := fd.epolls + fd.epolls = nil + fd.epollMu.Unlock() + for epi := range epolls { + ep := epi.epoll + ep.interestMu.Lock() + // Check that epi has not been concurrently unregistered by + // EpollInstance.DeleteInterest() or EpollInstance.Release(). + if _, ok := ep.interest[epi.key]; ok { + fd.EventUnregister(&epi.waiter) + ep.removeLocked(epi) + } + ep.interestMu.Unlock() + } + // Release implementation resources. fd.impl.Release() if fd.writable { fd.vd.mount.EndWrite() @@ -370,7 +394,25 @@ type FileDescriptionImpl interface { // Removexattr removes the given extended attribute from the file. Removexattr(ctx context.Context, name string) error - // TODO: file locking + // LockBSD tries to acquire a BSD-style advisory file lock. + // + // TODO(gvisor.dev/issue/1480): BSD-style file locking + LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error + + // LockBSD releases a BSD-style advisory file lock. + // + // TODO(gvisor.dev/issue/1480): BSD-style file locking + UnlockBSD(ctx context.Context, uid lock.UniqueID) error + + // LockPOSIX tries to acquire a POSIX-style advisory file lock. + // + // TODO(gvisor.dev/issue/1480): POSIX-style file locking + LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error + + // UnlockPOSIX releases a POSIX-style advisory file lock. + // + // TODO(gvisor.dev/issue/1480): POSIX-style file locking + UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error } // Dirent holds the information contained in struct linux_dirent64. @@ -453,6 +495,21 @@ func (fd *FileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { return fd.impl.StatFS(ctx) } +// Readiness returns fd's I/O readiness. +func (fd *FileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { + return fd.impl.Readiness(mask) +} + +// EventRegister registers e for I/O readiness events in mask. +func (fd *FileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + fd.impl.EventRegister(e, mask) +} + +// EventUnregister unregisters e for I/O readiness events. +func (fd *FileDescription) EventUnregister(e *waiter.Entry) { + fd.impl.EventUnregister(e) +} + // PRead reads from the file represented by fd into dst, starting at the given // offset, and returns the number of bytes read. PRead is permitted to return // partial reads with a nil error. diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index c00b3c84b..c2a52ec1b 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -19,12 +19,13 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -152,6 +153,26 @@ func (FileDescriptionDefaultImpl) Removexattr(ctx context.Context, name string) return syserror.ENOTSUP } +// LockBSD implements FileDescriptionImpl.LockBSD. +func (FileDescriptionDefaultImpl) LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error { + return syserror.EBADF +} + +// UnlockBSD implements FileDescriptionImpl.UnlockBSD. +func (FileDescriptionDefaultImpl) UnlockBSD(ctx context.Context, uid lock.UniqueID) error { + return syserror.EBADF +} + +// LockPOSIX implements FileDescriptionImpl.LockPOSIX. +func (FileDescriptionDefaultImpl) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error { + return syserror.EBADF +} + +// UnlockPOSIX implements FileDescriptionImpl.UnlockPOSIX. +func (FileDescriptionDefaultImpl) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error { + return syserror.EBADF +} + // DirectoryFileDescriptionDefaultImpl may be embedded by implementations of // FileDescriptionImpl that always represent directories to obtain // implementations of non-directory I/O methods that return EISDIR. @@ -192,21 +213,6 @@ func (DentryMetadataFileDescriptionImpl) SetStat(ctx context.Context, opts SetSt panic("illegal call to DentryMetadataFileDescriptionImpl.SetStat") } -// DynamicBytesFileDescriptionImpl may be embedded by implementations of -// FileDescriptionImpl that represent read-only regular files whose contents -// are backed by a bytes.Buffer that is regenerated when necessary, consistent -// with Linux's fs/seq_file.c:single_open(). -// -// DynamicBytesFileDescriptionImpl.SetDataSource() must be called before first -// use. -type DynamicBytesFileDescriptionImpl struct { - data DynamicBytesSource // immutable - mu sync.Mutex // protects the following fields - buf bytes.Buffer - off int64 - lastRead int64 // offset at which the last Read, PRead, or Seek ended -} - // DynamicBytesSource represents a data source for a // DynamicBytesFileDescriptionImpl. type DynamicBytesSource interface { @@ -225,6 +231,30 @@ func (s *StaticData) Generate(ctx context.Context, buf *bytes.Buffer) error { return nil } +// WritableDynamicBytesSource extends DynamicBytesSource to allow writes to the +// underlying source. +type WritableDynamicBytesSource interface { + DynamicBytesSource + + // Write sends writes to the source. + Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) +} + +// DynamicBytesFileDescriptionImpl may be embedded by implementations of +// FileDescriptionImpl that represent read-only regular files whose contents +// are backed by a bytes.Buffer that is regenerated when necessary, consistent +// with Linux's fs/seq_file.c:single_open(). +// +// DynamicBytesFileDescriptionImpl.SetDataSource() must be called before first +// use. +type DynamicBytesFileDescriptionImpl struct { + data DynamicBytesSource // immutable + mu sync.Mutex // protects the following fields + buf bytes.Buffer + off int64 + lastRead int64 // offset at which the last Read, PRead, or Seek ended +} + // SetDataSource must be called exactly once on fd before first use. func (fd *DynamicBytesFileDescriptionImpl) SetDataSource(data DynamicBytesSource) { fd.data = data @@ -304,6 +334,43 @@ func (fd *DynamicBytesFileDescriptionImpl) Seek(ctx context.Context, offset int6 return offset, nil } +// Preconditions: fd.mu must be locked. +func (fd *DynamicBytesFileDescriptionImpl) pwriteLocked(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { + if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 { + return 0, syserror.EOPNOTSUPP + } + + writable, ok := fd.data.(WritableDynamicBytesSource) + if !ok { + return 0, syserror.EINVAL + } + n, err := writable.Write(ctx, src, offset) + if err != nil { + return 0, err + } + + // Invalidate cached data that might exist prior to this call. + fd.buf.Reset() + return n, nil +} + +// PWrite implements FileDescriptionImpl.PWrite. +func (fd *DynamicBytesFileDescriptionImpl) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { + fd.mu.Lock() + n, err := fd.pwriteLocked(ctx, src, offset, opts) + fd.mu.Unlock() + return n, err +} + +// Write implements FileDescriptionImpl.Write. +func (fd *DynamicBytesFileDescriptionImpl) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { + fd.mu.Lock() + n, err := fd.pwriteLocked(ctx, src, fd.off, opts) + fd.off += n + fd.mu.Unlock() + return n, err +} + // GenericConfigureMMap may be used by most implementations of // FileDescriptionImpl.ConfigureMMap. func GenericConfigureMMap(fd *FileDescription, m memmap.Mappable, opts *memmap.MMapOpts) error { diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go index 9ed58512f..8fa26418e 100644 --- a/pkg/sentry/vfs/file_description_impl_util_test.go +++ b/pkg/sentry/vfs/file_description_impl_util_test.go @@ -22,11 +22,10 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // fileDescription is the common fd struct which a filesystem implementation @@ -36,68 +35,80 @@ type fileDescription struct { FileDescriptionDefaultImpl } -// genCountFD is a read-only FileDescriptionImpl representing a regular file -// that contains the number of times its DynamicBytesSource.Generate() +// genCount contains the number of times its DynamicBytesSource.Generate() // implementation has been called. -type genCountFD struct { - fileDescription - DynamicBytesFileDescriptionImpl - +type genCount struct { count uint64 // accessed using atomic memory ops } -func newGenCountFD(mnt *Mount, vfsd *Dentry) *FileDescription { - var fd genCountFD - fd.vfsfd.Init(&fd, 0 /* statusFlags */, mnt, vfsd, &FileDescriptionOptions{}) - fd.DynamicBytesFileDescriptionImpl.SetDataSource(&fd) - return &fd.vfsfd +// Generate implements DynamicBytesSource.Generate. +func (g *genCount) Generate(ctx context.Context, buf *bytes.Buffer) error { + fmt.Fprintf(buf, "%d", atomic.AddUint64(&g.count, 1)) + return nil } -// Release implements FileDescriptionImpl.Release. -func (fd *genCountFD) Release() { +type storeData struct { + data string +} + +var _ WritableDynamicBytesSource = (*storeData)(nil) + +// Generate implements DynamicBytesSource. +func (d *storeData) Generate(ctx context.Context, buf *bytes.Buffer) error { + buf.WriteString(d.data) + return nil } -// StatusFlags implements FileDescriptionImpl.StatusFlags. -func (fd *genCountFD) StatusFlags(ctx context.Context) (uint32, error) { +// Generate implements WritableDynamicBytesSource. +func (d *storeData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + buf := make([]byte, src.NumBytes()) + n, err := src.CopyIn(ctx, buf) + if err != nil { + return 0, err + } + + d.data = string(buf[:n]) return 0, nil } -// SetStatusFlags implements FileDescriptionImpl.SetStatusFlags. -func (fd *genCountFD) SetStatusFlags(ctx context.Context, flags uint32) error { - return syserror.EPERM +// testFD is a read-only FileDescriptionImpl representing a regular file. +type testFD struct { + fileDescription + DynamicBytesFileDescriptionImpl + + data DynamicBytesSource +} + +func newTestFD(vfsObj *VirtualFilesystem, statusFlags uint32, data DynamicBytesSource) *FileDescription { + vd := vfsObj.NewAnonVirtualDentry("genCountFD") + defer vd.DecRef() + var fd testFD + fd.vfsfd.Init(&fd, statusFlags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{}) + fd.DynamicBytesFileDescriptionImpl.SetDataSource(data) + return &fd.vfsfd } +// Release implements FileDescriptionImpl.Release. +func (fd *testFD) Release() { +} + +// SetStatusFlags implements FileDescriptionImpl.SetStatusFlags. // Stat implements FileDescriptionImpl.Stat. -func (fd *genCountFD) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) { +func (fd *testFD) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) { // Note that Statx.Mask == 0 in the return value. return linux.Statx{}, nil } // SetStat implements FileDescriptionImpl.SetStat. -func (fd *genCountFD) SetStat(ctx context.Context, opts SetStatOptions) error { +func (fd *testFD) SetStat(ctx context.Context, opts SetStatOptions) error { return syserror.EPERM } -// Generate implements DynamicBytesSource.Generate. -func (fd *genCountFD) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "%d", atomic.AddUint64(&fd.count, 1)) - return nil -} - func TestGenCountFD(t *testing.T) { ctx := contexttest.Context(t) - creds := auth.CredentialsFromContext(ctx) vfsObj := New() // vfs.New() - vfsObj.MustRegisterFilesystemType("testfs", FDTestFilesystemType{}, &RegisterFilesystemTypeOptions{}) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "testfs", &GetFilesystemOptions{}) - if err != nil { - t.Fatalf("failed to create testfs root mount: %v", err) - } - vd := mntns.Root() - defer vd.DecRef() - - fd := newGenCountFD(vd.Mount(), vd.Dentry()) + fd := newTestFD(vfsObj, linux.O_RDWR, &genCount{}) defer fd.DecRef() // The first read causes Generate to be called to fill the FD's buffer. @@ -138,4 +149,69 @@ func TestGenCountFD(t *testing.T) { if want := byte('3'); buf[0] != want { t.Errorf("PRead: got byte %c, wanted %c", buf[0], want) } + + // Write and PWrite fails. + if _, err := fd.Write(ctx, ioseq, WriteOptions{}); err != syserror.EINVAL { + t.Errorf("Write: got err %v, wanted %v", err, syserror.EINVAL) + } + if _, err := fd.PWrite(ctx, ioseq, 0, WriteOptions{}); err != syserror.EINVAL { + t.Errorf("Write: got err %v, wanted %v", err, syserror.EINVAL) + } +} + +func TestWritable(t *testing.T) { + ctx := contexttest.Context(t) + + vfsObj := New() // vfs.New() + fd := newTestFD(vfsObj, linux.O_RDWR, &storeData{data: "init"}) + defer fd.DecRef() + + buf := make([]byte, 10) + ioseq := usermem.BytesIOSequence(buf) + if n, err := fd.Read(ctx, ioseq, ReadOptions{}); n != 4 && err != io.EOF { + t.Fatalf("Read: got (%v, %v), wanted (4, EOF)", n, err) + } + if want := "init"; want == string(buf) { + t.Fatalf("Read: got %v, wanted %v", string(buf), want) + } + + // Test PWrite. + want := "write" + writeIOSeq := usermem.BytesIOSequence([]byte(want)) + if n, err := fd.PWrite(ctx, writeIOSeq, 0, WriteOptions{}); int(n) != len(want) && err != nil { + t.Errorf("PWrite: got err (%v, %v), wanted (%v, nil)", n, err, len(want)) + } + if n, err := fd.PRead(ctx, ioseq, 0, ReadOptions{}); int(n) != len(want) && err != io.EOF { + t.Fatalf("PRead: got (%v, %v), wanted (%v, EOF)", n, err, len(want)) + } + if want == string(buf) { + t.Fatalf("PRead: got %v, wanted %v", string(buf), want) + } + + // Test Seek to 0 followed by Write. + want = "write2" + writeIOSeq = usermem.BytesIOSequence([]byte(want)) + if n, err := fd.Seek(ctx, 0, linux.SEEK_SET); n != 0 && err != nil { + t.Errorf("Seek: got err (%v, %v), wanted (0, nil)", n, err) + } + if n, err := fd.Write(ctx, writeIOSeq, WriteOptions{}); int(n) != len(want) && err != nil { + t.Errorf("Write: got err (%v, %v), wanted (%v, nil)", n, err, len(want)) + } + if n, err := fd.PRead(ctx, ioseq, 0, ReadOptions{}); int(n) != len(want) && err != io.EOF { + t.Fatalf("PRead: got (%v, %v), wanted (%v, EOF)", n, err, len(want)) + } + if want == string(buf) { + t.Fatalf("PRead: got %v, wanted %v", string(buf), want) + } + + // Test failure if offset != 0. + if n, err := fd.Seek(ctx, 1, linux.SEEK_SET); n != 0 && err != nil { + t.Errorf("Seek: got err (%v, %v), wanted (0, nil)", n, err) + } + if n, err := fd.Write(ctx, writeIOSeq, WriteOptions{}); n != 0 && err != syserror.EINVAL { + t.Errorf("Write: got err (%v, %v), wanted (0, EINVAL)", n, err) + } + if n, err := fd.PWrite(ctx, writeIOSeq, 2, WriteOptions{}); n != 0 && err != syserror.EINVAL { + t.Errorf("PWrite: got err (%v, %v), wanted (0, EINVAL)", n, err) + } } diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index ea78f555b..a06a6caf3 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -18,8 +18,8 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" ) // A Filesystem is a tree of nodes represented by Dentries, which forms part of diff --git a/pkg/sentry/vfs/filesystem_type.go b/pkg/sentry/vfs/filesystem_type.go index 023301780..c58b70728 100644 --- a/pkg/sentry/vfs/filesystem_type.go +++ b/pkg/sentry/vfs/filesystem_type.go @@ -18,7 +18,7 @@ import ( "bytes" "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) diff --git a/pkg/sentry/vfs/lock/BUILD b/pkg/sentry/vfs/lock/BUILD new file mode 100644 index 000000000..d9ab063b7 --- /dev/null +++ b/pkg/sentry/vfs/lock/BUILD @@ -0,0 +1,13 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "lock", + srcs = ["lock.go"], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/sentry/fs/lock", + "//pkg/syserror", + ], +) diff --git a/pkg/sentry/vfs/lock/lock.go b/pkg/sentry/vfs/lock/lock.go new file mode 100644 index 000000000..724dfe743 --- /dev/null +++ b/pkg/sentry/vfs/lock/lock.go @@ -0,0 +1,72 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package lock provides POSIX and BSD style file locking for VFS2 file +// implementations. +// +// The actual implementations can be found in the lock package under +// sentry/fs/lock. +package lock + +import ( + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" + "gvisor.dev/gvisor/pkg/syserror" +) + +// FileLocks supports POSIX and BSD style locks, which correspond to fcntl(2) +// and flock(2) respectively in Linux. It can be embedded into various file +// implementations for VFS2 that support locking. +// +// Note that in Linux these two types of locks are _not_ cooperative, because +// race and deadlock conditions make merging them prohibitive. We do the same +// and keep them oblivious to each other. +type FileLocks struct { + // bsd is a set of BSD-style advisory file wide locks, see flock(2). + bsd fslock.Locks + + // posix is a set of POSIX-style regional advisory locks, see fcntl(2). + posix fslock.Locks +} + +// LockBSD tries to acquire a BSD-style lock on the entire file. +func (fl *FileLocks) LockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { + if fl.bsd.LockRegion(uid, t, fslock.LockRange{0, fslock.LockEOF}, block) { + return nil + } + return syserror.ErrWouldBlock +} + +// UnlockBSD releases a BSD-style lock on the entire file. +// +// This operation is always successful, even if there did not exist a lock on +// the requested region held by uid in the first place. +func (fl *FileLocks) UnlockBSD(uid fslock.UniqueID) { + fl.bsd.UnlockRegion(uid, fslock.LockRange{0, fslock.LockEOF}) +} + +// LockPOSIX tries to acquire a POSIX-style lock on a file region. +func (fl *FileLocks) LockPOSIX(uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { + if fl.posix.LockRegion(uid, t, rng, block) { + return nil + } + return syserror.ErrWouldBlock +} + +// UnlockPOSIX releases a POSIX-style lock on a file region. +// +// This operation is always successful, even if there did not exist a lock on +// the requested region held by uid in the first place. +func (fl *FileLocks) UnlockPOSIX(uid fslock.UniqueID, rng fslock.LockRange) { + fl.posix.UnlockRegion(uid, rng) +} diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 00177b371..1fbb420f9 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -19,7 +19,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" ) @@ -423,7 +423,8 @@ func (mntns *MountNamespace) IncRef() { } // DecRef decrements mntns' reference count. -func (mntns *MountNamespace) DecRef(vfs *VirtualFilesystem) { +func (mntns *MountNamespace) DecRef() { + vfs := mntns.root.fs.VirtualFilesystem() if refs := atomic.AddInt64(&mntns.refs, -1); refs == 0 { vfs.mountMu.Lock() vfs.mounts.seq.BeginWrite() diff --git a/pkg/sentry/vfs/pathname.go b/pkg/sentry/vfs/pathname.go index cf80df90e..b318c681a 100644 --- a/pkg/sentry/vfs/pathname.go +++ b/pkg/sentry/vfs/pathname.go @@ -15,8 +15,8 @@ package vfs import ( + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/vfs/testutil.go b/pkg/sentry/vfs/testutil.go deleted file mode 100644 index ee5c8b9e2..000000000 --- a/pkg/sentry/vfs/testutil.go +++ /dev/null @@ -1,173 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "fmt" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" -) - -// FDTestFilesystemType is a test-only FilesystemType that produces Filesystems -// for which all FilesystemImpl methods taking a path return EPERM. It is used -// to produce Mounts and Dentries for testing of FileDescriptionImpls that do -// not depend on their originating Filesystem. -type FDTestFilesystemType struct{} - -// FDTestFilesystem is a test-only FilesystemImpl produced by -// FDTestFilesystemType. -type FDTestFilesystem struct { - vfsfs Filesystem -} - -// GetFilesystem implements FilesystemType.GetFilesystem. -func (fstype FDTestFilesystemType) GetFilesystem(ctx context.Context, vfsObj *VirtualFilesystem, creds *auth.Credentials, source string, opts GetFilesystemOptions) (*Filesystem, *Dentry, error) { - var fs FDTestFilesystem - fs.vfsfs.Init(vfsObj, &fs) - return &fs.vfsfs, fs.NewDentry(), nil -} - -// Release implements FilesystemImpl.Release. -func (fs *FDTestFilesystem) Release() { -} - -// Sync implements FilesystemImpl.Sync. -func (fs *FDTestFilesystem) Sync(ctx context.Context) error { - return nil -} - -// GetDentryAt implements FilesystemImpl.GetDentryAt. -func (fs *FDTestFilesystem) GetDentryAt(ctx context.Context, rp *ResolvingPath, opts GetDentryOptions) (*Dentry, error) { - return nil, syserror.EPERM -} - -// GetParentDentryAt implements FilesystemImpl.GetParentDentryAt. -func (fs *FDTestFilesystem) GetParentDentryAt(ctx context.Context, rp *ResolvingPath) (*Dentry, error) { - return nil, syserror.EPERM -} - -// LinkAt implements FilesystemImpl.LinkAt. -func (fs *FDTestFilesystem) LinkAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry) error { - return syserror.EPERM -} - -// MkdirAt implements FilesystemImpl.MkdirAt. -func (fs *FDTestFilesystem) MkdirAt(ctx context.Context, rp *ResolvingPath, opts MkdirOptions) error { - return syserror.EPERM -} - -// MknodAt implements FilesystemImpl.MknodAt. -func (fs *FDTestFilesystem) MknodAt(ctx context.Context, rp *ResolvingPath, opts MknodOptions) error { - return syserror.EPERM -} - -// OpenAt implements FilesystemImpl.OpenAt. -func (fs *FDTestFilesystem) OpenAt(ctx context.Context, rp *ResolvingPath, opts OpenOptions) (*FileDescription, error) { - return nil, syserror.EPERM -} - -// ReadlinkAt implements FilesystemImpl.ReadlinkAt. -func (fs *FDTestFilesystem) ReadlinkAt(ctx context.Context, rp *ResolvingPath) (string, error) { - return "", syserror.EPERM -} - -// RenameAt implements FilesystemImpl.RenameAt. -func (fs *FDTestFilesystem) RenameAt(ctx context.Context, rp *ResolvingPath, oldParentVD VirtualDentry, oldName string, opts RenameOptions) error { - return syserror.EPERM -} - -// RmdirAt implements FilesystemImpl.RmdirAt. -func (fs *FDTestFilesystem) RmdirAt(ctx context.Context, rp *ResolvingPath) error { - return syserror.EPERM -} - -// SetStatAt implements FilesystemImpl.SetStatAt. -func (fs *FDTestFilesystem) SetStatAt(ctx context.Context, rp *ResolvingPath, opts SetStatOptions) error { - return syserror.EPERM -} - -// StatAt implements FilesystemImpl.StatAt. -func (fs *FDTestFilesystem) StatAt(ctx context.Context, rp *ResolvingPath, opts StatOptions) (linux.Statx, error) { - return linux.Statx{}, syserror.EPERM -} - -// StatFSAt implements FilesystemImpl.StatFSAt. -func (fs *FDTestFilesystem) StatFSAt(ctx context.Context, rp *ResolvingPath) (linux.Statfs, error) { - return linux.Statfs{}, syserror.EPERM -} - -// SymlinkAt implements FilesystemImpl.SymlinkAt. -func (fs *FDTestFilesystem) SymlinkAt(ctx context.Context, rp *ResolvingPath, target string) error { - return syserror.EPERM -} - -// UnlinkAt implements FilesystemImpl.UnlinkAt. -func (fs *FDTestFilesystem) UnlinkAt(ctx context.Context, rp *ResolvingPath) error { - return syserror.EPERM -} - -// ListxattrAt implements FilesystemImpl.ListxattrAt. -func (fs *FDTestFilesystem) ListxattrAt(ctx context.Context, rp *ResolvingPath) ([]string, error) { - return nil, syserror.EPERM -} - -// GetxattrAt implements FilesystemImpl.GetxattrAt. -func (fs *FDTestFilesystem) GetxattrAt(ctx context.Context, rp *ResolvingPath, name string) (string, error) { - return "", syserror.EPERM -} - -// SetxattrAt implements FilesystemImpl.SetxattrAt. -func (fs *FDTestFilesystem) SetxattrAt(ctx context.Context, rp *ResolvingPath, opts SetxattrOptions) error { - return syserror.EPERM -} - -// RemovexattrAt implements FilesystemImpl.RemovexattrAt. -func (fs *FDTestFilesystem) RemovexattrAt(ctx context.Context, rp *ResolvingPath, name string) error { - return syserror.EPERM -} - -// PrependPath implements FilesystemImpl.PrependPath. -func (fs *FDTestFilesystem) PrependPath(ctx context.Context, vfsroot, vd VirtualDentry, b *fspath.Builder) error { - b.PrependComponent(fmt.Sprintf("vfs.fdTestDentry:%p", vd.dentry.impl.(*fdTestDentry))) - return PrependPathSyntheticError{} -} - -type fdTestDentry struct { - vfsd Dentry -} - -// NewDentry returns a new Dentry. -func (fs *FDTestFilesystem) NewDentry() *Dentry { - var d fdTestDentry - d.vfsd.Init(&d) - return &d.vfsd -} - -// IncRef implements DentryImpl.IncRef. -func (d *fdTestDentry) IncRef() { -} - -// TryIncRef implements DentryImpl.TryIncRef. -func (d *fdTestDentry) TryIncRef() bool { - return true -} - -// DecRef implements DentryImpl.DecRef. -func (d *fdTestDentry) DecRef() { -} diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 1f6f56293..908c69f91 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -16,23 +16,27 @@ // // Lock order: // -// FilesystemImpl/FileDescriptionImpl locks -// VirtualFilesystem.mountMu -// Dentry.mu -// Locks acquired by FilesystemImpls between Prepare{Delete,Rename}Dentry and Commit{Delete,Rename*}Dentry -// VirtualFilesystem.filesystemsMu +// EpollInstance.interestMu +// FileDescription.epollMu +// FilesystemImpl/FileDescriptionImpl locks +// VirtualFilesystem.mountMu +// Dentry.mu +// Locks acquired by FilesystemImpls between Prepare{Delete,Rename}Dentry and Commit{Delete,Rename*}Dentry +// VirtualFilesystem.filesystemsMu +// EpollInstance.mu // VirtualFilesystem.fsTypesMu // // Locking Dentry.mu in multiple Dentries requires holding -// VirtualFilesystem.mountMu. +// VirtualFilesystem.mountMu. Locking EpollInstance.interestMu in multiple +// EpollInstances requires holding epollCycleMu. package vfs import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -75,6 +79,14 @@ type VirtualFilesystem struct { // mountpoints is analogous to Linux's mountpoint_hashtable. mountpoints map[*Dentry]map[*Mount]struct{} + // anonMount is a Mount, not included in mounts or mountpoints, + // representing an anonFilesystem. anonMount is used to back + // VirtualDentries returned by VirtualFilesystem.NewAnonVirtualDentry(). + // anonMount is immutable. + // + // anonMount is analogous to Linux's anon_inode_mnt. + anonMount *Mount + // devices contains all registered Devices. devices is protected by // devicesMu. devicesMu sync.RWMutex @@ -110,6 +122,22 @@ func New() *VirtualFilesystem { filesystems: make(map[*Filesystem]struct{}), } vfs.mounts.Init() + + // Construct vfs.anonMount. + anonfsDevMinor, err := vfs.GetAnonBlockDevMinor() + if err != nil { + panic(fmt.Sprintf("VirtualFilesystem.GetAnonBlockDevMinor() failed during VirtualFilesystem construction: %v", err)) + } + anonfs := anonFilesystem{ + devMinor: anonfsDevMinor, + } + anonfs.vfsfs.Init(vfs, &anonfs) + vfs.anonMount = &Mount{ + vfs: vfs, + fs: &anonfs.vfsfs, + refs: 1, + } + return vfs } |