diff options
Diffstat (limited to 'pkg')
25 files changed, 412 insertions, 112 deletions
diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go index 6fbdd668d..c3ab15a4f 100644 --- a/pkg/abi/linux/file.go +++ b/pkg/abi/linux/file.go @@ -24,27 +24,23 @@ import ( // Constants for open(2). const ( - O_ACCMODE = 000000003 - O_RDONLY = 000000000 - O_WRONLY = 000000001 - O_RDWR = 000000002 - O_CREAT = 000000100 - O_EXCL = 000000200 - O_NOCTTY = 000000400 - O_TRUNC = 000001000 - O_APPEND = 000002000 - O_NONBLOCK = 000004000 - O_DSYNC = 000010000 - O_ASYNC = 000020000 - O_DIRECT = 000040000 - O_LARGEFILE = 000100000 - O_DIRECTORY = 000200000 - O_NOFOLLOW = 000400000 - O_NOATIME = 001000000 - O_CLOEXEC = 002000000 - O_SYNC = 004000000 // __O_SYNC in Linux - O_PATH = 010000000 - O_TMPFILE = 020000000 // __O_TMPFILE in Linux + O_ACCMODE = 000000003 + O_RDONLY = 000000000 + O_WRONLY = 000000001 + O_RDWR = 000000002 + O_CREAT = 000000100 + O_EXCL = 000000200 + O_NOCTTY = 000000400 + O_TRUNC = 000001000 + O_APPEND = 000002000 + O_NONBLOCK = 000004000 + O_DSYNC = 000010000 + O_ASYNC = 000020000 + O_NOATIME = 001000000 + O_CLOEXEC = 002000000 + O_SYNC = 004000000 // __O_SYNC in Linux + O_PATH = 010000000 + O_TMPFILE = 020000000 // __O_TMPFILE in Linux ) // Constants for fstatat(2). diff --git a/pkg/abi/linux/file_amd64.go b/pkg/abi/linux/file_amd64.go index 74c554be6..9d307e840 100644 --- a/pkg/abi/linux/file_amd64.go +++ b/pkg/abi/linux/file_amd64.go @@ -14,6 +14,14 @@ package linux +// Constants for open(2). +const ( + O_DIRECT = 000040000 + O_LARGEFILE = 000100000 + O_DIRECTORY = 000200000 + O_NOFOLLOW = 000400000 +) + // Stat represents struct stat. type Stat struct { Dev uint64 diff --git a/pkg/abi/linux/file_arm64.go b/pkg/abi/linux/file_arm64.go index f16c07589..26a54f416 100644 --- a/pkg/abi/linux/file_arm64.go +++ b/pkg/abi/linux/file_arm64.go @@ -14,6 +14,14 @@ package linux +// Constants for open(2). +const ( + O_DIRECTORY = 000040000 + O_NOFOLLOW = 000100000 + O_DIRECT = 000200000 + O_LARGEFILE = 000400000 +) + // Stat represents struct stat. type Stat struct { Dev uint64 diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go index 4cad55327..f7702f8f4 100644 --- a/pkg/sentry/fs/overlay.go +++ b/pkg/sentry/fs/overlay.go @@ -198,7 +198,7 @@ type overlayEntry struct { upper *Inode // dirCacheMu protects dirCache. - dirCacheMu sync.DowngradableRWMutex `state:"nosave"` + dirCacheMu sync.RWMutex `state:"nosave"` // dirCache is cache of DentAttrs from upper and lower Inodes. dirCache *SortedDentryMap diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go index 8608805bf..191b39970 100644 --- a/pkg/sentry/fsimpl/ext/inode.go +++ b/pkg/sentry/fsimpl/ext/inode.go @@ -157,7 +157,9 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*v switch in.impl.(type) { case *regularFile: var fd regularFileFD - fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) + if err := fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { + return nil, err + } return &fd.vfsfd, nil case *directory: // Can't open directories writably. This check is not necessary for a read @@ -166,7 +168,9 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*v return nil, syserror.EISDIR } var fd directoryFD - fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) + if err := fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { + return nil, err + } return &fd.vfsfd, nil case *symlink: if flags&linux.O_PATH == 0 { diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index 606ca692d..75624e0b1 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -55,7 +55,9 @@ func (f *DynamicBytesFile) Init(creds *auth.Credentials, ino uint64, data vfs.Dy // Open implements Inode.Open. func (f *DynamicBytesFile) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { fd := &DynamicBytesFD{} - fd.Init(rp.Mount(), vfsd, f.data, flags) + if err := fd.Init(rp.Mount(), vfsd, f.data, flags); err != nil { + return nil, err + } return &fd.vfsfd, nil } @@ -80,10 +82,13 @@ type DynamicBytesFD struct { } // Init initializes a DynamicBytesFD. -func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, flags uint32) { +func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, flags uint32) error { + if err := fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}); err != nil { + return err + } fd.inode = d.Impl().(*Dentry).inode fd.SetDataSource(data) - fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}) + return nil } // Seek implements vfs.FileDescriptionImpl.Seek. diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index bcf069b5f..5fa1fa67b 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -43,9 +43,16 @@ type GenericDirectoryFD struct { } // Init initializes a GenericDirectoryFD. -func (fd *GenericDirectoryFD) Init(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, flags uint32) { +func (fd *GenericDirectoryFD) Init(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, flags uint32) error { + if vfs.AccessTypesForOpenFlags(flags)&vfs.MayWrite != 0 { + // Can't open directories for writing. + return syserror.EISDIR + } + if err := fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}); err != nil { + return err + } fd.children = children - fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}) + return nil } // VFSFileDescription returns a pointer to the vfs.FileDescription representing diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index a5fdfbde5..aa3fe76ee 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -115,7 +115,9 @@ func (fs *filesystem) newReadonlyDir(creds *auth.Credentials, mode linux.FileMod func (d *readonlyDir) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { fd := &kernfs.GenericDirectoryFD{} - fd.Init(rp.Mount(), vfsd, &d.OrderedChildren, flags) + if err := fd.Init(rp.Mount(), vfsd, &d.OrderedChildren, flags); err != nil { + return nil, err + } return fd.VFSFileDescription(), nil } @@ -225,7 +227,9 @@ func TestReadStaticFile(t *testing.T) { defer sys.Destroy() pop := sys.PathOpAtRoot("file1") - fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, &pop, &vfs.OpenOptions{}) + fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, &pop, &vfs.OpenOptions{ + Flags: linux.O_RDONLY, + }) if err != nil { t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err) } @@ -258,7 +262,9 @@ func TestCreateNewFileInStaticDir(t *testing.T) { // Close the file. The file should persist. fd.DecRef() - fd, err = sys.VFS.OpenAt(sys.Ctx, sys.Creds, &pop, &vfs.OpenOptions{}) + fd, err = sys.VFS.OpenAt(sys.Ctx, sys.Creds, &pop, &vfs.OpenOptions{ + Flags: linux.O_RDONLY, + }) if err != nil { t.Fatalf("OpenAt(pop:%+v) = %+v failed: %v", pop, fd, err) } @@ -272,7 +278,9 @@ func TestDirFDReadWrite(t *testing.T) { defer sys.Destroy() pop := sys.PathOpAtRoot("/") - fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, &pop, &vfs.OpenOptions{}) + fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, &pop, &vfs.OpenOptions{ + Flags: linux.O_RDONLY, + }) if err != nil { t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err) } @@ -282,7 +290,7 @@ func TestDirFDReadWrite(t *testing.T) { if _, err := fd.Read(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.ReadOptions{}); err != syserror.EISDIR { t.Fatalf("Read for directory FD failed with unexpected error: %v", err) } - if _, err := fd.Write(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.WriteOptions{}); err != syserror.EISDIR { + if _, err := fd.Write(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.WriteOptions{}); err != syserror.EBADF { t.Fatalf("Write for directory FD failed with unexpected error: %v", err) } } diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 4cd7e9aea..a9f66a42a 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -337,19 +337,12 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, return nil, err } } - mnt := rp.Mount() switch impl := d.inode.impl.(type) { case *regularFile: var fd regularFileFD - fd.readable = vfs.MayReadFileWithOpenFlags(flags) - fd.writable = vfs.MayWriteFileWithOpenFlags(flags) - if fd.writable { - if err := mnt.CheckBeginWrite(); err != nil { - return nil, err - } - // mnt.EndWrite() is called by regularFileFD.Release(). + if err := fd.vfsfd.Init(&fd, flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + return nil, err } - fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) if flags&linux.O_TRUNC != 0 { impl.mu.Lock() impl.data.Truncate(0, impl.memFile) @@ -363,7 +356,9 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, return nil, syserror.EISDIR } var fd directoryFD - fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) + if err := fd.vfsfd.Init(&fd, flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + return nil, err + } return &fd.vfsfd, nil case *symlink: // Can't open symlinks without O_PATH (which is unimplemented). diff --git a/pkg/sentry/fsimpl/tmpfs/named_pipe.go b/pkg/sentry/fsimpl/tmpfs/named_pipe.go index 40bde54de..482aabd52 100644 --- a/pkg/sentry/fsimpl/tmpfs/named_pipe.go +++ b/pkg/sentry/fsimpl/tmpfs/named_pipe.go @@ -50,11 +50,10 @@ type namedPipeFD struct { func newNamedPipeFD(ctx context.Context, np *namedPipe, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { var err error var fd namedPipeFD - fd.VFSPipeFD, err = np.pipe.NewVFSPipeFD(ctx, rp, vfsd, &fd.vfsfd, flags) + fd.VFSPipeFD, err = np.pipe.NewVFSPipeFD(ctx, vfsd, &fd.vfsfd, flags) if err != nil { return nil, err } - mnt := rp.Mount() - fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) + fd.vfsfd.Init(&fd, flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}) return &fd.vfsfd, nil } diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index 5fa70cc6d..7c633c1b0 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -101,10 +101,6 @@ func (rf *regularFile) truncate(size uint64) (bool, error) { type regularFileFD struct { fileDescription - // These are immutable. - readable bool - writable bool - // off is the file offset. off is accessed using atomic memory operations. // offMu serializes operations that may mutate off. off int64 @@ -113,16 +109,11 @@ type regularFileFD struct { // Release implements vfs.FileDescriptionImpl.Release. func (fd *regularFileFD) Release() { - if fd.writable { - fd.vfsfd.VirtualDentry().Mount().EndWrite() - } + // noop } // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - if !fd.readable { - return 0, syserror.EINVAL - } if offset < 0 { return 0, syserror.EINVAL } @@ -147,9 +138,6 @@ func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts // PWrite implements vfs.FileDescriptionImpl.PWrite. func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - if !fd.writable { - return 0, syserror.EINVAL - } if offset < 0 { return 0, syserror.EINVAL } diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index bf7461cbb..6f83e3cee 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -66,7 +66,7 @@ func NewVFSPipe(sizeBytes, atomicIOBytes int64) *VFSPipe { // for read and write will succeed both in blocking and nonblocking mode. POSIX // leaves this behavior undefined. This can be used to open a FIFO for writing // while there are no readers available." - fifo(7) -func (vp *VFSPipe) NewVFSPipeFD(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, vfsfd *vfs.FileDescription, flags uint32) (*VFSPipeFD, error) { +func (vp *VFSPipe) NewVFSPipeFD(ctx context.Context, vfsd *vfs.Dentry, vfsfd *vfs.FileDescription, flags uint32) (*VFSPipeFD, error) { vp.mu.Lock() defer vp.mu.Unlock() @@ -76,7 +76,7 @@ func (vp *VFSPipe) NewVFSPipeFD(ctx context.Context, rp *vfs.ResolvingPath, vfsd return nil, syserror.EINVAL } - vfd, err := vp.open(rp, vfsd, vfsfd, flags) + vfd, err := vp.open(vfsd, vfsfd, flags) if err != nil { return nil, err } @@ -118,19 +118,13 @@ func (vp *VFSPipe) NewVFSPipeFD(ctx context.Context, rp *vfs.ResolvingPath, vfsd } // Preconditions: vp.mu must be held. -func (vp *VFSPipe) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, vfsfd *vfs.FileDescription, flags uint32) (*VFSPipeFD, error) { +func (vp *VFSPipe) open(vfsd *vfs.Dentry, vfsfd *vfs.FileDescription, flags uint32) (*VFSPipeFD, error) { var fd VFSPipeFD fd.flags = flags fd.readable = vfs.MayReadFileWithOpenFlags(flags) fd.writable = vfs.MayWriteFileWithOpenFlags(flags) fd.vfsfd = vfsfd fd.pipe = &vp.pipe - if fd.writable { - // The corresponding Mount.EndWrite() is in VFSPipe.Release(). - if err := rp.Mount().CheckBeginWrite(); err != nil { - return nil, err - } - } switch { case fd.readable && fd.writable: diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index fa86ebced..78cc9e6e4 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -80,7 +80,7 @@ type MemoryManager struct { users int32 // mappingMu is analogous to Linux's struct mm_struct::mmap_sem. - mappingMu sync.DowngradableRWMutex `state:"nosave"` + mappingMu sync.RWMutex `state:"nosave"` // vmas stores virtual memory areas. Since vmas are stored by value, // clients should usually use vmaIterator.ValuePtr() instead of @@ -123,7 +123,7 @@ type MemoryManager struct { // activeMu is loosely analogous to Linux's struct // mm_struct::page_table_lock. - activeMu sync.DowngradableRWMutex `state:"nosave"` + activeMu sync.RWMutex `state:"nosave"` // pmas stores platform mapping areas used to implement vmas. Since pmas // are stored by value, clients should usually use pmaIterator.ValuePtr() diff --git a/pkg/sentry/syscalls/linux/sys_utsname.go b/pkg/sentry/syscalls/linux/sys_utsname.go index 748e8dd8d..a393e28c1 100644 --- a/pkg/sentry/syscalls/linux/sys_utsname.go +++ b/pkg/sentry/syscalls/linux/sys_utsname.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build amd64 +// +build amd64 arm64 package linux @@ -35,7 +35,15 @@ func Uname(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall copy(u.Nodename[:], uts.HostName()) copy(u.Release[:], version.Release) copy(u.Version[:], version.Version) - copy(u.Machine[:], "x86_64") // build tag above. + // build tag above. + switch t.SyscallTable().Arch { + case arch.AMD64: + copy(u.Machine[:], "x86_64") + case arch.ARM64: + copy(u.Machine[:], "aarch64") + default: + copy(u.Machine[:], "unknown") + } copy(u.Domainname[:], uts.DomainName()) // Copy out the result. diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 6afe280bc..51c95c2d9 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -49,8 +49,23 @@ type FileDescription struct { // A reference is held on vd. vd is immutable. vd VirtualDentry + // opts contains options passed to FileDescription.Init(). opts is + // immutable. opts FileDescriptionOptions + // readable is MayReadFileWithOpenFlags(statusFlags). readable is + // immutable. + // + // readable is analogous to Linux's FMODE_READ. + readable bool + + // writable is MayWriteFileWithOpenFlags(statusFlags). If writable is true, + // the FileDescription holds a write count on vd.mount. writable is + // immutable. + // + // writable is analogous to Linux's FMODE_WRITE. + writable bool + // impl is the FileDescriptionImpl associated with this Filesystem. impl is // immutable. This should be the last field in FileDescription. impl FileDescriptionImpl @@ -77,10 +92,17 @@ type FileDescriptionOptions struct { UseDentryMetadata bool } -// Init must be called before first use of fd. It takes references on mnt and -// d. statusFlags is the initial file description status flags, which is -// usually the full set of flags passed to open(2). -func (fd *FileDescription) Init(impl FileDescriptionImpl, statusFlags uint32, mnt *Mount, d *Dentry, opts *FileDescriptionOptions) { +// Init must be called before first use of fd. If it succeeds, it takes +// references on mnt and d. statusFlags is the initial file description status +// flags, which is usually the full set of flags passed to open(2). +func (fd *FileDescription) Init(impl FileDescriptionImpl, statusFlags uint32, mnt *Mount, d *Dentry, opts *FileDescriptionOptions) error { + writable := MayWriteFileWithOpenFlags(statusFlags) + if writable { + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + } + fd.refs = 1 fd.statusFlags = statusFlags | linux.O_LARGEFILE fd.vd = VirtualDentry{ @@ -89,7 +111,10 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, statusFlags uint32, mn } fd.vd.IncRef() fd.opts = *opts + fd.readable = MayReadFileWithOpenFlags(statusFlags) + fd.writable = writable fd.impl = impl + return nil } // IncRef increments fd's reference count. @@ -117,6 +142,9 @@ func (fd *FileDescription) TryIncRef() bool { func (fd *FileDescription) DecRef() { if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 { fd.impl.Release() + if fd.writable { + fd.vd.mount.EndWrite() + } fd.vd.DecRef() } else if refs < 0 { panic("FileDescription.DecRef() called without holding a reference") @@ -194,6 +222,16 @@ func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Crede return nil } +// IsReadable returns true if fd was opened for reading. +func (fd *FileDescription) IsReadable() bool { + return fd.readable +} + +// IsWritable returns true if fd was opened for writing. +func (fd *FileDescription) IsWritable() bool { + return fd.writable +} + // Impl returns the FileDescriptionImpl associated with fd. func (fd *FileDescription) Impl() FileDescriptionImpl { return fd.impl @@ -241,6 +279,8 @@ type FileDescriptionImpl interface { // Errors: // // - If opts.Flags specifies unsupported options, PRead returns EOPNOTSUPP. + // + // Preconditions: The FileDescription was opened for reading. PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) // Read is similar to PRead, but does not specify an offset. @@ -254,6 +294,8 @@ type FileDescriptionImpl interface { // Errors: // // - If opts.Flags specifies unsupported options, Read returns EOPNOTSUPP. + // + // Preconditions: The FileDescription was opened for reading. Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) // PWrite writes src to the file, starting at the given offset, and returns @@ -268,6 +310,8 @@ type FileDescriptionImpl interface { // // - If opts.Flags specifies unsupported options, PWrite returns // EOPNOTSUPP. + // + // Preconditions: The FileDescription was opened for writing. PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) // Write is similar to PWrite, but does not specify an offset, which is @@ -281,6 +325,8 @@ type FileDescriptionImpl interface { // Errors: // // - If opts.Flags specifies unsupported options, Write returns EOPNOTSUPP. + // + // Preconditions: The FileDescription was opened for writing. Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) // IterDirents invokes cb on each entry in the directory represented by the @@ -411,11 +457,17 @@ func (fd *FileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { // offset, and returns the number of bytes read. PRead is permitted to return // partial reads with a nil error. func (fd *FileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { + if !fd.readable { + return 0, syserror.EBADF + } return fd.impl.PRead(ctx, dst, offset, opts) } // Read is similar to PRead, but does not specify an offset. func (fd *FileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { + if !fd.readable { + return 0, syserror.EBADF + } return fd.impl.Read(ctx, dst, opts) } @@ -423,11 +475,17 @@ func (fd *FileDescription) Read(ctx context.Context, dst usermem.IOSequence, opt // offset, and returns the number of bytes written. PWrite is permitted to // return partial writes with a nil error. func (fd *FileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { + if !fd.writable { + return 0, syserror.EBADF + } return fd.impl.PWrite(ctx, src, offset, opts) } // Write is similar to PWrite, but does not specify an offset. func (fd *FileDescription) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { + if !fd.writable { + return 0, syserror.EBADF + } return fd.impl.Write(ctx, src, opts) } diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go index d279d05ca..f664581f4 100644 --- a/pkg/sentry/vfs/permissions.go +++ b/pkg/sentry/vfs/permissions.go @@ -94,14 +94,13 @@ func GenericCheckPermissions(creds *auth.Credentials, ats AccessTypes, isDir boo // the set of accesses permitted for the opened file: // // - O_TRUNC causes MayWrite to be set in the returned AccessTypes (since it -// mutates the file), but does not permit the opened to write to the file +// mutates the file), but does not permit writing to the open file description // thereafter. // // - "Linux reserves the special, nonstandard access mode 3 (binary 11) in // flags to mean: check for read and write permission on the file and return a // file descriptor that can't be used for reading or writing." - open(2). Thus -// AccessTypesForOpenFlags returns MayRead|MayWrite in this case, but -// filesystems are responsible for ensuring that access is denied. +// AccessTypesForOpenFlags returns MayRead|MayWrite in this case. // // Use May{Read,Write}FileWithOpenFlags() for these checks instead. func AccessTypesForOpenFlags(flags uint32) AccessTypes { diff --git a/pkg/sync/BUILD b/pkg/sync/BUILD index e8cd16b8f..97c4b3b1e 100644 --- a/pkg/sync/BUILD +++ b/pkg/sync/BUILD @@ -38,6 +38,7 @@ go_library( "race_unsafe.go", "seqcount.go", "syncutil.go", + "tmutex_unsafe.go", ], importpath = "gvisor.dev/gvisor/pkg/sync", ) @@ -48,6 +49,7 @@ go_test( srcs = [ "downgradable_rwmutex_test.go", "seqcount_test.go", + "tmutex_test.go", ], embed = [":sync"], ) diff --git a/pkg/sync/aliases.go b/pkg/sync/aliases.go index 20c7ca041..d2d7132fa 100644 --- a/pkg/sync/aliases.go +++ b/pkg/sync/aliases.go @@ -11,12 +11,6 @@ import ( // Aliases of standard library types. type ( - // Mutex is an alias of sync.Mutex. - Mutex = sync.Mutex - - // RWMutex is an alias of sync.RWMutex. - RWMutex = sync.RWMutex - // Cond is an alias of sync.Cond. Cond = sync.Cond diff --git a/pkg/sync/downgradable_rwmutex_test.go b/pkg/sync/downgradable_rwmutex_test.go index f04496bc5..ce667e825 100644 --- a/pkg/sync/downgradable_rwmutex_test.go +++ b/pkg/sync/downgradable_rwmutex_test.go @@ -18,7 +18,7 @@ import ( "testing" ) -func parallelReader(m *DowngradableRWMutex, clocked, cunlock, cdone chan bool) { +func parallelReader(m *RWMutex, clocked, cunlock, cdone chan bool) { m.RLock() clocked <- true <-cunlock @@ -28,7 +28,7 @@ func parallelReader(m *DowngradableRWMutex, clocked, cunlock, cdone chan bool) { func doTestParallelReaders(numReaders, gomaxprocs int) { runtime.GOMAXPROCS(gomaxprocs) - var m DowngradableRWMutex + var m RWMutex clocked := make(chan bool) cunlock := make(chan bool) cdone := make(chan bool) @@ -55,7 +55,7 @@ func TestParallelReaders(t *testing.T) { doTestParallelReaders(4, 2) } -func reader(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { +func reader(rwm *RWMutex, numIterations int, activity *int32, cdone chan bool) { for i := 0; i < numIterations; i++ { rwm.RLock() n := atomic.AddInt32(activity, 1) @@ -70,7 +70,7 @@ func reader(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone cdone <- true } -func writer(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { +func writer(rwm *RWMutex, numIterations int, activity *int32, cdone chan bool) { for i := 0; i < numIterations; i++ { rwm.Lock() n := atomic.AddInt32(activity, 10000) @@ -85,7 +85,7 @@ func writer(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone cdone <- true } -func downgradingWriter(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { +func downgradingWriter(rwm *RWMutex, numIterations int, activity *int32, cdone chan bool) { for i := 0; i < numIterations; i++ { rwm.Lock() n := atomic.AddInt32(activity, 10000) @@ -112,7 +112,7 @@ func HammerDowngradableRWMutex(gomaxprocs, numReaders, numIterations int) { runtime.GOMAXPROCS(gomaxprocs) // Number of active readers + 10000 * number of active writers. var activity int32 - var rwm DowngradableRWMutex + var rwm RWMutex cdone := make(chan bool) go writer(&rwm, numIterations, &activity, cdone) go downgradingWriter(&rwm, numIterations, &activity, cdone) @@ -148,3 +148,58 @@ func TestDowngradableRWMutex(t *testing.T) { HammerDowngradableRWMutex(10, 10, n) HammerDowngradableRWMutex(10, 5, n) } + +func TestRWDoubleTryLock(t *testing.T) { + var rwm RWMutex + if !rwm.TryLock() { + t.Fatal("failed to aquire lock") + } + if rwm.TryLock() { + t.Fatal("unexpectedly succeeded in aquiring locked mutex") + } +} + +func TestRWTryLockAfterLock(t *testing.T) { + var rwm RWMutex + rwm.Lock() + if rwm.TryLock() { + t.Fatal("unexpectedly succeeded in aquiring locked mutex") + } +} + +func TestRWTryLockUnlock(t *testing.T) { + var rwm RWMutex + if !rwm.TryLock() { + t.Fatal("failed to aquire lock") + } + rwm.Unlock() + if !rwm.TryLock() { + t.Fatal("failed to aquire lock after unlock") + } +} + +func TestTryRLockAfterLock(t *testing.T) { + var rwm RWMutex + rwm.Lock() + if rwm.TryRLock() { + t.Fatal("unexpectedly succeeded in aquiring locked mutex") + } +} + +func TestTryLockAfterRLock(t *testing.T) { + var rwm RWMutex + rwm.RLock() + if rwm.TryLock() { + t.Fatal("unexpectedly succeeded in aquiring locked mutex") + } +} + +func TestDoubleTryRLock(t *testing.T) { + var rwm RWMutex + if !rwm.TryRLock() { + t.Fatal("failed to aquire lock") + } + if !rwm.TryRLock() { + t.Fatal("failed to read aquire read locked lock") + } +} diff --git a/pkg/sync/downgradable_rwmutex_unsafe.go b/pkg/sync/downgradable_rwmutex_unsafe.go index 9bb55cd3a..ea6cdc447 100644 --- a/pkg/sync/downgradable_rwmutex_unsafe.go +++ b/pkg/sync/downgradable_rwmutex_unsafe.go @@ -19,7 +19,6 @@ package sync import ( - "sync" "sync/atomic" "unsafe" ) @@ -30,20 +29,45 @@ func runtimeSemacquire(s *uint32) //go:linkname runtimeSemrelease sync.runtime_Semrelease func runtimeSemrelease(s *uint32, handoff bool, skipframes int) -// DowngradableRWMutex is identical to sync.RWMutex, but adds the DowngradeLock -// method. -type DowngradableRWMutex struct { - w sync.Mutex // held if there are pending writers - writerSem uint32 // semaphore for writers to wait for completing readers - readerSem uint32 // semaphore for readers to wait for completing writers - readerCount int32 // number of pending readers - readerWait int32 // number of departing readers +// RWMutex is identical to sync.RWMutex, but adds the DowngradeLock, +// TryLock and TryRLock methods. +type RWMutex struct { + w Mutex // held if there are pending writers + writerSem uint32 // semaphore for writers to wait for completing readers + readerSem uint32 // semaphore for readers to wait for completing writers + readerCount int32 // number of pending readers + readerWait int32 // number of departing readers } const rwmutexMaxReaders = 1 << 30 +// TryRLock locks rw for reading. It returns true if it succeeds and false +// otherwise. It does not block. +func (rw *RWMutex) TryRLock() bool { + if RaceEnabled { + RaceDisable() + } + for { + rc := atomic.LoadInt32(&rw.readerCount) + if rc < 0 { + if RaceEnabled { + RaceEnable() + } + return false + } + if !atomic.CompareAndSwapInt32(&rw.readerCount, rc, rc+1) { + continue + } + if RaceEnabled { + RaceEnable() + RaceAcquire(unsafe.Pointer(&rw.readerSem)) + } + return true + } +} + // RLock locks rw for reading. -func (rw *DowngradableRWMutex) RLock() { +func (rw *RWMutex) RLock() { if RaceEnabled { RaceDisable() } @@ -58,14 +82,14 @@ func (rw *DowngradableRWMutex) RLock() { } // RUnlock undoes a single RLock call. -func (rw *DowngradableRWMutex) RUnlock() { +func (rw *RWMutex) RUnlock() { if RaceEnabled { RaceReleaseMerge(unsafe.Pointer(&rw.writerSem)) RaceDisable() } if r := atomic.AddInt32(&rw.readerCount, -1); r < 0 { if r+1 == 0 || r+1 == -rwmutexMaxReaders { - panic("RUnlock of unlocked DowngradableRWMutex") + panic("RUnlock of unlocked RWMutex") } // A writer is pending. if atomic.AddInt32(&rw.readerWait, -1) == 0 { @@ -78,8 +102,36 @@ func (rw *DowngradableRWMutex) RUnlock() { } } +// TryLock locks rw for writing. It returns true if it succeeds and false +// otherwise. It does not block. +func (rw *RWMutex) TryLock() bool { + if RaceEnabled { + RaceDisable() + } + // First, resolve competition with other writers. + if !rw.w.TryLock() { + if RaceEnabled { + RaceEnable() + } + return false + } + // Only proceed if there are no readers. + if !atomic.CompareAndSwapInt32(&rw.readerCount, 0, -rwmutexMaxReaders) { + rw.w.Unlock() + if RaceEnabled { + RaceEnable() + } + return false + } + if RaceEnabled { + RaceEnable() + RaceAcquire(unsafe.Pointer(&rw.writerSem)) + } + return true +} + // Lock locks rw for writing. -func (rw *DowngradableRWMutex) Lock() { +func (rw *RWMutex) Lock() { if RaceEnabled { RaceDisable() } @@ -98,7 +150,7 @@ func (rw *DowngradableRWMutex) Lock() { } // Unlock unlocks rw for writing. -func (rw *DowngradableRWMutex) Unlock() { +func (rw *RWMutex) Unlock() { if RaceEnabled { RaceRelease(unsafe.Pointer(&rw.writerSem)) RaceRelease(unsafe.Pointer(&rw.readerSem)) @@ -107,7 +159,7 @@ func (rw *DowngradableRWMutex) Unlock() { // Announce to readers there is no active writer. r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders) if r >= rwmutexMaxReaders { - panic("Unlock of unlocked DowngradableRWMutex") + panic("Unlock of unlocked RWMutex") } // Unblock blocked readers, if any. for i := 0; i < int(r); i++ { @@ -121,7 +173,7 @@ func (rw *DowngradableRWMutex) Unlock() { } // DowngradeLock atomically unlocks rw for writing and locks it for reading. -func (rw *DowngradableRWMutex) DowngradeLock() { +func (rw *RWMutex) DowngradeLock() { if RaceEnabled { RaceRelease(unsafe.Pointer(&rw.readerSem)) RaceDisable() @@ -129,7 +181,7 @@ func (rw *DowngradableRWMutex) DowngradeLock() { // Announce to readers there is no active writer and one additional reader. r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders+1) if r >= rwmutexMaxReaders+1 { - panic("DowngradeLock of unlocked DowngradableRWMutex") + panic("DowngradeLock of unlocked RWMutex") } // Unblock blocked readers, if any. Note that this loop starts as 1 since r // includes this goroutine. diff --git a/pkg/sync/tmutex_test.go b/pkg/sync/tmutex_test.go new file mode 100644 index 000000000..0838248b4 --- /dev/null +++ b/pkg/sync/tmutex_test.go @@ -0,0 +1,71 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sync + +import ( + "sync" + "testing" + "unsafe" +) + +// TestStructSize verifies that syncMutex's size hasn't drifted from the +// standard library's version. +// +// The correctness of this package relies on these remaining in sync. +func TestStructSize(t *testing.T) { + const ( + got = unsafe.Sizeof(syncMutex{}) + want = unsafe.Sizeof(sync.Mutex{}) + ) + if got != want { + t.Errorf("got sizeof(syncMutex) = %d, want = sizeof(sync.Mutex) = %d", got, want) + } +} + +// TestFieldValues verifies that the semantics of syncMutex.state from the +// standard library's implementation. +// +// The correctness of this package relies on these remaining in sync. +func TestFieldValues(t *testing.T) { + var m Mutex + m.Lock() + if got := *m.state(); got != mutexLocked { + t.Errorf("got locked sync.Mutex.state = %d, want = %d", got, mutexLocked) + } + m.Unlock() + if got := *m.state(); got != mutexUnlocked { + t.Errorf("got unlocked sync.Mutex.state = %d, want = %d", got, mutexUnlocked) + } +} + +func TestDoubleTryLock(t *testing.T) { + var m Mutex + if !m.TryLock() { + t.Fatal("failed to aquire lock") + } + if m.TryLock() { + t.Fatal("unexpectedly succeeded in aquiring locked mutex") + } +} + +func TestTryLockAfterLock(t *testing.T) { + var m Mutex + m.Lock() + if m.TryLock() { + t.Fatal("unexpectedly succeeded in aquiring locked mutex") + } +} + +func TestTryLockUnlock(t *testing.T) { + var m Mutex + if !m.TryLock() { + t.Fatal("failed to aquire lock") + } + m.Unlock() + if !m.TryLock() { + t.Fatal("failed to aquire lock after unlock") + } +} diff --git a/pkg/sync/tmutex_unsafe.go b/pkg/sync/tmutex_unsafe.go new file mode 100644 index 000000000..3dd15578b --- /dev/null +++ b/pkg/sync/tmutex_unsafe.go @@ -0,0 +1,49 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.13 +// +build !go1.15 + +// When updating the build constraint (above), check that syncMutex matches the +// standard library sync.Mutex definition. + +package sync + +import ( + "sync" + "sync/atomic" + "unsafe" +) + +// Mutex is a try lock. +type Mutex struct { + sync.Mutex +} + +type syncMutex struct { + state int32 + sema uint32 +} + +func (m *Mutex) state() *int32 { + return &(*syncMutex)(unsafe.Pointer(&m.Mutex)).state +} + +const ( + mutexUnlocked = 0 + mutexLocked = 1 +) + +// TryLock tries to aquire the mutex. It returns true if it succeeds and false +// otherwise. TryLock does not block. +func (m *Mutex) TryLock() bool { + if atomic.CompareAndSwapInt32(m.state(), mutexUnlocked, mutexLocked) { + if RaceEnabled { + RaceAcquire(unsafe.Pointer(&m.Mutex)) + } + return true + } + return false +} diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go index a2f44b496..3bba4028b 100644 --- a/pkg/tcpip/adapters/gonet/gonet.go +++ b/pkg/tcpip/adapters/gonet/gonet.go @@ -622,7 +622,7 @@ func (c *PacketConn) RemoteAddr() net.Addr { if err != nil { return nil } - return fullToTCPAddr(a) + return fullToUDPAddr(a) } // Read implements net.Conn.Read diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go index 2239c1e66..0ab089208 100644 --- a/pkg/tcpip/sample/tun_tcp_connect/main.go +++ b/pkg/tcpip/sample/tun_tcp_connect/main.go @@ -164,7 +164,7 @@ func main() { // Create TCP endpoint. var wq waiter.Queue ep, e := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &wq) - if err != nil { + if e != nil { log.Fatal(e) } diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go index bca73cbb1..9e37cab18 100644 --- a/pkg/tcpip/sample/tun_tcp_echo/main.go +++ b/pkg/tcpip/sample/tun_tcp_echo/main.go @@ -168,7 +168,7 @@ func main() { // Create TCP endpoint, bind it, then start listening. var wq waiter.Queue ep, e := s.NewEndpoint(tcp.ProtocolNumber, proto, &wq) - if err != nil { + if e != nil { log.Fatal(e) } |