From 1c8a014e7e129d6a49c1280e28434354881ace94 Mon Sep 17 00:00:00 2001 From: "Zyad A. Ali" Date: Tue, 20 Jul 2021 18:51:39 +0200 Subject: Create mq package. Create package mq to implement POSIX message queues, and define initial struct definitions. Updates #136 --- pkg/sentry/kernel/mq/mq.go | 96 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 pkg/sentry/kernel/mq/mq.go (limited to 'pkg/sentry/kernel/mq/mq.go') diff --git a/pkg/sentry/kernel/mq/mq.go b/pkg/sentry/kernel/mq/mq.go new file mode 100644 index 000000000..df9bdc267 --- /dev/null +++ b/pkg/sentry/kernel/mq/mq.go @@ -0,0 +1,96 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mq provides an implementation for POSIX message queues. +package mq + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/waiter" +) + +const ( + maxPriority = linux.MQ_PRIO_MAX - 1 // Highest possible message priority. +) + +// Queue represents a POSIX message queue. +// +// +stateify savable +type Queue struct { + // owner is the registry's owner. Immutable. + owner fs.FileOwner + + // perms is the registry's access permissions. Immutable. + perms fs.FilePermissions + + // mu protects all the fields below. + mu sync.Mutex `state:"nosave"` + + // senders is a queue of currently blocked senders. Senders are notified + // when space isi available in the queue for a new message. + senders waiter.Queue + + // receivers is a queue of currently blocked receivers. Receivers are + // notified when a new message is inserted in the queue. + receivers waiter.Queue + + // messages is a list of messages currently in the queue. + messages msgList + + // subscriber represents a task registered to receive async notification + // from this queue. + subscriber Subscriber + + // nonBlock is true if this queue is non-blocking. + nonBlock bool + + // messageCount is the number of messages currently in the queue. + messageCount int64 + + // maxMessageCount is the maximum number of messages that the queue can + // hold. + maxMessageCount int64 + + // maxMessageSize is the maximum size of a message held by the queue. + maxMessageSize uint64 + + // byteCount is the number of bytes of data in all messages in the queue. + byteCount uint64 +} + +// Message holds a message exchanged through a Queue via mq_timedsend(2) and +// mq_timedreceive(2), and additional info relating to the message. +// +// +stateify savable +type Message struct { + msgEntry + + // Text is the message's sent content. + Text string + + // Size is the message's size in bytes. + Size uint64 + + // Priority is the message's priority. + Priority uint32 +} + +// Subscriber represents a task registered for async notification from a Queue. +// +// +stateify savable +type Subscriber struct { + // TODO: Add fields when mq_notify(2) is implemented. +} -- cgit v1.2.3 From 0061d0e4e5d74efce8af8d706437cba3d040cd5f Mon Sep 17 00:00:00 2001 From: "Zyad A. Ali" Date: Tue, 20 Jul 2021 20:31:46 +0200 Subject: Implement queueInode and queueFD in mqfs. Implement inode and file description representing a POSIX message queue, and other utilities needed to implement file operations. Updates #136 --- pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go | 5 + pkg/sentry/fsimpl/mqfs/BUILD | 6 +- pkg/sentry/fsimpl/mqfs/inodes.go | 91 ---------------- pkg/sentry/fsimpl/mqfs/queue.go | 145 +++++++++++++++++++++++++ pkg/sentry/fsimpl/mqfs/root.go | 89 +++++++++++++++ pkg/sentry/kernel/mq/BUILD | 1 + pkg/sentry/kernel/mq/mq.go | 84 +++++++++++++- 7 files changed, 328 insertions(+), 93 deletions(-) delete mode 100644 pkg/sentry/fsimpl/mqfs/inodes.go create mode 100644 pkg/sentry/fsimpl/mqfs/queue.go create mode 100644 pkg/sentry/fsimpl/mqfs/root.go (limited to 'pkg/sentry/kernel/mq/mq.go') diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index 9d7526e47..652ade564 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -74,6 +74,11 @@ func (*DynamicBytesFile) SetStat(context.Context, *vfs.Filesystem, *auth.Credent return linuxerr.EPERM } +// Locks returns the file locks for this file. +func (f *DynamicBytesFile) Locks() *vfs.FileLocks { + return &f.locks +} + // DynamicBytesFD implements vfs.FileDescriptionImpl for an FD backed by a // DynamicBytesFile. // diff --git a/pkg/sentry/fsimpl/mqfs/BUILD b/pkg/sentry/fsimpl/mqfs/BUILD index afe1f3cd5..6b22ffabd 100644 --- a/pkg/sentry/fsimpl/mqfs/BUILD +++ b/pkg/sentry/fsimpl/mqfs/BUILD @@ -18,7 +18,8 @@ go_library( name = "mqfs", srcs = [ "mqfs.go", - "inodes.go", + "root.go", + "queue.go", "root_inode_refs.go", ], visibility = ["//pkg/sentry:internal"], @@ -29,6 +30,9 @@ go_library( "//pkg/refsvfs2", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/mq", "//pkg/sentry/vfs", + "//pkg/usermem", + "//pkg/waiter", ], ) diff --git a/pkg/sentry/fsimpl/mqfs/inodes.go b/pkg/sentry/fsimpl/mqfs/inodes.go deleted file mode 100644 index 702db59ee..000000000 --- a/pkg/sentry/fsimpl/mqfs/inodes.go +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2021 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mqfs - -import ( - "bytes" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// rootInode represents inode for filesystem's root directory (/dev/mqueue). -// -// +stateify savable -type rootInode struct { - rootInodeRefs - kernfs.InodeAlwaysValid - kernfs.InodeAttrs - kernfs.InodeDirectoryNoNewChildren - kernfs.InodeNotSymlink - kernfs.InodeTemporary - kernfs.OrderedChildren - implStatFS - - locks vfs.FileLocks -} - -var _ kernfs.Inode = (*rootInode)(nil) - -// newRootInode returns a new, initialized rootInode. -func (fs *filesystem) newRootInode(ctx context.Context, creds *auth.Credentials) kernfs.Inode { - inode := &rootInode{} - inode.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|linux.FileMode(0555)) - inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true}) - inode.InitRefs() - return inode -} - -// Open implements kernfs.Inode.Open. -func (i *rootInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), d, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{ - SeekEnd: kernfs.SeekEndZero, - }) - if err != nil { - return nil, err - } - return fd.VFSFileDescription(), nil -} - -// DecRef implements kernfs.Inode.DecRef. -func (i *rootInode) DecRef(ctx context.Context) { - i.rootInodeRefs.DecRef(func() { i.Destroy(ctx) }) -} - -// Rename implements Inode.Rename and overrides OrderedChildren.Rename. mqueue -// filesystem allows files to be unlinked, but not renamed. -func (i *rootInode) Rename(ctx context.Context, oldname, newname string, child, dstDir kernfs.Inode) error { - return linuxerr.EPERM -} - -// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. -func (*rootInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return linuxerr.EPERM -} - -// implStatFS provides an implementation of kernfs.Inode.StatFS for message -// queues to be embedded in inodes. -// -// +stateify savable -type implStatFS struct{} - -// StatFS implements kernfs.Inode.StatFS. -func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) { - return vfs.GenericStatFS(linux.MQUEUE_MAGIC), nil -} diff --git a/pkg/sentry/fsimpl/mqfs/queue.go b/pkg/sentry/fsimpl/mqfs/queue.go new file mode 100644 index 000000000..a8e9bc722 --- /dev/null +++ b/pkg/sentry/fsimpl/mqfs/queue.go @@ -0,0 +1,145 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mqfs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/mq" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +// queueInode represents an inode for a message queue (/dev/mqueue/[name]). +// +// +stateify savable +type queueInode struct { + kernfs.DynamicBytesFile + + // queue is the message queue backing this inode. + queue *mq.Queue +} + +var _ kernfs.Inode = (*queueInode)(nil) + +// newQueueInode returns a new, initialized queueInode. +func (fs *filesystem) newQueueInode(ctx context.Context, creds *auth.Credentials, q *mq.Queue, perm linux.FileMode) kernfs.Inode { + inode := &queueInode{queue: q} + inode.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), q, perm) + return inode +} + +// Keep implements kernfs.Inode.Keep. +func (q *queueInode) Keep() bool { + // Return true so that the fs keeps newly created dentries. This is done + // because inodes returned by root.Lookup are not temporary, they exist + // in the fs, and refer to message queues. + return true +} + +// queueFD implements vfs.FileDescriptionImpl for FD backed by a POSIX message +// queue. It's mostly similar to DynamicBytesFD, but implements more operations. +// +// +stateify savable +type queueFD struct { + vfs.FileDescriptionDefaultImpl + vfs.DynamicBytesFileDescriptionImpl + vfs.LockFD + + vfsfd vfs.FileDescription + inode kernfs.Inode + + // queue is the queue backing this fd. + queue *mq.Queue +} + +// Init initializes a queueFD. Mostly copied from DynamicBytesFD.Init, but uses +// the queueFD as FileDescriptionImpl. +func (fd *queueFD) Init(m *vfs.Mount, d *kernfs.Dentry, data vfs.DynamicBytesSource, locks *vfs.FileLocks, flags uint32) error { + fd.LockFD.Init(locks) + if err := fd.vfsfd.Init(fd, flags, m, d.VFSDentry(), &vfs.FileDescriptionOptions{}); err != nil { + return err + } + fd.inode = d.Inode() + fd.SetDataSource(data) + return nil +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *queueFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + return fd.DynamicBytesFileDescriptionImpl.Seek(ctx, offset, whence) +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *queueFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + return fd.DynamicBytesFileDescriptionImpl.Read(ctx, dst, opts) +} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *queueFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + return fd.DynamicBytesFileDescriptionImpl.PRead(ctx, dst, offset, opts) +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *queueFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + return fd.DynamicBytesFileDescriptionImpl.Write(ctx, src, opts) +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *queueFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + return fd.DynamicBytesFileDescriptionImpl.PWrite(ctx, src, offset, opts) +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *queueFD) Release(context.Context) {} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (fd *queueFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + fs := fd.vfsfd.VirtualDentry().Mount().Filesystem() + return fd.inode.Stat(ctx, fs, opts) +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (fd *queueFD) SetStat(context.Context, vfs.SetStatOptions) error { + // DynamicBytesFiles are immutable. + return linuxerr.EPERM +} + +// OnClose implements FileDescriptionImpl.OnClose similar to +// ipc/mqueue.c::mqueue_flush_file. +func (fd *queueFD) OnClose(ctx context.Context) error { + fd.queue.Flush(ctx) + return nil +} + +// Readiness implements waiter.Waitable.Readiness similar to +// ipc/mqueue.c::mqueue_poll_file. +func (fd *queueFD) Readiness(mask waiter.EventMask) waiter.EventMask { + return fd.queue.Readiness(mask) +} + +// EventRegister implements Waitable.EventRegister. +func (fd *queueFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + fd.queue.EventRegister(e, mask) +} + +// EventUnregister implements Waitable.EventUnregister. +func (fd *queueFD) EventUnregister(e *waiter.Entry) { + fd.queue.EventUnregister(e) +} diff --git a/pkg/sentry/fsimpl/mqfs/root.go b/pkg/sentry/fsimpl/mqfs/root.go new file mode 100644 index 000000000..37b5749fb --- /dev/null +++ b/pkg/sentry/fsimpl/mqfs/root.go @@ -0,0 +1,89 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mqfs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// rootInode represents inode for filesystem's root directory (/dev/mqueue). +// +// +stateify savable +type rootInode struct { + rootInodeRefs + kernfs.InodeAlwaysValid + kernfs.InodeAttrs + kernfs.InodeDirectoryNoNewChildren + kernfs.InodeNotSymlink + kernfs.InodeTemporary + kernfs.OrderedChildren + implStatFS + + locks vfs.FileLocks +} + +var _ kernfs.Inode = (*rootInode)(nil) + +// newRootInode returns a new, initialized rootInode. +func (fs *filesystem) newRootInode(ctx context.Context, creds *auth.Credentials) kernfs.Inode { + inode := &rootInode{} + inode.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|linux.FileMode(0555)) + inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true}) + inode.InitRefs() + return inode +} + +// Open implements kernfs.Inode.Open. +func (i *rootInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), d, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{ + SeekEnd: kernfs.SeekEndZero, + }) + if err != nil { + return nil, err + } + return fd.VFSFileDescription(), nil +} + +// DecRef implements kernfs.Inode.DecRef. +func (i *rootInode) DecRef(ctx context.Context) { + i.rootInodeRefs.DecRef(func() { i.Destroy(ctx) }) +} + +// Rename implements Inode.Rename and overrides OrderedChildren.Rename. mqueue +// filesystem allows files to be unlinked, but not renamed. +func (i *rootInode) Rename(ctx context.Context, oldname, newname string, child, dstDir kernfs.Inode) error { + return linuxerr.EPERM +} + +// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. +func (*rootInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { + return linuxerr.EPERM +} + +// implStatFS provides an implementation of kernfs.Inode.StatFS for message +// queues to be embedded in inodes. +// +// +stateify savable +type implStatFS struct{} + +// StatFS implements kernfs.Inode.StatFS. +func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) { + return vfs.GenericStatFS(linux.MQUEUE_MAGIC), nil +} diff --git a/pkg/sentry/kernel/mq/BUILD b/pkg/sentry/kernel/mq/BUILD index ec9cd18a9..b4e17b582 100644 --- a/pkg/sentry/kernel/mq/BUILD +++ b/pkg/sentry/kernel/mq/BUILD @@ -24,6 +24,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/sentry/fs", "//pkg/sync", "//pkg/waiter", diff --git a/pkg/sentry/kernel/mq/mq.go b/pkg/sentry/kernel/mq/mq.go index df9bdc267..29a46e8a9 100644 --- a/pkg/sentry/kernel/mq/mq.go +++ b/pkg/sentry/kernel/mq/mq.go @@ -16,7 +16,11 @@ package mq import ( + "bytes" + "fmt" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" @@ -52,7 +56,7 @@ type Queue struct { // subscriber represents a task registered to receive async notification // from this queue. - subscriber Subscriber + subscriber *Subscriber // nonBlock is true if this queue is non-blocking. nonBlock bool @@ -93,4 +97,82 @@ type Message struct { // +stateify savable type Subscriber struct { // TODO: Add fields when mq_notify(2) is implemented. + + // pid is the PID of the registered task. + pid int32 +} + +// Generate implements vfs.DynamicBytesSource.Generate. Queue is used as a +// dynamic bytes source for mqfs's queueInode. +func (q *Queue) Generate(ctx context.Context, buf *bytes.Buffer) error { + q.mu.Lock() + defer q.mu.Unlock() + + var ( + pid int32 + method int + sigNumber int + ) + if q.subscriber != nil { + pid = q.subscriber.pid + // TODO: add method and sigNumber when mq_notify(2) is implemented. + } + + buf.WriteString( + fmt.Sprintf("QSIZE:%-10d NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n", + q.byteCount, method, sigNumber, pid), + ) + return nil +} + +// Flush checks if the calling process has attached a notification request to +// this queue, if yes, then the request is removed, and another process can +// attach a request. +func (q *Queue) Flush(ctx context.Context) { + q.mu.Lock() + defer q.mu.Unlock() + + pid, ok := context.ThreadGroupIDFromContext(ctx) + if ok { + if q.subscriber != nil && pid == q.subscriber.pid { + q.subscriber = nil + } + } +} + +// Readiness implements Waitable.Readiness. +func (q *Queue) Readiness(mask waiter.EventMask) waiter.EventMask { + q.mu.Lock() + defer q.mu.Unlock() + + events := waiter.EventMask(0) + if q.messageCount > 0 { + events |= waiter.ReadableEvents + } + if q.messageCount < q.maxMessageCount { + events |= waiter.WritableEvents + } + return events & mask +} + +// EventRegister implements Waitable.EventRegister. +func (q *Queue) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + q.mu.Lock() + defer q.mu.Unlock() + + if mask&waiter.WritableEvents != 0 { + q.senders.EventRegister(e, waiter.EventOut) + } + if mask&waiter.ReadableEvents != 0 { + q.receivers.EventRegister(e, waiter.EventIn) + } +} + +// EventUnregister implements Waitable.EventUnregister. +func (q *Queue) EventUnregister(e *waiter.Entry) { + q.mu.Lock() + defer q.mu.Unlock() + + q.senders.EventUnregister(e) + q.receivers.EventUnregister(e) } -- cgit v1.2.3 From e452ecd49526f4a0bbacc462840fbc6e88781e36 Mon Sep 17 00:00:00 2001 From: "Zyad A. Ali" Date: Sat, 24 Jul 2021 19:15:54 +0200 Subject: Create mq.Registry and mqfs.RegistryImpl. Define a POSIX message queue Registry and RegistryImpl in mq package, implement RegistryImpl in mqfs, and add a Registry object to IPCNamespace initialized at filesystem creation. Updates #136 --- pkg/sentry/fsimpl/mqfs/BUILD | 2 + pkg/sentry/fsimpl/mqfs/mqfs.go | 4 +- pkg/sentry/fsimpl/mqfs/registry.go | 121 +++++++++++++++++++++++++++++++++++++ pkg/sentry/kernel/BUILD | 1 + pkg/sentry/kernel/ipc_namespace.go | 25 ++++++++ pkg/sentry/kernel/mq/BUILD | 1 + pkg/sentry/kernel/mq/mq.go | 51 +++++++++++++++- 7 files changed, 202 insertions(+), 3 deletions(-) create mode 100644 pkg/sentry/fsimpl/mqfs/registry.go (limited to 'pkg/sentry/kernel/mq/mq.go') diff --git a/pkg/sentry/fsimpl/mqfs/BUILD b/pkg/sentry/fsimpl/mqfs/BUILD index 6b22ffabd..6892c6c25 100644 --- a/pkg/sentry/fsimpl/mqfs/BUILD +++ b/pkg/sentry/fsimpl/mqfs/BUILD @@ -20,6 +20,7 @@ go_library( "mqfs.go", "root.go", "queue.go", + "registry.go", "root_inode_refs.go", ], visibility = ["//pkg/sentry:internal"], @@ -32,6 +33,7 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/mq", "//pkg/sentry/vfs", + "//pkg/sync", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fsimpl/mqfs/mqfs.go b/pkg/sentry/fsimpl/mqfs/mqfs.go index 18bc66134..a92012deb 100644 --- a/pkg/sentry/fsimpl/mqfs/mqfs.go +++ b/pkg/sentry/fsimpl/mqfs/mqfs.go @@ -28,7 +28,7 @@ import ( ) const ( - fsName = "mqueue" + Name = "mqueue" defaultMaxCachedDentries = uint64(1000) ) @@ -39,7 +39,7 @@ type FilesystemType struct{} // Name implements vfs.FilesystemType.Name. func (FilesystemType) Name() string { - return fsName + return Name } // Release implements vfs.FilesystemType.Release. diff --git a/pkg/sentry/fsimpl/mqfs/registry.go b/pkg/sentry/fsimpl/mqfs/registry.go new file mode 100644 index 000000000..3875b39ee --- /dev/null +++ b/pkg/sentry/fsimpl/mqfs/registry.go @@ -0,0 +1,121 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mqfs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/mq" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" +) + +// RegistryImpl implements mq.RegistryImpl. It implements the interface using +// the message queue filesystem, and is provided to mq.Registry at +// initialization. +// +// +stateify savable +type RegistryImpl struct { + // mu protects all fields below. + mu sync.Mutex + + // root is the root dentry of the mq filesystem. Its main usage is to + // retreive the root inode, which we use to add, remove, and lookup message + // queues. + // + // We hold a reference on root and release when the registry is destroyed. + root *kernfs.Dentry + + // fs is the filesystem backing this registry, used mainly to initialize + // new inodes. + fs *filesystem + + // mount is the mount point used for this filesystem. + mount *vfs.Mount +} + +// NewRegistryImpl returns a new, initialized RegistryImpl, and takes a +// reference on root. +func NewRegistryImpl(root *kernfs.Dentry, fs *filesystem) *RegistryImpl { + root.IncRef() + return &RegistryImpl{ + root: root, + fs: fs, + } +} + +// Lookup implements mq.RegistryImpl.Lookup. +func (r *RegistryImpl) Lookup(ctx context.Context, name string) *mq.Queue { + r.mu.Lock() + defer r.mu.Unlock() + + inode, err := r.lookup(ctx, name) + if err != nil { + return nil + } + return inode.(*queueInode).queue +} + +// New implements mq.RegistryImpl.New. +func (r *RegistryImpl) New(ctx context.Context, name string, q *mq.Queue, perm linux.FileMode) (*vfs.FileDescription, error) { + r.mu.Lock() + defer r.mu.Unlock() + + root := r.root.Inode().(*rootInode) + qInode := r.fs.newQueueInode(ctx, auth.CredentialsFromContext(ctx), q, perm).(*queueInode) + err := root.Insert(name, qInode) + if err != nil { + return nil, err + } + + fd := &queueFD{queue: q} + err = fd.Init(r.mount, r.root, qInode.data, &qInode.locks, 0 /* flags */) + if err != nil { + return nil, err + } + return fd.VFSFileDescription(), nil +} + +// Unlink implements mq.RegistryImpl.Unlink. +func (r *RegistryImpl) Unlink(ctx context.Context, name string) error { + r.mu.Lock() + defer r.mu.Unlock() + + root := r.root.Inode().(*rootInode) + inode, err := r.lookup(ctx, name) + if err != nil { + return err + } + return root.Unlink(ctx, name, inode) +} + +// lookup retreives a kernfs.Inode using a name. +// +// Precondition: r.mu must be held. +func (r *RegistryImpl) lookup(ctx context.Context, name string) (kernfs.Inode, error) { + inode := r.root.Inode().(*rootInode) + lookup, err := inode.Lookup(ctx, name) + if err != nil { + return nil, err + } + return lookup, nil +} + +// Destroy implements mq.RegistryImpl.Destroy. +func (r *RegistryImpl) Destroy(ctx context.Context) { + r.root.DecRef(ctx) +} diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index c0f13bf52..e91338da7 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -257,6 +257,7 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/futex", + "//pkg/sentry/kernel/mq", "//pkg/sentry/kernel/msgqueue", "//pkg/sentry/kernel/sched", "//pkg/sentry/kernel/semaphore", diff --git a/pkg/sentry/kernel/ipc_namespace.go b/pkg/sentry/kernel/ipc_namespace.go index 0b101b1bb..aa9c3fb31 100644 --- a/pkg/sentry/kernel/ipc_namespace.go +++ b/pkg/sentry/kernel/ipc_namespace.go @@ -17,6 +17,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/mq" "gvisor.dev/gvisor/pkg/sentry/kernel/msgqueue" "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore" "gvisor.dev/gvisor/pkg/sentry/kernel/shm" @@ -31,9 +32,17 @@ type IPCNamespace struct { // User namespace which owns this IPC namespace. Immutable. userNS *auth.UserNamespace + // System V utilities. queues *msgqueue.Registry semaphores *semaphore.Registry shms *shm.Registry + + // posixQueues is a POSIX message queue registry. + // + // posixQueues is somewhat equivelant to Linux's ipc_namespace.mq_mnt. + // Unlike SysV utilities, mq.Registry is not map-based, but is backed by + // a virtual filesystem. + posixQueues *mq.Registry } // NewIPCNamespace creates a new IPC namespace. @@ -63,10 +72,26 @@ func (i *IPCNamespace) ShmRegistry() *shm.Registry { return i.shms } +// SetPosixQueues sets value of posixQueues if the value is currently nil, +// otherwise returns without doing anything. +func (i *IPCNamespace) SetPosixQueues(r *mq.Registry) { + if i.posixQueues == nil { + i.posixQueues = r + } +} + +// PosixQueues returns the posix message queue registry for this namespace. +func (i *IPCNamespace) PosixQueues() *mq.Registry { + return i.posixQueues +} + // DecRef implements refsvfs2.RefCounter.DecRef. func (i *IPCNamespace) DecRef(ctx context.Context) { i.IPCNamespaceRefs.DecRef(func() { i.shms.Release(ctx) + if i.posixQueues != nil { + i.posixQueues.Destroy(ctx) + } }) } diff --git a/pkg/sentry/kernel/mq/BUILD b/pkg/sentry/kernel/mq/BUILD index b4e17b582..7b00b8346 100644 --- a/pkg/sentry/kernel/mq/BUILD +++ b/pkg/sentry/kernel/mq/BUILD @@ -26,6 +26,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/sentry/fs", + "//pkg/sentry/vfs", "//pkg/sync", "//pkg/waiter", ], diff --git a/pkg/sentry/kernel/mq/mq.go b/pkg/sentry/kernel/mq/mq.go index 29a46e8a9..be46f78c8 100644 --- a/pkg/sentry/kernel/mq/mq.go +++ b/pkg/sentry/kernel/mq/mq.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) @@ -30,6 +31,54 @@ const ( maxPriority = linux.MQ_PRIO_MAX - 1 // Highest possible message priority. ) +// Registry is a POSIX message queue registry. +// +// Unlike SysV utilities, Registry is not map-based. It uses a provided +// RegistryImpl backed by a virtual filesystem to implement registry operations. +// +// +stateify savable +type Registry struct { + // impl is an implementation of several message queue utilities needed by + // the registry. impl should be provided by mqfs. + impl RegistryImpl +} + +// RegistryImpl defines utilities needed by a Registry to provide actual +// registry implementation. It works mainly as an abstraction layer used by +// Registry to avoid dealing directly with the filesystem. RegistryImpl should +// be implemented by mqfs and provided to Registry at initialization. +type RegistryImpl interface { + // Lookup returns the queue with the given name, nil if non exists. + Lookup(context.Context, string) *Queue + + // New creates a new inode and file description using the given queue, + // inserts the inode into the filesystem tree with the given name, and + // returns the file description. An error is returned if creation fails, or + // if the name already exists. + New(context.Context, string, *Queue, linux.FileMode) (*vfs.FileDescription, error) + + // Unlink removes the queue with given name from the registry, and returns + // an error if the name doesn't exist. + Unlink(context.Context, string) error + + // Destroy destroys the registry. + Destroy(context.Context) +} + +// NewRegistry returns a new, initialized message queue registry. NewRegistry +// should be called when a new message queue filesystem is created, once per +// IPCNamespace. +func NewRegistry(impl RegistryImpl) *Registry { + return &Registry{ + impl: impl, + } +} + +// Destroy destroys the registry and releases all held references. +func (r *Registry) Destroy(ctx context.Context) { + r.impl.Destroy(ctx) +} + // Queue represents a POSIX message queue. // // +stateify savable @@ -103,7 +152,7 @@ type Subscriber struct { } // Generate implements vfs.DynamicBytesSource.Generate. Queue is used as a -// dynamic bytes source for mqfs's queueInode. +// DynamicBytesSource for mqfs's queueInode. func (q *Queue) Generate(ctx context.Context, buf *bytes.Buffer) error { q.mu.Lock() defer q.mu.Unlock() -- cgit v1.2.3 From 229c01552e2b819c2fa6bf1f5aa017cff366869e Mon Sep 17 00:00:00 2001 From: "Zyad A. Ali" Date: Thu, 29 Jul 2021 17:33:45 +0200 Subject: Move filesystem creation from GetFilesystem to RegistryImpl. Move root dentry and filesystem creation from GetFilesystem to NewRegistryImpl, create IPCNamespace.InitPosixQueues to create a new mqueue filesystem for each ipc namespace, and update GetFilesystem to retreive fs and root dentry from IPCNamespace and return them. Updates #136 --- pkg/sentry/fsimpl/mqfs/BUILD | 1 + pkg/sentry/fsimpl/mqfs/mqfs.go | 61 ++++++++++++++++++++++++++++---------- pkg/sentry/fsimpl/mqfs/registry.go | 34 ++++++++++++++++----- pkg/sentry/kernel/BUILD | 1 + pkg/sentry/kernel/ipc_namespace.go | 21 +++++++++---- pkg/sentry/kernel/mq/mq.go | 5 ++++ 6 files changed, 96 insertions(+), 27 deletions(-) (limited to 'pkg/sentry/kernel/mq/mq.go') diff --git a/pkg/sentry/fsimpl/mqfs/BUILD b/pkg/sentry/fsimpl/mqfs/BUILD index 6892c6c25..e688b9b48 100644 --- a/pkg/sentry/fsimpl/mqfs/BUILD +++ b/pkg/sentry/fsimpl/mqfs/BUILD @@ -31,6 +31,7 @@ go_library( "//pkg/refsvfs2", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/ipc", "//pkg/sentry/kernel/mq", "//pkg/sentry/vfs", "//pkg/sync", diff --git a/pkg/sentry/fsimpl/mqfs/mqfs.go b/pkg/sentry/fsimpl/mqfs/mqfs.go index a92012deb..ed559cd13 100644 --- a/pkg/sentry/fsimpl/mqfs/mqfs.go +++ b/pkg/sentry/fsimpl/mqfs/mqfs.go @@ -24,6 +24,8 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" + "gvisor.dev/gvisor/pkg/sentry/kernel/mq" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -47,28 +49,32 @@ func (FilesystemType) Release(ctx context.Context) {} // GetFilesystem implements vfs.FilesystemType.GetFilesystem. func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { - devMinor, err := vfsObj.GetAnonBlockDevMinor() - if err != nil { - return nil, nil, err + // mqfs is initialized only once per ipc namespace. Each ipc namespace has + // a POSIX message registry with a root dentry, filesystem, and a + // disconnected mount. We want the fs to be consistent for all processes in + // the same ipc namespace, so instead of creating a new fs and root dentry, + // we retreive them using IPCNamespace.PosixQueues and use them. + + i := ipcNamespaceFromContext(ctx) + if i == nil { + return nil, nil, fmt.Errorf("mqfs.FilesystemType.GetFilesystem: ipc namespace doesn't exist") + } + defer i.DecRef(ctx) + + registry := i.PosixQueues() + if registry == nil { + return nil, nil, fmt.Errorf("mqfs.FilesystemType.GetFilesystem: ipc namespace doesn't have a POSIX registry") } + impl := registry.Impl().(*RegistryImpl) maxCachedDentries, err := maxCachedDentries(ctx, vfs.GenericParseMountOptions(opts.Data)) if err != nil { return nil, nil, err } + impl.fs.MaxCachedDentries = maxCachedDentries - fs := &filesystem{ - devMinor: devMinor, - Filesystem: kernfs.Filesystem{ - MaxCachedDentries: maxCachedDentries, - }, - } - fs.VFSFilesystem().Init(vfsObj, &ft, fs) - - var dentry kernfs.Dentry - dentry.InitRoot(&fs.Filesystem, fs.newRootInode(ctx, creds)) - - return fs.VFSFilesystem(), dentry.VFSDentry(), nil + impl.root.IncRef() + return impl.fs.VFSFilesystem(), impl.root.VFSDentry(), nil } // maxCachedDentries checks mopts for dentry_cache_limit. If a value is @@ -93,15 +99,40 @@ func maxCachedDentries(ctx context.Context, mopts map[string]string) (_ uint64, type filesystem struct { kernfs.Filesystem devMinor uint32 + + // root is the filesystem's root dentry. Since we take a reference on it in + // GetFilesystem, we should release it when the fs is released. + root *kernfs.Dentry } // Release implements vfs.FilesystemImpl.Release. func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) fs.Filesystem.Release(ctx) + fs.root.DecRef(ctx) } // MountOptions implements vfs.FilesystemImpl.MountOptions. func (fs *filesystem) MountOptions() string { return fmt.Sprintf("dentry_cache_limit=%d", fs.MaxCachedDentries) } + +// ipcNamespace defines functions we need from kernel.IPCNamespace. We redefine +// ipcNamespace along with ipcNamespaceFromContext to avoid circular dependency +// with package sentry/kernel. +type ipcNamespace interface { + // PosixQueues returns a POSIX message queue registry. + PosixQueues() *mq.Registry + + // DecRef decrements ipcNamespace's number of references. + DecRef(ctx context.Context) +} + +// ipcNamespaceFromContext returns the IPC namespace in which ctx is executing. +// Copied from package sentry/kernel. +func ipcNamespaceFromContext(ctx context.Context) ipcNamespace { + if v := ctx.Value(ipc.CtxIPCNamespace); v != nil { + return v.(ipcNamespace) + } + return nil +} diff --git a/pkg/sentry/fsimpl/mqfs/registry.go b/pkg/sentry/fsimpl/mqfs/registry.go index 3875b39ee..9361b7eb4 100644 --- a/pkg/sentry/fsimpl/mqfs/registry.go +++ b/pkg/sentry/fsimpl/mqfs/registry.go @@ -50,12 +50,32 @@ type RegistryImpl struct { // NewRegistryImpl returns a new, initialized RegistryImpl, and takes a // reference on root. -func NewRegistryImpl(root *kernfs.Dentry, fs *filesystem) *RegistryImpl { - root.IncRef() - return &RegistryImpl{ - root: root, - fs: fs, +func NewRegistryImpl(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) (*RegistryImpl, error) { + devMinor, err := vfsObj.GetAnonBlockDevMinor() + if err != nil { + return nil, err + } + + var dentry kernfs.Dentry + fs := &filesystem{ + devMinor: devMinor, + root: &dentry, + } + fs.VFSFilesystem().Init(vfsObj, &FilesystemType{}, fs) + + dentry.InitRoot(&fs.Filesystem, fs.newRootInode(ctx, creds)) + dentry.IncRef() + + mount, err := vfsObj.NewDisconnectedMount(fs.VFSFilesystem(), dentry.VFSDentry(), &vfs.MountOptions{}) + if err != nil { + return nil, err } + + return &RegistryImpl{ + root: &dentry, + fs: fs, + mount: mount, + }, nil } // Lookup implements mq.RegistryImpl.Lookup. @@ -83,11 +103,11 @@ func (r *RegistryImpl) New(ctx context.Context, name string, q *mq.Queue, perm l } fd := &queueFD{queue: q} - err = fd.Init(r.mount, r.root, qInode.data, &qInode.locks, 0 /* flags */) + err = fd.Init(r.mount, r.root, q, qInode.Locks(), 0 /* flags */) if err != nil { return nil, err } - return fd.VFSFileDescription(), nil + return &fd.vfsfd, nil } // Unlink implements mq.RegistryImpl.Unlink. diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index 6ff3deb97..9f30a7706 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -249,6 +249,7 @@ go_library( "//pkg/sentry/fs/timerfd", "//pkg/sentry/fsbridge", "//pkg/sentry/fsimpl/kernfs", + "//pkg/sentry/fsimpl/mqfs", "//pkg/sentry/fsimpl/pipefs", "//pkg/sentry/fsimpl/sockfs", "//pkg/sentry/fsimpl/timerfd", diff --git a/pkg/sentry/kernel/ipc_namespace.go b/pkg/sentry/kernel/ipc_namespace.go index aa9c3fb31..11b4545c6 100644 --- a/pkg/sentry/kernel/ipc_namespace.go +++ b/pkg/sentry/kernel/ipc_namespace.go @@ -15,12 +15,16 @@ package kernel import ( + "fmt" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/mqfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/mq" "gvisor.dev/gvisor/pkg/sentry/kernel/msgqueue" "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore" "gvisor.dev/gvisor/pkg/sentry/kernel/shm" + "gvisor.dev/gvisor/pkg/sentry/vfs" ) // IPCNamespace represents an IPC namespace. @@ -72,12 +76,19 @@ func (i *IPCNamespace) ShmRegistry() *shm.Registry { return i.shms } -// SetPosixQueues sets value of posixQueues if the value is currently nil, -// otherwise returns without doing anything. -func (i *IPCNamespace) SetPosixQueues(r *mq.Registry) { - if i.posixQueues == nil { - i.posixQueues = r +// InitPosixQueues creates a new POSIX queue registry, and returns an error if +// the registry was previously initialized. +func (i *IPCNamespace) InitPosixQueues(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error { + if i.posixQueues != nil { + return fmt.Errorf("IPCNamespace.InitPosixQueues: already initialized") + } + + impl, err := mqfs.NewRegistryImpl(ctx, vfsObj, creds) + if err != nil { + return err } + i.posixQueues = mq.NewRegistry(impl) + return nil } // PosixQueues returns the posix message queue registry for this namespace. diff --git a/pkg/sentry/kernel/mq/mq.go b/pkg/sentry/kernel/mq/mq.go index be46f78c8..739ea2f1c 100644 --- a/pkg/sentry/kernel/mq/mq.go +++ b/pkg/sentry/kernel/mq/mq.go @@ -79,6 +79,11 @@ func (r *Registry) Destroy(ctx context.Context) { r.impl.Destroy(ctx) } +// Impl returns RegistryImpl inside r. +func (r *Registry) Impl() RegistryImpl { + return r.impl +} + // Queue represents a POSIX message queue. // // +stateify savable -- cgit v1.2.3 From 7508a0efeeef19a3e08e06e80be8258743438412 Mon Sep 17 00:00:00 2001 From: "Zyad A. Ali" Date: Thu, 29 Jul 2021 22:17:26 +0200 Subject: Define mq.View and use it for mqfs.queueFD. View makes it easier to handle O_RDONLY, O_WRONLY, and ORDWR options in mq_open(2). Updates #136 --- pkg/sentry/fsimpl/mqfs/queue.go | 4 ++-- pkg/sentry/kernel/mq/mq.go | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'pkg/sentry/kernel/mq/mq.go') diff --git a/pkg/sentry/fsimpl/mqfs/queue.go b/pkg/sentry/fsimpl/mqfs/queue.go index a8e9bc722..933dbc6ed 100644 --- a/pkg/sentry/fsimpl/mqfs/queue.go +++ b/pkg/sentry/fsimpl/mqfs/queue.go @@ -65,8 +65,8 @@ type queueFD struct { vfsfd vfs.FileDescription inode kernfs.Inode - // queue is the queue backing this fd. - queue *mq.Queue + // queue is a view into the queue backing this fd. + queue mq.View } // Init initializes a queueFD. Mostly copied from DynamicBytesFD.Init, but uses diff --git a/pkg/sentry/kernel/mq/mq.go b/pkg/sentry/kernel/mq/mq.go index 739ea2f1c..954883c5f 100644 --- a/pkg/sentry/kernel/mq/mq.go +++ b/pkg/sentry/kernel/mq/mq.go @@ -129,6 +129,21 @@ type Queue struct { byteCount uint64 } +// View is a view into a message queue. Views should only be used in file +// descriptions, but not inodes, because we use inodes to retreive the actual +// queue, and only FDs are responsible for providing user functionality. +type View interface { + // TODO: Add Send and Receive when mq_timedsend(2) and mq_timedreceive(2) + // are implemented. + + // Flush checks if the calling process has attached a notification request + // to this queue, if yes, then the request is removed, and another process + // can attach a request. + Flush(ctx context.Context) + + waiter.Waitable +} + // Message holds a message exchanged through a Queue via mq_timedsend(2) and // mq_timedreceive(2), and additional info relating to the message. // @@ -179,9 +194,7 @@ func (q *Queue) Generate(ctx context.Context, buf *bytes.Buffer) error { return nil } -// Flush checks if the calling process has attached a notification request to -// this queue, if yes, then the request is removed, and another process can -// attach a request. +// Flush implements View.Flush. func (q *Queue) Flush(ctx context.Context) { q.mu.Lock() defer q.mu.Unlock() -- cgit v1.2.3 From 13d36561b8a9cab6cf20b4b5053752955f451518 Mon Sep 17 00:00:00 2001 From: "Zyad A. Ali" Date: Fri, 30 Jul 2021 16:57:59 +0200 Subject: Return FDs in RegistryImpl functions and use Views. Update RegistryImpl functions to return file descriptions, instead of queues, and use Views in queue inodes. Updates #136 --- pkg/sentry/fsimpl/mqfs/BUILD | 1 + pkg/sentry/fsimpl/mqfs/registry.go | 75 ++++++++++++++++++++++++++++++-------- pkg/sentry/kernel/mq/BUILD | 2 + pkg/sentry/kernel/mq/mq.go | 64 +++++++++++++++++++++++++++++--- 4 files changed, 122 insertions(+), 20 deletions(-) (limited to 'pkg/sentry/kernel/mq/mq.go') diff --git a/pkg/sentry/fsimpl/mqfs/BUILD b/pkg/sentry/fsimpl/mqfs/BUILD index e688b9b48..ef843015d 100644 --- a/pkg/sentry/fsimpl/mqfs/BUILD +++ b/pkg/sentry/fsimpl/mqfs/BUILD @@ -29,6 +29,7 @@ go_library( "//pkg/context", "//pkg/errors/linuxerr", "//pkg/refsvfs2", + "//pkg/sentry/fs", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/ipc", diff --git a/pkg/sentry/fsimpl/mqfs/registry.go b/pkg/sentry/fsimpl/mqfs/registry.go index 9361b7eb4..89ffaaf04 100644 --- a/pkg/sentry/fsimpl/mqfs/registry.go +++ b/pkg/sentry/fsimpl/mqfs/registry.go @@ -17,6 +17,8 @@ package mqfs import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/mq" @@ -78,20 +80,32 @@ func NewRegistryImpl(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds * }, nil } -// Lookup implements mq.RegistryImpl.Lookup. -func (r *RegistryImpl) Lookup(ctx context.Context, name string) *mq.Queue { +// Get implements mq.RegistryImpl.Get. +func (r *RegistryImpl) Get(ctx context.Context, name string, rOnly, wOnly, readWrite, block bool, flags uint32) (*vfs.FileDescription, bool, error) { r.mu.Lock() defer r.mu.Unlock() inode, err := r.lookup(ctx, name) if err != nil { - return nil + return nil, false, nil } - return inode.(*queueInode).queue + + qInode := inode.(*queueInode) + if !qInode.queue.HasPermissions(auth.CredentialsFromContext(ctx), perm(rOnly, wOnly, readWrite)) { + // "The queue exists, but the caller does not have permission to + // open it in the specified mode." + return nil, false, linuxerr.EACCES + } + + fd, err := r.newFD(qInode.queue, qInode, rOnly, wOnly, readWrite, block, flags) + if err != nil { + return nil, false, err + } + return fd, true, nil } // New implements mq.RegistryImpl.New. -func (r *RegistryImpl) New(ctx context.Context, name string, q *mq.Queue, perm linux.FileMode) (*vfs.FileDescription, error) { +func (r *RegistryImpl) New(ctx context.Context, name string, q *mq.Queue, rOnly, wOnly, readWrite, block bool, perm linux.FileMode, flags uint32) (*vfs.FileDescription, error) { r.mu.Lock() defer r.mu.Unlock() @@ -101,13 +115,7 @@ func (r *RegistryImpl) New(ctx context.Context, name string, q *mq.Queue, perm l if err != nil { return nil, err } - - fd := &queueFD{queue: q} - err = fd.Init(r.mount, r.root, q, qInode.Locks(), 0 /* flags */) - if err != nil { - return nil, err - } - return &fd.vfsfd, nil + return r.newFD(q, qInode, rOnly, wOnly, readWrite, block, flags) } // Unlink implements mq.RegistryImpl.Unlink. @@ -115,6 +123,11 @@ func (r *RegistryImpl) Unlink(ctx context.Context, name string) error { r.mu.Lock() defer r.mu.Unlock() + creds := auth.CredentialsFromContext(ctx) + if err := r.root.Inode().CheckPermissions(ctx, creds, vfs.MayWrite|vfs.MayExec); err != nil { + return err + } + root := r.root.Inode().(*rootInode) inode, err := r.lookup(ctx, name) if err != nil { @@ -123,6 +136,11 @@ func (r *RegistryImpl) Unlink(ctx context.Context, name string) error { return root.Unlink(ctx, name, inode) } +// Destroy implements mq.RegistryImpl.Destroy. +func (r *RegistryImpl) Destroy(ctx context.Context) { + r.root.DecRef(ctx) +} + // lookup retreives a kernfs.Inode using a name. // // Precondition: r.mu must be held. @@ -135,7 +153,34 @@ func (r *RegistryImpl) lookup(ctx context.Context, name string) (kernfs.Inode, e return lookup, nil } -// Destroy implements mq.RegistryImpl.Destroy. -func (r *RegistryImpl) Destroy(ctx context.Context) { - r.root.DecRef(ctx) +// newFD returns a new file description created using the given queue and inode. +func (r *RegistryImpl) newFD(q *mq.Queue, inode *queueInode, rOnly, wOnly, readWrite, block bool, flags uint32) (*vfs.FileDescription, error) { + view, err := mq.NewView(q, rOnly, wOnly, readWrite, block) + if err != nil { + return nil, err + } + + var dentry kernfs.Dentry + dentry.Init(&r.fs.Filesystem, inode) + + fd := &queueFD{queue: view} + err = fd.Init(r.mount, &dentry, inode.queue, inode.Locks(), flags) + if err != nil { + return nil, err + } + return &fd.vfsfd, nil +} + +// perm returns a permission mask created using given flags. +func perm(rOnly, wOnly, readWrite bool) fs.PermMask { + switch { + case readWrite: + return fs.PermMask{Read: true, Write: true} + case wOnly: + return fs.PermMask{Write: true} + case rOnly: + return fs.PermMask{Read: true} + default: + return fs.PermMask{} // Can't happen, see NewView. + } } diff --git a/pkg/sentry/kernel/mq/BUILD b/pkg/sentry/kernel/mq/BUILD index 7b00b8346..fefac3ba5 100644 --- a/pkg/sentry/kernel/mq/BUILD +++ b/pkg/sentry/kernel/mq/BUILD @@ -25,7 +25,9 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sentry/fs", + "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", "//pkg/sync", "//pkg/waiter", diff --git a/pkg/sentry/kernel/mq/mq.go b/pkg/sentry/kernel/mq/mq.go index 954883c5f..217478dca 100644 --- a/pkg/sentry/kernel/mq/mq.go +++ b/pkg/sentry/kernel/mq/mq.go @@ -21,13 +21,16 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) const ( + MaxName = 255 // Maximum size for a queue name. maxPriority = linux.MQ_PRIO_MAX - 1 // Highest possible message priority. ) @@ -48,18 +51,20 @@ type Registry struct { // Registry to avoid dealing directly with the filesystem. RegistryImpl should // be implemented by mqfs and provided to Registry at initialization. type RegistryImpl interface { - // Lookup returns the queue with the given name, nil if non exists. - Lookup(context.Context, string) *Queue + // Get searchs for a queue with the given name, if it exists, the queue is + // used to create a new FD, return it and return true. If the queue doesn't + // exist, return false and no error. An error is returned if creation fails. + Get(ctx context.Context, name string, rOnly, wOnly, readWrite, block bool, flags uint32) (*vfs.FileDescription, bool, error) // New creates a new inode and file description using the given queue, - // inserts the inode into the filesystem tree with the given name, and + // inserts the inode into the filesystem tree using the given name, and // returns the file description. An error is returned if creation fails, or // if the name already exists. - New(context.Context, string, *Queue, linux.FileMode) (*vfs.FileDescription, error) + New(ctx context.Context, name string, q *Queue, rOnly, wOnly, readWrite, block bool, perm linux.FileMode, flags uint32) (*vfs.FileDescription, error) // Unlink removes the queue with given name from the registry, and returns // an error if the name doesn't exist. - Unlink(context.Context, string) error + Unlink(ctx context.Context, name string) error // Destroy destroys the registry. Destroy(context.Context) @@ -144,6 +149,43 @@ type View interface { waiter.Waitable } +// ReaderWriter provides a send and receive view into a queue. +type ReaderWriter struct { + *Queue + + block bool +} + +// Reader provides a send-only view into a queue. +type Reader struct { + *Queue + + block bool +} + +// Writer provides a receive-only view into a queue. +type Writer struct { + *Queue + + block bool +} + +// NewView creates a new view into a queue and returns it. +func NewView(q *Queue, rOnly, wOnly, readWrite, block bool) (View, error) { + switch { + case readWrite: + return ReaderWriter{Queue: q, block: block}, nil + case wOnly: + return Writer{Queue: q, block: block}, nil + case rOnly: + return Reader{Queue: q, block: block}, nil + default: + // This case can't happen, due to O_RDONLY flag being 0 and O_WRONLY + // being 1, so one of them must be true. + return nil, linuxerr.EINVAL + } +} + // Message holds a message exchanged through a Queue via mq_timedsend(2) and // mq_timedreceive(2), and additional info relating to the message. // @@ -243,3 +285,15 @@ func (q *Queue) EventUnregister(e *waiter.Entry) { q.senders.EventUnregister(e) q.receivers.EventUnregister(e) } + +// HasPermissions returns true if the given credentials meet the access +// permissions required by the queue. +func (q *Queue) HasPermissions(creds *auth.Credentials, req fs.PermMask) bool { + p := q.perms.Other + if q.owner.UID == creds.EffectiveKUID { + p = q.perms.User + } else if creds.InGroup(q.owner.GID) { + p = q.perms.Group + } + return p.SupersetOf(req) +} -- cgit v1.2.3 From 7df562d47337b29e6ac53c06c120cd4d88dd5da3 Mon Sep 17 00:00:00 2001 From: "Zyad A. Ali" Date: Fri, 30 Jul 2021 22:15:29 +0200 Subject: Implement Registry.FindOrCreate. FindOrCreate implements the behaviour of mq_open(2). Updates #136 --- pkg/sentry/fsimpl/mqfs/registry.go | 24 +++--- pkg/sentry/kernel/ipc_namespace.go | 2 +- pkg/sentry/kernel/mq/mq.go | 166 ++++++++++++++++++++++++++++++++++--- 3 files changed, 167 insertions(+), 25 deletions(-) (limited to 'pkg/sentry/kernel/mq/mq.go') diff --git a/pkg/sentry/fsimpl/mqfs/registry.go b/pkg/sentry/fsimpl/mqfs/registry.go index 89ffaaf04..ea1f2981c 100644 --- a/pkg/sentry/fsimpl/mqfs/registry.go +++ b/pkg/sentry/fsimpl/mqfs/registry.go @@ -81,7 +81,7 @@ func NewRegistryImpl(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds * } // Get implements mq.RegistryImpl.Get. -func (r *RegistryImpl) Get(ctx context.Context, name string, rOnly, wOnly, readWrite, block bool, flags uint32) (*vfs.FileDescription, bool, error) { +func (r *RegistryImpl) Get(ctx context.Context, name string, access mq.AccessType, block bool, flags uint32) (*vfs.FileDescription, bool, error) { r.mu.Lock() defer r.mu.Unlock() @@ -91,13 +91,13 @@ func (r *RegistryImpl) Get(ctx context.Context, name string, rOnly, wOnly, readW } qInode := inode.(*queueInode) - if !qInode.queue.HasPermissions(auth.CredentialsFromContext(ctx), perm(rOnly, wOnly, readWrite)) { + if !qInode.queue.HasPermissions(auth.CredentialsFromContext(ctx), perm(access)) { // "The queue exists, but the caller does not have permission to // open it in the specified mode." return nil, false, linuxerr.EACCES } - fd, err := r.newFD(qInode.queue, qInode, rOnly, wOnly, readWrite, block, flags) + fd, err := r.newFD(qInode.queue, qInode, access, block, flags) if err != nil { return nil, false, err } @@ -105,7 +105,7 @@ func (r *RegistryImpl) Get(ctx context.Context, name string, rOnly, wOnly, readW } // New implements mq.RegistryImpl.New. -func (r *RegistryImpl) New(ctx context.Context, name string, q *mq.Queue, rOnly, wOnly, readWrite, block bool, perm linux.FileMode, flags uint32) (*vfs.FileDescription, error) { +func (r *RegistryImpl) New(ctx context.Context, name string, q *mq.Queue, access mq.AccessType, block bool, perm linux.FileMode, flags uint32) (*vfs.FileDescription, error) { r.mu.Lock() defer r.mu.Unlock() @@ -115,7 +115,7 @@ func (r *RegistryImpl) New(ctx context.Context, name string, q *mq.Queue, rOnly, if err != nil { return nil, err } - return r.newFD(q, qInode, rOnly, wOnly, readWrite, block, flags) + return r.newFD(q, qInode, access, block, flags) } // Unlink implements mq.RegistryImpl.Unlink. @@ -154,8 +154,8 @@ func (r *RegistryImpl) lookup(ctx context.Context, name string) (kernfs.Inode, e } // newFD returns a new file description created using the given queue and inode. -func (r *RegistryImpl) newFD(q *mq.Queue, inode *queueInode, rOnly, wOnly, readWrite, block bool, flags uint32) (*vfs.FileDescription, error) { - view, err := mq.NewView(q, rOnly, wOnly, readWrite, block) +func (r *RegistryImpl) newFD(q *mq.Queue, inode *queueInode, access mq.AccessType, block bool, flags uint32) (*vfs.FileDescription, error) { + view, err := mq.NewView(q, access, block) if err != nil { return nil, err } @@ -172,13 +172,13 @@ func (r *RegistryImpl) newFD(q *mq.Queue, inode *queueInode, rOnly, wOnly, readW } // perm returns a permission mask created using given flags. -func perm(rOnly, wOnly, readWrite bool) fs.PermMask { - switch { - case readWrite: +func perm(access mq.AccessType) fs.PermMask { + switch access { + case mq.ReadWrite: return fs.PermMask{Read: true, Write: true} - case wOnly: + case mq.WriteOnly: return fs.PermMask{Write: true} - case rOnly: + case mq.ReadOnly: return fs.PermMask{Read: true} default: return fs.PermMask{} // Can't happen, see NewView. diff --git a/pkg/sentry/kernel/ipc_namespace.go b/pkg/sentry/kernel/ipc_namespace.go index 429a4b983..50b4e015e 100644 --- a/pkg/sentry/kernel/ipc_namespace.go +++ b/pkg/sentry/kernel/ipc_namespace.go @@ -87,7 +87,7 @@ func (i *IPCNamespace) InitPosixQueues(ctx context.Context, vfsObj *vfs.VirtualF if err != nil { return err } - i.posixQueues = mq.NewRegistry(impl) + i.posixQueues = mq.NewRegistry(i.userNS, impl) return nil } diff --git a/pkg/sentry/kernel/mq/mq.go b/pkg/sentry/kernel/mq/mq.go index 217478dca..c21cc9d47 100644 --- a/pkg/sentry/kernel/mq/mq.go +++ b/pkg/sentry/kernel/mq/mq.go @@ -18,6 +18,7 @@ package mq import ( "bytes" "fmt" + "strings" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" @@ -29,9 +30,31 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// AccessType is the access type passed to mq_open. +type AccessType int + +// Possible access types. +const ( + ReadOnly AccessType = iota + WriteOnly + ReadWrite +) + const ( MaxName = 255 // Maximum size for a queue name. maxPriority = linux.MQ_PRIO_MAX - 1 // Highest possible message priority. + + maxQueuesDefault = linux.DFLT_QUEUESMAX // Default max number of queues. + + maxMsgDefault = linux.DFLT_MSG // Default max number of messages per queue. + maxMsgMin = linux.MIN_MSGMAX // Min value for max number of messages per queue. + maxMsgLimit = linux.DFLT_MSGMAX // Limit for max number of messages per queue. + maxMsgHardLimit = linux.HARD_MSGMAX // Hard limit for max number of messages per queue. + + msgSizeDefault = linux.DFLT_MSGSIZE // Default max message size. + msgSizeMin = linux.MIN_MSGSIZEMAX // Min value for max message size. + msgSizeLimit = linux.DFLT_MSGSIZEMAX // Limit for max message size. + msgSizeHardLimit = linux.HARD_MSGSIZEMAX // Hard limit for max message size. ) // Registry is a POSIX message queue registry. @@ -41,6 +64,12 @@ const ( // // +stateify savable type Registry struct { + // userNS is the user namespace containing this registry. Immutable. + userNS *auth.UserNamespace + + // mu protects all fields below. + mu sync.Mutex `state:"nosave"` + // impl is an implementation of several message queue utilities needed by // the registry. impl should be provided by mqfs. impl RegistryImpl @@ -54,13 +83,13 @@ type RegistryImpl interface { // Get searchs for a queue with the given name, if it exists, the queue is // used to create a new FD, return it and return true. If the queue doesn't // exist, return false and no error. An error is returned if creation fails. - Get(ctx context.Context, name string, rOnly, wOnly, readWrite, block bool, flags uint32) (*vfs.FileDescription, bool, error) + Get(ctx context.Context, name string, access AccessType, block bool, flags uint32) (*vfs.FileDescription, bool, error) // New creates a new inode and file description using the given queue, // inserts the inode into the filesystem tree using the given name, and // returns the file description. An error is returned if creation fails, or // if the name already exists. - New(ctx context.Context, name string, q *Queue, rOnly, wOnly, readWrite, block bool, perm linux.FileMode, flags uint32) (*vfs.FileDescription, error) + New(ctx context.Context, name string, q *Queue, access AccessType, block bool, perm linux.FileMode, flags uint32) (*vfs.FileDescription, error) // Unlink removes the queue with given name from the registry, and returns // an error if the name doesn't exist. @@ -73,12 +102,128 @@ type RegistryImpl interface { // NewRegistry returns a new, initialized message queue registry. NewRegistry // should be called when a new message queue filesystem is created, once per // IPCNamespace. -func NewRegistry(impl RegistryImpl) *Registry { +func NewRegistry(userNS *auth.UserNamespace, impl RegistryImpl) *Registry { return &Registry{ - impl: impl, + userNS: userNS, + impl: impl, } } +// OpenOpts holds the options passed to FindOrCreate. +type OpenOpts struct { + Name string + Access AccessType + Create bool + Exclusive bool + Block bool +} + +// FindOrCreate creates a new POSIX message queue or opens an existing queue. +// See mq_open(2). +func (r *Registry) FindOrCreate(ctx context.Context, opts OpenOpts, perm linux.FileMode, attr *linux.MqAttr) (*vfs.FileDescription, error) { + // mq_overview(7) mentions that: "Each message queue is identified by a name + // of the form '/somename'", but the mq_open(3) man pages mention: + // "The mq_open() library function is implemented on top of a system call + // of the same name. The library function performs the check that the + // name starts with a slash (/), giving the EINVAL error if it does not. + // The kernel system call expects name to contain no preceding slash, so + // the C library function passes name without the preceding slash (i.e., + // name+1) to the system call." + // So we don't need to check it. + + if len(opts.Name) == 0 { + return nil, linuxerr.ENOENT + } + if len(opts.Name) > MaxName { + return nil, linuxerr.ENAMETOOLONG + } + if strings.ContainsRune(opts.Name, '/') { + return nil, linuxerr.EACCES + } + if opts.Name == "." || opts.Name == ".." { + return nil, linuxerr.EINVAL + } + + // Construct status flags. + var flags uint32 + if opts.Block { + flags = linux.O_NONBLOCK + } + switch opts.Access { + case ReadOnly: + flags = flags | linux.O_RDONLY + case WriteOnly: + flags = flags | linux.O_WRONLY + case ReadWrite: + flags = flags | linux.O_RDWR + } + + r.mu.Lock() + defer r.mu.Unlock() + fd, ok, err := r.impl.Get(ctx, opts.Name, opts.Access, opts.Block, flags) + if err != nil { + return nil, err + } + + if ok { + if opts.Create && opts.Exclusive { + // "Both O_CREAT and O_EXCL were specified in oflag, but a queue + // with this name already exists." + return nil, linuxerr.EEXIST + } + return fd, nil + } + + if !opts.Create { + // "The O_CREAT flag was not specified in oflag, and no queue with this name + // exists." + return nil, linuxerr.ENOENT + } + + q, err := r.newQueueLocked(auth.CredentialsFromContext(ctx), fs.FileOwnerFromContext(ctx), fs.FilePermsFromMode(perm), attr) + if err != nil { + return nil, err + } + return r.impl.New(ctx, opts.Name, q, opts.Access, opts.Block, perm, flags) +} + +// newQueueLocked creates a new queue using the given attributes. If attr is nil +// return a queue with default values, otherwise use attr to create a new queue, +// and return an error if attributes are invalid. +func (r *Registry) newQueueLocked(creds *auth.Credentials, owner fs.FileOwner, perms fs.FilePermissions, attr *linux.MqAttr) (*Queue, error) { + if attr == nil { + return &Queue{ + owner: owner, + perms: perms, + maxMessageCount: int64(maxMsgDefault), + maxMessageSize: uint64(msgSizeDefault), + }, nil + } + + // "O_CREAT was specified in oflag, and attr was not NULL, but + // attr->mq_maxmsg or attr->mq_msqsize was invalid. Both of these fields + // these fields must be greater than zero. In a process that is + // unprivileged (does not have the CAP_SYS_RESOURCE capability), + // attr->mq_maxmsg must be less than or equal to the msg_max limit, and + // attr->mq_msgsize must be less than or equal to the msgsize_max limit. + // In addition, even in a privileged process, attr->mq_maxmsg cannot + // exceed the HARD_MAX limit." - man mq_open(3). + if attr.MqMaxmsg <= 0 || attr.MqMsgsize <= 0 { + return nil, linuxerr.EINVAL + } + + if attr.MqMaxmsg > maxMsgHardLimit || (!creds.HasCapabilityIn(linux.CAP_SYS_RESOURCE, r.userNS) && (attr.MqMaxmsg > maxMsgLimit || attr.MqMsgsize > msgSizeLimit)) { + return nil, linuxerr.EINVAL + } + + return &Queue{ + owner: owner, + perms: perms, + maxMessageCount: attr.MqMaxmsg, + maxMessageSize: uint64(attr.MqMsgsize), + }, nil +} + // Destroy destroys the registry and releases all held references. func (r *Registry) Destroy(ctx context.Context) { r.impl.Destroy(ctx) @@ -117,9 +262,6 @@ type Queue struct { // from this queue. subscriber *Subscriber - // nonBlock is true if this queue is non-blocking. - nonBlock bool - // messageCount is the number of messages currently in the queue. messageCount int64 @@ -171,13 +313,13 @@ type Writer struct { } // NewView creates a new view into a queue and returns it. -func NewView(q *Queue, rOnly, wOnly, readWrite, block bool) (View, error) { - switch { - case readWrite: +func NewView(q *Queue, access AccessType, block bool) (View, error) { + switch access { + case ReadWrite: return ReaderWriter{Queue: q, block: block}, nil - case wOnly: + case WriteOnly: return Writer{Queue: q, block: block}, nil - case rOnly: + case ReadOnly: return Reader{Queue: q, block: block}, nil default: // This case can't happen, due to O_RDONLY flag being 0 and O_WRONLY -- cgit v1.2.3 From 9bde727f4f2e5b7cf52211a3a4fe71c7a0e4f1ea Mon Sep 17 00:00:00 2001 From: "Zyad A. Ali" Date: Fri, 30 Jul 2021 22:24:54 +0200 Subject: Implement Registry.Remove. Remove implements the behaviour of mq_unlink(2). Updates #136 --- pkg/sentry/kernel/mq/mq.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'pkg/sentry/kernel/mq/mq.go') diff --git a/pkg/sentry/kernel/mq/mq.go b/pkg/sentry/kernel/mq/mq.go index c21cc9d47..a7c787081 100644 --- a/pkg/sentry/kernel/mq/mq.go +++ b/pkg/sentry/kernel/mq/mq.go @@ -224,8 +224,22 @@ func (r *Registry) newQueueLocked(creds *auth.Credentials, owner fs.FileOwner, p }, nil } +// Remove removes the queue with the given name from the registry. See +// mq_unlink(2). +func (r *Registry) Remove(ctx context.Context, name string) error { + if len(name) > MaxName { + return linuxerr.ENAMETOOLONG + } + + r.mu.Lock() + defer r.mu.Unlock() + return r.impl.Unlink(ctx, name) +} + // Destroy destroys the registry and releases all held references. func (r *Registry) Destroy(ctx context.Context) { + r.mu.Lock() + defer r.mu.Unlock() r.impl.Destroy(ctx) } -- cgit v1.2.3