summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/sentry/kernel/ipc/BUILD4
-rw-r--r--pkg/sentry/kernel/ipc/object.go22
-rw-r--r--pkg/sentry/kernel/ipc/registry.go196
-rw-r--r--pkg/sentry/kernel/semaphore/BUILD12
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore.go172
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore_test.go5
-rw-r--r--pkg/sentry/kernel/shm/shm.go169
-rw-r--r--pkg/sentry/syscalls/linux/sys_sem.go2
8 files changed, 378 insertions, 204 deletions
diff --git a/pkg/sentry/kernel/ipc/BUILD b/pkg/sentry/kernel/ipc/BUILD
index 14f73ee17..e42a94e15 100644
--- a/pkg/sentry/kernel/ipc/BUILD
+++ b/pkg/sentry/kernel/ipc/BUILD
@@ -6,10 +6,14 @@ go_library(
name = "ipc",
srcs = [
"object.go",
+ "registry.go",
],
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
+ "//pkg/context",
+ "//pkg/errors/linuxerr",
+ "//pkg/log",
"//pkg/sentry/fs",
"//pkg/sentry/kernel/auth",
],
diff --git a/pkg/sentry/kernel/ipc/object.go b/pkg/sentry/kernel/ipc/object.go
index 769ff44e1..387b35e7e 100644
--- a/pkg/sentry/kernel/ipc/object.go
+++ b/pkg/sentry/kernel/ipc/object.go
@@ -13,6 +13,8 @@
// limitations under the License.
// Package ipc defines functionality and utilities common to sysvipc mechanisms.
+//
+// Lock ordering: [shm/semaphore/msgqueue].Registry.mu -> Mechanism
package ipc
import (
@@ -29,6 +31,8 @@ type ID int32
// Object represents an abstract IPC object with fields common to all IPC
// mechanisms.
+//
+// +stateify savable
type Object struct {
// User namespace which owns the IPC namespace which owns the IPC object.
// Immutable.
@@ -54,22 +58,26 @@ type Object struct {
// be looked at as a container for an ipc.Object, which is by definition a fully
// functional SysV object.
type Mechanism interface {
- // Object returns a pointer to the mechanism's ipc.Object. Mechanism.Lock,
- // and Mechanism.Unlock should be used when the object is used.
- Object() *Object
-
// Lock behaves the same as Mutex.Lock on the mechanism.
Lock()
// Unlock behaves the same as Mutex.Unlock on the mechanism.
Unlock()
+
+ // Object returns a pointer to the mechanism's ipc.Object. Mechanism.Lock,
+ // and Mechanism.Unlock should be used when the object is used.
+ Object() *Object
+
+ // Destroy destroys the mechanism.
+ Destroy()
}
-// NewObject returns a new, initialized ipc.Object.
-func NewObject(un *auth.UserNamespace, id ID, key Key, creator, owner fs.FileOwner, perms fs.FilePermissions) *Object {
+// NewObject returns a new, initialized ipc.Object. The newly returned object
+// doesn't have a valid ID. When the object is registered, the registry assigns
+// it a new unique ID.
+func NewObject(un *auth.UserNamespace, key Key, creator, owner fs.FileOwner, perms fs.FilePermissions) *Object {
return &Object{
UserNS: un,
- ID: id,
Key: key,
Creator: creator,
Owner: owner,
diff --git a/pkg/sentry/kernel/ipc/registry.go b/pkg/sentry/kernel/ipc/registry.go
new file mode 100644
index 000000000..91de19070
--- /dev/null
+++ b/pkg/sentry/kernel/ipc/registry.go
@@ -0,0 +1,196 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ipc
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+)
+
+// Registry is similar to Object, but for registries. It represent an abstract
+// SysV IPC registry with fields common to all SysV registries. Registry is not
+// thread-safe, and should be protected using a mutex.
+//
+// +stateify savable
+type Registry struct {
+ // UserNS owning the IPC namespace this registry belongs to. Immutable.
+ UserNS *auth.UserNamespace
+
+ // objects is a map of IDs to IPC mechanisms.
+ objects map[ID]Mechanism
+
+ // KeysToIDs maps a lookup key to an ID.
+ keysToIDs map[Key]ID
+
+ // lastIDUsed is used to find the next available ID for object creation.
+ lastIDUsed ID
+}
+
+// NewRegistry return a new, initialized ipc.Registry.
+func NewRegistry(userNS *auth.UserNamespace) *Registry {
+ return &Registry{
+ UserNS: userNS,
+ objects: make(map[ID]Mechanism),
+ keysToIDs: make(map[Key]ID),
+ }
+}
+
+// Find uses key to search for and return a SysV mechanism. Find returns an
+// error if an object is found by shouldn't be, or if the user doesn't have
+// permission to use the object. If no object is found, Find checks create
+// flag, and returns an error only if it's false.
+func (r *Registry) Find(ctx context.Context, key Key, mode linux.FileMode, create, exclusive bool) (Mechanism, error) {
+ if id, ok := r.keysToIDs[key]; ok {
+ mech := r.objects[id]
+ mech.Lock()
+ defer mech.Unlock()
+
+ obj := mech.Object()
+ creds := auth.CredentialsFromContext(ctx)
+ if !obj.CheckPermissions(creds, fs.PermsFromMode(mode)) {
+ // The [calling process / user] does not have permission to access
+ // the set, and does not have the CAP_IPC_OWNER capability in the
+ // user namespace that governs its IPC namespace.
+ return nil, linuxerr.EACCES
+ }
+
+ if create && exclusive {
+ // IPC_CREAT and IPC_EXCL were specified, but an object already
+ // exists for key.
+ return nil, linuxerr.EEXIST
+ }
+ return mech, nil
+ }
+
+ if !create {
+ // No object exists for key and msgflg did not specify IPC_CREAT.
+ return nil, linuxerr.ENOENT
+ }
+
+ return nil, nil
+}
+
+// Register adds the given object into Registry.Objects, and assigns it a new
+// ID. It returns an error if all IDs are exhausted.
+func (r *Registry) Register(m Mechanism) error {
+ id, err := r.newID()
+ if err != nil {
+ return err
+ }
+
+ obj := m.Object()
+ obj.ID = id
+
+ r.objects[id] = m
+ r.keysToIDs[obj.Key] = id
+
+ return nil
+}
+
+// newID finds the first unused ID in the registry, and returns an error if
+// non is found.
+func (r *Registry) newID() (ID, error) {
+ // Find the next available ID.
+ for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
+ // Handle wrap around.
+ if id < 0 {
+ id = 0
+ continue
+ }
+ if r.objects[id] == nil {
+ r.lastIDUsed = id
+ return id, nil
+ }
+ }
+
+ log.Warningf("ids exhausted, they may be leaking")
+
+ // The man pages for shmget(2) mention that ENOSPC should be used if "All
+ // possible shared memory IDs have been taken (SHMMNI)". Other SysV
+ // mechanisms don't have a specific errno for running out of IDs, but they
+ // return ENOSPC if the max number of objects is exceeded, so we assume that
+ // it's the same case.
+ return 0, linuxerr.ENOSPC
+}
+
+// Remove removes the mechanism with the given id from the registry, and calls
+// mechanism.Destroy to perform mechanism-specific removal.
+func (r *Registry) Remove(id ID, creds *auth.Credentials) error {
+ mech := r.objects[id]
+ if mech == nil {
+ return linuxerr.EINVAL
+ }
+
+ mech.Lock()
+ defer mech.Unlock()
+
+ obj := mech.Object()
+
+ // The effective user ID of the calling process must match the creator or
+ // owner of the [mechanism], or the caller must be privileged.
+ if !obj.CheckOwnership(creds) {
+ return linuxerr.EPERM
+ }
+
+ delete(r.objects, obj.ID)
+ delete(r.keysToIDs, obj.Key)
+ mech.Destroy()
+
+ return nil
+}
+
+// ForAllObjects executes a given function for all given objects.
+func (r *Registry) ForAllObjects(f func(o Mechanism)) {
+ for _, o := range r.objects {
+ f(o)
+ }
+}
+
+// FindByID returns the mechanism with the given ID, nil if non exists.
+func (r *Registry) FindByID(id ID) Mechanism {
+ return r.objects[id]
+}
+
+// DissociateKey removes the association between a mechanism and its key
+// (deletes it from r.keysToIDs), preventing it from being discovered by any new
+// process, but not necessarily destroying it. If the given key doesn't exist,
+// nothing is changed.
+func (r *Registry) DissociateKey(key Key) {
+ delete(r.keysToIDs, key)
+}
+
+// DissociateID removes the association between a mechanism and its ID (deletes
+// it from r.objects). An ID can't be removed unless the associated key is
+// removed already, this is done to prevent the users from acquiring nil a
+// Mechanism.
+//
+// Precondition: must be preceded by a call to r.DissociateKey.
+func (r *Registry) DissociateID(id ID) {
+ delete(r.objects, id)
+}
+
+// ObjectCount returns the number of registered objects.
+func (r *Registry) ObjectCount() int {
+ return len(r.objects)
+}
+
+// LastIDUsed returns the last used ID.
+func (r *Registry) LastIDUsed() ID {
+ return r.lastIDUsed
+}
diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD
index 5dd607953..2ae08ed12 100644
--- a/pkg/sentry/kernel/semaphore/BUILD
+++ b/pkg/sentry/kernel/semaphore/BUILD
@@ -26,7 +26,6 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/errors/linuxerr",
- "//pkg/log",
"//pkg/sentry/fs",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/ipc",
@@ -42,10 +41,11 @@ go_test(
srcs = ["semaphore_test.go"],
library = ":semaphore",
deps = [
- "//pkg/abi/linux",
- "//pkg/context",
- "//pkg/sentry/contexttest",
- "//pkg/sentry/kernel/auth",
- "//pkg/syserror",
+ "//pkg/abi/linux", # keep
+ "//pkg/context", # keep
+ "//pkg/sentry/contexttest", # keep
+ "//pkg/sentry/kernel/auth", # keep
+ "//pkg/sentry/kernel/ipc", # keep
+ "//pkg/syserror", # keep
],
)
diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go
index d609d88e2..b7879d284 100644
--- a/pkg/sentry/kernel/semaphore/semaphore.go
+++ b/pkg/sentry/kernel/semaphore/semaphore.go
@@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
- "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
@@ -48,12 +47,12 @@ const (
//
// +stateify savable
type Registry struct {
- // userNS owning the ipc name this registry belongs to. Immutable.
- userNS *auth.UserNamespace
// mu protects all fields below.
- mu sync.Mutex `state:"nosave"`
- semaphores map[ipc.ID]*Set
- lastIDUsed ipc.ID
+ mu sync.Mutex `state:"nosave"`
+
+ // reg defines basic fields and operations needed for all SysV registries.
+ reg *ipc.Registry
+
// indexes maintains a mapping between a set's index in virtual array and
// its identifier.
indexes map[int32]ipc.ID
@@ -69,7 +68,6 @@ type Set struct {
// mu protects all fields below.
mu sync.Mutex `state:"nosave"`
- // obj defines basic fields that should be included in all SysV IPC objects.
obj *ipc.Object
opTime ktime.Time
@@ -109,9 +107,8 @@ type waiter struct {
// NewRegistry creates a new semaphore set registry.
func NewRegistry(userNS *auth.UserNamespace) *Registry {
return &Registry{
- userNS: userNS,
- semaphores: make(map[ipc.ID]*Set),
- indexes: make(map[int32]ipc.ID),
+ reg: ipc.NewRegistry(userNS),
+ indexes: make(map[int32]ipc.ID),
}
}
@@ -129,31 +126,19 @@ func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, m
defer r.mu.Unlock()
if !private {
- // Look up an existing semaphore.
- if set := r.findByKey(key); set != nil {
- set.mu.Lock()
- defer set.mu.Unlock()
-
- // Check that caller can access semaphore set.
- creds := auth.CredentialsFromContext(ctx)
- if !set.obj.CheckPermissions(creds, fs.PermsFromMode(mode)) {
- return nil, linuxerr.EACCES
- }
+ set, err := r.reg.Find(ctx, key, mode, create, exclusive)
+ if err != nil {
+ return nil, err
+ }
- // Validate parameters.
+ // Validate semaphore-specific parameters.
+ if set != nil {
+ set := set.(*Set)
if nsems > int32(set.Size()) {
return nil, linuxerr.EINVAL
}
- if create && exclusive {
- return nil, linuxerr.EEXIST
- }
return set, nil
}
-
- if !create {
- // Semaphore not found and should not be created.
- return nil, syserror.ENOENT
- }
}
// Zero is only valid if an existing set is found.
@@ -163,9 +148,9 @@ func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, m
// Apply system limits.
//
- // Map semaphores and map indexes in a registry are of the same size,
- // check map semaphores only here for the system limit.
- if len(r.semaphores) >= setsMax {
+ // Map reg.objects and map indexes in a registry are of the same size,
+ // check map reg.objects only here for the system limit.
+ if r.reg.ObjectCount() >= setsMax {
return nil, syserror.ENOSPC
}
if r.totalSems() > int(semsTotalMax-nsems) {
@@ -173,9 +158,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, m
}
// Finally create a new set.
- owner := fs.FileOwnerFromContext(ctx)
- perms := fs.FilePermsFromMode(mode)
- return r.newSet(ctx, key, owner, owner, perms, nsems)
+ return r.newSetLocked(ctx, key, fs.FileOwnerFromContext(ctx), fs.FilePermsFromMode(mode), nsems)
}
// IPCInfo returns information about system-wide semaphore limits and parameters.
@@ -202,7 +185,7 @@ func (r *Registry) SemInfo() *linux.SemInfo {
defer r.mu.Unlock()
info := r.IPCInfo()
- info.SemUsz = uint32(len(r.semaphores))
+ info.SemUsz = uint32(r.reg.ObjectCount())
info.SemAem = uint32(r.totalSems())
return info
@@ -225,74 +208,59 @@ func (r *Registry) HighestIndex() int32 {
return highestIndex
}
-// RemoveID removes set with give 'id' from the registry and marks the set as
+// Remove removes set with give 'id' from the registry and marks the set as
// dead. All waiters will be awakened and fail.
-func (r *Registry) RemoveID(id ipc.ID, creds *auth.Credentials) error {
+func (r *Registry) Remove(id ipc.ID, creds *auth.Credentials) error {
r.mu.Lock()
defer r.mu.Unlock()
- set := r.semaphores[id]
- if set == nil {
- return linuxerr.EINVAL
- }
+ r.reg.Remove(id, creds)
+
index, found := r.findIndexByID(id)
if !found {
// Inconsistent state.
panic(fmt.Sprintf("unable to find an index for ID: %d", id))
}
-
- set.mu.Lock()
- defer set.mu.Unlock()
-
- // "The effective user ID of the calling process must match the creator or
- // owner of the semaphore set, or the caller must be privileged."
- if !set.obj.CheckOwnership(creds) {
- return linuxerr.EACCES
- }
-
- delete(r.semaphores, set.obj.ID)
delete(r.indexes, index)
- set.destroy()
+
return nil
}
-func (r *Registry) newSet(ctx context.Context, key ipc.Key, owner, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) {
- // Find the next available ID.
- for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
- // Handle wrap around.
- if id < 0 {
- id = 0
- continue
- }
- if r.semaphores[id] == nil {
- index, found := r.findFirstAvailableIndex()
- if !found {
- panic("unable to find an available index")
- }
- r.indexes[index] = id
- r.lastIDUsed = id
-
- set := &Set{
- registry: r,
- obj: ipc.NewObject(r.userNS, ipc.ID(id), ipc.Key(key), creator, owner, perms),
- changeTime: ktime.NowFromContext(ctx),
- sems: make([]sem, nsems),
- }
- r.semaphores[id] = set
+// newSetLocked creates a new Set using given fields. An error is returned if there
+// are no more available identifiers.
+//
+// Precondition: r.mu must be held.
+func (r *Registry) newSetLocked(ctx context.Context, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) {
+ set := &Set{
+ registry: r,
+ obj: ipc.NewObject(r.reg.UserNS, ipc.Key(key), creator, creator, perms),
+ changeTime: ktime.NowFromContext(ctx),
+ sems: make([]sem, nsems),
+ }
- return set, nil
- }
+ err := r.reg.Register(set)
+ if err != nil {
+ return nil, err
+ }
+
+ index, found := r.findFirstAvailableIndex()
+ if !found {
+ panic("unable to find an available index")
}
+ r.indexes[index] = set.obj.ID
- log.Warningf("Semaphore map is full, they must be leaking")
- return nil, syserror.ENOMEM
+ return set, nil
}
// FindByID looks up a set given an ID.
func (r *Registry) FindByID(id ipc.ID) *Set {
r.mu.Lock()
defer r.mu.Unlock()
- return r.semaphores[id]
+ mech := r.reg.FindByID(id)
+ if mech == nil {
+ return nil
+ }
+ return mech.(*Set)
}
// FindByIndex looks up a set given an index.
@@ -304,16 +272,7 @@ func (r *Registry) FindByIndex(index int32) *Set {
if !present {
return nil
}
- return r.semaphores[id]
-}
-
-func (r *Registry) findByKey(key ipc.Key) *Set {
- for _, v := range r.semaphores {
- if v.obj.Key == key {
- return v
- }
- }
- return nil
+ return r.reg.FindByID(id).(*Set)
}
func (r *Registry) findIndexByID(id ipc.ID) (int32, bool) {
@@ -336,9 +295,11 @@ func (r *Registry) findFirstAvailableIndex() (int32, bool) {
func (r *Registry) totalSems() int {
totalSems := 0
- for _, v := range r.semaphores {
- totalSems += v.Size()
- }
+ r.reg.ForAllObjects(
+ func(o ipc.Mechanism) {
+ totalSems += o.(*Set).Size()
+ },
+ )
return totalSems
}
@@ -347,6 +308,23 @@ func (s *Set) ID() ipc.ID {
return s.obj.ID
}
+// Object implements ipc.Mechanism.Object.
+func (s *Set) Object() *ipc.Object {
+ return s.obj
+}
+
+// Lock implements ipc.Mechanism.Lock.
+func (s *Set) Lock() {
+ s.mu.Lock()
+}
+
+// Unlock implements ipc.mechanism.Unlock.
+//
+// +checklocksignore
+func (s *Set) Unlock() {
+ s.mu.Unlock()
+}
+
func (s *Set) findSem(num int32) *sem {
if num < 0 || int(num) >= s.Size() {
return nil
@@ -671,10 +649,10 @@ func (s *Set) AbortWait(num int32, ch chan struct{}) {
// Waiter may not be found in case it raced with wakeWaiters().
}
-// destroy destroys the set.
+// Destroy implements ipc.Mechanism.Destroy.
//
// Preconditions: Caller must hold 's.mu'.
-func (s *Set) destroy() {
+func (s *Set) Destroy() {
// Notify all waiters. They will fail on the next attempt to execute
// operations and return error.
s.dead = true
diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go
index 0c7abb68e..2e4ab8121 100644
--- a/pkg/sentry/kernel/semaphore/semaphore_test.go
+++ b/pkg/sentry/kernel/semaphore/semaphore_test.go
@@ -139,6 +139,7 @@ func TestUnregister(t *testing.T) {
ctx := contexttest.Context(t)
r := NewRegistry(auth.NewRootUserNamespace())
set, err := r.FindOrCreate(ctx, 123, 2, linux.FileMode(0x600), true, true, true)
+
if err != nil {
t.Fatalf("FindOrCreate() failed, err: %v", err)
}
@@ -156,8 +157,8 @@ func TestUnregister(t *testing.T) {
}
creds := auth.CredentialsFromContext(ctx)
- if err := r.RemoveID(set.obj.ID, creds); err != nil {
- t.Fatalf("RemoveID(%d) failed, err: %v", set.obj.ID, err)
+ if err := r.Remove(set.obj.ID, creds); err != nil {
+ t.Fatalf("Remove(%d) failed, err: %v", set.obj.ID, err)
}
if !set.dead {
t.Fatalf("set is not dead: %+v", set)
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index 459ae8c30..2abf467d7 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -64,38 +64,34 @@ type Registry struct {
// mu protects all fields below.
mu sync.Mutex `state:"nosave"`
- // shms maps segment ids to segments.
+ // reg defines basic fields and operations needed for all SysV registries.
//
- // shms holds all referenced segments, which are removed on the last
+ // Withing reg, there are two maps, Objects and KeysToIDs.
+ //
+ // reg.objects holds all referenced segments, which are removed on the last
// DecRef. Thus, it cannot itself hold a reference on the Shm.
//
// Since removal only occurs after the last (unlocked) DecRef, there
// exists a short window during which a Shm still exists in Shm, but is
// unreferenced. Users must use TryIncRef to determine if the Shm is
// still valid.
- shms map[ipc.ID]*Shm
-
- // keysToShms maps segment keys to segments.
//
- // Shms in keysToShms are guaranteed to be referenced, as they are
+ // keysToIDs maps segment keys to IDs.
+ //
+ // Shms in keysToIDs are guaranteed to be referenced, as they are
// removed by disassociateKey before the last DecRef.
- keysToShms map[ipc.Key]*Shm
+ reg *ipc.Registry
// Sum of the sizes of all existing segments rounded up to page size, in
// units of page size.
totalPages uint64
-
- // ID assigned to the last created segment. Used to quickly find the next
- // unused ID.
- lastIDUsed ipc.ID
}
// NewRegistry creates a new shm registry.
func NewRegistry(userNS *auth.UserNamespace) *Registry {
return &Registry{
- userNS: userNS,
- shms: make(map[ipc.ID]*Shm),
- keysToShms: make(map[ipc.Key]*Shm),
+ userNS: userNS,
+ reg: ipc.NewRegistry(userNS),
}
}
@@ -105,9 +101,14 @@ func NewRegistry(userNS *auth.UserNamespace) *Registry {
func (r *Registry) FindByID(id ipc.ID) *Shm {
r.mu.Lock()
defer r.mu.Unlock()
- s := r.shms[id]
+ mech := r.reg.FindByID(id)
+ if mech == nil {
+ return nil
+ }
+ s := mech.(*Shm)
+
// Take a reference on s. If TryIncRef fails, s has reached the last
- // DecRef, but hasn't quite been removed from r.shms yet.
+ // DecRef, but hasn't quite been removed from r.reg.objects yet.
if s != nil && s.TryIncRef() {
return s
}
@@ -125,7 +126,7 @@ func (r *Registry) dissociateKey(s *Shm) {
s.mu.Lock()
defer s.mu.Unlock()
if s.obj.Key != linux.IPC_PRIVATE {
- delete(r.keysToShms, s.obj.Key)
+ r.reg.DissociateKey(s.obj.Key)
s.obj.Key = linux.IPC_PRIVATE
}
}
@@ -147,50 +148,29 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, siz
r.mu.Lock()
defer r.mu.Unlock()
- if len(r.shms) >= linux.SHMMNI {
+ if r.reg.ObjectCount() >= linux.SHMMNI {
// "All possible shared memory IDs have been taken (SHMMNI) ..."
// - man shmget(2)
return nil, syserror.ENOSPC
}
if !private {
- // Look up an existing segment.
- if shm := r.keysToShms[key]; shm != nil {
- shm.mu.Lock()
- defer shm.mu.Unlock()
-
- // Check that caller can access the segment.
- creds := auth.CredentialsFromContext(ctx)
- if !shm.obj.CheckPermissions(creds, fs.PermsFromMode(mode)) {
- // "The user does not have permission to access the shared
- // memory segment, and does not have the CAP_IPC_OWNER
- // capability in the user namespace that governs its IPC
- // namespace." - man shmget(2)
- return nil, linuxerr.EACCES
- }
+ shm, err := r.reg.Find(ctx, key, mode, create, exclusive)
+ if err != nil {
+ return nil, err
+ }
+ // Validate shm-specific parameters.
+ if shm != nil {
+ shm := shm.(*Shm)
if size > shm.size {
// "A segment for the given key exists, but size is greater than
// the size of that segment." - man shmget(2)
return nil, linuxerr.EINVAL
}
-
- if create && exclusive {
- // "IPC_CREAT and IPC_EXCL were specified in shmflg, but a
- // shared memory segment already exists for key."
- // - man shmget(2)
- return nil, linuxerr.EEXIST
- }
-
shm.IncRef()
return shm, nil
}
-
- if !create {
- // "No segment exists for the given key, and IPC_CREAT was not
- // specified." - man shmget(2)
- return nil, syserror.ENOENT
- }
}
var sizeAligned uint64
@@ -208,9 +188,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, siz
}
// Need to create a new segment.
- creator := fs.FileOwnerFromContext(ctx)
- perms := fs.FilePermsFromMode(mode)
- s, err := r.newShm(ctx, pid, key, creator, perms, size)
+ s, err := r.newShmLocked(ctx, pid, key, fs.FileOwnerFromContext(ctx), fs.FilePermsFromMode(mode), size)
if err != nil {
return nil, err
}
@@ -220,10 +198,10 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, siz
return s, nil
}
-// newShm creates a new segment in the registry.
+// newShmLocked creates a new segment in the registry.
//
// Precondition: Caller must hold r.mu.
-func (r *Registry) newShm(ctx context.Context, pid int32, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) {
+func (r *Registry) newShmLocked(ctx context.Context, pid int32, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) {
mfp := pgalloc.MemoryFileProviderFromContext(ctx)
if mfp == nil {
panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider))
@@ -235,39 +213,24 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key ipc.Key, creator f
return nil, err
}
- // Find the next available ID.
- for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
- // Handle wrap around.
- if id < 0 {
- id = 0
- continue
- }
- if r.shms[id] == nil {
- r.lastIDUsed = id
-
- shm := &Shm{
- mfp: mfp,
- registry: r,
- size: size,
- effectiveSize: effectiveSize,
- obj: ipc.NewObject(r.userNS, ipc.ID(id), ipc.Key(key), creator, creator, perms),
- fr: fr,
- creatorPID: pid,
- changeTime: ktime.NowFromContext(ctx),
- }
- shm.InitRefs()
-
- r.shms[id] = shm
- r.keysToShms[key] = shm
-
- r.totalPages += effectiveSize / hostarch.PageSize
+ shm := &Shm{
+ mfp: mfp,
+ registry: r,
+ size: size,
+ effectiveSize: effectiveSize,
+ obj: ipc.NewObject(r.reg.UserNS, ipc.Key(key), creator, creator, perms),
+ fr: fr,
+ creatorPID: pid,
+ changeTime: ktime.NowFromContext(ctx),
+ }
+ shm.InitRefs()
- return shm, nil
- }
+ if err := r.reg.Register(shm); err != nil {
+ return nil, err
}
+ r.totalPages += effectiveSize / hostarch.PageSize
- log.Warningf("Shm ids exhuasted, they may be leaking")
- return nil, syserror.ENOSPC
+ return shm, nil
}
// IPCInfo reports global parameters for sysv shared memory segments on this
@@ -289,7 +252,7 @@ func (r *Registry) ShmInfo() *linux.ShmInfo {
defer r.mu.Unlock()
return &linux.ShmInfo{
- UsedIDs: int32(r.lastIDUsed),
+ UsedIDs: int32(r.reg.LastIDUsed()),
ShmTot: r.totalPages,
ShmRss: r.totalPages, // We could probably get a better estimate from memory accounting.
ShmSwp: 0, // No reclaim at the moment.
@@ -310,7 +273,7 @@ func (r *Registry) remove(s *Shm) {
panic(fmt.Sprintf("Attempted to remove %s from the registry whose key is still associated", s.debugLocked()))
}
- delete(r.shms, s.obj.ID)
+ r.reg.DissociateID(s.obj.ID)
r.totalPages -= s.effectiveSize / hostarch.PageSize
}
@@ -322,13 +285,16 @@ func (r *Registry) Release(ctx context.Context) {
// the IPC namespace containing it has no more references.
toRelease := make([]*Shm, 0)
r.mu.Lock()
- for _, s := range r.keysToShms {
- s.mu.Lock()
- if !s.pendingDestruction {
- toRelease = append(toRelease, s)
- }
- s.mu.Unlock()
- }
+ r.reg.ForAllObjects(
+ func(o ipc.Mechanism) {
+ s := o.(*Shm)
+ s.mu.Lock()
+ if !s.pendingDestruction {
+ toRelease = append(toRelease, s)
+ }
+ s.mu.Unlock()
+ },
+ )
r.mu.Unlock()
for _, s := range toRelease {
@@ -383,7 +349,6 @@ type Shm struct {
// mu protects all fields below.
mu sync.Mutex `state:"nosave"`
- // obj defines basic fields that should be included in all SysV IPC objects.
obj *ipc.Object
// attachTime is updated on every successful shmat.
@@ -412,6 +377,28 @@ func (s *Shm) ID() ipc.ID {
return s.obj.ID
}
+// Object implements ipc.Mechanism.Object.
+func (s *Shm) Object() *ipc.Object {
+ return s.obj
+}
+
+// Destroy implements ipc.Mechanism.Destroy. No work is performed on shm.Destroy
+// because a different removal mechanism is used in shm. See Shm.MarkDestroyed.
+func (s *Shm) Destroy() {
+}
+
+// Lock implements ipc.Mechanism.Lock.
+func (s *Shm) Lock() {
+ s.mu.Lock()
+}
+
+// Unlock implements ipc.mechanism.Unlock.
+//
+// +checklocksignore
+func (s *Shm) Unlock() {
+ s.mu.Unlock()
+}
+
// Precondition: Caller must hold s.mu.
func (s *Shm) debugLocked() string {
return fmt.Sprintf("Shm{id: %d, key: %d, size: %d bytes, refs: %d, destroyed: %v}",
diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go
index 4755bef18..f61cc466c 100644
--- a/pkg/sentry/syscalls/linux/sys_sem.go
+++ b/pkg/sentry/syscalls/linux/sys_sem.go
@@ -240,7 +240,7 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
func remove(t *kernel.Task, id ipc.ID) error {
r := t.IPCNamespace().SemaphoreRegistry()
creds := auth.CredentialsFromContext(t)
- return r.RemoveID(id, creds)
+ return r.Remove(id, creds)
}
func ipcSet(t *kernel.Task, id ipc.ID, uid auth.UID, gid auth.GID, perms fs.FilePermissions) error {