diff options
-rw-r--r-- | pkg/sentry/kernel/ipc/BUILD | 4 | ||||
-rw-r--r-- | pkg/sentry/kernel/ipc/object.go | 22 | ||||
-rw-r--r-- | pkg/sentry/kernel/ipc/registry.go | 196 | ||||
-rw-r--r-- | pkg/sentry/kernel/semaphore/BUILD | 12 | ||||
-rw-r--r-- | pkg/sentry/kernel/semaphore/semaphore.go | 172 | ||||
-rw-r--r-- | pkg/sentry/kernel/semaphore/semaphore_test.go | 5 | ||||
-rw-r--r-- | pkg/sentry/kernel/shm/shm.go | 169 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_sem.go | 2 |
8 files changed, 378 insertions, 204 deletions
diff --git a/pkg/sentry/kernel/ipc/BUILD b/pkg/sentry/kernel/ipc/BUILD index 14f73ee17..e42a94e15 100644 --- a/pkg/sentry/kernel/ipc/BUILD +++ b/pkg/sentry/kernel/ipc/BUILD @@ -6,10 +6,14 @@ go_library( name = "ipc", srcs = [ "object.go", + "registry.go", ], visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", + "//pkg/errors/linuxerr", + "//pkg/log", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", ], diff --git a/pkg/sentry/kernel/ipc/object.go b/pkg/sentry/kernel/ipc/object.go index 769ff44e1..387b35e7e 100644 --- a/pkg/sentry/kernel/ipc/object.go +++ b/pkg/sentry/kernel/ipc/object.go @@ -13,6 +13,8 @@ // limitations under the License. // Package ipc defines functionality and utilities common to sysvipc mechanisms. +// +// Lock ordering: [shm/semaphore/msgqueue].Registry.mu -> Mechanism package ipc import ( @@ -29,6 +31,8 @@ type ID int32 // Object represents an abstract IPC object with fields common to all IPC // mechanisms. +// +// +stateify savable type Object struct { // User namespace which owns the IPC namespace which owns the IPC object. // Immutable. @@ -54,22 +58,26 @@ type Object struct { // be looked at as a container for an ipc.Object, which is by definition a fully // functional SysV object. type Mechanism interface { - // Object returns a pointer to the mechanism's ipc.Object. Mechanism.Lock, - // and Mechanism.Unlock should be used when the object is used. - Object() *Object - // Lock behaves the same as Mutex.Lock on the mechanism. Lock() // Unlock behaves the same as Mutex.Unlock on the mechanism. Unlock() + + // Object returns a pointer to the mechanism's ipc.Object. Mechanism.Lock, + // and Mechanism.Unlock should be used when the object is used. + Object() *Object + + // Destroy destroys the mechanism. + Destroy() } -// NewObject returns a new, initialized ipc.Object. -func NewObject(un *auth.UserNamespace, id ID, key Key, creator, owner fs.FileOwner, perms fs.FilePermissions) *Object { +// NewObject returns a new, initialized ipc.Object. The newly returned object +// doesn't have a valid ID. When the object is registered, the registry assigns +// it a new unique ID. +func NewObject(un *auth.UserNamespace, key Key, creator, owner fs.FileOwner, perms fs.FilePermissions) *Object { return &Object{ UserNS: un, - ID: id, Key: key, Creator: creator, Owner: owner, diff --git a/pkg/sentry/kernel/ipc/registry.go b/pkg/sentry/kernel/ipc/registry.go new file mode 100644 index 000000000..91de19070 --- /dev/null +++ b/pkg/sentry/kernel/ipc/registry.go @@ -0,0 +1,196 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipc + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +// Registry is similar to Object, but for registries. It represent an abstract +// SysV IPC registry with fields common to all SysV registries. Registry is not +// thread-safe, and should be protected using a mutex. +// +// +stateify savable +type Registry struct { + // UserNS owning the IPC namespace this registry belongs to. Immutable. + UserNS *auth.UserNamespace + + // objects is a map of IDs to IPC mechanisms. + objects map[ID]Mechanism + + // KeysToIDs maps a lookup key to an ID. + keysToIDs map[Key]ID + + // lastIDUsed is used to find the next available ID for object creation. + lastIDUsed ID +} + +// NewRegistry return a new, initialized ipc.Registry. +func NewRegistry(userNS *auth.UserNamespace) *Registry { + return &Registry{ + UserNS: userNS, + objects: make(map[ID]Mechanism), + keysToIDs: make(map[Key]ID), + } +} + +// Find uses key to search for and return a SysV mechanism. Find returns an +// error if an object is found by shouldn't be, or if the user doesn't have +// permission to use the object. If no object is found, Find checks create +// flag, and returns an error only if it's false. +func (r *Registry) Find(ctx context.Context, key Key, mode linux.FileMode, create, exclusive bool) (Mechanism, error) { + if id, ok := r.keysToIDs[key]; ok { + mech := r.objects[id] + mech.Lock() + defer mech.Unlock() + + obj := mech.Object() + creds := auth.CredentialsFromContext(ctx) + if !obj.CheckPermissions(creds, fs.PermsFromMode(mode)) { + // The [calling process / user] does not have permission to access + // the set, and does not have the CAP_IPC_OWNER capability in the + // user namespace that governs its IPC namespace. + return nil, linuxerr.EACCES + } + + if create && exclusive { + // IPC_CREAT and IPC_EXCL were specified, but an object already + // exists for key. + return nil, linuxerr.EEXIST + } + return mech, nil + } + + if !create { + // No object exists for key and msgflg did not specify IPC_CREAT. + return nil, linuxerr.ENOENT + } + + return nil, nil +} + +// Register adds the given object into Registry.Objects, and assigns it a new +// ID. It returns an error if all IDs are exhausted. +func (r *Registry) Register(m Mechanism) error { + id, err := r.newID() + if err != nil { + return err + } + + obj := m.Object() + obj.ID = id + + r.objects[id] = m + r.keysToIDs[obj.Key] = id + + return nil +} + +// newID finds the first unused ID in the registry, and returns an error if +// non is found. +func (r *Registry) newID() (ID, error) { + // Find the next available ID. + for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ { + // Handle wrap around. + if id < 0 { + id = 0 + continue + } + if r.objects[id] == nil { + r.lastIDUsed = id + return id, nil + } + } + + log.Warningf("ids exhausted, they may be leaking") + + // The man pages for shmget(2) mention that ENOSPC should be used if "All + // possible shared memory IDs have been taken (SHMMNI)". Other SysV + // mechanisms don't have a specific errno for running out of IDs, but they + // return ENOSPC if the max number of objects is exceeded, so we assume that + // it's the same case. + return 0, linuxerr.ENOSPC +} + +// Remove removes the mechanism with the given id from the registry, and calls +// mechanism.Destroy to perform mechanism-specific removal. +func (r *Registry) Remove(id ID, creds *auth.Credentials) error { + mech := r.objects[id] + if mech == nil { + return linuxerr.EINVAL + } + + mech.Lock() + defer mech.Unlock() + + obj := mech.Object() + + // The effective user ID of the calling process must match the creator or + // owner of the [mechanism], or the caller must be privileged. + if !obj.CheckOwnership(creds) { + return linuxerr.EPERM + } + + delete(r.objects, obj.ID) + delete(r.keysToIDs, obj.Key) + mech.Destroy() + + return nil +} + +// ForAllObjects executes a given function for all given objects. +func (r *Registry) ForAllObjects(f func(o Mechanism)) { + for _, o := range r.objects { + f(o) + } +} + +// FindByID returns the mechanism with the given ID, nil if non exists. +func (r *Registry) FindByID(id ID) Mechanism { + return r.objects[id] +} + +// DissociateKey removes the association between a mechanism and its key +// (deletes it from r.keysToIDs), preventing it from being discovered by any new +// process, but not necessarily destroying it. If the given key doesn't exist, +// nothing is changed. +func (r *Registry) DissociateKey(key Key) { + delete(r.keysToIDs, key) +} + +// DissociateID removes the association between a mechanism and its ID (deletes +// it from r.objects). An ID can't be removed unless the associated key is +// removed already, this is done to prevent the users from acquiring nil a +// Mechanism. +// +// Precondition: must be preceded by a call to r.DissociateKey. +func (r *Registry) DissociateID(id ID) { + delete(r.objects, id) +} + +// ObjectCount returns the number of registered objects. +func (r *Registry) ObjectCount() int { + return len(r.objects) +} + +// LastIDUsed returns the last used ID. +func (r *Registry) LastIDUsed() ID { + return r.lastIDUsed +} diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD index 5dd607953..2ae08ed12 100644 --- a/pkg/sentry/kernel/semaphore/BUILD +++ b/pkg/sentry/kernel/semaphore/BUILD @@ -26,7 +26,6 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/errors/linuxerr", - "//pkg/log", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/ipc", @@ -42,10 +41,11 @@ go_test( srcs = ["semaphore_test.go"], library = ":semaphore", deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/contexttest", - "//pkg/sentry/kernel/auth", - "//pkg/syserror", + "//pkg/abi/linux", # keep + "//pkg/context", # keep + "//pkg/sentry/contexttest", # keep + "//pkg/sentry/kernel/auth", # keep + "//pkg/sentry/kernel/ipc", # keep + "//pkg/syserror", # keep ], ) diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go index d609d88e2..b7879d284 100644 --- a/pkg/sentry/kernel/semaphore/semaphore.go +++ b/pkg/sentry/kernel/semaphore/semaphore.go @@ -21,7 +21,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" @@ -48,12 +47,12 @@ const ( // // +stateify savable type Registry struct { - // userNS owning the ipc name this registry belongs to. Immutable. - userNS *auth.UserNamespace // mu protects all fields below. - mu sync.Mutex `state:"nosave"` - semaphores map[ipc.ID]*Set - lastIDUsed ipc.ID + mu sync.Mutex `state:"nosave"` + + // reg defines basic fields and operations needed for all SysV registries. + reg *ipc.Registry + // indexes maintains a mapping between a set's index in virtual array and // its identifier. indexes map[int32]ipc.ID @@ -69,7 +68,6 @@ type Set struct { // mu protects all fields below. mu sync.Mutex `state:"nosave"` - // obj defines basic fields that should be included in all SysV IPC objects. obj *ipc.Object opTime ktime.Time @@ -109,9 +107,8 @@ type waiter struct { // NewRegistry creates a new semaphore set registry. func NewRegistry(userNS *auth.UserNamespace) *Registry { return &Registry{ - userNS: userNS, - semaphores: make(map[ipc.ID]*Set), - indexes: make(map[int32]ipc.ID), + reg: ipc.NewRegistry(userNS), + indexes: make(map[int32]ipc.ID), } } @@ -129,31 +126,19 @@ func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, m defer r.mu.Unlock() if !private { - // Look up an existing semaphore. - if set := r.findByKey(key); set != nil { - set.mu.Lock() - defer set.mu.Unlock() - - // Check that caller can access semaphore set. - creds := auth.CredentialsFromContext(ctx) - if !set.obj.CheckPermissions(creds, fs.PermsFromMode(mode)) { - return nil, linuxerr.EACCES - } + set, err := r.reg.Find(ctx, key, mode, create, exclusive) + if err != nil { + return nil, err + } - // Validate parameters. + // Validate semaphore-specific parameters. + if set != nil { + set := set.(*Set) if nsems > int32(set.Size()) { return nil, linuxerr.EINVAL } - if create && exclusive { - return nil, linuxerr.EEXIST - } return set, nil } - - if !create { - // Semaphore not found and should not be created. - return nil, syserror.ENOENT - } } // Zero is only valid if an existing set is found. @@ -163,9 +148,9 @@ func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, m // Apply system limits. // - // Map semaphores and map indexes in a registry are of the same size, - // check map semaphores only here for the system limit. - if len(r.semaphores) >= setsMax { + // Map reg.objects and map indexes in a registry are of the same size, + // check map reg.objects only here for the system limit. + if r.reg.ObjectCount() >= setsMax { return nil, syserror.ENOSPC } if r.totalSems() > int(semsTotalMax-nsems) { @@ -173,9 +158,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, m } // Finally create a new set. - owner := fs.FileOwnerFromContext(ctx) - perms := fs.FilePermsFromMode(mode) - return r.newSet(ctx, key, owner, owner, perms, nsems) + return r.newSetLocked(ctx, key, fs.FileOwnerFromContext(ctx), fs.FilePermsFromMode(mode), nsems) } // IPCInfo returns information about system-wide semaphore limits and parameters. @@ -202,7 +185,7 @@ func (r *Registry) SemInfo() *linux.SemInfo { defer r.mu.Unlock() info := r.IPCInfo() - info.SemUsz = uint32(len(r.semaphores)) + info.SemUsz = uint32(r.reg.ObjectCount()) info.SemAem = uint32(r.totalSems()) return info @@ -225,74 +208,59 @@ func (r *Registry) HighestIndex() int32 { return highestIndex } -// RemoveID removes set with give 'id' from the registry and marks the set as +// Remove removes set with give 'id' from the registry and marks the set as // dead. All waiters will be awakened and fail. -func (r *Registry) RemoveID(id ipc.ID, creds *auth.Credentials) error { +func (r *Registry) Remove(id ipc.ID, creds *auth.Credentials) error { r.mu.Lock() defer r.mu.Unlock() - set := r.semaphores[id] - if set == nil { - return linuxerr.EINVAL - } + r.reg.Remove(id, creds) + index, found := r.findIndexByID(id) if !found { // Inconsistent state. panic(fmt.Sprintf("unable to find an index for ID: %d", id)) } - - set.mu.Lock() - defer set.mu.Unlock() - - // "The effective user ID of the calling process must match the creator or - // owner of the semaphore set, or the caller must be privileged." - if !set.obj.CheckOwnership(creds) { - return linuxerr.EACCES - } - - delete(r.semaphores, set.obj.ID) delete(r.indexes, index) - set.destroy() + return nil } -func (r *Registry) newSet(ctx context.Context, key ipc.Key, owner, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) { - // Find the next available ID. - for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ { - // Handle wrap around. - if id < 0 { - id = 0 - continue - } - if r.semaphores[id] == nil { - index, found := r.findFirstAvailableIndex() - if !found { - panic("unable to find an available index") - } - r.indexes[index] = id - r.lastIDUsed = id - - set := &Set{ - registry: r, - obj: ipc.NewObject(r.userNS, ipc.ID(id), ipc.Key(key), creator, owner, perms), - changeTime: ktime.NowFromContext(ctx), - sems: make([]sem, nsems), - } - r.semaphores[id] = set +// newSetLocked creates a new Set using given fields. An error is returned if there +// are no more available identifiers. +// +// Precondition: r.mu must be held. +func (r *Registry) newSetLocked(ctx context.Context, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) { + set := &Set{ + registry: r, + obj: ipc.NewObject(r.reg.UserNS, ipc.Key(key), creator, creator, perms), + changeTime: ktime.NowFromContext(ctx), + sems: make([]sem, nsems), + } - return set, nil - } + err := r.reg.Register(set) + if err != nil { + return nil, err + } + + index, found := r.findFirstAvailableIndex() + if !found { + panic("unable to find an available index") } + r.indexes[index] = set.obj.ID - log.Warningf("Semaphore map is full, they must be leaking") - return nil, syserror.ENOMEM + return set, nil } // FindByID looks up a set given an ID. func (r *Registry) FindByID(id ipc.ID) *Set { r.mu.Lock() defer r.mu.Unlock() - return r.semaphores[id] + mech := r.reg.FindByID(id) + if mech == nil { + return nil + } + return mech.(*Set) } // FindByIndex looks up a set given an index. @@ -304,16 +272,7 @@ func (r *Registry) FindByIndex(index int32) *Set { if !present { return nil } - return r.semaphores[id] -} - -func (r *Registry) findByKey(key ipc.Key) *Set { - for _, v := range r.semaphores { - if v.obj.Key == key { - return v - } - } - return nil + return r.reg.FindByID(id).(*Set) } func (r *Registry) findIndexByID(id ipc.ID) (int32, bool) { @@ -336,9 +295,11 @@ func (r *Registry) findFirstAvailableIndex() (int32, bool) { func (r *Registry) totalSems() int { totalSems := 0 - for _, v := range r.semaphores { - totalSems += v.Size() - } + r.reg.ForAllObjects( + func(o ipc.Mechanism) { + totalSems += o.(*Set).Size() + }, + ) return totalSems } @@ -347,6 +308,23 @@ func (s *Set) ID() ipc.ID { return s.obj.ID } +// Object implements ipc.Mechanism.Object. +func (s *Set) Object() *ipc.Object { + return s.obj +} + +// Lock implements ipc.Mechanism.Lock. +func (s *Set) Lock() { + s.mu.Lock() +} + +// Unlock implements ipc.mechanism.Unlock. +// +// +checklocksignore +func (s *Set) Unlock() { + s.mu.Unlock() +} + func (s *Set) findSem(num int32) *sem { if num < 0 || int(num) >= s.Size() { return nil @@ -671,10 +649,10 @@ func (s *Set) AbortWait(num int32, ch chan struct{}) { // Waiter may not be found in case it raced with wakeWaiters(). } -// destroy destroys the set. +// Destroy implements ipc.Mechanism.Destroy. // // Preconditions: Caller must hold 's.mu'. -func (s *Set) destroy() { +func (s *Set) Destroy() { // Notify all waiters. They will fail on the next attempt to execute // operations and return error. s.dead = true diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go index 0c7abb68e..2e4ab8121 100644 --- a/pkg/sentry/kernel/semaphore/semaphore_test.go +++ b/pkg/sentry/kernel/semaphore/semaphore_test.go @@ -139,6 +139,7 @@ func TestUnregister(t *testing.T) { ctx := contexttest.Context(t) r := NewRegistry(auth.NewRootUserNamespace()) set, err := r.FindOrCreate(ctx, 123, 2, linux.FileMode(0x600), true, true, true) + if err != nil { t.Fatalf("FindOrCreate() failed, err: %v", err) } @@ -156,8 +157,8 @@ func TestUnregister(t *testing.T) { } creds := auth.CredentialsFromContext(ctx) - if err := r.RemoveID(set.obj.ID, creds); err != nil { - t.Fatalf("RemoveID(%d) failed, err: %v", set.obj.ID, err) + if err := r.Remove(set.obj.ID, creds); err != nil { + t.Fatalf("Remove(%d) failed, err: %v", set.obj.ID, err) } if !set.dead { t.Fatalf("set is not dead: %+v", set) diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index 459ae8c30..2abf467d7 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -64,38 +64,34 @@ type Registry struct { // mu protects all fields below. mu sync.Mutex `state:"nosave"` - // shms maps segment ids to segments. + // reg defines basic fields and operations needed for all SysV registries. // - // shms holds all referenced segments, which are removed on the last + // Withing reg, there are two maps, Objects and KeysToIDs. + // + // reg.objects holds all referenced segments, which are removed on the last // DecRef. Thus, it cannot itself hold a reference on the Shm. // // Since removal only occurs after the last (unlocked) DecRef, there // exists a short window during which a Shm still exists in Shm, but is // unreferenced. Users must use TryIncRef to determine if the Shm is // still valid. - shms map[ipc.ID]*Shm - - // keysToShms maps segment keys to segments. // - // Shms in keysToShms are guaranteed to be referenced, as they are + // keysToIDs maps segment keys to IDs. + // + // Shms in keysToIDs are guaranteed to be referenced, as they are // removed by disassociateKey before the last DecRef. - keysToShms map[ipc.Key]*Shm + reg *ipc.Registry // Sum of the sizes of all existing segments rounded up to page size, in // units of page size. totalPages uint64 - - // ID assigned to the last created segment. Used to quickly find the next - // unused ID. - lastIDUsed ipc.ID } // NewRegistry creates a new shm registry. func NewRegistry(userNS *auth.UserNamespace) *Registry { return &Registry{ - userNS: userNS, - shms: make(map[ipc.ID]*Shm), - keysToShms: make(map[ipc.Key]*Shm), + userNS: userNS, + reg: ipc.NewRegistry(userNS), } } @@ -105,9 +101,14 @@ func NewRegistry(userNS *auth.UserNamespace) *Registry { func (r *Registry) FindByID(id ipc.ID) *Shm { r.mu.Lock() defer r.mu.Unlock() - s := r.shms[id] + mech := r.reg.FindByID(id) + if mech == nil { + return nil + } + s := mech.(*Shm) + // Take a reference on s. If TryIncRef fails, s has reached the last - // DecRef, but hasn't quite been removed from r.shms yet. + // DecRef, but hasn't quite been removed from r.reg.objects yet. if s != nil && s.TryIncRef() { return s } @@ -125,7 +126,7 @@ func (r *Registry) dissociateKey(s *Shm) { s.mu.Lock() defer s.mu.Unlock() if s.obj.Key != linux.IPC_PRIVATE { - delete(r.keysToShms, s.obj.Key) + r.reg.DissociateKey(s.obj.Key) s.obj.Key = linux.IPC_PRIVATE } } @@ -147,50 +148,29 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, siz r.mu.Lock() defer r.mu.Unlock() - if len(r.shms) >= linux.SHMMNI { + if r.reg.ObjectCount() >= linux.SHMMNI { // "All possible shared memory IDs have been taken (SHMMNI) ..." // - man shmget(2) return nil, syserror.ENOSPC } if !private { - // Look up an existing segment. - if shm := r.keysToShms[key]; shm != nil { - shm.mu.Lock() - defer shm.mu.Unlock() - - // Check that caller can access the segment. - creds := auth.CredentialsFromContext(ctx) - if !shm.obj.CheckPermissions(creds, fs.PermsFromMode(mode)) { - // "The user does not have permission to access the shared - // memory segment, and does not have the CAP_IPC_OWNER - // capability in the user namespace that governs its IPC - // namespace." - man shmget(2) - return nil, linuxerr.EACCES - } + shm, err := r.reg.Find(ctx, key, mode, create, exclusive) + if err != nil { + return nil, err + } + // Validate shm-specific parameters. + if shm != nil { + shm := shm.(*Shm) if size > shm.size { // "A segment for the given key exists, but size is greater than // the size of that segment." - man shmget(2) return nil, linuxerr.EINVAL } - - if create && exclusive { - // "IPC_CREAT and IPC_EXCL were specified in shmflg, but a - // shared memory segment already exists for key." - // - man shmget(2) - return nil, linuxerr.EEXIST - } - shm.IncRef() return shm, nil } - - if !create { - // "No segment exists for the given key, and IPC_CREAT was not - // specified." - man shmget(2) - return nil, syserror.ENOENT - } } var sizeAligned uint64 @@ -208,9 +188,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, siz } // Need to create a new segment. - creator := fs.FileOwnerFromContext(ctx) - perms := fs.FilePermsFromMode(mode) - s, err := r.newShm(ctx, pid, key, creator, perms, size) + s, err := r.newShmLocked(ctx, pid, key, fs.FileOwnerFromContext(ctx), fs.FilePermsFromMode(mode), size) if err != nil { return nil, err } @@ -220,10 +198,10 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, siz return s, nil } -// newShm creates a new segment in the registry. +// newShmLocked creates a new segment in the registry. // // Precondition: Caller must hold r.mu. -func (r *Registry) newShm(ctx context.Context, pid int32, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) { +func (r *Registry) newShmLocked(ctx context.Context, pid int32, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) { mfp := pgalloc.MemoryFileProviderFromContext(ctx) if mfp == nil { panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider)) @@ -235,39 +213,24 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key ipc.Key, creator f return nil, err } - // Find the next available ID. - for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ { - // Handle wrap around. - if id < 0 { - id = 0 - continue - } - if r.shms[id] == nil { - r.lastIDUsed = id - - shm := &Shm{ - mfp: mfp, - registry: r, - size: size, - effectiveSize: effectiveSize, - obj: ipc.NewObject(r.userNS, ipc.ID(id), ipc.Key(key), creator, creator, perms), - fr: fr, - creatorPID: pid, - changeTime: ktime.NowFromContext(ctx), - } - shm.InitRefs() - - r.shms[id] = shm - r.keysToShms[key] = shm - - r.totalPages += effectiveSize / hostarch.PageSize + shm := &Shm{ + mfp: mfp, + registry: r, + size: size, + effectiveSize: effectiveSize, + obj: ipc.NewObject(r.reg.UserNS, ipc.Key(key), creator, creator, perms), + fr: fr, + creatorPID: pid, + changeTime: ktime.NowFromContext(ctx), + } + shm.InitRefs() - return shm, nil - } + if err := r.reg.Register(shm); err != nil { + return nil, err } + r.totalPages += effectiveSize / hostarch.PageSize - log.Warningf("Shm ids exhuasted, they may be leaking") - return nil, syserror.ENOSPC + return shm, nil } // IPCInfo reports global parameters for sysv shared memory segments on this @@ -289,7 +252,7 @@ func (r *Registry) ShmInfo() *linux.ShmInfo { defer r.mu.Unlock() return &linux.ShmInfo{ - UsedIDs: int32(r.lastIDUsed), + UsedIDs: int32(r.reg.LastIDUsed()), ShmTot: r.totalPages, ShmRss: r.totalPages, // We could probably get a better estimate from memory accounting. ShmSwp: 0, // No reclaim at the moment. @@ -310,7 +273,7 @@ func (r *Registry) remove(s *Shm) { panic(fmt.Sprintf("Attempted to remove %s from the registry whose key is still associated", s.debugLocked())) } - delete(r.shms, s.obj.ID) + r.reg.DissociateID(s.obj.ID) r.totalPages -= s.effectiveSize / hostarch.PageSize } @@ -322,13 +285,16 @@ func (r *Registry) Release(ctx context.Context) { // the IPC namespace containing it has no more references. toRelease := make([]*Shm, 0) r.mu.Lock() - for _, s := range r.keysToShms { - s.mu.Lock() - if !s.pendingDestruction { - toRelease = append(toRelease, s) - } - s.mu.Unlock() - } + r.reg.ForAllObjects( + func(o ipc.Mechanism) { + s := o.(*Shm) + s.mu.Lock() + if !s.pendingDestruction { + toRelease = append(toRelease, s) + } + s.mu.Unlock() + }, + ) r.mu.Unlock() for _, s := range toRelease { @@ -383,7 +349,6 @@ type Shm struct { // mu protects all fields below. mu sync.Mutex `state:"nosave"` - // obj defines basic fields that should be included in all SysV IPC objects. obj *ipc.Object // attachTime is updated on every successful shmat. @@ -412,6 +377,28 @@ func (s *Shm) ID() ipc.ID { return s.obj.ID } +// Object implements ipc.Mechanism.Object. +func (s *Shm) Object() *ipc.Object { + return s.obj +} + +// Destroy implements ipc.Mechanism.Destroy. No work is performed on shm.Destroy +// because a different removal mechanism is used in shm. See Shm.MarkDestroyed. +func (s *Shm) Destroy() { +} + +// Lock implements ipc.Mechanism.Lock. +func (s *Shm) Lock() { + s.mu.Lock() +} + +// Unlock implements ipc.mechanism.Unlock. +// +// +checklocksignore +func (s *Shm) Unlock() { + s.mu.Unlock() +} + // Precondition: Caller must hold s.mu. func (s *Shm) debugLocked() string { return fmt.Sprintf("Shm{id: %d, key: %d, size: %d bytes, refs: %d, destroyed: %v}", diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go index 4755bef18..f61cc466c 100644 --- a/pkg/sentry/syscalls/linux/sys_sem.go +++ b/pkg/sentry/syscalls/linux/sys_sem.go @@ -240,7 +240,7 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal func remove(t *kernel.Task, id ipc.ID) error { r := t.IPCNamespace().SemaphoreRegistry() creds := auth.CredentialsFromContext(t) - return r.RemoveID(id, creds) + return r.Remove(id, creds) } func ipcSet(t *kernel.Task, id ipc.ID, uid auth.UID, gid auth.GID, perms fs.FilePermissions) error { |