summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r--pkg/sentry/kernel/ipc/BUILD16
-rw-r--r--pkg/sentry/kernel/ipc/object.go107
-rw-r--r--pkg/sentry/kernel/semaphore/BUILD1
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore.go132
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore_test.go19
-rw-r--r--pkg/sentry/kernel/shm/BUILD1
-rw-r--r--pkg/sentry/kernel/shm/shm.go156
7 files changed, 238 insertions, 194 deletions
diff --git a/pkg/sentry/kernel/ipc/BUILD b/pkg/sentry/kernel/ipc/BUILD
new file mode 100644
index 000000000..14f73ee17
--- /dev/null
+++ b/pkg/sentry/kernel/ipc/BUILD
@@ -0,0 +1,16 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "ipc",
+ srcs = [
+ "object.go",
+ ],
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/sentry/fs",
+ "//pkg/sentry/kernel/auth",
+ ],
+)
diff --git a/pkg/sentry/kernel/ipc/object.go b/pkg/sentry/kernel/ipc/object.go
new file mode 100644
index 000000000..769ff44e1
--- /dev/null
+++ b/pkg/sentry/kernel/ipc/object.go
@@ -0,0 +1,107 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package ipc defines functionality and utilities common to sysvipc mechanisms.
+package ipc
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+)
+
+// Key is a user-provided identifier for IPC objects.
+type Key int32
+
+// ID is a kernel identifier for IPC objects.
+type ID int32
+
+// Object represents an abstract IPC object with fields common to all IPC
+// mechanisms.
+type Object struct {
+ // User namespace which owns the IPC namespace which owns the IPC object.
+ // Immutable.
+ UserNS *auth.UserNamespace
+
+ // ID is a kernel identifier for the IPC object. Immutable.
+ ID ID
+
+ // Key is a user-provided identifier for the IPC object. Immutable.
+ Key Key
+
+ // Creator is the user who created the IPC object. Immutable.
+ Creator fs.FileOwner
+
+ // Owner is the current owner of the IPC object.
+ Owner fs.FileOwner
+
+ // Perms is the access permissions the IPC object.
+ Perms fs.FilePermissions
+}
+
+// Mechanism represents a SysV mechanism that holds an IPC object. It can also
+// be looked at as a container for an ipc.Object, which is by definition a fully
+// functional SysV object.
+type Mechanism interface {
+ // Object returns a pointer to the mechanism's ipc.Object. Mechanism.Lock,
+ // and Mechanism.Unlock should be used when the object is used.
+ Object() *Object
+
+ // Lock behaves the same as Mutex.Lock on the mechanism.
+ Lock()
+
+ // Unlock behaves the same as Mutex.Unlock on the mechanism.
+ Unlock()
+}
+
+// NewObject returns a new, initialized ipc.Object.
+func NewObject(un *auth.UserNamespace, id ID, key Key, creator, owner fs.FileOwner, perms fs.FilePermissions) *Object {
+ return &Object{
+ UserNS: un,
+ ID: id,
+ Key: key,
+ Creator: creator,
+ Owner: owner,
+ Perms: perms,
+ }
+}
+
+// CheckOwnership verifies whether an IPC object may be accessed using creds as
+// an owner. See ipc/util.c:ipcctl_obtain_check() in Linux.
+func (o *Object) CheckOwnership(creds *auth.Credentials) bool {
+ if o.Owner.UID == creds.EffectiveKUID || o.Creator.UID == creds.EffectiveKUID {
+ return true
+ }
+
+ // Tasks with CAP_SYS_ADMIN may bypass ownership checks. Strangely, Linux
+ // doesn't use CAP_IPC_OWNER for this despite CAP_IPC_OWNER being documented
+ // for use to "override IPC ownership checks".
+ return creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, o.UserNS)
+}
+
+// CheckPermissions verifies whether an IPC object is accessible using creds for
+// access described by req. See ipc/util.c:ipcperms() in Linux.
+func (o *Object) CheckPermissions(creds *auth.Credentials, req fs.PermMask) bool {
+ p := o.Perms.Other
+ if o.Owner.UID == creds.EffectiveKUID {
+ p = o.Perms.User
+ } else if creds.InGroup(o.Owner.GID) {
+ p = o.Perms.Group
+ }
+
+ if p.SupersetOf(req) {
+ return true
+ }
+ return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, o.UserNS)
+}
diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD
index a787c00a8..5dd607953 100644
--- a/pkg/sentry/kernel/semaphore/BUILD
+++ b/pkg/sentry/kernel/semaphore/BUILD
@@ -29,6 +29,7 @@ go_library(
"//pkg/log",
"//pkg/sentry/fs",
"//pkg/sentry/kernel/auth",
+ "//pkg/sentry/kernel/ipc",
"//pkg/sentry/kernel/time",
"//pkg/sync",
"//pkg/syserror",
diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go
index 485c3a788..d609d88e2 100644
--- a/pkg/sentry/kernel/semaphore/semaphore.go
+++ b/pkg/sentry/kernel/semaphore/semaphore.go
@@ -24,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
@@ -51,11 +52,11 @@ type Registry struct {
userNS *auth.UserNamespace
// mu protects all fields below.
mu sync.Mutex `state:"nosave"`
- semaphores map[int32]*Set
- lastIDUsed int32
+ semaphores map[ipc.ID]*Set
+ lastIDUsed ipc.ID
// indexes maintains a mapping between a set's index in virtual array and
// its identifier.
- indexes map[int32]int32
+ indexes map[int32]ipc.ID
}
// Set represents a set of semaphores that can be operated atomically.
@@ -65,19 +66,12 @@ type Set struct {
// registry owning this sem set. Immutable.
registry *Registry
- // Id is a handle that identifies the set.
- ID int32
-
- // key is an user provided key that can be shared between processes.
- key int32
+ // mu protects all fields below.
+ mu sync.Mutex `state:"nosave"`
- // creator is the user that created the set. Immutable.
- creator fs.FileOwner
+ // obj defines basic fields that should be included in all SysV IPC objects.
+ obj *ipc.Object
- // mu protects all fields below.
- mu sync.Mutex `state:"nosave"`
- owner fs.FileOwner
- perms fs.FilePermissions
opTime ktime.Time
changeTime ktime.Time
@@ -116,8 +110,8 @@ type waiter struct {
func NewRegistry(userNS *auth.UserNamespace) *Registry {
return &Registry{
userNS: userNS,
- semaphores: make(map[int32]*Set),
- indexes: make(map[int32]int32),
+ semaphores: make(map[ipc.ID]*Set),
+ indexes: make(map[int32]ipc.ID),
}
}
@@ -126,7 +120,7 @@ func NewRegistry(userNS *auth.UserNamespace) *Registry {
// a new set is always created. If create is false, it fails if a set cannot
// be found. If exclusive is true, it fails if a set with the same key already
// exists.
-func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linux.FileMode, private, create, exclusive bool) (*Set, error) {
+func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, mode linux.FileMode, private, create, exclusive bool) (*Set, error) {
if nsems < 0 || nsems > semsMax {
return nil, linuxerr.EINVAL
}
@@ -142,7 +136,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linu
// Check that caller can access semaphore set.
creds := auth.CredentialsFromContext(ctx)
- if !set.checkPerms(creds, fs.PermsFromMode(mode)) {
+ if !set.obj.CheckPermissions(creds, fs.PermsFromMode(mode)) {
return nil, linuxerr.EACCES
}
@@ -233,7 +227,7 @@ func (r *Registry) HighestIndex() int32 {
// RemoveID removes set with give 'id' from the registry and marks the set as
// dead. All waiters will be awakened and fail.
-func (r *Registry) RemoveID(id int32, creds *auth.Credentials) error {
+func (r *Registry) RemoveID(id ipc.ID, creds *auth.Credentials) error {
r.mu.Lock()
defer r.mu.Unlock()
@@ -252,27 +246,17 @@ func (r *Registry) RemoveID(id int32, creds *auth.Credentials) error {
// "The effective user ID of the calling process must match the creator or
// owner of the semaphore set, or the caller must be privileged."
- if !set.checkCredentials(creds) && !set.checkCapability(creds) {
+ if !set.obj.CheckOwnership(creds) {
return linuxerr.EACCES
}
- delete(r.semaphores, set.ID)
+ delete(r.semaphores, set.obj.ID)
delete(r.indexes, index)
set.destroy()
return nil
}
-func (r *Registry) newSet(ctx context.Context, key int32, owner, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) {
- set := &Set{
- registry: r,
- key: key,
- owner: owner,
- creator: owner,
- perms: perms,
- changeTime: ktime.NowFromContext(ctx),
- sems: make([]sem, nsems),
- }
-
+func (r *Registry) newSet(ctx context.Context, key ipc.Key, owner, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) {
// Find the next available ID.
for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
// Handle wrap around.
@@ -287,8 +271,15 @@ func (r *Registry) newSet(ctx context.Context, key int32, owner, creator fs.File
}
r.indexes[index] = id
r.lastIDUsed = id
+
+ set := &Set{
+ registry: r,
+ obj: ipc.NewObject(r.userNS, ipc.ID(id), ipc.Key(key), creator, owner, perms),
+ changeTime: ktime.NowFromContext(ctx),
+ sems: make([]sem, nsems),
+ }
r.semaphores[id] = set
- set.ID = id
+
return set, nil
}
}
@@ -298,7 +289,7 @@ func (r *Registry) newSet(ctx context.Context, key int32, owner, creator fs.File
}
// FindByID looks up a set given an ID.
-func (r *Registry) FindByID(id int32) *Set {
+func (r *Registry) FindByID(id ipc.ID) *Set {
r.mu.Lock()
defer r.mu.Unlock()
return r.semaphores[id]
@@ -316,16 +307,16 @@ func (r *Registry) FindByIndex(index int32) *Set {
return r.semaphores[id]
}
-func (r *Registry) findByKey(key int32) *Set {
+func (r *Registry) findByKey(key ipc.Key) *Set {
for _, v := range r.semaphores {
- if v.key == key {
+ if v.obj.Key == key {
return v
}
}
return nil
}
-func (r *Registry) findIndexByID(id int32) (int32, bool) {
+func (r *Registry) findIndexByID(id ipc.ID) (int32, bool) {
for k, v := range r.indexes {
if v == id {
return k, true
@@ -351,6 +342,11 @@ func (r *Registry) totalSems() int {
return totalSems
}
+// ID returns semaphore's ID.
+func (s *Set) ID() ipc.ID {
+ return s.obj.ID
+}
+
func (s *Set) findSem(num int32) *sem {
if num < 0 || int(num) >= s.Size() {
return nil
@@ -370,12 +366,12 @@ func (s *Set) Change(ctx context.Context, creds *auth.Credentials, owner fs.File
// "The effective UID of the calling process must match the owner or creator
// of the semaphore set, or the caller must be privileged."
- if !s.checkCredentials(creds) && !s.checkCapability(creds) {
+ if !s.obj.CheckOwnership(creds) {
return linuxerr.EACCES
}
- s.owner = owner
- s.perms = perms
+ s.obj.Owner = owner
+ s.obj.Perms = perms
s.changeTime = ktime.NowFromContext(ctx)
return nil
}
@@ -395,18 +391,18 @@ func (s *Set) semStat(creds *auth.Credentials, permMask fs.PermMask) (*linux.Sem
s.mu.Lock()
defer s.mu.Unlock()
- if !s.checkPerms(creds, permMask) {
+ if !s.obj.CheckPermissions(creds, permMask) {
return nil, linuxerr.EACCES
}
return &linux.SemidDS{
SemPerm: linux.IPCPerm{
- Key: uint32(s.key),
- UID: uint32(creds.UserNamespace.MapFromKUID(s.owner.UID)),
- GID: uint32(creds.UserNamespace.MapFromKGID(s.owner.GID)),
- CUID: uint32(creds.UserNamespace.MapFromKUID(s.creator.UID)),
- CGID: uint32(creds.UserNamespace.MapFromKGID(s.creator.GID)),
- Mode: uint16(s.perms.LinuxMode()),
+ Key: uint32(s.obj.Key),
+ UID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Owner.UID)),
+ GID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Owner.GID)),
+ CUID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Creator.UID)),
+ CGID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Creator.GID)),
+ Mode: uint16(s.obj.Perms.LinuxMode()),
Seq: 0, // IPC sequence not supported.
},
SemOTime: s.opTime.TimeT(),
@@ -425,7 +421,7 @@ func (s *Set) SetVal(ctx context.Context, num int32, val int16, creds *auth.Cred
defer s.mu.Unlock()
// "The calling process must have alter permission on the semaphore set."
- if !s.checkPerms(creds, fs.PermMask{Write: true}) {
+ if !s.obj.CheckPermissions(creds, fs.PermMask{Write: true}) {
return linuxerr.EACCES
}
@@ -461,7 +457,7 @@ func (s *Set) SetValAll(ctx context.Context, vals []uint16, creds *auth.Credenti
defer s.mu.Unlock()
// "The calling process must have alter permission on the semaphore set."
- if !s.checkPerms(creds, fs.PermMask{Write: true}) {
+ if !s.obj.CheckPermissions(creds, fs.PermMask{Write: true}) {
return linuxerr.EACCES
}
@@ -483,7 +479,7 @@ func (s *Set) GetVal(num int32, creds *auth.Credentials) (int16, error) {
defer s.mu.Unlock()
// "The calling process must have read permission on the semaphore set."
- if !s.checkPerms(creds, fs.PermMask{Read: true}) {
+ if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) {
return 0, linuxerr.EACCES
}
@@ -500,7 +496,7 @@ func (s *Set) GetValAll(creds *auth.Credentials) ([]uint16, error) {
defer s.mu.Unlock()
// "The calling process must have read permission on the semaphore set."
- if !s.checkPerms(creds, fs.PermMask{Read: true}) {
+ if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) {
return nil, linuxerr.EACCES
}
@@ -517,7 +513,7 @@ func (s *Set) GetPID(num int32, creds *auth.Credentials) (int32, error) {
defer s.mu.Unlock()
// "The calling process must have read permission on the semaphore set."
- if !s.checkPerms(creds, fs.PermMask{Read: true}) {
+ if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) {
return 0, linuxerr.EACCES
}
@@ -533,7 +529,7 @@ func (s *Set) countWaiters(num int32, creds *auth.Credentials, pred func(w *wait
defer s.mu.Unlock()
// The calling process must have read permission on the semaphore set.
- if !s.checkPerms(creds, fs.PermMask{Read: true}) {
+ if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) {
return 0, linuxerr.EACCES
}
@@ -589,7 +585,7 @@ func (s *Set) ExecuteOps(ctx context.Context, ops []linux.Sembuf, creds *auth.Cr
}
}
- if !s.checkPerms(creds, fs.PermMask{Read: readOnly, Write: !readOnly}) {
+ if !s.obj.CheckPermissions(creds, fs.PermMask{Read: readOnly, Write: !readOnly}) {
return nil, 0, linuxerr.EACCES
}
@@ -675,34 +671,6 @@ func (s *Set) AbortWait(num int32, ch chan struct{}) {
// Waiter may not be found in case it raced with wakeWaiters().
}
-func (s *Set) checkCredentials(creds *auth.Credentials) bool {
- return s.owner.UID == creds.EffectiveKUID ||
- s.owner.GID == creds.EffectiveKGID ||
- s.creator.UID == creds.EffectiveKUID ||
- s.creator.GID == creds.EffectiveKGID
-}
-
-func (s *Set) checkCapability(creds *auth.Credentials) bool {
- return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, s.registry.userNS) && creds.UserNamespace.MapFromKUID(s.owner.UID).Ok()
-}
-
-func (s *Set) checkPerms(creds *auth.Credentials, reqPerms fs.PermMask) bool {
- // Are we owner, or in group, or other?
- p := s.perms.Other
- if s.owner.UID == creds.EffectiveKUID {
- p = s.perms.User
- } else if creds.InGroup(s.owner.GID) {
- p = s.perms.Group
- }
-
- // Are permissions satisfied without capability checks?
- if p.SupersetOf(reqPerms) {
- return true
- }
-
- return s.checkCapability(creds)
-}
-
// destroy destroys the set.
//
// Preconditions: Caller must hold 's.mu'.
diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go
index e47acefdf..0c7abb68e 100644
--- a/pkg/sentry/kernel/semaphore/semaphore_test.go
+++ b/pkg/sentry/kernel/semaphore/semaphore_test.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -55,7 +56,7 @@ func signalled(ch chan struct{}) bool {
func TestBasic(t *testing.T) {
ctx := contexttest.Context(t)
- set := &Set{ID: 123, sems: make([]sem, 1)}
+ set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)}
ops := []linux.Sembuf{
{SemOp: 1},
}
@@ -76,7 +77,7 @@ func TestBasic(t *testing.T) {
func TestWaitForZero(t *testing.T) {
ctx := contexttest.Context(t)
- set := &Set{ID: 123, sems: make([]sem, 1)}
+ set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)}
ops := []linux.Sembuf{
{SemOp: 0},
}
@@ -115,7 +116,7 @@ func TestWaitForZero(t *testing.T) {
func TestNoWait(t *testing.T) {
ctx := contexttest.Context(t)
- set := &Set{ID: 123, sems: make([]sem, 1)}
+ set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)}
ops := []linux.Sembuf{
{SemOp: 1},
}
@@ -141,8 +142,8 @@ func TestUnregister(t *testing.T) {
if err != nil {
t.Fatalf("FindOrCreate() failed, err: %v", err)
}
- if got := r.FindByID(set.ID); got.ID != set.ID {
- t.Fatalf("FindById(%d) failed, got: %+v, expected: %+v", set.ID, got, set)
+ if got := r.FindByID(set.obj.ID); got.obj.ID != set.obj.ID {
+ t.Fatalf("FindById(%d) failed, got: %+v, expected: %+v", set.obj.ID, got, set)
}
ops := []linux.Sembuf{
@@ -155,14 +156,14 @@ func TestUnregister(t *testing.T) {
}
creds := auth.CredentialsFromContext(ctx)
- if err := r.RemoveID(set.ID, creds); err != nil {
- t.Fatalf("RemoveID(%d) failed, err: %v", set.ID, err)
+ if err := r.RemoveID(set.obj.ID, creds); err != nil {
+ t.Fatalf("RemoveID(%d) failed, err: %v", set.obj.ID, err)
}
if !set.dead {
t.Fatalf("set is not dead: %+v", set)
}
- if got := r.FindByID(set.ID); got != nil {
- t.Fatalf("FindById(%d) failed, got: %+v, expected: nil", set.ID, got)
+ if got := r.FindByID(set.obj.ID); got != nil {
+ t.Fatalf("FindById(%d) failed, got: %+v, expected: nil", set.obj.ID, got)
}
for i, ch := range chs {
if !signalled(ch) {
diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD
index 5b69333fe..4e8deac4c 100644
--- a/pkg/sentry/kernel/shm/BUILD
+++ b/pkg/sentry/kernel/shm/BUILD
@@ -36,6 +36,7 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/kernel/auth",
+ "//pkg/sentry/kernel/ipc",
"//pkg/sentry/kernel/time",
"//pkg/sentry/memmap",
"//pkg/sentry/pgalloc",
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index f7ac4c2b2..459ae8c30 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -43,6 +43,7 @@ import (
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
@@ -51,12 +52,6 @@ import (
"gvisor.dev/gvisor/pkg/syserror"
)
-// Key represents a shm segment key. Analogous to a file name.
-type Key int32
-
-// ID represents the opaque handle for a shm segment. Analogous to an fd.
-type ID int32
-
// Registry tracks all shared memory segments in an IPC namespace. The registry
// provides the mechanisms for creating and finding segments, and reporting
// global shm parameters.
@@ -78,13 +73,13 @@ type Registry struct {
// exists a short window during which a Shm still exists in Shm, but is
// unreferenced. Users must use TryIncRef to determine if the Shm is
// still valid.
- shms map[ID]*Shm
+ shms map[ipc.ID]*Shm
// keysToShms maps segment keys to segments.
//
// Shms in keysToShms are guaranteed to be referenced, as they are
// removed by disassociateKey before the last DecRef.
- keysToShms map[Key]*Shm
+ keysToShms map[ipc.Key]*Shm
// Sum of the sizes of all existing segments rounded up to page size, in
// units of page size.
@@ -92,22 +87,22 @@ type Registry struct {
// ID assigned to the last created segment. Used to quickly find the next
// unused ID.
- lastIDUsed ID
+ lastIDUsed ipc.ID
}
// NewRegistry creates a new shm registry.
func NewRegistry(userNS *auth.UserNamespace) *Registry {
return &Registry{
userNS: userNS,
- shms: make(map[ID]*Shm),
- keysToShms: make(map[Key]*Shm),
+ shms: make(map[ipc.ID]*Shm),
+ keysToShms: make(map[ipc.Key]*Shm),
}
}
// FindByID looks up a segment given an ID.
//
// FindByID returns a reference on Shm.
-func (r *Registry) FindByID(id ID) *Shm {
+func (r *Registry) FindByID(id ipc.ID) *Shm {
r.mu.Lock()
defer r.mu.Unlock()
s := r.shms[id]
@@ -129,9 +124,9 @@ func (r *Registry) dissociateKey(s *Shm) {
defer r.mu.Unlock()
s.mu.Lock()
defer s.mu.Unlock()
- if s.key != linux.IPC_PRIVATE {
- delete(r.keysToShms, s.key)
- s.key = linux.IPC_PRIVATE
+ if s.obj.Key != linux.IPC_PRIVATE {
+ delete(r.keysToShms, s.obj.Key)
+ s.obj.Key = linux.IPC_PRIVATE
}
}
@@ -139,7 +134,7 @@ func (r *Registry) dissociateKey(s *Shm) {
// analogous to open(2).
//
// FindOrCreate returns a reference on Shm.
-func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size uint64, mode linux.FileMode, private, create, exclusive bool) (*Shm, error) {
+func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, size uint64, mode linux.FileMode, private, create, exclusive bool) (*Shm, error) {
if (create || private) && (size < linux.SHMMIN || size > linux.SHMMAX) {
// "A new segment was to be created and size is less than SHMMIN or
// greater than SHMMAX." - man shmget(2)
@@ -165,7 +160,8 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui
defer shm.mu.Unlock()
// Check that caller can access the segment.
- if !shm.checkPermissions(ctx, fs.PermsFromMode(mode)) {
+ creds := auth.CredentialsFromContext(ctx)
+ if !shm.obj.CheckPermissions(creds, fs.PermsFromMode(mode)) {
// "The user does not have permission to access the shared
// memory segment, and does not have the CAP_IPC_OWNER
// capability in the user namespace that governs its IPC
@@ -227,7 +223,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui
// newShm creates a new segment in the registry.
//
// Precondition: Caller must hold r.mu.
-func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) {
+func (r *Registry) newShm(ctx context.Context, pid int32, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) {
mfp := pgalloc.MemoryFileProviderFromContext(ctx)
if mfp == nil {
panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider))
@@ -239,21 +235,6 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi
return nil, err
}
- shm := &Shm{
- mfp: mfp,
- registry: r,
- creator: creator,
- size: size,
- effectiveSize: effectiveSize,
- fr: fr,
- key: key,
- perms: perms,
- owner: creator,
- creatorPID: pid,
- changeTime: ktime.NowFromContext(ctx),
- }
- shm.InitRefs()
-
// Find the next available ID.
for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
// Handle wrap around.
@@ -264,7 +245,18 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi
if r.shms[id] == nil {
r.lastIDUsed = id
- shm.ID = id
+ shm := &Shm{
+ mfp: mfp,
+ registry: r,
+ size: size,
+ effectiveSize: effectiveSize,
+ obj: ipc.NewObject(r.userNS, ipc.ID(id), ipc.Key(key), creator, creator, perms),
+ fr: fr,
+ creatorPID: pid,
+ changeTime: ktime.NowFromContext(ctx),
+ }
+ shm.InitRefs()
+
r.shms[id] = shm
r.keysToShms[key] = shm
@@ -314,11 +306,11 @@ func (r *Registry) remove(s *Shm) {
s.mu.Lock()
defer s.mu.Unlock()
- if s.key != linux.IPC_PRIVATE {
+ if s.obj.Key != linux.IPC_PRIVATE {
panic(fmt.Sprintf("Attempted to remove %s from the registry whose key is still associated", s.debugLocked()))
}
- delete(r.shms, s.ID)
+ delete(r.shms, s.obj.ID)
r.totalPages -= s.effectiveSize / hostarch.PageSize
}
@@ -374,12 +366,6 @@ type Shm struct {
// registry points to the shm registry containing this segment. Immutable.
registry *Registry
- // ID is the kernel identifier for this segment. Immutable.
- ID ID
-
- // creator is the user that created the segment. Immutable.
- creator fs.FileOwner
-
// size is the requested size of the segment at creation, in
// bytes. Immutable.
size uint64
@@ -397,14 +383,9 @@ type Shm struct {
// mu protects all fields below.
mu sync.Mutex `state:"nosave"`
- // key is the public identifier for this segment.
- key Key
+ // obj defines basic fields that should be included in all SysV IPC objects.
+ obj *ipc.Object
- // perms is the access permissions for the segment.
- perms fs.FilePermissions
-
- // owner of this segment.
- owner fs.FileOwner
// attachTime is updated on every successful shmat.
attachTime ktime.Time
// detachTime is updated on every successful shmdt.
@@ -426,17 +407,22 @@ type Shm struct {
pendingDestruction bool
}
+// ID returns object's ID.
+func (s *Shm) ID() ipc.ID {
+ return s.obj.ID
+}
+
// Precondition: Caller must hold s.mu.
func (s *Shm) debugLocked() string {
return fmt.Sprintf("Shm{id: %d, key: %d, size: %d bytes, refs: %d, destroyed: %v}",
- s.ID, s.key, s.size, s.ReadRefs(), s.pendingDestruction)
+ s.obj.ID, s.obj.Key, s.size, s.ReadRefs(), s.pendingDestruction)
}
// MappedName implements memmap.MappingIdentity.MappedName.
func (s *Shm) MappedName(ctx context.Context) string {
s.mu.Lock()
defer s.mu.Unlock()
- return fmt.Sprintf("SYSV%08d", s.key)
+ return fmt.Sprintf("SYSV%08d", s.obj.Key)
}
// DeviceID implements memmap.MappingIdentity.DeviceID.
@@ -448,7 +434,7 @@ func (s *Shm) DeviceID() uint64 {
func (s *Shm) InodeID() uint64 {
// "shmid gets reported as "inode#" in /proc/pid/maps. proc-ps tools use
// this. Changing this will break them." -- Linux, ipc/shm.c:newseg()
- return uint64(s.ID)
+ return uint64(s.obj.ID)
}
// DecRef drops a reference on s.
@@ -551,7 +537,8 @@ func (s *Shm) ConfigureAttach(ctx context.Context, addr hostarch.Addr, opts Atta
return memmap.MMapOpts{}, syserror.EIDRM
}
- if !s.checkPermissions(ctx, fs.PermMask{
+ creds := auth.CredentialsFromContext(ctx)
+ if !s.obj.CheckPermissions(creds, fs.PermMask{
Read: true,
Write: !opts.Readonly,
Execute: opts.Execute,
@@ -591,7 +578,8 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) {
// "The caller must have read permission on the shared memory segment."
// - man shmctl(2)
- if !s.checkPermissions(ctx, fs.PermMask{Read: true}) {
+ creds := auth.CredentialsFromContext(ctx)
+ if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) {
// "IPC_STAT or SHM_STAT is requested and shm_perm.mode does not allow
// read access for shmid, and the calling process does not have the
// CAP_IPC_OWNER capability in the user namespace that governs its IPC
@@ -603,7 +591,6 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) {
if s.pendingDestruction {
mode |= linux.SHM_DEST
}
- creds := auth.CredentialsFromContext(ctx)
// Use the reference count as a rudimentary count of the number of
// attaches. We exclude:
@@ -620,12 +607,12 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) {
ds := &linux.ShmidDS{
ShmPerm: linux.IPCPerm{
- Key: uint32(s.key),
- UID: uint32(creds.UserNamespace.MapFromKUID(s.owner.UID)),
- GID: uint32(creds.UserNamespace.MapFromKGID(s.owner.GID)),
- CUID: uint32(creds.UserNamespace.MapFromKUID(s.creator.UID)),
- CGID: uint32(creds.UserNamespace.MapFromKGID(s.creator.GID)),
- Mode: mode | uint16(s.perms.LinuxMode()),
+ Key: uint32(s.obj.Key),
+ UID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Owner.UID)),
+ GID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Owner.GID)),
+ CUID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Creator.UID)),
+ CGID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Creator.GID)),
+ Mode: mode | uint16(s.obj.Perms.LinuxMode()),
Seq: 0, // IPC sequences not supported.
},
ShmSegsz: s.size,
@@ -645,11 +632,11 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error {
s.mu.Lock()
defer s.mu.Unlock()
- if !s.checkOwnership(ctx) {
+ creds := auth.CredentialsFromContext(ctx)
+ if !s.obj.CheckOwnership(creds) {
return linuxerr.EPERM
}
- creds := auth.CredentialsFromContext(ctx)
uid := creds.UserNamespace.MapToKUID(auth.UID(ds.ShmPerm.UID))
gid := creds.UserNamespace.MapToKGID(auth.GID(ds.ShmPerm.GID))
if !uid.Ok() || !gid.Ok() {
@@ -659,10 +646,10 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error {
// User may only modify the lower 9 bits of the mode. All the other bits are
// always 0 for the underlying inode.
mode := linux.FileMode(ds.ShmPerm.Mode & 0x1ff)
- s.perms = fs.FilePermsFromMode(mode)
+ s.obj.Perms = fs.FilePermsFromMode(mode)
- s.owner.UID = uid
- s.owner.GID = gid
+ s.obj.Owner.UID = uid
+ s.obj.Owner.GID = gid
s.changeTime = ktime.NowFromContext(ctx)
return nil
@@ -691,40 +678,3 @@ func (s *Shm) MarkDestroyed(ctx context.Context) {
s.DecRef(ctx)
return
}
-
-// checkOwnership verifies whether a segment may be accessed by ctx as an
-// owner. See ipc/util.c:ipcctl_pre_down_nolock() in Linux.
-//
-// Precondition: Caller must hold s.mu.
-func (s *Shm) checkOwnership(ctx context.Context) bool {
- creds := auth.CredentialsFromContext(ctx)
- if s.owner.UID == creds.EffectiveKUID || s.creator.UID == creds.EffectiveKUID {
- return true
- }
-
- // Tasks with CAP_SYS_ADMIN may bypass ownership checks. Strangely, Linux
- // doesn't use CAP_IPC_OWNER for this despite CAP_IPC_OWNER being documented
- // for use to "override IPC ownership checks".
- return creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, s.registry.userNS)
-}
-
-// checkPermissions verifies whether a segment is accessible by ctx for access
-// described by req. See ipc/util.c:ipcperms() in Linux.
-//
-// Precondition: Caller must hold s.mu.
-func (s *Shm) checkPermissions(ctx context.Context, req fs.PermMask) bool {
- creds := auth.CredentialsFromContext(ctx)
-
- p := s.perms.Other
- if s.owner.UID == creds.EffectiveKUID {
- p = s.perms.User
- } else if creds.InGroup(s.owner.GID) {
- p = s.perms.Group
- }
- if p.SupersetOf(req) {
- return true
- }
-
- // Tasks with CAP_IPC_OWNER may bypass permission checks.
- return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, s.registry.userNS)
-}