diff options
author | Zyad A. Ali <zyad.ali.me@gmail.com> | 2021-05-28 21:37:47 +0200 |
---|---|---|
committer | Zyad A. Ali <zyad.ali.me@gmail.com> | 2021-07-13 22:09:41 +0200 |
commit | 7a73169229bd856eca6febebc6170cbfff582a4a (patch) | |
tree | d1f32686776a4d2d2bcf7576cb8e34e951b58292 | |
parent | c16e69a9d5ec3422b648a6d32842442925285a29 (diff) |
Create ipc package and ipc.Object.
Create ipc.Object to define fields and functionality used in SysV
mechanisms, and have them use it.
-rw-r--r-- | pkg/sentry/kernel/ipc/BUILD | 16 | ||||
-rw-r--r-- | pkg/sentry/kernel/ipc/object.go | 107 | ||||
-rw-r--r-- | pkg/sentry/kernel/semaphore/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/kernel/semaphore/semaphore.go | 132 | ||||
-rw-r--r-- | pkg/sentry/kernel/semaphore/semaphore_test.go | 19 | ||||
-rw-r--r-- | pkg/sentry/kernel/shm/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/kernel/shm/shm.go | 156 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_sem.go | 41 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_shm.go | 11 |
10 files changed, 266 insertions, 219 deletions
diff --git a/pkg/sentry/kernel/ipc/BUILD b/pkg/sentry/kernel/ipc/BUILD new file mode 100644 index 000000000..14f73ee17 --- /dev/null +++ b/pkg/sentry/kernel/ipc/BUILD @@ -0,0 +1,16 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "ipc", + srcs = [ + "object.go", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/sentry/fs", + "//pkg/sentry/kernel/auth", + ], +) diff --git a/pkg/sentry/kernel/ipc/object.go b/pkg/sentry/kernel/ipc/object.go new file mode 100644 index 000000000..769ff44e1 --- /dev/null +++ b/pkg/sentry/kernel/ipc/object.go @@ -0,0 +1,107 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ipc defines functionality and utilities common to sysvipc mechanisms. +package ipc + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +// Key is a user-provided identifier for IPC objects. +type Key int32 + +// ID is a kernel identifier for IPC objects. +type ID int32 + +// Object represents an abstract IPC object with fields common to all IPC +// mechanisms. +type Object struct { + // User namespace which owns the IPC namespace which owns the IPC object. + // Immutable. + UserNS *auth.UserNamespace + + // ID is a kernel identifier for the IPC object. Immutable. + ID ID + + // Key is a user-provided identifier for the IPC object. Immutable. + Key Key + + // Creator is the user who created the IPC object. Immutable. + Creator fs.FileOwner + + // Owner is the current owner of the IPC object. + Owner fs.FileOwner + + // Perms is the access permissions the IPC object. + Perms fs.FilePermissions +} + +// Mechanism represents a SysV mechanism that holds an IPC object. It can also +// be looked at as a container for an ipc.Object, which is by definition a fully +// functional SysV object. +type Mechanism interface { + // Object returns a pointer to the mechanism's ipc.Object. Mechanism.Lock, + // and Mechanism.Unlock should be used when the object is used. + Object() *Object + + // Lock behaves the same as Mutex.Lock on the mechanism. + Lock() + + // Unlock behaves the same as Mutex.Unlock on the mechanism. + Unlock() +} + +// NewObject returns a new, initialized ipc.Object. +func NewObject(un *auth.UserNamespace, id ID, key Key, creator, owner fs.FileOwner, perms fs.FilePermissions) *Object { + return &Object{ + UserNS: un, + ID: id, + Key: key, + Creator: creator, + Owner: owner, + Perms: perms, + } +} + +// CheckOwnership verifies whether an IPC object may be accessed using creds as +// an owner. See ipc/util.c:ipcctl_obtain_check() in Linux. +func (o *Object) CheckOwnership(creds *auth.Credentials) bool { + if o.Owner.UID == creds.EffectiveKUID || o.Creator.UID == creds.EffectiveKUID { + return true + } + + // Tasks with CAP_SYS_ADMIN may bypass ownership checks. Strangely, Linux + // doesn't use CAP_IPC_OWNER for this despite CAP_IPC_OWNER being documented + // for use to "override IPC ownership checks". + return creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, o.UserNS) +} + +// CheckPermissions verifies whether an IPC object is accessible using creds for +// access described by req. See ipc/util.c:ipcperms() in Linux. +func (o *Object) CheckPermissions(creds *auth.Credentials, req fs.PermMask) bool { + p := o.Perms.Other + if o.Owner.UID == creds.EffectiveKUID { + p = o.Perms.User + } else if creds.InGroup(o.Owner.GID) { + p = o.Perms.Group + } + + if p.SupersetOf(req) { + return true + } + return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, o.UserNS) +} diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD index a787c00a8..5dd607953 100644 --- a/pkg/sentry/kernel/semaphore/BUILD +++ b/pkg/sentry/kernel/semaphore/BUILD @@ -29,6 +29,7 @@ go_library( "//pkg/log", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/ipc", "//pkg/sentry/kernel/time", "//pkg/sync", "//pkg/syserror", diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go index 485c3a788..d609d88e2 100644 --- a/pkg/sentry/kernel/semaphore/semaphore.go +++ b/pkg/sentry/kernel/semaphore/semaphore.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -51,11 +52,11 @@ type Registry struct { userNS *auth.UserNamespace // mu protects all fields below. mu sync.Mutex `state:"nosave"` - semaphores map[int32]*Set - lastIDUsed int32 + semaphores map[ipc.ID]*Set + lastIDUsed ipc.ID // indexes maintains a mapping between a set's index in virtual array and // its identifier. - indexes map[int32]int32 + indexes map[int32]ipc.ID } // Set represents a set of semaphores that can be operated atomically. @@ -65,19 +66,12 @@ type Set struct { // registry owning this sem set. Immutable. registry *Registry - // Id is a handle that identifies the set. - ID int32 - - // key is an user provided key that can be shared between processes. - key int32 + // mu protects all fields below. + mu sync.Mutex `state:"nosave"` - // creator is the user that created the set. Immutable. - creator fs.FileOwner + // obj defines basic fields that should be included in all SysV IPC objects. + obj *ipc.Object - // mu protects all fields below. - mu sync.Mutex `state:"nosave"` - owner fs.FileOwner - perms fs.FilePermissions opTime ktime.Time changeTime ktime.Time @@ -116,8 +110,8 @@ type waiter struct { func NewRegistry(userNS *auth.UserNamespace) *Registry { return &Registry{ userNS: userNS, - semaphores: make(map[int32]*Set), - indexes: make(map[int32]int32), + semaphores: make(map[ipc.ID]*Set), + indexes: make(map[int32]ipc.ID), } } @@ -126,7 +120,7 @@ func NewRegistry(userNS *auth.UserNamespace) *Registry { // a new set is always created. If create is false, it fails if a set cannot // be found. If exclusive is true, it fails if a set with the same key already // exists. -func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linux.FileMode, private, create, exclusive bool) (*Set, error) { +func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, mode linux.FileMode, private, create, exclusive bool) (*Set, error) { if nsems < 0 || nsems > semsMax { return nil, linuxerr.EINVAL } @@ -142,7 +136,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linu // Check that caller can access semaphore set. creds := auth.CredentialsFromContext(ctx) - if !set.checkPerms(creds, fs.PermsFromMode(mode)) { + if !set.obj.CheckPermissions(creds, fs.PermsFromMode(mode)) { return nil, linuxerr.EACCES } @@ -233,7 +227,7 @@ func (r *Registry) HighestIndex() int32 { // RemoveID removes set with give 'id' from the registry and marks the set as // dead. All waiters will be awakened and fail. -func (r *Registry) RemoveID(id int32, creds *auth.Credentials) error { +func (r *Registry) RemoveID(id ipc.ID, creds *auth.Credentials) error { r.mu.Lock() defer r.mu.Unlock() @@ -252,27 +246,17 @@ func (r *Registry) RemoveID(id int32, creds *auth.Credentials) error { // "The effective user ID of the calling process must match the creator or // owner of the semaphore set, or the caller must be privileged." - if !set.checkCredentials(creds) && !set.checkCapability(creds) { + if !set.obj.CheckOwnership(creds) { return linuxerr.EACCES } - delete(r.semaphores, set.ID) + delete(r.semaphores, set.obj.ID) delete(r.indexes, index) set.destroy() return nil } -func (r *Registry) newSet(ctx context.Context, key int32, owner, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) { - set := &Set{ - registry: r, - key: key, - owner: owner, - creator: owner, - perms: perms, - changeTime: ktime.NowFromContext(ctx), - sems: make([]sem, nsems), - } - +func (r *Registry) newSet(ctx context.Context, key ipc.Key, owner, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) { // Find the next available ID. for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ { // Handle wrap around. @@ -287,8 +271,15 @@ func (r *Registry) newSet(ctx context.Context, key int32, owner, creator fs.File } r.indexes[index] = id r.lastIDUsed = id + + set := &Set{ + registry: r, + obj: ipc.NewObject(r.userNS, ipc.ID(id), ipc.Key(key), creator, owner, perms), + changeTime: ktime.NowFromContext(ctx), + sems: make([]sem, nsems), + } r.semaphores[id] = set - set.ID = id + return set, nil } } @@ -298,7 +289,7 @@ func (r *Registry) newSet(ctx context.Context, key int32, owner, creator fs.File } // FindByID looks up a set given an ID. -func (r *Registry) FindByID(id int32) *Set { +func (r *Registry) FindByID(id ipc.ID) *Set { r.mu.Lock() defer r.mu.Unlock() return r.semaphores[id] @@ -316,16 +307,16 @@ func (r *Registry) FindByIndex(index int32) *Set { return r.semaphores[id] } -func (r *Registry) findByKey(key int32) *Set { +func (r *Registry) findByKey(key ipc.Key) *Set { for _, v := range r.semaphores { - if v.key == key { + if v.obj.Key == key { return v } } return nil } -func (r *Registry) findIndexByID(id int32) (int32, bool) { +func (r *Registry) findIndexByID(id ipc.ID) (int32, bool) { for k, v := range r.indexes { if v == id { return k, true @@ -351,6 +342,11 @@ func (r *Registry) totalSems() int { return totalSems } +// ID returns semaphore's ID. +func (s *Set) ID() ipc.ID { + return s.obj.ID +} + func (s *Set) findSem(num int32) *sem { if num < 0 || int(num) >= s.Size() { return nil @@ -370,12 +366,12 @@ func (s *Set) Change(ctx context.Context, creds *auth.Credentials, owner fs.File // "The effective UID of the calling process must match the owner or creator // of the semaphore set, or the caller must be privileged." - if !s.checkCredentials(creds) && !s.checkCapability(creds) { + if !s.obj.CheckOwnership(creds) { return linuxerr.EACCES } - s.owner = owner - s.perms = perms + s.obj.Owner = owner + s.obj.Perms = perms s.changeTime = ktime.NowFromContext(ctx) return nil } @@ -395,18 +391,18 @@ func (s *Set) semStat(creds *auth.Credentials, permMask fs.PermMask) (*linux.Sem s.mu.Lock() defer s.mu.Unlock() - if !s.checkPerms(creds, permMask) { + if !s.obj.CheckPermissions(creds, permMask) { return nil, linuxerr.EACCES } return &linux.SemidDS{ SemPerm: linux.IPCPerm{ - Key: uint32(s.key), - UID: uint32(creds.UserNamespace.MapFromKUID(s.owner.UID)), - GID: uint32(creds.UserNamespace.MapFromKGID(s.owner.GID)), - CUID: uint32(creds.UserNamespace.MapFromKUID(s.creator.UID)), - CGID: uint32(creds.UserNamespace.MapFromKGID(s.creator.GID)), - Mode: uint16(s.perms.LinuxMode()), + Key: uint32(s.obj.Key), + UID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Owner.UID)), + GID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Owner.GID)), + CUID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Creator.UID)), + CGID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Creator.GID)), + Mode: uint16(s.obj.Perms.LinuxMode()), Seq: 0, // IPC sequence not supported. }, SemOTime: s.opTime.TimeT(), @@ -425,7 +421,7 @@ func (s *Set) SetVal(ctx context.Context, num int32, val int16, creds *auth.Cred defer s.mu.Unlock() // "The calling process must have alter permission on the semaphore set." - if !s.checkPerms(creds, fs.PermMask{Write: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Write: true}) { return linuxerr.EACCES } @@ -461,7 +457,7 @@ func (s *Set) SetValAll(ctx context.Context, vals []uint16, creds *auth.Credenti defer s.mu.Unlock() // "The calling process must have alter permission on the semaphore set." - if !s.checkPerms(creds, fs.PermMask{Write: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Write: true}) { return linuxerr.EACCES } @@ -483,7 +479,7 @@ func (s *Set) GetVal(num int32, creds *auth.Credentials) (int16, error) { defer s.mu.Unlock() // "The calling process must have read permission on the semaphore set." - if !s.checkPerms(creds, fs.PermMask{Read: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) { return 0, linuxerr.EACCES } @@ -500,7 +496,7 @@ func (s *Set) GetValAll(creds *auth.Credentials) ([]uint16, error) { defer s.mu.Unlock() // "The calling process must have read permission on the semaphore set." - if !s.checkPerms(creds, fs.PermMask{Read: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) { return nil, linuxerr.EACCES } @@ -517,7 +513,7 @@ func (s *Set) GetPID(num int32, creds *auth.Credentials) (int32, error) { defer s.mu.Unlock() // "The calling process must have read permission on the semaphore set." - if !s.checkPerms(creds, fs.PermMask{Read: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) { return 0, linuxerr.EACCES } @@ -533,7 +529,7 @@ func (s *Set) countWaiters(num int32, creds *auth.Credentials, pred func(w *wait defer s.mu.Unlock() // The calling process must have read permission on the semaphore set. - if !s.checkPerms(creds, fs.PermMask{Read: true}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) { return 0, linuxerr.EACCES } @@ -589,7 +585,7 @@ func (s *Set) ExecuteOps(ctx context.Context, ops []linux.Sembuf, creds *auth.Cr } } - if !s.checkPerms(creds, fs.PermMask{Read: readOnly, Write: !readOnly}) { + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: readOnly, Write: !readOnly}) { return nil, 0, linuxerr.EACCES } @@ -675,34 +671,6 @@ func (s *Set) AbortWait(num int32, ch chan struct{}) { // Waiter may not be found in case it raced with wakeWaiters(). } -func (s *Set) checkCredentials(creds *auth.Credentials) bool { - return s.owner.UID == creds.EffectiveKUID || - s.owner.GID == creds.EffectiveKGID || - s.creator.UID == creds.EffectiveKUID || - s.creator.GID == creds.EffectiveKGID -} - -func (s *Set) checkCapability(creds *auth.Credentials) bool { - return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, s.registry.userNS) && creds.UserNamespace.MapFromKUID(s.owner.UID).Ok() -} - -func (s *Set) checkPerms(creds *auth.Credentials, reqPerms fs.PermMask) bool { - // Are we owner, or in group, or other? - p := s.perms.Other - if s.owner.UID == creds.EffectiveKUID { - p = s.perms.User - } else if creds.InGroup(s.owner.GID) { - p = s.perms.Group - } - - // Are permissions satisfied without capability checks? - if p.SupersetOf(reqPerms) { - return true - } - - return s.checkCapability(creds) -} - // destroy destroys the set. // // Preconditions: Caller must hold 's.mu'. diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go index e47acefdf..0c7abb68e 100644 --- a/pkg/sentry/kernel/semaphore/semaphore_test.go +++ b/pkg/sentry/kernel/semaphore/semaphore_test.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" "gvisor.dev/gvisor/pkg/syserror" ) @@ -55,7 +56,7 @@ func signalled(ch chan struct{}) bool { func TestBasic(t *testing.T) { ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} + set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)} ops := []linux.Sembuf{ {SemOp: 1}, } @@ -76,7 +77,7 @@ func TestBasic(t *testing.T) { func TestWaitForZero(t *testing.T) { ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} + set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)} ops := []linux.Sembuf{ {SemOp: 0}, } @@ -115,7 +116,7 @@ func TestWaitForZero(t *testing.T) { func TestNoWait(t *testing.T) { ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} + set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)} ops := []linux.Sembuf{ {SemOp: 1}, } @@ -141,8 +142,8 @@ func TestUnregister(t *testing.T) { if err != nil { t.Fatalf("FindOrCreate() failed, err: %v", err) } - if got := r.FindByID(set.ID); got.ID != set.ID { - t.Fatalf("FindById(%d) failed, got: %+v, expected: %+v", set.ID, got, set) + if got := r.FindByID(set.obj.ID); got.obj.ID != set.obj.ID { + t.Fatalf("FindById(%d) failed, got: %+v, expected: %+v", set.obj.ID, got, set) } ops := []linux.Sembuf{ @@ -155,14 +156,14 @@ func TestUnregister(t *testing.T) { } creds := auth.CredentialsFromContext(ctx) - if err := r.RemoveID(set.ID, creds); err != nil { - t.Fatalf("RemoveID(%d) failed, err: %v", set.ID, err) + if err := r.RemoveID(set.obj.ID, creds); err != nil { + t.Fatalf("RemoveID(%d) failed, err: %v", set.obj.ID, err) } if !set.dead { t.Fatalf("set is not dead: %+v", set) } - if got := r.FindByID(set.ID); got != nil { - t.Fatalf("FindById(%d) failed, got: %+v, expected: nil", set.ID, got) + if got := r.FindByID(set.obj.ID); got != nil { + t.Fatalf("FindById(%d) failed, got: %+v, expected: nil", set.obj.ID, got) } for i, ch := range chs { if !signalled(ch) { diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index 5b69333fe..4e8deac4c 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -36,6 +36,7 @@ go_library( "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/ipc", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", "//pkg/sentry/pgalloc", diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index f7ac4c2b2..459ae8c30 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -43,6 +43,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" @@ -51,12 +52,6 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) -// Key represents a shm segment key. Analogous to a file name. -type Key int32 - -// ID represents the opaque handle for a shm segment. Analogous to an fd. -type ID int32 - // Registry tracks all shared memory segments in an IPC namespace. The registry // provides the mechanisms for creating and finding segments, and reporting // global shm parameters. @@ -78,13 +73,13 @@ type Registry struct { // exists a short window during which a Shm still exists in Shm, but is // unreferenced. Users must use TryIncRef to determine if the Shm is // still valid. - shms map[ID]*Shm + shms map[ipc.ID]*Shm // keysToShms maps segment keys to segments. // // Shms in keysToShms are guaranteed to be referenced, as they are // removed by disassociateKey before the last DecRef. - keysToShms map[Key]*Shm + keysToShms map[ipc.Key]*Shm // Sum of the sizes of all existing segments rounded up to page size, in // units of page size. @@ -92,22 +87,22 @@ type Registry struct { // ID assigned to the last created segment. Used to quickly find the next // unused ID. - lastIDUsed ID + lastIDUsed ipc.ID } // NewRegistry creates a new shm registry. func NewRegistry(userNS *auth.UserNamespace) *Registry { return &Registry{ userNS: userNS, - shms: make(map[ID]*Shm), - keysToShms: make(map[Key]*Shm), + shms: make(map[ipc.ID]*Shm), + keysToShms: make(map[ipc.Key]*Shm), } } // FindByID looks up a segment given an ID. // // FindByID returns a reference on Shm. -func (r *Registry) FindByID(id ID) *Shm { +func (r *Registry) FindByID(id ipc.ID) *Shm { r.mu.Lock() defer r.mu.Unlock() s := r.shms[id] @@ -129,9 +124,9 @@ func (r *Registry) dissociateKey(s *Shm) { defer r.mu.Unlock() s.mu.Lock() defer s.mu.Unlock() - if s.key != linux.IPC_PRIVATE { - delete(r.keysToShms, s.key) - s.key = linux.IPC_PRIVATE + if s.obj.Key != linux.IPC_PRIVATE { + delete(r.keysToShms, s.obj.Key) + s.obj.Key = linux.IPC_PRIVATE } } @@ -139,7 +134,7 @@ func (r *Registry) dissociateKey(s *Shm) { // analogous to open(2). // // FindOrCreate returns a reference on Shm. -func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size uint64, mode linux.FileMode, private, create, exclusive bool) (*Shm, error) { +func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, size uint64, mode linux.FileMode, private, create, exclusive bool) (*Shm, error) { if (create || private) && (size < linux.SHMMIN || size > linux.SHMMAX) { // "A new segment was to be created and size is less than SHMMIN or // greater than SHMMAX." - man shmget(2) @@ -165,7 +160,8 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui defer shm.mu.Unlock() // Check that caller can access the segment. - if !shm.checkPermissions(ctx, fs.PermsFromMode(mode)) { + creds := auth.CredentialsFromContext(ctx) + if !shm.obj.CheckPermissions(creds, fs.PermsFromMode(mode)) { // "The user does not have permission to access the shared // memory segment, and does not have the CAP_IPC_OWNER // capability in the user namespace that governs its IPC @@ -227,7 +223,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui // newShm creates a new segment in the registry. // // Precondition: Caller must hold r.mu. -func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) { +func (r *Registry) newShm(ctx context.Context, pid int32, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) { mfp := pgalloc.MemoryFileProviderFromContext(ctx) if mfp == nil { panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider)) @@ -239,21 +235,6 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi return nil, err } - shm := &Shm{ - mfp: mfp, - registry: r, - creator: creator, - size: size, - effectiveSize: effectiveSize, - fr: fr, - key: key, - perms: perms, - owner: creator, - creatorPID: pid, - changeTime: ktime.NowFromContext(ctx), - } - shm.InitRefs() - // Find the next available ID. for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ { // Handle wrap around. @@ -264,7 +245,18 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi if r.shms[id] == nil { r.lastIDUsed = id - shm.ID = id + shm := &Shm{ + mfp: mfp, + registry: r, + size: size, + effectiveSize: effectiveSize, + obj: ipc.NewObject(r.userNS, ipc.ID(id), ipc.Key(key), creator, creator, perms), + fr: fr, + creatorPID: pid, + changeTime: ktime.NowFromContext(ctx), + } + shm.InitRefs() + r.shms[id] = shm r.keysToShms[key] = shm @@ -314,11 +306,11 @@ func (r *Registry) remove(s *Shm) { s.mu.Lock() defer s.mu.Unlock() - if s.key != linux.IPC_PRIVATE { + if s.obj.Key != linux.IPC_PRIVATE { panic(fmt.Sprintf("Attempted to remove %s from the registry whose key is still associated", s.debugLocked())) } - delete(r.shms, s.ID) + delete(r.shms, s.obj.ID) r.totalPages -= s.effectiveSize / hostarch.PageSize } @@ -374,12 +366,6 @@ type Shm struct { // registry points to the shm registry containing this segment. Immutable. registry *Registry - // ID is the kernel identifier for this segment. Immutable. - ID ID - - // creator is the user that created the segment. Immutable. - creator fs.FileOwner - // size is the requested size of the segment at creation, in // bytes. Immutable. size uint64 @@ -397,14 +383,9 @@ type Shm struct { // mu protects all fields below. mu sync.Mutex `state:"nosave"` - // key is the public identifier for this segment. - key Key + // obj defines basic fields that should be included in all SysV IPC objects. + obj *ipc.Object - // perms is the access permissions for the segment. - perms fs.FilePermissions - - // owner of this segment. - owner fs.FileOwner // attachTime is updated on every successful shmat. attachTime ktime.Time // detachTime is updated on every successful shmdt. @@ -426,17 +407,22 @@ type Shm struct { pendingDestruction bool } +// ID returns object's ID. +func (s *Shm) ID() ipc.ID { + return s.obj.ID +} + // Precondition: Caller must hold s.mu. func (s *Shm) debugLocked() string { return fmt.Sprintf("Shm{id: %d, key: %d, size: %d bytes, refs: %d, destroyed: %v}", - s.ID, s.key, s.size, s.ReadRefs(), s.pendingDestruction) + s.obj.ID, s.obj.Key, s.size, s.ReadRefs(), s.pendingDestruction) } // MappedName implements memmap.MappingIdentity.MappedName. func (s *Shm) MappedName(ctx context.Context) string { s.mu.Lock() defer s.mu.Unlock() - return fmt.Sprintf("SYSV%08d", s.key) + return fmt.Sprintf("SYSV%08d", s.obj.Key) } // DeviceID implements memmap.MappingIdentity.DeviceID. @@ -448,7 +434,7 @@ func (s *Shm) DeviceID() uint64 { func (s *Shm) InodeID() uint64 { // "shmid gets reported as "inode#" in /proc/pid/maps. proc-ps tools use // this. Changing this will break them." -- Linux, ipc/shm.c:newseg() - return uint64(s.ID) + return uint64(s.obj.ID) } // DecRef drops a reference on s. @@ -551,7 +537,8 @@ func (s *Shm) ConfigureAttach(ctx context.Context, addr hostarch.Addr, opts Atta return memmap.MMapOpts{}, syserror.EIDRM } - if !s.checkPermissions(ctx, fs.PermMask{ + creds := auth.CredentialsFromContext(ctx) + if !s.obj.CheckPermissions(creds, fs.PermMask{ Read: true, Write: !opts.Readonly, Execute: opts.Execute, @@ -591,7 +578,8 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) { // "The caller must have read permission on the shared memory segment." // - man shmctl(2) - if !s.checkPermissions(ctx, fs.PermMask{Read: true}) { + creds := auth.CredentialsFromContext(ctx) + if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) { // "IPC_STAT or SHM_STAT is requested and shm_perm.mode does not allow // read access for shmid, and the calling process does not have the // CAP_IPC_OWNER capability in the user namespace that governs its IPC @@ -603,7 +591,6 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) { if s.pendingDestruction { mode |= linux.SHM_DEST } - creds := auth.CredentialsFromContext(ctx) // Use the reference count as a rudimentary count of the number of // attaches. We exclude: @@ -620,12 +607,12 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) { ds := &linux.ShmidDS{ ShmPerm: linux.IPCPerm{ - Key: uint32(s.key), - UID: uint32(creds.UserNamespace.MapFromKUID(s.owner.UID)), - GID: uint32(creds.UserNamespace.MapFromKGID(s.owner.GID)), - CUID: uint32(creds.UserNamespace.MapFromKUID(s.creator.UID)), - CGID: uint32(creds.UserNamespace.MapFromKGID(s.creator.GID)), - Mode: mode | uint16(s.perms.LinuxMode()), + Key: uint32(s.obj.Key), + UID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Owner.UID)), + GID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Owner.GID)), + CUID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Creator.UID)), + CGID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Creator.GID)), + Mode: mode | uint16(s.obj.Perms.LinuxMode()), Seq: 0, // IPC sequences not supported. }, ShmSegsz: s.size, @@ -645,11 +632,11 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error { s.mu.Lock() defer s.mu.Unlock() - if !s.checkOwnership(ctx) { + creds := auth.CredentialsFromContext(ctx) + if !s.obj.CheckOwnership(creds) { return linuxerr.EPERM } - creds := auth.CredentialsFromContext(ctx) uid := creds.UserNamespace.MapToKUID(auth.UID(ds.ShmPerm.UID)) gid := creds.UserNamespace.MapToKGID(auth.GID(ds.ShmPerm.GID)) if !uid.Ok() || !gid.Ok() { @@ -659,10 +646,10 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error { // User may only modify the lower 9 bits of the mode. All the other bits are // always 0 for the underlying inode. mode := linux.FileMode(ds.ShmPerm.Mode & 0x1ff) - s.perms = fs.FilePermsFromMode(mode) + s.obj.Perms = fs.FilePermsFromMode(mode) - s.owner.UID = uid - s.owner.GID = gid + s.obj.Owner.UID = uid + s.obj.Owner.GID = gid s.changeTime = ktime.NowFromContext(ctx) return nil @@ -691,40 +678,3 @@ func (s *Shm) MarkDestroyed(ctx context.Context) { s.DecRef(ctx) return } - -// checkOwnership verifies whether a segment may be accessed by ctx as an -// owner. See ipc/util.c:ipcctl_pre_down_nolock() in Linux. -// -// Precondition: Caller must hold s.mu. -func (s *Shm) checkOwnership(ctx context.Context) bool { - creds := auth.CredentialsFromContext(ctx) - if s.owner.UID == creds.EffectiveKUID || s.creator.UID == creds.EffectiveKUID { - return true - } - - // Tasks with CAP_SYS_ADMIN may bypass ownership checks. Strangely, Linux - // doesn't use CAP_IPC_OWNER for this despite CAP_IPC_OWNER being documented - // for use to "override IPC ownership checks". - return creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, s.registry.userNS) -} - -// checkPermissions verifies whether a segment is accessible by ctx for access -// described by req. See ipc/util.c:ipcperms() in Linux. -// -// Precondition: Caller must hold s.mu. -func (s *Shm) checkPermissions(ctx context.Context, req fs.PermMask) bool { - creds := auth.CredentialsFromContext(ctx) - - p := s.perms.Other - if s.owner.UID == creds.EffectiveKUID { - p = s.perms.User - } else if creds.InGroup(s.owner.GID) { - p = s.perms.Group - } - if p.SupersetOf(req) { - return true - } - - // Tasks with CAP_IPC_OWNER may bypass permission checks. - return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, s.registry.userNS) -} diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index a2f612f45..7941b2be9 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -84,6 +84,7 @@ go_library( "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/eventfd", "//pkg/sentry/kernel/fasync", + "//pkg/sentry/kernel/ipc", "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/sched", "//pkg/sentry/kernel/shm", diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go index 30919eb2f..4755bef18 100644 --- a/pkg/sentry/syscalls/linux/sys_sem.go +++ b/pkg/sentry/syscalls/linux/sys_sem.go @@ -26,13 +26,14 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" ) const opsMax = 500 // SEMOPM // Semget handles: semget(key_t key, int nsems, int semflg) func Semget(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - key := args[0].Int() + key := ipc.Key(args[0].Int()) nsems := args[1].Int() flag := args[2].Int() @@ -46,7 +47,7 @@ func Semget(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if err != nil { return 0, nil, err } - return uintptr(set.ID), nil, nil + return uintptr(set.ID()), nil, nil } // Semtimedop handles: semop(int semid, struct sembuf *sops, size_t nsops, const struct timespec *timeout) @@ -56,7 +57,7 @@ func Semtimedop(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return Semop(t, args) } - id := args[0].Int() + id := ipc.ID(args[0].Int()) sembufAddr := args[1].Pointer() nsops := args[2].SizeT() timespecAddr := args[3].Pointer() @@ -91,7 +92,7 @@ func Semtimedop(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Semop handles: semop(int semid, struct sembuf *sops, size_t nsops) func Semop(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - id := args[0].Int() + id := ipc.ID(args[0].Int()) sembufAddr := args[1].Pointer() nsops := args[2].SizeT() @@ -109,7 +110,7 @@ func Semop(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, semTimedOp(t, id, ops, false, time.Second) } -func semTimedOp(t *kernel.Task, id int32, ops []linux.Sembuf, haveTimeout bool, timeout time.Duration) error { +func semTimedOp(t *kernel.Task, id ipc.ID, ops []linux.Sembuf, haveTimeout bool, timeout time.Duration) error { set := t.IPCNamespace().SemaphoreRegistry().FindByID(id) if set == nil { @@ -131,7 +132,7 @@ func semTimedOp(t *kernel.Task, id int32, ops []linux.Sembuf, haveTimeout bool, // Semctl handles: semctl(int semid, int semnum, int cmd, ...) func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - id := args[0].Int() + id := ipc.ID(args[0].Int()) num := args[1].Int() cmd := args[2].Int() @@ -210,7 +211,7 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case linux.SEM_STAT: arg := args[3].Pointer() // id is an index in SEM_STAT. - semid, ds, err := semStat(t, id) + semid, ds, err := semStat(t, int32(id)) if err != nil { return 0, nil, err } @@ -222,7 +223,7 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case linux.SEM_STAT_ANY: arg := args[3].Pointer() // id is an index in SEM_STAT. - semid, ds, err := semStatAny(t, id) + semid, ds, err := semStatAny(t, int32(id)) if err != nil { return 0, nil, err } @@ -236,13 +237,13 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } } -func remove(t *kernel.Task, id int32) error { +func remove(t *kernel.Task, id ipc.ID) error { r := t.IPCNamespace().SemaphoreRegistry() creds := auth.CredentialsFromContext(t) return r.RemoveID(id, creds) } -func ipcSet(t *kernel.Task, id int32, uid auth.UID, gid auth.GID, perms fs.FilePermissions) error { +func ipcSet(t *kernel.Task, id ipc.ID, uid auth.UID, gid auth.GID, perms fs.FilePermissions) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -262,7 +263,7 @@ func ipcSet(t *kernel.Task, id int32, uid auth.UID, gid auth.GID, perms fs.FileP return set.Change(t, creds, owner, perms) } -func ipcStat(t *kernel.Task, id int32) (*linux.SemidDS, error) { +func ipcStat(t *kernel.Task, id ipc.ID) (*linux.SemidDS, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -283,7 +284,7 @@ func semStat(t *kernel.Task, index int32) (int32, *linux.SemidDS, error) { if err != nil { return 0, ds, err } - return set.ID, ds, nil + return int32(set.ID()), ds, nil } func semStatAny(t *kernel.Task, index int32) (int32, *linux.SemidDS, error) { @@ -296,10 +297,10 @@ func semStatAny(t *kernel.Task, index int32) (int32, *linux.SemidDS, error) { if err != nil { return 0, ds, err } - return set.ID, ds, nil + return int32(set.ID()), ds, nil } -func setVal(t *kernel.Task, id int32, num int32, val int16) error { +func setVal(t *kernel.Task, id ipc.ID, num int32, val int16) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -310,7 +311,7 @@ func setVal(t *kernel.Task, id int32, num int32, val int16) error { return set.SetVal(t, num, val, creds, int32(pid)) } -func setValAll(t *kernel.Task, id int32, array hostarch.Addr) error { +func setValAll(t *kernel.Task, id ipc.ID, array hostarch.Addr) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -325,7 +326,7 @@ func setValAll(t *kernel.Task, id int32, array hostarch.Addr) error { return set.SetValAll(t, vals, creds, int32(pid)) } -func getVal(t *kernel.Task, id int32, num int32) (int16, error) { +func getVal(t *kernel.Task, id ipc.ID, num int32) (int16, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -335,7 +336,7 @@ func getVal(t *kernel.Task, id int32, num int32) (int16, error) { return set.GetVal(num, creds) } -func getValAll(t *kernel.Task, id int32, array hostarch.Addr) error { +func getValAll(t *kernel.Task, id ipc.ID, array hostarch.Addr) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -350,7 +351,7 @@ func getValAll(t *kernel.Task, id int32, array hostarch.Addr) error { return err } -func getPID(t *kernel.Task, id int32, num int32) (int32, error) { +func getPID(t *kernel.Task, id ipc.ID, num int32) (int32, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -369,7 +370,7 @@ func getPID(t *kernel.Task, id int32, num int32) (int32, error) { return int32(tg.ID()), nil } -func getZCnt(t *kernel.Task, id int32, num int32) (uint16, error) { +func getZCnt(t *kernel.Task, id ipc.ID, num int32) (uint16, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -379,7 +380,7 @@ func getZCnt(t *kernel.Task, id int32, num int32) (uint16, error) { return set.CountZeroWaiters(num, creds) } -func getNCnt(t *kernel.Task, id int32, num int32) (uint16, error) { +func getNCnt(t *kernel.Task, id ipc.ID, num int32) (uint16, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { diff --git a/pkg/sentry/syscalls/linux/sys_shm.go b/pkg/sentry/syscalls/linux/sys_shm.go index 3e3a952ce..840540506 100644 --- a/pkg/sentry/syscalls/linux/sys_shm.go +++ b/pkg/sentry/syscalls/linux/sys_shm.go @@ -19,12 +19,13 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" "gvisor.dev/gvisor/pkg/sentry/kernel/shm" ) // Shmget implements shmget(2). func Shmget(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - key := shm.Key(args[0].Int()) + key := ipc.Key(args[0].Int()) size := uint64(args[1].SizeT()) flag := args[2].Int() @@ -40,13 +41,13 @@ func Shmget(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } defer segment.DecRef(t) - return uintptr(segment.ID), nil, nil + return uintptr(segment.ID()), nil, nil } // findSegment retrives a shm segment by the given id. // // findSegment returns a reference on Shm. -func findSegment(t *kernel.Task, id shm.ID) (*shm.Shm, error) { +func findSegment(t *kernel.Task, id ipc.ID) (*shm.Shm, error) { r := t.IPCNamespace().ShmRegistry() segment := r.FindByID(id) if segment == nil { @@ -58,7 +59,7 @@ func findSegment(t *kernel.Task, id shm.ID) (*shm.Shm, error) { // Shmat implements shmat(2). func Shmat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - id := shm.ID(args[0].Int()) + id := ipc.ID(args[0].Int()) addr := args[1].Pointer() flag := args[2].Int() @@ -89,7 +90,7 @@ func Shmdt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Shmctl implements shmctl(2). func Shmctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - id := shm.ID(args[0].Int()) + id := ipc.ID(args[0].Int()) cmd := args[1].Int() buf := args[2].Pointer() |