diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/abi/linux/fcntl.go | 3 | ||||
-rw-r--r-- | pkg/abi/linux/linux_abi_autogen_unsafe.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/lock/lock.go | 153 | ||||
-rw-r--r-- | pkg/sentry/fs/lock/lock_set_functions.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fs/lock/lock_state_autogen.go | 27 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/gofer.go | 8 | ||||
-rw-r--r-- | pkg/sentry/kernel/threads.go | 12 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_file.go | 16 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/fd.go | 58 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/lock.go | 4 | ||||
-rw-r--r-- | pkg/sentry/vfs/file_description.go | 20 | ||||
-rw-r--r-- | pkg/sentry/vfs/file_description_impl_util.go | 22 | ||||
-rw-r--r-- | pkg/sentry/vfs/lock.go | 35 |
13 files changed, 279 insertions, 88 deletions
diff --git a/pkg/abi/linux/fcntl.go b/pkg/abi/linux/fcntl.go index d1ca56370..b84d7c048 100644 --- a/pkg/abi/linux/fcntl.go +++ b/pkg/abi/linux/fcntl.go @@ -21,6 +21,7 @@ const ( F_SETFD = 2 F_GETFL = 3 F_SETFL = 4 + F_GETLK = 5 F_SETLK = 6 F_SETLKW = 7 F_SETOWN = 8 @@ -55,7 +56,7 @@ type Flock struct { _ [4]byte Start int64 Len int64 - Pid int32 + PID int32 _ [4]byte } diff --git a/pkg/abi/linux/linux_abi_autogen_unsafe.go b/pkg/abi/linux/linux_abi_autogen_unsafe.go index 7d571c7a7..4960295ae 100644 --- a/pkg/abi/linux/linux_abi_autogen_unsafe.go +++ b/pkg/abi/linux/linux_abi_autogen_unsafe.go @@ -1284,7 +1284,7 @@ func (f *Flock) MarshalBytes(dst []byte) { dst = dst[8:] usermem.ByteOrder.PutUint64(dst[:8], uint64(f.Len)) dst = dst[8:] - usermem.ByteOrder.PutUint32(dst[:4], uint32(f.Pid)) + usermem.ByteOrder.PutUint32(dst[:4], uint32(f.PID)) dst = dst[4:] // Padding: dst[:sizeof(byte)*4] ~= [4]byte{0} dst = dst[1*(4):] @@ -1302,7 +1302,7 @@ func (f *Flock) UnmarshalBytes(src []byte) { src = src[8:] f.Len = int64(usermem.ByteOrder.Uint64(src[:8])) src = src[8:] - f.Pid = int32(usermem.ByteOrder.Uint32(src[:4])) + f.PID = int32(usermem.ByteOrder.Uint32(src[:4])) src = src[4:] // Padding: ~ copy([4]byte(f._), src[:sizeof(byte)*4]) src = src[1*(4):] diff --git a/pkg/sentry/fs/lock/lock.go b/pkg/sentry/fs/lock/lock.go index 8a5d9c7eb..57686ce07 100644 --- a/pkg/sentry/fs/lock/lock.go +++ b/pkg/sentry/fs/lock/lock.go @@ -54,6 +54,8 @@ import ( "math" "syscall" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) @@ -83,6 +85,17 @@ const ( // offset 0 to LockEOF. const LockEOF = math.MaxUint64 +// OwnerInfo describes the owner of a lock. +// +// TODO(gvisor.dev/issue/5264): We may need to add other fields in the future +// (e.g., Linux's file_lock.fl_flags to support open file-descriptor locks). +// +// +stateify savable +type OwnerInfo struct { + // PID is the process ID of the lock owner. + PID int32 +} + // Lock is a regional file lock. It consists of either a single writer // or a set of readers. // @@ -92,14 +105,20 @@ const LockEOF = math.MaxUint64 // A Lock may be downgraded from a write lock to a read lock only if // the write lock's uid is the same as the read lock. // +// Accesses to Lock are synchronized through the Locks object to which it +// belongs. +// // +stateify savable type Lock struct { // Readers are the set of read lock holders identified by UniqueID. - // If len(Readers) > 0 then HasWriter must be false. - Readers map[UniqueID]bool + // If len(Readers) > 0 then Writer must be nil. + Readers map[UniqueID]OwnerInfo // Writer holds the writer unique ID. It's nil if there are no writers. Writer UniqueID + + // WriterInfo describes the writer. It is only meaningful if Writer != nil. + WriterInfo OwnerInfo } // Locks is a thread-safe wrapper around a LockSet. @@ -135,14 +154,14 @@ const ( // acquiring the lock in a non-blocking mode or "interrupted" if in a blocking mode. // Blocker is the interface used to provide blocking behavior, passing a nil Blocker // will result in non-blocking behavior. -func (l *Locks) LockRegion(uid UniqueID, t LockType, r LockRange, block Blocker) bool { +func (l *Locks) LockRegion(uid UniqueID, ownerPID int32, t LockType, r LockRange, block Blocker) bool { for { l.mu.Lock() // Blocking locks must run in a loop because we'll be woken up whenever an unlock event // happens for this lock. We will then attempt to take the lock again and if it fails // continue blocking. - res := l.locks.lock(uid, t, r) + res := l.locks.lock(uid, ownerPID, t, r) if !res && block != nil { e, ch := waiter.NewChannelEntry(nil) l.blockedQueue.EventRegister(&e, EventMaskAll) @@ -161,6 +180,14 @@ func (l *Locks) LockRegion(uid UniqueID, t LockType, r LockRange, block Blocker) } } +// LockRegionVFS1 is a wrapper around LockRegion for VFS1, which does not implement +// F_GETLK (and does not care about storing PIDs as a result). +// +// TODO(gvisor.dev/issue/1624): Delete. +func (l *Locks) LockRegionVFS1(uid UniqueID, t LockType, r LockRange, block Blocker) bool { + return l.LockRegion(uid, 0 /* ownerPID */, t, r, block) +} + // UnlockRegion attempts to release a lock for the uid on a region of a file. // This operation is always successful, even if there did not exist a lock on // the requested region held by uid in the first place. @@ -175,13 +202,14 @@ func (l *Locks) UnlockRegion(uid UniqueID, r LockRange) { // makeLock returns a new typed Lock that has either uid as its only reader // or uid as its only writer. -func makeLock(uid UniqueID, t LockType) Lock { - value := Lock{Readers: make(map[UniqueID]bool)} +func makeLock(uid UniqueID, ownerPID int32, t LockType) Lock { + value := Lock{Readers: make(map[UniqueID]OwnerInfo)} switch t { case ReadLock: - value.Readers[uid] = true + value.Readers[uid] = OwnerInfo{PID: ownerPID} case WriteLock: value.Writer = uid + value.WriterInfo = OwnerInfo{PID: ownerPID} default: panic(fmt.Sprintf("makeLock: invalid lock type %d", t)) } @@ -190,17 +218,20 @@ func makeLock(uid UniqueID, t LockType) Lock { // isHeld returns true if uid is a holder of Lock. func (l Lock) isHeld(uid UniqueID) bool { - return l.Writer == uid || l.Readers[uid] + if _, ok := l.Readers[uid]; ok { + return true + } + return l.Writer == uid } // lock sets uid as a holder of a typed lock on Lock. // // Preconditions: canLock is true for the range containing this Lock. -func (l *Lock) lock(uid UniqueID, t LockType) { +func (l *Lock) lock(uid UniqueID, ownerPID int32, t LockType) { switch t { case ReadLock: // If we are already a reader, then this is a no-op. - if l.Readers[uid] { + if _, ok := l.Readers[uid]; ok { return } // We cannot downgrade a write lock to a read lock unless the @@ -210,11 +241,11 @@ func (l *Lock) lock(uid UniqueID, t LockType) { panic(fmt.Sprintf("lock: cannot downgrade write lock to read lock for uid %d, writer is %d", uid, l.Writer)) } // Ensure that there is only one reader if upgrading. - l.Readers = make(map[UniqueID]bool) + l.Readers = make(map[UniqueID]OwnerInfo) // Ensure that there is no longer a writer. l.Writer = nil } - l.Readers[uid] = true + l.Readers[uid] = OwnerInfo{PID: ownerPID} return case WriteLock: // If we are already the writer, then this is a no-op. @@ -228,13 +259,14 @@ func (l *Lock) lock(uid UniqueID, t LockType) { if readers != 1 { panic(fmt.Sprintf("lock: cannot upgrade read lock to write lock for uid %d, too many readers %v", uid, l.Readers)) } - if !l.Readers[uid] { + if _, ok := l.Readers[uid]; !ok { panic(fmt.Sprintf("lock: cannot upgrade read lock to write lock for uid %d, conflicting reader %v", uid, l.Readers)) } } // Ensure that there is only a writer. - l.Readers = make(map[UniqueID]bool) + l.Readers = make(map[UniqueID]OwnerInfo) l.Writer = uid + l.WriterInfo = OwnerInfo{PID: ownerPID} default: panic(fmt.Sprintf("lock: invalid lock type %d", t)) } @@ -247,7 +279,7 @@ func (l LockSet) lockable(r LockRange, check func(value Lock) bool) bool { // Get our starting point. seg := l.LowerBoundSegment(r.Start) for seg.Ok() && seg.Start() < r.End { - // Note that we don't care about overruning the end of the + // Note that we don't care about overrunning the end of the // last segment because if everything checks out we'll just // split the last segment. if !check(seg.Value()) { @@ -281,7 +313,7 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool { if value.Writer == nil { // Then this uid can only take a write lock if this is a private // upgrade, meaning that the only reader is uid. - return len(value.Readers) == 1 && value.Readers[uid] + return value.isOnlyReader(uid) } // If the uid is already a writer on this region, then // adding a write lock would be a no-op. @@ -292,11 +324,19 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool { } } +func (l *Lock) isOnlyReader(uid UniqueID) bool { + if len(l.Readers) != 1 { + return false + } + _, ok := l.Readers[uid] + return ok +} + // lock returns true if uid took a lock of type t on the entire range of // LockRange. // // Preconditions: r.Start <= r.End (will panic otherwise). -func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool { +func (l *LockSet) lock(uid UniqueID, ownerPID int32, t LockType, r LockRange) bool { if r.Start > r.End { panic(fmt.Sprintf("lock: r.Start %d > r.End %d", r.Start, r.End)) } @@ -317,7 +357,7 @@ func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool { seg, gap := l.Find(r.Start) if gap.Ok() { // Fill in the gap and get the next segment to modify. - seg = l.Insert(gap, gap.Range().Intersect(r), makeLock(uid, t)).NextSegment() + seg = l.Insert(gap, gap.Range().Intersect(r), makeLock(uid, ownerPID, t)).NextSegment() } else if seg.Start() < r.Start { // Get our first segment to modify. _, seg = l.Split(seg, r.Start) @@ -331,12 +371,12 @@ func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool { // Set the lock on the segment. This is guaranteed to // always be safe, given canLock above. value := seg.ValuePtr() - value.lock(uid, t) + value.lock(uid, ownerPID, t) // Fill subsequent gaps. gap = seg.NextGap() if gr := gap.Range().Intersect(r); gr.Length() > 0 { - seg = l.Insert(gap, gr, makeLock(uid, t)).NextSegment() + seg = l.Insert(gap, gr, makeLock(uid, ownerPID, t)).NextSegment() } else { seg = gap.NextSegment() } @@ -380,7 +420,7 @@ func (l *LockSet) unlock(uid UniqueID, r LockRange) { // only ever be one writer and no readers, then this // lock should always be removed from the set. remove = true - } else if value.Readers[uid] { + } else if _, ok := value.Readers[uid]; ok { // If uid is the last reader, then just remove the entire // segment. if len(value.Readers) == 1 { @@ -390,7 +430,7 @@ func (l *LockSet) unlock(uid UniqueID, r LockRange) { // affecting any other segment's readers. To do // this, we need to make a copy of the Readers map // and not add this uid. - newValue := Lock{Readers: make(map[UniqueID]bool)} + newValue := Lock{Readers: make(map[UniqueID]OwnerInfo)} for k, v := range value.Readers { if k != uid { newValue.Readers[k] = v @@ -451,3 +491,72 @@ func ComputeRange(start, length, offset int64) (LockRange, error) { // Offset is guaranteed to be positive at this point. return LockRange{Start: uint64(offset), End: end}, nil } + +// TestRegion checks whether the lock holder identified by uid can hold a lock +// of type t on range r. It returns a Flock struct representing this +// information as the F_GETLK fcntl does. +// +// Note that the PID returned in the flock structure is relative to the root PID +// namespace. It needs to be converted to the caller's PID namespace before +// returning to userspace. +// +// TODO(gvisor.dev/issue/5264): we don't support OFD locks through fcntl, which +// would return a struct with pid = -1. +func (l *Locks) TestRegion(ctx context.Context, uid UniqueID, t LockType, r LockRange) linux.Flock { + f := linux.Flock{Type: linux.F_UNLCK} + switch t { + case ReadLock: + l.testRegion(r, func(lock Lock, start, length uint64) bool { + if lock.Writer == nil || lock.Writer == uid { + return true + } + f.Type = linux.F_WRLCK + f.PID = lock.WriterInfo.PID + f.Start = int64(start) + f.Len = int64(length) + return false + }) + case WriteLock: + l.testRegion(r, func(lock Lock, start, length uint64) bool { + if lock.Writer == nil { + for k, v := range lock.Readers { + if k != uid { + // Stop at the first conflict detected. + f.Type = linux.F_RDLCK + f.PID = v.PID + f.Start = int64(start) + f.Len = int64(length) + return false + } + } + return true + } + if lock.Writer == uid { + return true + } + f.Type = linux.F_WRLCK + f.PID = lock.WriterInfo.PID + f.Start = int64(start) + f.Len = int64(length) + return false + }) + default: + panic(fmt.Sprintf("TestRegion: invalid lock type %d", t)) + } + return f +} + +func (l *Locks) testRegion(r LockRange, check func(lock Lock, start, length uint64) bool) { + l.mu.Lock() + defer l.mu.Unlock() + + seg := l.locks.LowerBoundSegment(r.Start) + for seg.Ok() && seg.Start() < r.End { + lock := seg.Value() + if !check(lock, seg.Start(), seg.End()-seg.Start()) { + // Stop at the first conflict detected. + return + } + seg = seg.NextSegment() + } +} diff --git a/pkg/sentry/fs/lock/lock_set_functions.go b/pkg/sentry/fs/lock/lock_set_functions.go index 50a16e662..dcc17c0dc 100644 --- a/pkg/sentry/fs/lock/lock_set_functions.go +++ b/pkg/sentry/fs/lock/lock_set_functions.go @@ -40,7 +40,7 @@ func (lockSetFunctions) Merge(r1 LockRange, val1 Lock, r2 LockRange, val2 Lock) return Lock{}, false } for k := range val1.Readers { - if !val2.Readers[k] { + if _, ok := val2.Readers[k]; !ok { return Lock{}, false } } @@ -53,11 +53,12 @@ func (lockSetFunctions) Merge(r1 LockRange, val1 Lock, r2 LockRange, val2 Lock) func (lockSetFunctions) Split(r LockRange, val Lock, split uint64) (Lock, Lock) { // Copy the segment so that split segments don't contain map references // to other segments. - val0 := Lock{Readers: make(map[UniqueID]bool)} + val0 := Lock{Readers: make(map[UniqueID]OwnerInfo)} for k, v := range val.Readers { val0.Readers[k] = v } val0.Writer = val.Writer + val0.WriterInfo = val.WriterInfo return val, val0 } diff --git a/pkg/sentry/fs/lock/lock_state_autogen.go b/pkg/sentry/fs/lock/lock_state_autogen.go index 707a7518f..49db04cfb 100644 --- a/pkg/sentry/fs/lock/lock_state_autogen.go +++ b/pkg/sentry/fs/lock/lock_state_autogen.go @@ -6,6 +6,29 @@ import ( "gvisor.dev/gvisor/pkg/state" ) +func (o *OwnerInfo) StateTypeName() string { + return "pkg/sentry/fs/lock.OwnerInfo" +} + +func (o *OwnerInfo) StateFields() []string { + return []string{ + "PID", + } +} + +func (o *OwnerInfo) beforeSave() {} + +func (o *OwnerInfo) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.PID) +} + +func (o *OwnerInfo) afterLoad() {} + +func (o *OwnerInfo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.PID) +} + func (l *Lock) StateTypeName() string { return "pkg/sentry/fs/lock.Lock" } @@ -14,6 +37,7 @@ func (l *Lock) StateFields() []string { return []string{ "Readers", "Writer", + "WriterInfo", } } @@ -23,6 +47,7 @@ func (l *Lock) StateSave(stateSinkObject state.Sink) { l.beforeSave() stateSinkObject.Save(0, &l.Readers) stateSinkObject.Save(1, &l.Writer) + stateSinkObject.Save(2, &l.WriterInfo) } func (l *Lock) afterLoad() {} @@ -30,6 +55,7 @@ func (l *Lock) afterLoad() {} func (l *Lock) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &l.Readers) stateSourceObject.Load(1, &l.Writer) + stateSourceObject.Load(2, &l.WriterInfo) } func (l *Locks) StateTypeName() string { @@ -182,6 +208,7 @@ func (l *LockSegmentDataSlices) StateLoad(stateSourceObject state.Source) { } func init() { + state.Register((*OwnerInfo)(nil)) state.Register((*Lock)(nil)) state.Register((*Locks)(nil)) state.Register((*LockRange)(nil)) diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index a43364b55..98f7bc52f 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -1944,19 +1944,19 @@ func (fd *fileDescription) RemoveXattr(ctx context.Context, name string) error { } // LockBSD implements vfs.FileDescriptionImpl.LockBSD. -func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { +func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error { fd.lockLogging.Do(func() { log.Infof("File lock using gofer file handled internally.") }) - return fd.LockFD.LockBSD(ctx, uid, t, block) + return fd.LockFD.LockBSD(ctx, uid, ownerPID, t, block) } // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. -func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { +func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { fd.lockLogging.Do(func() { log.Infof("Range lock using gofer file handled internally.") }) - return fd.Locks().LockPOSIX(ctx, uid, t, r, block) + return fd.Locks().LockPOSIX(ctx, uid, ownerPID, t, r, block) } // UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go index fdadb52c0..e9da99067 100644 --- a/pkg/sentry/kernel/threads.go +++ b/pkg/sentry/kernel/threads.go @@ -216,7 +216,7 @@ func (ns *PIDNamespace) TaskWithID(tid ThreadID) *Task { return t } -// ThreadGroupWithID returns the thread group lead by the task with thread ID +// ThreadGroupWithID returns the thread group led by the task with thread ID // tid in PID namespace ns. If no task has that TID, or if the task with that // TID is not a thread group leader, ThreadGroupWithID returns nil. func (ns *PIDNamespace) ThreadGroupWithID(tid ThreadID) *ThreadGroup { @@ -292,6 +292,11 @@ func (ns *PIDNamespace) UserNamespace() *auth.UserNamespace { return ns.userns } +// Root returns the root PID namespace of ns. +func (ns *PIDNamespace) Root() *PIDNamespace { + return ns.owner.Root +} + // A threadGroupNode defines the relationship between a thread group and the // rest of the system. Conceptually, threadGroupNode is data belonging to the // owning TaskSet, as if TaskSet contained a field `nodes @@ -485,3 +490,8 @@ func (t *Task) Parent() *Task { func (t *Task) ThreadID() ThreadID { return t.tg.pidns.IDOfTask(t) } + +// TGIDInRoot returns t's TGID in the root PID namespace. +func (t *Task) TGIDInRoot() ThreadID { + return t.tg.pidns.owner.Root.IDOfThreadGroup(t.tg) +} diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index c33571f43..a6253626e 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -1014,12 +1014,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } if cmd == linux.F_SETLK { // Non-blocking lock, provide a nil lock.Blocker. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, nil) { return 0, nil, syserror.EAGAIN } } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, t) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, t) { return 0, nil, syserror.EINTR } } @@ -1030,12 +1030,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } if cmd == linux.F_SETLK { // Non-blocking lock, provide a nil lock.Blocker. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, nil) { return 0, nil, syserror.EAGAIN } } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, t) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, t) { return 0, nil, syserror.EINTR } } @@ -2167,24 +2167,24 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.LOCK_EX: if nonblocking { // Since we're nonblocking we pass a nil lock.Blocker implementation. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, nil) { return 0, nil, syserror.EWOULDBLOCK } } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, t) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, t) { return 0, nil, syserror.EINTR } } case linux.LOCK_SH: if nonblocking { // Since we're nonblocking we pass a nil lock.Blocker implementation. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, nil) { return 0, nil, syserror.EWOULDBLOCK } } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, t) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, t) { return 0, nil, syserror.EINTR } } diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index 9281952cb..e39f074f2 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -205,8 +205,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } err := tmpfs.AddSeals(file, args[2].Uint()) return 0, nil, err - case linux.F_SETLK, linux.F_SETLKW: - return 0, nil, posixLock(t, args, file, cmd) + case linux.F_SETLK: + return 0, nil, posixLock(t, args, file, false /* blocking */) + case linux.F_SETLKW: + return 0, nil, posixLock(t, args, file, true /* blocking */) + case linux.F_GETLK: + return 0, nil, posixTestLock(t, args, file) case linux.F_GETSIG: a := file.AsyncHandler() if a == nil { @@ -292,7 +296,49 @@ func setAsyncOwner(t *kernel.Task, fd int, file *vfs.FileDescription, ownerType, } } -func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, cmd int32) error { +func posixTestLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription) error { + // Copy in the lock request. + flockAddr := args[2].Pointer() + var flock linux.Flock + if _, err := flock.CopyIn(t, flockAddr); err != nil { + return err + } + var typ lock.LockType + switch flock.Type { + case linux.F_RDLCK: + typ = lock.ReadLock + case linux.F_WRLCK: + typ = lock.WriteLock + default: + return syserror.EINVAL + } + r, err := file.ComputeLockRange(t, uint64(flock.Start), uint64(flock.Len), flock.Whence) + if err != nil { + return err + } + + newFlock, err := file.TestPOSIX(t, t.FDTable(), typ, r) + if err != nil { + return err + } + newFlock.PID = translatePID(t.PIDNamespace().Root(), t.PIDNamespace(), newFlock.PID) + if _, err = newFlock.CopyOut(t, flockAddr); err != nil { + return err + } + return nil +} + +// translatePID translates a pid from one namespace to another. Note that this +// may race with task termination/creation, in which case the original task +// corresponding to pid may no longer exist. This is used to implement the +// F_GETLK fcntl, which has the same potential race in Linux as well (i.e., +// there is no synchronization between retrieving the lock PID and translating +// it). See fs/locks.c:posix_lock_to_flock. +func translatePID(old, new *kernel.PIDNamespace, pid int32) int32 { + return int32(new.IDOfTask(old.TaskWithID(kernel.ThreadID(pid)))) +} + +func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, blocking bool) error { // Copy in the lock request. flockAddr := args[2].Pointer() var flock linux.Flock @@ -301,7 +347,7 @@ func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescrip } var blocker lock.Blocker - if cmd == linux.F_SETLKW { + if blocking { blocker = t } @@ -315,13 +361,13 @@ func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescrip if !file.IsReadable() { return syserror.EBADF } - return file.LockPOSIX(t, t.FDTable(), lock.ReadLock, r, blocker) + return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.ReadLock, r, blocker) case linux.F_WRLCK: if !file.IsWritable() { return syserror.EBADF } - return file.LockPOSIX(t, t.FDTable(), lock.WriteLock, r, blocker) + return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.WriteLock, r, blocker) case linux.F_UNLCK: return file.UnlockPOSIX(t, t.FDTable(), r) diff --git a/pkg/sentry/syscalls/linux/vfs2/lock.go b/pkg/sentry/syscalls/linux/vfs2/lock.go index b910b5a74..d1452a04d 100644 --- a/pkg/sentry/syscalls/linux/vfs2/lock.go +++ b/pkg/sentry/syscalls/linux/vfs2/lock.go @@ -44,11 +44,11 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall switch operation { case linux.LOCK_EX: - if err := file.LockBSD(t, lock.WriteLock, blocker); err != nil { + if err := file.LockBSD(t, int32(t.TGIDInRoot()), lock.WriteLock, blocker); err != nil { return 0, nil, err } case linux.LOCK_SH: - if err := file.LockBSD(t, lock.ReadLock, blocker); err != nil { + if err := file.LockBSD(t, int32(t.TGIDInRoot()), lock.ReadLock, blocker); err != nil { return 0, nil, err } case linux.LOCK_UN: diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index dbf8425b4..eb6c2e36b 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -448,16 +448,19 @@ type FileDescriptionImpl interface { RemoveXattr(ctx context.Context, name string) error // LockBSD tries to acquire a BSD-style advisory file lock. - LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error + LockBSD(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, block lock.Blocker) error // UnlockBSD releases a BSD-style advisory file lock. UnlockBSD(ctx context.Context, uid lock.UniqueID) error // LockPOSIX tries to acquire a POSIX-style advisory file lock. - LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange, block lock.Blocker) error + LockPOSIX(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, r lock.LockRange, block lock.Blocker) error // UnlockPOSIX releases a POSIX-style advisory file lock. UnlockPOSIX(ctx context.Context, uid lock.UniqueID, ComputeLockRange lock.LockRange) error + + // TestPOSIX returns information about whether the specified lock can be held, in the style of the F_GETLK fcntl. + TestPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange) (linux.Flock, error) } // Dirent holds the information contained in struct linux_dirent64. @@ -791,9 +794,9 @@ func (fd *FileDescription) Msync(ctx context.Context, mr memmap.MappableRange) e } // LockBSD tries to acquire a BSD-style advisory file lock. -func (fd *FileDescription) LockBSD(ctx context.Context, lockType lock.LockType, blocker lock.Blocker) error { +func (fd *FileDescription) LockBSD(ctx context.Context, ownerPID int32, lockType lock.LockType, blocker lock.Blocker) error { atomic.StoreUint32(&fd.usedLockBSD, 1) - return fd.impl.LockBSD(ctx, fd, lockType, blocker) + return fd.impl.LockBSD(ctx, fd, ownerPID, lockType, blocker) } // UnlockBSD releases a BSD-style advisory file lock. @@ -802,8 +805,8 @@ func (fd *FileDescription) UnlockBSD(ctx context.Context) error { } // LockPOSIX locks a POSIX-style file range lock. -func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange, block lock.Blocker) error { - return fd.impl.LockPOSIX(ctx, uid, t, r, block) +func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, r lock.LockRange, block lock.Blocker) error { + return fd.impl.LockPOSIX(ctx, uid, ownerPID, t, r, block) } // UnlockPOSIX unlocks a POSIX-style file range lock. @@ -811,6 +814,11 @@ func (fd *FileDescription) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, r return fd.impl.UnlockPOSIX(ctx, uid, r) } +// TestPOSIX returns information about whether the specified lock can be held. +func (fd *FileDescription) TestPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange) (linux.Flock, error) { + return fd.impl.TestPOSIX(ctx, uid, t, r) +} + // ComputeLockRange computes the range of a file lock based on the given values. func (fd *FileDescription) ComputeLockRange(ctx context.Context, start uint64, length uint64, whence int16) (lock.LockRange, error) { var off int64 diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index 9d13e2ccb..eb7d2fd3b 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -419,8 +419,8 @@ func (fd *LockFD) Locks() *FileLocks { } // LockBSD implements vfs.FileDescriptionImpl.LockBSD. -func (fd *LockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { - return fd.locks.LockBSD(uid, t, block) +func (fd *LockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error { + return fd.locks.LockBSD(ctx, uid, ownerPID, t, block) } // UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD. @@ -430,8 +430,8 @@ func (fd *LockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { } // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. -func (fd *LockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { - return fd.locks.LockPOSIX(ctx, uid, t, r, block) +func (fd *LockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { + return fd.locks.LockPOSIX(ctx, uid, ownerPID, t, r, block) } // UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. @@ -439,6 +439,11 @@ func (fd *LockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock return fd.locks.UnlockPOSIX(ctx, uid, r) } +// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX. +func (fd *LockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) { + return fd.locks.TestPOSIX(ctx, uid, t, r) +} + // NoLockFD implements Lock*/Unlock* portion of FileDescriptionImpl interface // returning ENOLCK. // @@ -446,7 +451,7 @@ func (fd *LockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock type NoLockFD struct{} // LockBSD implements vfs.FileDescriptionImpl.LockBSD. -func (NoLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { +func (NoLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error { return syserror.ENOLCK } @@ -456,7 +461,7 @@ func (NoLockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { } // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. -func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { +func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { return syserror.ENOLCK } @@ -464,3 +469,8 @@ func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.Loc func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error { return syserror.ENOLCK } + +// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX. +func (NoLockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) { + return linux.Flock{}, syserror.ENOLCK +} diff --git a/pkg/sentry/vfs/lock.go b/pkg/sentry/vfs/lock.go index bcff35792..cbe4d8c2d 100644 --- a/pkg/sentry/vfs/lock.go +++ b/pkg/sentry/vfs/lock.go @@ -39,8 +39,8 @@ type FileLocks struct { } // LockBSD tries to acquire a BSD-style lock on the entire file. -func (fl *FileLocks) LockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { - if fl.bsd.LockRegion(uid, t, fslock.LockRange{0, fslock.LockEOF}, block) { +func (fl *FileLocks) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerID int32, t fslock.LockType, block fslock.Blocker) error { + if fl.bsd.LockRegion(uid, ownerID, t, fslock.LockRange{0, fslock.LockEOF}, block) { return nil } @@ -61,8 +61,8 @@ func (fl *FileLocks) UnlockBSD(uid fslock.UniqueID) { } // LockPOSIX tries to acquire a POSIX-style lock on a file region. -func (fl *FileLocks) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { - if fl.posix.LockRegion(uid, t, r, block) { +func (fl *FileLocks) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { + if fl.posix.LockRegion(uid, ownerPID, t, r, block) { return nil } @@ -83,28 +83,7 @@ func (fl *FileLocks) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fsl return nil } -func computeRange(ctx context.Context, fd *FileDescription, start uint64, length uint64, whence int16) (fslock.LockRange, error) { - var off int64 - switch whence { - case linux.SEEK_SET: - off = 0 - case linux.SEEK_CUR: - // Note that Linux does not hold any mutexes while retrieving the file - // offset, see fs/locks.c:flock_to_posix_lock and fs/locks.c:fcntl_setlk. - curOff, err := fd.Seek(ctx, 0, linux.SEEK_CUR) - if err != nil { - return fslock.LockRange{}, err - } - off = curOff - case linux.SEEK_END: - stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_SIZE}) - if err != nil { - return fslock.LockRange{}, err - } - off = int64(stat.Size) - default: - return fslock.LockRange{}, syserror.EINVAL - } - - return fslock.ComputeRange(int64(start), int64(length), off) +// TestPOSIX returns information about whether the specified lock can be held, in the style of the F_GETLK fcntl. +func (fl *FileLocks) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) { + return fl.posix.TestRegion(ctx, uid, t, r), nil } |