summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/abi/linux/fcntl.go3
-rw-r--r--pkg/abi/linux/linux_abi_autogen_unsafe.go4
-rw-r--r--pkg/sentry/fs/lock/lock.go153
-rw-r--r--pkg/sentry/fs/lock/lock_set_functions.go5
-rw-r--r--pkg/sentry/fs/lock/lock_state_autogen.go27
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go8
-rw-r--r--pkg/sentry/kernel/threads.go12
-rw-r--r--pkg/sentry/syscalls/linux/sys_file.go16
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/fd.go58
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/lock.go4
-rw-r--r--pkg/sentry/vfs/file_description.go20
-rw-r--r--pkg/sentry/vfs/file_description_impl_util.go22
-rw-r--r--pkg/sentry/vfs/lock.go35
13 files changed, 279 insertions, 88 deletions
diff --git a/pkg/abi/linux/fcntl.go b/pkg/abi/linux/fcntl.go
index d1ca56370..b84d7c048 100644
--- a/pkg/abi/linux/fcntl.go
+++ b/pkg/abi/linux/fcntl.go
@@ -21,6 +21,7 @@ const (
F_SETFD = 2
F_GETFL = 3
F_SETFL = 4
+ F_GETLK = 5
F_SETLK = 6
F_SETLKW = 7
F_SETOWN = 8
@@ -55,7 +56,7 @@ type Flock struct {
_ [4]byte
Start int64
Len int64
- Pid int32
+ PID int32
_ [4]byte
}
diff --git a/pkg/abi/linux/linux_abi_autogen_unsafe.go b/pkg/abi/linux/linux_abi_autogen_unsafe.go
index 7d571c7a7..4960295ae 100644
--- a/pkg/abi/linux/linux_abi_autogen_unsafe.go
+++ b/pkg/abi/linux/linux_abi_autogen_unsafe.go
@@ -1284,7 +1284,7 @@ func (f *Flock) MarshalBytes(dst []byte) {
dst = dst[8:]
usermem.ByteOrder.PutUint64(dst[:8], uint64(f.Len))
dst = dst[8:]
- usermem.ByteOrder.PutUint32(dst[:4], uint32(f.Pid))
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(f.PID))
dst = dst[4:]
// Padding: dst[:sizeof(byte)*4] ~= [4]byte{0}
dst = dst[1*(4):]
@@ -1302,7 +1302,7 @@ func (f *Flock) UnmarshalBytes(src []byte) {
src = src[8:]
f.Len = int64(usermem.ByteOrder.Uint64(src[:8]))
src = src[8:]
- f.Pid = int32(usermem.ByteOrder.Uint32(src[:4]))
+ f.PID = int32(usermem.ByteOrder.Uint32(src[:4]))
src = src[4:]
// Padding: ~ copy([4]byte(f._), src[:sizeof(byte)*4])
src = src[1*(4):]
diff --git a/pkg/sentry/fs/lock/lock.go b/pkg/sentry/fs/lock/lock.go
index 8a5d9c7eb..57686ce07 100644
--- a/pkg/sentry/fs/lock/lock.go
+++ b/pkg/sentry/fs/lock/lock.go
@@ -54,6 +54,8 @@ import (
"math"
"syscall"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -83,6 +85,17 @@ const (
// offset 0 to LockEOF.
const LockEOF = math.MaxUint64
+// OwnerInfo describes the owner of a lock.
+//
+// TODO(gvisor.dev/issue/5264): We may need to add other fields in the future
+// (e.g., Linux's file_lock.fl_flags to support open file-descriptor locks).
+//
+// +stateify savable
+type OwnerInfo struct {
+ // PID is the process ID of the lock owner.
+ PID int32
+}
+
// Lock is a regional file lock. It consists of either a single writer
// or a set of readers.
//
@@ -92,14 +105,20 @@ const LockEOF = math.MaxUint64
// A Lock may be downgraded from a write lock to a read lock only if
// the write lock's uid is the same as the read lock.
//
+// Accesses to Lock are synchronized through the Locks object to which it
+// belongs.
+//
// +stateify savable
type Lock struct {
// Readers are the set of read lock holders identified by UniqueID.
- // If len(Readers) > 0 then HasWriter must be false.
- Readers map[UniqueID]bool
+ // If len(Readers) > 0 then Writer must be nil.
+ Readers map[UniqueID]OwnerInfo
// Writer holds the writer unique ID. It's nil if there are no writers.
Writer UniqueID
+
+ // WriterInfo describes the writer. It is only meaningful if Writer != nil.
+ WriterInfo OwnerInfo
}
// Locks is a thread-safe wrapper around a LockSet.
@@ -135,14 +154,14 @@ const (
// acquiring the lock in a non-blocking mode or "interrupted" if in a blocking mode.
// Blocker is the interface used to provide blocking behavior, passing a nil Blocker
// will result in non-blocking behavior.
-func (l *Locks) LockRegion(uid UniqueID, t LockType, r LockRange, block Blocker) bool {
+func (l *Locks) LockRegion(uid UniqueID, ownerPID int32, t LockType, r LockRange, block Blocker) bool {
for {
l.mu.Lock()
// Blocking locks must run in a loop because we'll be woken up whenever an unlock event
// happens for this lock. We will then attempt to take the lock again and if it fails
// continue blocking.
- res := l.locks.lock(uid, t, r)
+ res := l.locks.lock(uid, ownerPID, t, r)
if !res && block != nil {
e, ch := waiter.NewChannelEntry(nil)
l.blockedQueue.EventRegister(&e, EventMaskAll)
@@ -161,6 +180,14 @@ func (l *Locks) LockRegion(uid UniqueID, t LockType, r LockRange, block Blocker)
}
}
+// LockRegionVFS1 is a wrapper around LockRegion for VFS1, which does not implement
+// F_GETLK (and does not care about storing PIDs as a result).
+//
+// TODO(gvisor.dev/issue/1624): Delete.
+func (l *Locks) LockRegionVFS1(uid UniqueID, t LockType, r LockRange, block Blocker) bool {
+ return l.LockRegion(uid, 0 /* ownerPID */, t, r, block)
+}
+
// UnlockRegion attempts to release a lock for the uid on a region of a file.
// This operation is always successful, even if there did not exist a lock on
// the requested region held by uid in the first place.
@@ -175,13 +202,14 @@ func (l *Locks) UnlockRegion(uid UniqueID, r LockRange) {
// makeLock returns a new typed Lock that has either uid as its only reader
// or uid as its only writer.
-func makeLock(uid UniqueID, t LockType) Lock {
- value := Lock{Readers: make(map[UniqueID]bool)}
+func makeLock(uid UniqueID, ownerPID int32, t LockType) Lock {
+ value := Lock{Readers: make(map[UniqueID]OwnerInfo)}
switch t {
case ReadLock:
- value.Readers[uid] = true
+ value.Readers[uid] = OwnerInfo{PID: ownerPID}
case WriteLock:
value.Writer = uid
+ value.WriterInfo = OwnerInfo{PID: ownerPID}
default:
panic(fmt.Sprintf("makeLock: invalid lock type %d", t))
}
@@ -190,17 +218,20 @@ func makeLock(uid UniqueID, t LockType) Lock {
// isHeld returns true if uid is a holder of Lock.
func (l Lock) isHeld(uid UniqueID) bool {
- return l.Writer == uid || l.Readers[uid]
+ if _, ok := l.Readers[uid]; ok {
+ return true
+ }
+ return l.Writer == uid
}
// lock sets uid as a holder of a typed lock on Lock.
//
// Preconditions: canLock is true for the range containing this Lock.
-func (l *Lock) lock(uid UniqueID, t LockType) {
+func (l *Lock) lock(uid UniqueID, ownerPID int32, t LockType) {
switch t {
case ReadLock:
// If we are already a reader, then this is a no-op.
- if l.Readers[uid] {
+ if _, ok := l.Readers[uid]; ok {
return
}
// We cannot downgrade a write lock to a read lock unless the
@@ -210,11 +241,11 @@ func (l *Lock) lock(uid UniqueID, t LockType) {
panic(fmt.Sprintf("lock: cannot downgrade write lock to read lock for uid %d, writer is %d", uid, l.Writer))
}
// Ensure that there is only one reader if upgrading.
- l.Readers = make(map[UniqueID]bool)
+ l.Readers = make(map[UniqueID]OwnerInfo)
// Ensure that there is no longer a writer.
l.Writer = nil
}
- l.Readers[uid] = true
+ l.Readers[uid] = OwnerInfo{PID: ownerPID}
return
case WriteLock:
// If we are already the writer, then this is a no-op.
@@ -228,13 +259,14 @@ func (l *Lock) lock(uid UniqueID, t LockType) {
if readers != 1 {
panic(fmt.Sprintf("lock: cannot upgrade read lock to write lock for uid %d, too many readers %v", uid, l.Readers))
}
- if !l.Readers[uid] {
+ if _, ok := l.Readers[uid]; !ok {
panic(fmt.Sprintf("lock: cannot upgrade read lock to write lock for uid %d, conflicting reader %v", uid, l.Readers))
}
}
// Ensure that there is only a writer.
- l.Readers = make(map[UniqueID]bool)
+ l.Readers = make(map[UniqueID]OwnerInfo)
l.Writer = uid
+ l.WriterInfo = OwnerInfo{PID: ownerPID}
default:
panic(fmt.Sprintf("lock: invalid lock type %d", t))
}
@@ -247,7 +279,7 @@ func (l LockSet) lockable(r LockRange, check func(value Lock) bool) bool {
// Get our starting point.
seg := l.LowerBoundSegment(r.Start)
for seg.Ok() && seg.Start() < r.End {
- // Note that we don't care about overruning the end of the
+ // Note that we don't care about overrunning the end of the
// last segment because if everything checks out we'll just
// split the last segment.
if !check(seg.Value()) {
@@ -281,7 +313,7 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool {
if value.Writer == nil {
// Then this uid can only take a write lock if this is a private
// upgrade, meaning that the only reader is uid.
- return len(value.Readers) == 1 && value.Readers[uid]
+ return value.isOnlyReader(uid)
}
// If the uid is already a writer on this region, then
// adding a write lock would be a no-op.
@@ -292,11 +324,19 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool {
}
}
+func (l *Lock) isOnlyReader(uid UniqueID) bool {
+ if len(l.Readers) != 1 {
+ return false
+ }
+ _, ok := l.Readers[uid]
+ return ok
+}
+
// lock returns true if uid took a lock of type t on the entire range of
// LockRange.
//
// Preconditions: r.Start <= r.End (will panic otherwise).
-func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool {
+func (l *LockSet) lock(uid UniqueID, ownerPID int32, t LockType, r LockRange) bool {
if r.Start > r.End {
panic(fmt.Sprintf("lock: r.Start %d > r.End %d", r.Start, r.End))
}
@@ -317,7 +357,7 @@ func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool {
seg, gap := l.Find(r.Start)
if gap.Ok() {
// Fill in the gap and get the next segment to modify.
- seg = l.Insert(gap, gap.Range().Intersect(r), makeLock(uid, t)).NextSegment()
+ seg = l.Insert(gap, gap.Range().Intersect(r), makeLock(uid, ownerPID, t)).NextSegment()
} else if seg.Start() < r.Start {
// Get our first segment to modify.
_, seg = l.Split(seg, r.Start)
@@ -331,12 +371,12 @@ func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool {
// Set the lock on the segment. This is guaranteed to
// always be safe, given canLock above.
value := seg.ValuePtr()
- value.lock(uid, t)
+ value.lock(uid, ownerPID, t)
// Fill subsequent gaps.
gap = seg.NextGap()
if gr := gap.Range().Intersect(r); gr.Length() > 0 {
- seg = l.Insert(gap, gr, makeLock(uid, t)).NextSegment()
+ seg = l.Insert(gap, gr, makeLock(uid, ownerPID, t)).NextSegment()
} else {
seg = gap.NextSegment()
}
@@ -380,7 +420,7 @@ func (l *LockSet) unlock(uid UniqueID, r LockRange) {
// only ever be one writer and no readers, then this
// lock should always be removed from the set.
remove = true
- } else if value.Readers[uid] {
+ } else if _, ok := value.Readers[uid]; ok {
// If uid is the last reader, then just remove the entire
// segment.
if len(value.Readers) == 1 {
@@ -390,7 +430,7 @@ func (l *LockSet) unlock(uid UniqueID, r LockRange) {
// affecting any other segment's readers. To do
// this, we need to make a copy of the Readers map
// and not add this uid.
- newValue := Lock{Readers: make(map[UniqueID]bool)}
+ newValue := Lock{Readers: make(map[UniqueID]OwnerInfo)}
for k, v := range value.Readers {
if k != uid {
newValue.Readers[k] = v
@@ -451,3 +491,72 @@ func ComputeRange(start, length, offset int64) (LockRange, error) {
// Offset is guaranteed to be positive at this point.
return LockRange{Start: uint64(offset), End: end}, nil
}
+
+// TestRegion checks whether the lock holder identified by uid can hold a lock
+// of type t on range r. It returns a Flock struct representing this
+// information as the F_GETLK fcntl does.
+//
+// Note that the PID returned in the flock structure is relative to the root PID
+// namespace. It needs to be converted to the caller's PID namespace before
+// returning to userspace.
+//
+// TODO(gvisor.dev/issue/5264): we don't support OFD locks through fcntl, which
+// would return a struct with pid = -1.
+func (l *Locks) TestRegion(ctx context.Context, uid UniqueID, t LockType, r LockRange) linux.Flock {
+ f := linux.Flock{Type: linux.F_UNLCK}
+ switch t {
+ case ReadLock:
+ l.testRegion(r, func(lock Lock, start, length uint64) bool {
+ if lock.Writer == nil || lock.Writer == uid {
+ return true
+ }
+ f.Type = linux.F_WRLCK
+ f.PID = lock.WriterInfo.PID
+ f.Start = int64(start)
+ f.Len = int64(length)
+ return false
+ })
+ case WriteLock:
+ l.testRegion(r, func(lock Lock, start, length uint64) bool {
+ if lock.Writer == nil {
+ for k, v := range lock.Readers {
+ if k != uid {
+ // Stop at the first conflict detected.
+ f.Type = linux.F_RDLCK
+ f.PID = v.PID
+ f.Start = int64(start)
+ f.Len = int64(length)
+ return false
+ }
+ }
+ return true
+ }
+ if lock.Writer == uid {
+ return true
+ }
+ f.Type = linux.F_WRLCK
+ f.PID = lock.WriterInfo.PID
+ f.Start = int64(start)
+ f.Len = int64(length)
+ return false
+ })
+ default:
+ panic(fmt.Sprintf("TestRegion: invalid lock type %d", t))
+ }
+ return f
+}
+
+func (l *Locks) testRegion(r LockRange, check func(lock Lock, start, length uint64) bool) {
+ l.mu.Lock()
+ defer l.mu.Unlock()
+
+ seg := l.locks.LowerBoundSegment(r.Start)
+ for seg.Ok() && seg.Start() < r.End {
+ lock := seg.Value()
+ if !check(lock, seg.Start(), seg.End()-seg.Start()) {
+ // Stop at the first conflict detected.
+ return
+ }
+ seg = seg.NextSegment()
+ }
+}
diff --git a/pkg/sentry/fs/lock/lock_set_functions.go b/pkg/sentry/fs/lock/lock_set_functions.go
index 50a16e662..dcc17c0dc 100644
--- a/pkg/sentry/fs/lock/lock_set_functions.go
+++ b/pkg/sentry/fs/lock/lock_set_functions.go
@@ -40,7 +40,7 @@ func (lockSetFunctions) Merge(r1 LockRange, val1 Lock, r2 LockRange, val2 Lock)
return Lock{}, false
}
for k := range val1.Readers {
- if !val2.Readers[k] {
+ if _, ok := val2.Readers[k]; !ok {
return Lock{}, false
}
}
@@ -53,11 +53,12 @@ func (lockSetFunctions) Merge(r1 LockRange, val1 Lock, r2 LockRange, val2 Lock)
func (lockSetFunctions) Split(r LockRange, val Lock, split uint64) (Lock, Lock) {
// Copy the segment so that split segments don't contain map references
// to other segments.
- val0 := Lock{Readers: make(map[UniqueID]bool)}
+ val0 := Lock{Readers: make(map[UniqueID]OwnerInfo)}
for k, v := range val.Readers {
val0.Readers[k] = v
}
val0.Writer = val.Writer
+ val0.WriterInfo = val.WriterInfo
return val, val0
}
diff --git a/pkg/sentry/fs/lock/lock_state_autogen.go b/pkg/sentry/fs/lock/lock_state_autogen.go
index 707a7518f..49db04cfb 100644
--- a/pkg/sentry/fs/lock/lock_state_autogen.go
+++ b/pkg/sentry/fs/lock/lock_state_autogen.go
@@ -6,6 +6,29 @@ import (
"gvisor.dev/gvisor/pkg/state"
)
+func (o *OwnerInfo) StateTypeName() string {
+ return "pkg/sentry/fs/lock.OwnerInfo"
+}
+
+func (o *OwnerInfo) StateFields() []string {
+ return []string{
+ "PID",
+ }
+}
+
+func (o *OwnerInfo) beforeSave() {}
+
+func (o *OwnerInfo) StateSave(stateSinkObject state.Sink) {
+ o.beforeSave()
+ stateSinkObject.Save(0, &o.PID)
+}
+
+func (o *OwnerInfo) afterLoad() {}
+
+func (o *OwnerInfo) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &o.PID)
+}
+
func (l *Lock) StateTypeName() string {
return "pkg/sentry/fs/lock.Lock"
}
@@ -14,6 +37,7 @@ func (l *Lock) StateFields() []string {
return []string{
"Readers",
"Writer",
+ "WriterInfo",
}
}
@@ -23,6 +47,7 @@ func (l *Lock) StateSave(stateSinkObject state.Sink) {
l.beforeSave()
stateSinkObject.Save(0, &l.Readers)
stateSinkObject.Save(1, &l.Writer)
+ stateSinkObject.Save(2, &l.WriterInfo)
}
func (l *Lock) afterLoad() {}
@@ -30,6 +55,7 @@ func (l *Lock) afterLoad() {}
func (l *Lock) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &l.Readers)
stateSourceObject.Load(1, &l.Writer)
+ stateSourceObject.Load(2, &l.WriterInfo)
}
func (l *Locks) StateTypeName() string {
@@ -182,6 +208,7 @@ func (l *LockSegmentDataSlices) StateLoad(stateSourceObject state.Source) {
}
func init() {
+ state.Register((*OwnerInfo)(nil))
state.Register((*Lock)(nil))
state.Register((*Locks)(nil))
state.Register((*LockRange)(nil))
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index a43364b55..98f7bc52f 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1944,19 +1944,19 @@ func (fd *fileDescription) RemoveXattr(ctx context.Context, name string) error {
}
// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
-func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error {
+func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
fd.lockLogging.Do(func() {
log.Infof("File lock using gofer file handled internally.")
})
- return fd.LockFD.LockBSD(ctx, uid, t, block)
+ return fd.LockFD.LockBSD(ctx, uid, ownerPID, t, block)
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
+func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
fd.lockLogging.Do(func() {
log.Infof("Range lock using gofer file handled internally.")
})
- return fd.Locks().LockPOSIX(ctx, uid, t, r, block)
+ return fd.Locks().LockPOSIX(ctx, uid, ownerPID, t, r, block)
}
// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go
index fdadb52c0..e9da99067 100644
--- a/pkg/sentry/kernel/threads.go
+++ b/pkg/sentry/kernel/threads.go
@@ -216,7 +216,7 @@ func (ns *PIDNamespace) TaskWithID(tid ThreadID) *Task {
return t
}
-// ThreadGroupWithID returns the thread group lead by the task with thread ID
+// ThreadGroupWithID returns the thread group led by the task with thread ID
// tid in PID namespace ns. If no task has that TID, or if the task with that
// TID is not a thread group leader, ThreadGroupWithID returns nil.
func (ns *PIDNamespace) ThreadGroupWithID(tid ThreadID) *ThreadGroup {
@@ -292,6 +292,11 @@ func (ns *PIDNamespace) UserNamespace() *auth.UserNamespace {
return ns.userns
}
+// Root returns the root PID namespace of ns.
+func (ns *PIDNamespace) Root() *PIDNamespace {
+ return ns.owner.Root
+}
+
// A threadGroupNode defines the relationship between a thread group and the
// rest of the system. Conceptually, threadGroupNode is data belonging to the
// owning TaskSet, as if TaskSet contained a field `nodes
@@ -485,3 +490,8 @@ func (t *Task) Parent() *Task {
func (t *Task) ThreadID() ThreadID {
return t.tg.pidns.IDOfTask(t)
}
+
+// TGIDInRoot returns t's TGID in the root PID namespace.
+func (t *Task) TGIDInRoot() ThreadID {
+ return t.tg.pidns.owner.Root.IDOfThreadGroup(t.tg)
+}
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index c33571f43..a6253626e 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -1014,12 +1014,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
if cmd == linux.F_SETLK {
// Non-blocking lock, provide a nil lock.Blocker.
- if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, nil) {
+ if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, nil) {
return 0, nil, syserror.EAGAIN
}
} else {
// Blocking lock, pass in the task to satisfy the lock.Blocker interface.
- if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, t) {
+ if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, t) {
return 0, nil, syserror.EINTR
}
}
@@ -1030,12 +1030,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
if cmd == linux.F_SETLK {
// Non-blocking lock, provide a nil lock.Blocker.
- if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, nil) {
+ if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, nil) {
return 0, nil, syserror.EAGAIN
}
} else {
// Blocking lock, pass in the task to satisfy the lock.Blocker interface.
- if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, t) {
+ if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, t) {
return 0, nil, syserror.EINTR
}
}
@@ -2167,24 +2167,24 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
case linux.LOCK_EX:
if nonblocking {
// Since we're nonblocking we pass a nil lock.Blocker implementation.
- if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, nil) {
+ if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, nil) {
return 0, nil, syserror.EWOULDBLOCK
}
} else {
// Because we're blocking we will pass the task to satisfy the lock.Blocker interface.
- if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, t) {
+ if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, t) {
return 0, nil, syserror.EINTR
}
}
case linux.LOCK_SH:
if nonblocking {
// Since we're nonblocking we pass a nil lock.Blocker implementation.
- if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, nil) {
+ if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, nil) {
return 0, nil, syserror.EWOULDBLOCK
}
} else {
// Because we're blocking we will pass the task to satisfy the lock.Blocker interface.
- if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, t) {
+ if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, t) {
return 0, nil, syserror.EINTR
}
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go
index 9281952cb..e39f074f2 100644
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@@ -205,8 +205,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
err := tmpfs.AddSeals(file, args[2].Uint())
return 0, nil, err
- case linux.F_SETLK, linux.F_SETLKW:
- return 0, nil, posixLock(t, args, file, cmd)
+ case linux.F_SETLK:
+ return 0, nil, posixLock(t, args, file, false /* blocking */)
+ case linux.F_SETLKW:
+ return 0, nil, posixLock(t, args, file, true /* blocking */)
+ case linux.F_GETLK:
+ return 0, nil, posixTestLock(t, args, file)
case linux.F_GETSIG:
a := file.AsyncHandler()
if a == nil {
@@ -292,7 +296,49 @@ func setAsyncOwner(t *kernel.Task, fd int, file *vfs.FileDescription, ownerType,
}
}
-func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, cmd int32) error {
+func posixTestLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription) error {
+ // Copy in the lock request.
+ flockAddr := args[2].Pointer()
+ var flock linux.Flock
+ if _, err := flock.CopyIn(t, flockAddr); err != nil {
+ return err
+ }
+ var typ lock.LockType
+ switch flock.Type {
+ case linux.F_RDLCK:
+ typ = lock.ReadLock
+ case linux.F_WRLCK:
+ typ = lock.WriteLock
+ default:
+ return syserror.EINVAL
+ }
+ r, err := file.ComputeLockRange(t, uint64(flock.Start), uint64(flock.Len), flock.Whence)
+ if err != nil {
+ return err
+ }
+
+ newFlock, err := file.TestPOSIX(t, t.FDTable(), typ, r)
+ if err != nil {
+ return err
+ }
+ newFlock.PID = translatePID(t.PIDNamespace().Root(), t.PIDNamespace(), newFlock.PID)
+ if _, err = newFlock.CopyOut(t, flockAddr); err != nil {
+ return err
+ }
+ return nil
+}
+
+// translatePID translates a pid from one namespace to another. Note that this
+// may race with task termination/creation, in which case the original task
+// corresponding to pid may no longer exist. This is used to implement the
+// F_GETLK fcntl, which has the same potential race in Linux as well (i.e.,
+// there is no synchronization between retrieving the lock PID and translating
+// it). See fs/locks.c:posix_lock_to_flock.
+func translatePID(old, new *kernel.PIDNamespace, pid int32) int32 {
+ return int32(new.IDOfTask(old.TaskWithID(kernel.ThreadID(pid))))
+}
+
+func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, blocking bool) error {
// Copy in the lock request.
flockAddr := args[2].Pointer()
var flock linux.Flock
@@ -301,7 +347,7 @@ func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescrip
}
var blocker lock.Blocker
- if cmd == linux.F_SETLKW {
+ if blocking {
blocker = t
}
@@ -315,13 +361,13 @@ func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescrip
if !file.IsReadable() {
return syserror.EBADF
}
- return file.LockPOSIX(t, t.FDTable(), lock.ReadLock, r, blocker)
+ return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.ReadLock, r, blocker)
case linux.F_WRLCK:
if !file.IsWritable() {
return syserror.EBADF
}
- return file.LockPOSIX(t, t.FDTable(), lock.WriteLock, r, blocker)
+ return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.WriteLock, r, blocker)
case linux.F_UNLCK:
return file.UnlockPOSIX(t, t.FDTable(), r)
diff --git a/pkg/sentry/syscalls/linux/vfs2/lock.go b/pkg/sentry/syscalls/linux/vfs2/lock.go
index b910b5a74..d1452a04d 100644
--- a/pkg/sentry/syscalls/linux/vfs2/lock.go
+++ b/pkg/sentry/syscalls/linux/vfs2/lock.go
@@ -44,11 +44,11 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
switch operation {
case linux.LOCK_EX:
- if err := file.LockBSD(t, lock.WriteLock, blocker); err != nil {
+ if err := file.LockBSD(t, int32(t.TGIDInRoot()), lock.WriteLock, blocker); err != nil {
return 0, nil, err
}
case linux.LOCK_SH:
- if err := file.LockBSD(t, lock.ReadLock, blocker); err != nil {
+ if err := file.LockBSD(t, int32(t.TGIDInRoot()), lock.ReadLock, blocker); err != nil {
return 0, nil, err
}
case linux.LOCK_UN:
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index dbf8425b4..eb6c2e36b 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -448,16 +448,19 @@ type FileDescriptionImpl interface {
RemoveXattr(ctx context.Context, name string) error
// LockBSD tries to acquire a BSD-style advisory file lock.
- LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error
+ LockBSD(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, block lock.Blocker) error
// UnlockBSD releases a BSD-style advisory file lock.
UnlockBSD(ctx context.Context, uid lock.UniqueID) error
// LockPOSIX tries to acquire a POSIX-style advisory file lock.
- LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange, block lock.Blocker) error
+ LockPOSIX(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, r lock.LockRange, block lock.Blocker) error
// UnlockPOSIX releases a POSIX-style advisory file lock.
UnlockPOSIX(ctx context.Context, uid lock.UniqueID, ComputeLockRange lock.LockRange) error
+
+ // TestPOSIX returns information about whether the specified lock can be held, in the style of the F_GETLK fcntl.
+ TestPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange) (linux.Flock, error)
}
// Dirent holds the information contained in struct linux_dirent64.
@@ -791,9 +794,9 @@ func (fd *FileDescription) Msync(ctx context.Context, mr memmap.MappableRange) e
}
// LockBSD tries to acquire a BSD-style advisory file lock.
-func (fd *FileDescription) LockBSD(ctx context.Context, lockType lock.LockType, blocker lock.Blocker) error {
+func (fd *FileDescription) LockBSD(ctx context.Context, ownerPID int32, lockType lock.LockType, blocker lock.Blocker) error {
atomic.StoreUint32(&fd.usedLockBSD, 1)
- return fd.impl.LockBSD(ctx, fd, lockType, blocker)
+ return fd.impl.LockBSD(ctx, fd, ownerPID, lockType, blocker)
}
// UnlockBSD releases a BSD-style advisory file lock.
@@ -802,8 +805,8 @@ func (fd *FileDescription) UnlockBSD(ctx context.Context) error {
}
// LockPOSIX locks a POSIX-style file range lock.
-func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange, block lock.Blocker) error {
- return fd.impl.LockPOSIX(ctx, uid, t, r, block)
+func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, r lock.LockRange, block lock.Blocker) error {
+ return fd.impl.LockPOSIX(ctx, uid, ownerPID, t, r, block)
}
// UnlockPOSIX unlocks a POSIX-style file range lock.
@@ -811,6 +814,11 @@ func (fd *FileDescription) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, r
return fd.impl.UnlockPOSIX(ctx, uid, r)
}
+// TestPOSIX returns information about whether the specified lock can be held.
+func (fd *FileDescription) TestPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange) (linux.Flock, error) {
+ return fd.impl.TestPOSIX(ctx, uid, t, r)
+}
+
// ComputeLockRange computes the range of a file lock based on the given values.
func (fd *FileDescription) ComputeLockRange(ctx context.Context, start uint64, length uint64, whence int16) (lock.LockRange, error) {
var off int64
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index 9d13e2ccb..eb7d2fd3b 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -419,8 +419,8 @@ func (fd *LockFD) Locks() *FileLocks {
}
// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
-func (fd *LockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error {
- return fd.locks.LockBSD(uid, t, block)
+func (fd *LockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
+ return fd.locks.LockBSD(ctx, uid, ownerPID, t, block)
}
// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD.
@@ -430,8 +430,8 @@ func (fd *LockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *LockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
- return fd.locks.LockPOSIX(ctx, uid, t, r, block)
+func (fd *LockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
+ return fd.locks.LockPOSIX(ctx, uid, ownerPID, t, r, block)
}
// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
@@ -439,6 +439,11 @@ func (fd *LockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock
return fd.locks.UnlockPOSIX(ctx, uid, r)
}
+// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX.
+func (fd *LockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
+ return fd.locks.TestPOSIX(ctx, uid, t, r)
+}
+
// NoLockFD implements Lock*/Unlock* portion of FileDescriptionImpl interface
// returning ENOLCK.
//
@@ -446,7 +451,7 @@ func (fd *LockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock
type NoLockFD struct{}
// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
-func (NoLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error {
+func (NoLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
return syserror.ENOLCK
}
@@ -456,7 +461,7 @@ func (NoLockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
+func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
return syserror.ENOLCK
}
@@ -464,3 +469,8 @@ func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.Loc
func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error {
return syserror.ENOLCK
}
+
+// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX.
+func (NoLockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
+ return linux.Flock{}, syserror.ENOLCK
+}
diff --git a/pkg/sentry/vfs/lock.go b/pkg/sentry/vfs/lock.go
index bcff35792..cbe4d8c2d 100644
--- a/pkg/sentry/vfs/lock.go
+++ b/pkg/sentry/vfs/lock.go
@@ -39,8 +39,8 @@ type FileLocks struct {
}
// LockBSD tries to acquire a BSD-style lock on the entire file.
-func (fl *FileLocks) LockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error {
- if fl.bsd.LockRegion(uid, t, fslock.LockRange{0, fslock.LockEOF}, block) {
+func (fl *FileLocks) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerID int32, t fslock.LockType, block fslock.Blocker) error {
+ if fl.bsd.LockRegion(uid, ownerID, t, fslock.LockRange{0, fslock.LockEOF}, block) {
return nil
}
@@ -61,8 +61,8 @@ func (fl *FileLocks) UnlockBSD(uid fslock.UniqueID) {
}
// LockPOSIX tries to acquire a POSIX-style lock on a file region.
-func (fl *FileLocks) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
- if fl.posix.LockRegion(uid, t, r, block) {
+func (fl *FileLocks) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
+ if fl.posix.LockRegion(uid, ownerPID, t, r, block) {
return nil
}
@@ -83,28 +83,7 @@ func (fl *FileLocks) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fsl
return nil
}
-func computeRange(ctx context.Context, fd *FileDescription, start uint64, length uint64, whence int16) (fslock.LockRange, error) {
- var off int64
- switch whence {
- case linux.SEEK_SET:
- off = 0
- case linux.SEEK_CUR:
- // Note that Linux does not hold any mutexes while retrieving the file
- // offset, see fs/locks.c:flock_to_posix_lock and fs/locks.c:fcntl_setlk.
- curOff, err := fd.Seek(ctx, 0, linux.SEEK_CUR)
- if err != nil {
- return fslock.LockRange{}, err
- }
- off = curOff
- case linux.SEEK_END:
- stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_SIZE})
- if err != nil {
- return fslock.LockRange{}, err
- }
- off = int64(stat.Size)
- default:
- return fslock.LockRange{}, syserror.EINVAL
- }
-
- return fslock.ComputeRange(int64(start), int64(length), off)
+// TestPOSIX returns information about whether the specified lock can be held, in the style of the F_GETLK fcntl.
+func (fl *FileLocks) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
+ return fl.posix.TestRegion(ctx, uid, t, r), nil
}