summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fs/fdpipe/pipe_state.go4
-rw-r--r--pkg/sentry/fs/fs.go2
-rw-r--r--pkg/sentry/fs/gofer/inode_state.go4
-rw-r--r--pkg/sentry/fs/host/socket.go2
-rw-r--r--pkg/sentry/fs/lock/BUILD2
-rw-r--r--pkg/sentry/fs/lock/lock.go153
-rw-r--r--pkg/sentry/fs/lock/lock_set_functions.go5
-rw-r--r--pkg/sentry/fs/lock/lock_test.go124
-rw-r--r--pkg/sentry/fsimpl/devpts/master.go11
-rw-r--r--pkg/sentry/fsimpl/devpts/replica.go11
-rw-r--r--pkg/sentry/fsimpl/ext/directory.go11
-rw-r--r--pkg/sentry/fsimpl/ext/regular_file.go11
-rw-r--r--pkg/sentry/fsimpl/gofer/directory.go5
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go14
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go12
-rw-r--r--pkg/sentry/fsimpl/host/host.go11
-rw-r--r--pkg/sentry/fsimpl/host/socket.go2
-rw-r--r--pkg/sentry/fsimpl/host/tty.go11
-rw-r--r--pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go11
-rw-r--r--pkg/sentry/fsimpl/kernfs/fd_impl_util.go11
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go5
-rw-r--r--pkg/sentry/fsimpl/overlay/overlay.go11
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go21
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_test.go6
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file_test.go18
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go11
-rw-r--r--pkg/sentry/fsimpl/verity/verity.go24
-rw-r--r--pkg/sentry/fsimpl/verity/verity_test.go4
-rw-r--r--pkg/sentry/kernel/BUILD1
-rw-r--r--pkg/sentry/kernel/fd_table.go12
-rw-r--r--pkg/sentry/kernel/kernel.go4
-rw-r--r--pkg/sentry/kernel/pipe/BUILD1
-rw-r--r--pkg/sentry/kernel/pipe/vfs.go11
-rw-r--r--pkg/sentry/kernel/task_block.go2
-rw-r--r--pkg/sentry/kernel/threads.go12
-rw-r--r--pkg/sentry/mm/procfs.go7
-rw-r--r--pkg/sentry/mm/syscalls.go27
-rw-r--r--pkg/sentry/platform/ptrace/subprocess.go12
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_amd64.go3
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_arm64.go36
-rw-r--r--pkg/sentry/socket/hostinet/BUILD1
-rw-r--r--pkg/sentry/socket/hostinet/socket.go4
-rw-r--r--pkg/sentry/socket/hostinet/socket_vfs2.go11
-rw-r--r--pkg/sentry/socket/netfilter/ipv4.go30
-rw-r--r--pkg/sentry/socket/netfilter/ipv6.go31
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go9
-rw-r--r--pkg/sentry/socket/netfilter/owner_matcher.go2
-rw-r--r--pkg/sentry/socket/netfilter/tcp_matcher.go2
-rw-r--r--pkg/sentry/socket/netfilter/udp_matcher.go2
-rw-r--r--pkg/sentry/socket/netlink/BUILD1
-rw-r--r--pkg/sentry/socket/netlink/socket_vfs2.go11
-rw-r--r--pkg/sentry/socket/netstack/BUILD2
-rw-r--r--pkg/sentry/socket/netstack/netstack.go265
-rw-r--r--pkg/sentry/socket/netstack/netstack_vfs2.go25
-rw-r--r--pkg/sentry/socket/netstack/provider.go2
-rw-r--r--pkg/sentry/socket/netstack/provider_vfs2.go2
-rw-r--r--pkg/sentry/socket/unix/transport/connectioned.go8
-rw-r--r--pkg/sentry/socket/unix/transport/connectionless.go2
-rw-r--r--pkg/sentry/socket/unix/transport/unix.go87
-rw-r--r--pkg/sentry/socket/unix/unix.go3
-rw-r--r--pkg/sentry/socket/unix/unix_vfs2.go11
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go4
-rw-r--r--pkg/sentry/syscalls/linux/sys_file.go16
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/fd.go78
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/ioctl.go4
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/lock.go4
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/read_write.go39
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/setstat.go13
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/splice.go12
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/sync.go4
-rw-r--r--pkg/sentry/vfs/BUILD1
-rw-r--r--pkg/sentry/vfs/file_description.go84
-rw-r--r--pkg/sentry/vfs/file_description_impl_util.go30
-rw-r--r--pkg/sentry/vfs/lock.go47
-rw-r--r--pkg/sentry/vfs/opath.go139
-rw-r--r--pkg/sentry/vfs/options.go2
-rw-r--r--pkg/sentry/vfs/vfs.go12
77 files changed, 903 insertions, 742 deletions
diff --git a/pkg/sentry/fs/fdpipe/pipe_state.go b/pkg/sentry/fs/fdpipe/pipe_state.go
index af8230a7d..387f713aa 100644
--- a/pkg/sentry/fs/fdpipe/pipe_state.go
+++ b/pkg/sentry/fs/fdpipe/pipe_state.go
@@ -34,7 +34,9 @@ func (p *pipeOperations) beforeSave() {
} else if p.flags.Write {
file, err := p.opener.NonBlockingOpen(context.Background(), fs.PermMask{Write: true})
if err != nil {
- panic(fs.ErrSaveRejection{fmt.Errorf("write-only pipe end cannot be re-opened as %v: %v", p, err)})
+ panic(&fs.ErrSaveRejection{
+ Err: fmt.Errorf("write-only pipe end cannot be re-opened as %#v: %w", p, err),
+ })
}
file.Close()
}
diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go
index a020da53b..44587bb37 100644
--- a/pkg/sentry/fs/fs.go
+++ b/pkg/sentry/fs/fs.go
@@ -144,7 +144,7 @@ type ErrSaveRejection struct {
}
// Error returns a sensible description of the save rejection error.
-func (e ErrSaveRejection) Error() string {
+func (e *ErrSaveRejection) Error() string {
return "save rejected due to unsupported file system state: " + e.Err.Error()
}
diff --git a/pkg/sentry/fs/gofer/inode_state.go b/pkg/sentry/fs/gofer/inode_state.go
index a3402e343..141e3c27f 100644
--- a/pkg/sentry/fs/gofer/inode_state.go
+++ b/pkg/sentry/fs/gofer/inode_state.go
@@ -67,7 +67,9 @@ func (i *inodeFileState) beforeSave() {
if i.sattr.Type == fs.RegularFile {
uattr, err := i.unstableAttr(&dummyClockContext{context.Background()})
if err != nil {
- panic(fs.ErrSaveRejection{fmt.Errorf("failed to get unstable atttribute of %s: %v", i.s.inodeMappings[i.sattr.InodeID], err)})
+ panic(&fs.ErrSaveRejection{
+ Err: fmt.Errorf("failed to get unstable atttribute of %s: %w", i.s.inodeMappings[i.sattr.InodeID], err),
+ })
}
i.savedUAttr = &uattr
}
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go
index a2f3d5918..07b4fb70f 100644
--- a/pkg/sentry/fs/host/socket.go
+++ b/pkg/sentry/fs/host/socket.go
@@ -257,7 +257,7 @@ func (c *ConnectedEndpoint) Passcred() bool {
}
// GetLocalAddress implements transport.ConnectedEndpoint.GetLocalAddress.
-func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
return tcpip.FullAddress{Addr: tcpip.Address(c.path)}, nil
}
diff --git a/pkg/sentry/fs/lock/BUILD b/pkg/sentry/fs/lock/BUILD
index ae3331737..4d3b216d8 100644
--- a/pkg/sentry/fs/lock/BUILD
+++ b/pkg/sentry/fs/lock/BUILD
@@ -41,6 +41,8 @@ go_library(
],
visibility = ["//pkg/sentry:internal"],
deps = [
+ "//pkg/abi/linux",
+ "//pkg/context",
"//pkg/log",
"//pkg/sync",
"//pkg/waiter",
diff --git a/pkg/sentry/fs/lock/lock.go b/pkg/sentry/fs/lock/lock.go
index 8a5d9c7eb..57686ce07 100644
--- a/pkg/sentry/fs/lock/lock.go
+++ b/pkg/sentry/fs/lock/lock.go
@@ -54,6 +54,8 @@ import (
"math"
"syscall"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -83,6 +85,17 @@ const (
// offset 0 to LockEOF.
const LockEOF = math.MaxUint64
+// OwnerInfo describes the owner of a lock.
+//
+// TODO(gvisor.dev/issue/5264): We may need to add other fields in the future
+// (e.g., Linux's file_lock.fl_flags to support open file-descriptor locks).
+//
+// +stateify savable
+type OwnerInfo struct {
+ // PID is the process ID of the lock owner.
+ PID int32
+}
+
// Lock is a regional file lock. It consists of either a single writer
// or a set of readers.
//
@@ -92,14 +105,20 @@ const LockEOF = math.MaxUint64
// A Lock may be downgraded from a write lock to a read lock only if
// the write lock's uid is the same as the read lock.
//
+// Accesses to Lock are synchronized through the Locks object to which it
+// belongs.
+//
// +stateify savable
type Lock struct {
// Readers are the set of read lock holders identified by UniqueID.
- // If len(Readers) > 0 then HasWriter must be false.
- Readers map[UniqueID]bool
+ // If len(Readers) > 0 then Writer must be nil.
+ Readers map[UniqueID]OwnerInfo
// Writer holds the writer unique ID. It's nil if there are no writers.
Writer UniqueID
+
+ // WriterInfo describes the writer. It is only meaningful if Writer != nil.
+ WriterInfo OwnerInfo
}
// Locks is a thread-safe wrapper around a LockSet.
@@ -135,14 +154,14 @@ const (
// acquiring the lock in a non-blocking mode or "interrupted" if in a blocking mode.
// Blocker is the interface used to provide blocking behavior, passing a nil Blocker
// will result in non-blocking behavior.
-func (l *Locks) LockRegion(uid UniqueID, t LockType, r LockRange, block Blocker) bool {
+func (l *Locks) LockRegion(uid UniqueID, ownerPID int32, t LockType, r LockRange, block Blocker) bool {
for {
l.mu.Lock()
// Blocking locks must run in a loop because we'll be woken up whenever an unlock event
// happens for this lock. We will then attempt to take the lock again and if it fails
// continue blocking.
- res := l.locks.lock(uid, t, r)
+ res := l.locks.lock(uid, ownerPID, t, r)
if !res && block != nil {
e, ch := waiter.NewChannelEntry(nil)
l.blockedQueue.EventRegister(&e, EventMaskAll)
@@ -161,6 +180,14 @@ func (l *Locks) LockRegion(uid UniqueID, t LockType, r LockRange, block Blocker)
}
}
+// LockRegionVFS1 is a wrapper around LockRegion for VFS1, which does not implement
+// F_GETLK (and does not care about storing PIDs as a result).
+//
+// TODO(gvisor.dev/issue/1624): Delete.
+func (l *Locks) LockRegionVFS1(uid UniqueID, t LockType, r LockRange, block Blocker) bool {
+ return l.LockRegion(uid, 0 /* ownerPID */, t, r, block)
+}
+
// UnlockRegion attempts to release a lock for the uid on a region of a file.
// This operation is always successful, even if there did not exist a lock on
// the requested region held by uid in the first place.
@@ -175,13 +202,14 @@ func (l *Locks) UnlockRegion(uid UniqueID, r LockRange) {
// makeLock returns a new typed Lock that has either uid as its only reader
// or uid as its only writer.
-func makeLock(uid UniqueID, t LockType) Lock {
- value := Lock{Readers: make(map[UniqueID]bool)}
+func makeLock(uid UniqueID, ownerPID int32, t LockType) Lock {
+ value := Lock{Readers: make(map[UniqueID]OwnerInfo)}
switch t {
case ReadLock:
- value.Readers[uid] = true
+ value.Readers[uid] = OwnerInfo{PID: ownerPID}
case WriteLock:
value.Writer = uid
+ value.WriterInfo = OwnerInfo{PID: ownerPID}
default:
panic(fmt.Sprintf("makeLock: invalid lock type %d", t))
}
@@ -190,17 +218,20 @@ func makeLock(uid UniqueID, t LockType) Lock {
// isHeld returns true if uid is a holder of Lock.
func (l Lock) isHeld(uid UniqueID) bool {
- return l.Writer == uid || l.Readers[uid]
+ if _, ok := l.Readers[uid]; ok {
+ return true
+ }
+ return l.Writer == uid
}
// lock sets uid as a holder of a typed lock on Lock.
//
// Preconditions: canLock is true for the range containing this Lock.
-func (l *Lock) lock(uid UniqueID, t LockType) {
+func (l *Lock) lock(uid UniqueID, ownerPID int32, t LockType) {
switch t {
case ReadLock:
// If we are already a reader, then this is a no-op.
- if l.Readers[uid] {
+ if _, ok := l.Readers[uid]; ok {
return
}
// We cannot downgrade a write lock to a read lock unless the
@@ -210,11 +241,11 @@ func (l *Lock) lock(uid UniqueID, t LockType) {
panic(fmt.Sprintf("lock: cannot downgrade write lock to read lock for uid %d, writer is %d", uid, l.Writer))
}
// Ensure that there is only one reader if upgrading.
- l.Readers = make(map[UniqueID]bool)
+ l.Readers = make(map[UniqueID]OwnerInfo)
// Ensure that there is no longer a writer.
l.Writer = nil
}
- l.Readers[uid] = true
+ l.Readers[uid] = OwnerInfo{PID: ownerPID}
return
case WriteLock:
// If we are already the writer, then this is a no-op.
@@ -228,13 +259,14 @@ func (l *Lock) lock(uid UniqueID, t LockType) {
if readers != 1 {
panic(fmt.Sprintf("lock: cannot upgrade read lock to write lock for uid %d, too many readers %v", uid, l.Readers))
}
- if !l.Readers[uid] {
+ if _, ok := l.Readers[uid]; !ok {
panic(fmt.Sprintf("lock: cannot upgrade read lock to write lock for uid %d, conflicting reader %v", uid, l.Readers))
}
}
// Ensure that there is only a writer.
- l.Readers = make(map[UniqueID]bool)
+ l.Readers = make(map[UniqueID]OwnerInfo)
l.Writer = uid
+ l.WriterInfo = OwnerInfo{PID: ownerPID}
default:
panic(fmt.Sprintf("lock: invalid lock type %d", t))
}
@@ -247,7 +279,7 @@ func (l LockSet) lockable(r LockRange, check func(value Lock) bool) bool {
// Get our starting point.
seg := l.LowerBoundSegment(r.Start)
for seg.Ok() && seg.Start() < r.End {
- // Note that we don't care about overruning the end of the
+ // Note that we don't care about overrunning the end of the
// last segment because if everything checks out we'll just
// split the last segment.
if !check(seg.Value()) {
@@ -281,7 +313,7 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool {
if value.Writer == nil {
// Then this uid can only take a write lock if this is a private
// upgrade, meaning that the only reader is uid.
- return len(value.Readers) == 1 && value.Readers[uid]
+ return value.isOnlyReader(uid)
}
// If the uid is already a writer on this region, then
// adding a write lock would be a no-op.
@@ -292,11 +324,19 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool {
}
}
+func (l *Lock) isOnlyReader(uid UniqueID) bool {
+ if len(l.Readers) != 1 {
+ return false
+ }
+ _, ok := l.Readers[uid]
+ return ok
+}
+
// lock returns true if uid took a lock of type t on the entire range of
// LockRange.
//
// Preconditions: r.Start <= r.End (will panic otherwise).
-func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool {
+func (l *LockSet) lock(uid UniqueID, ownerPID int32, t LockType, r LockRange) bool {
if r.Start > r.End {
panic(fmt.Sprintf("lock: r.Start %d > r.End %d", r.Start, r.End))
}
@@ -317,7 +357,7 @@ func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool {
seg, gap := l.Find(r.Start)
if gap.Ok() {
// Fill in the gap and get the next segment to modify.
- seg = l.Insert(gap, gap.Range().Intersect(r), makeLock(uid, t)).NextSegment()
+ seg = l.Insert(gap, gap.Range().Intersect(r), makeLock(uid, ownerPID, t)).NextSegment()
} else if seg.Start() < r.Start {
// Get our first segment to modify.
_, seg = l.Split(seg, r.Start)
@@ -331,12 +371,12 @@ func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool {
// Set the lock on the segment. This is guaranteed to
// always be safe, given canLock above.
value := seg.ValuePtr()
- value.lock(uid, t)
+ value.lock(uid, ownerPID, t)
// Fill subsequent gaps.
gap = seg.NextGap()
if gr := gap.Range().Intersect(r); gr.Length() > 0 {
- seg = l.Insert(gap, gr, makeLock(uid, t)).NextSegment()
+ seg = l.Insert(gap, gr, makeLock(uid, ownerPID, t)).NextSegment()
} else {
seg = gap.NextSegment()
}
@@ -380,7 +420,7 @@ func (l *LockSet) unlock(uid UniqueID, r LockRange) {
// only ever be one writer and no readers, then this
// lock should always be removed from the set.
remove = true
- } else if value.Readers[uid] {
+ } else if _, ok := value.Readers[uid]; ok {
// If uid is the last reader, then just remove the entire
// segment.
if len(value.Readers) == 1 {
@@ -390,7 +430,7 @@ func (l *LockSet) unlock(uid UniqueID, r LockRange) {
// affecting any other segment's readers. To do
// this, we need to make a copy of the Readers map
// and not add this uid.
- newValue := Lock{Readers: make(map[UniqueID]bool)}
+ newValue := Lock{Readers: make(map[UniqueID]OwnerInfo)}
for k, v := range value.Readers {
if k != uid {
newValue.Readers[k] = v
@@ -451,3 +491,72 @@ func ComputeRange(start, length, offset int64) (LockRange, error) {
// Offset is guaranteed to be positive at this point.
return LockRange{Start: uint64(offset), End: end}, nil
}
+
+// TestRegion checks whether the lock holder identified by uid can hold a lock
+// of type t on range r. It returns a Flock struct representing this
+// information as the F_GETLK fcntl does.
+//
+// Note that the PID returned in the flock structure is relative to the root PID
+// namespace. It needs to be converted to the caller's PID namespace before
+// returning to userspace.
+//
+// TODO(gvisor.dev/issue/5264): we don't support OFD locks through fcntl, which
+// would return a struct with pid = -1.
+func (l *Locks) TestRegion(ctx context.Context, uid UniqueID, t LockType, r LockRange) linux.Flock {
+ f := linux.Flock{Type: linux.F_UNLCK}
+ switch t {
+ case ReadLock:
+ l.testRegion(r, func(lock Lock, start, length uint64) bool {
+ if lock.Writer == nil || lock.Writer == uid {
+ return true
+ }
+ f.Type = linux.F_WRLCK
+ f.PID = lock.WriterInfo.PID
+ f.Start = int64(start)
+ f.Len = int64(length)
+ return false
+ })
+ case WriteLock:
+ l.testRegion(r, func(lock Lock, start, length uint64) bool {
+ if lock.Writer == nil {
+ for k, v := range lock.Readers {
+ if k != uid {
+ // Stop at the first conflict detected.
+ f.Type = linux.F_RDLCK
+ f.PID = v.PID
+ f.Start = int64(start)
+ f.Len = int64(length)
+ return false
+ }
+ }
+ return true
+ }
+ if lock.Writer == uid {
+ return true
+ }
+ f.Type = linux.F_WRLCK
+ f.PID = lock.WriterInfo.PID
+ f.Start = int64(start)
+ f.Len = int64(length)
+ return false
+ })
+ default:
+ panic(fmt.Sprintf("TestRegion: invalid lock type %d", t))
+ }
+ return f
+}
+
+func (l *Locks) testRegion(r LockRange, check func(lock Lock, start, length uint64) bool) {
+ l.mu.Lock()
+ defer l.mu.Unlock()
+
+ seg := l.locks.LowerBoundSegment(r.Start)
+ for seg.Ok() && seg.Start() < r.End {
+ lock := seg.Value()
+ if !check(lock, seg.Start(), seg.End()-seg.Start()) {
+ // Stop at the first conflict detected.
+ return
+ }
+ seg = seg.NextSegment()
+ }
+}
diff --git a/pkg/sentry/fs/lock/lock_set_functions.go b/pkg/sentry/fs/lock/lock_set_functions.go
index 50a16e662..dcc17c0dc 100644
--- a/pkg/sentry/fs/lock/lock_set_functions.go
+++ b/pkg/sentry/fs/lock/lock_set_functions.go
@@ -40,7 +40,7 @@ func (lockSetFunctions) Merge(r1 LockRange, val1 Lock, r2 LockRange, val2 Lock)
return Lock{}, false
}
for k := range val1.Readers {
- if !val2.Readers[k] {
+ if _, ok := val2.Readers[k]; !ok {
return Lock{}, false
}
}
@@ -53,11 +53,12 @@ func (lockSetFunctions) Merge(r1 LockRange, val1 Lock, r2 LockRange, val2 Lock)
func (lockSetFunctions) Split(r LockRange, val Lock, split uint64) (Lock, Lock) {
// Copy the segment so that split segments don't contain map references
// to other segments.
- val0 := Lock{Readers: make(map[UniqueID]bool)}
+ val0 := Lock{Readers: make(map[UniqueID]OwnerInfo)}
for k, v := range val.Readers {
val0.Readers[k] = v
}
val0.Writer = val.Writer
+ val0.WriterInfo = val.WriterInfo
return val, val0
}
diff --git a/pkg/sentry/fs/lock/lock_test.go b/pkg/sentry/fs/lock/lock_test.go
index fad90984b..9878c04e1 100644
--- a/pkg/sentry/fs/lock/lock_test.go
+++ b/pkg/sentry/fs/lock/lock_test.go
@@ -30,12 +30,12 @@ func equals(e0, e1 []entry) bool {
}
for i := range e0 {
for k := range e0[i].Lock.Readers {
- if !e1[i].Lock.Readers[k] {
+ if _, ok := e1[i].Lock.Readers[k]; !ok {
return false
}
}
for k := range e1[i].Lock.Readers {
- if !e0[i].Lock.Readers[k] {
+ if _, ok := e0[i].Lock.Readers[k]; !ok {
return false
}
}
@@ -90,15 +90,15 @@ func TestCanLock(t *testing.T) {
// 0 1024 2048 3072 4096
l := fill([]entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}}},
LockRange: LockRange{1024, 2048},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{1: true, 3: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 3: OwnerInfo{}}},
LockRange: LockRange{2048, 3072},
},
{
@@ -220,7 +220,7 @@ func TestSetLock(t *testing.T) {
// 0 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -266,7 +266,7 @@ func TestSetLock(t *testing.T) {
// 0 4096 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, 4096},
},
{
@@ -283,7 +283,7 @@ func TestSetLock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -302,7 +302,7 @@ func TestSetLock(t *testing.T) {
LockRange: LockRange{0, 4096},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -333,7 +333,7 @@ func TestSetLock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -351,7 +351,7 @@ func TestSetLock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -366,11 +366,11 @@ func TestSetLock(t *testing.T) {
// 0 4096 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{0, 4096},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -383,7 +383,7 @@ func TestSetLock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -398,15 +398,15 @@ func TestSetLock(t *testing.T) {
// 0 4096 8192 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, 4096},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{4096, 8192},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{8192, LockEOF},
},
},
@@ -419,7 +419,7 @@ func TestSetLock(t *testing.T) {
// 0 1024 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{1024, LockEOF},
},
},
@@ -434,7 +434,7 @@ func TestSetLock(t *testing.T) {
// 0 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -447,7 +447,7 @@ func TestSetLock(t *testing.T) {
// 0 4096
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, 4096},
},
},
@@ -467,11 +467,11 @@ func TestSetLock(t *testing.T) {
// 0 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{1024, LockEOF},
},
},
@@ -484,7 +484,7 @@ func TestSetLock(t *testing.T) {
// 0 1024 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{1024, LockEOF},
},
},
@@ -499,15 +499,15 @@ func TestSetLock(t *testing.T) {
// 0 1024 4096 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}}},
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{1024, 4096},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -520,15 +520,15 @@ func TestSetLock(t *testing.T) {
// 0 1024 2048 4096 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{1024, 2048},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -543,7 +543,7 @@ func TestSetLock(t *testing.T) {
// 0 1024 4096 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{0, 1024},
},
{
@@ -551,7 +551,7 @@ func TestSetLock(t *testing.T) {
LockRange: LockRange{1024, 4096},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -564,15 +564,15 @@ func TestSetLock(t *testing.T) {
// 0 1024 2048 4096 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{1024, 2048},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -587,7 +587,7 @@ func TestSetLock(t *testing.T) {
// 0 1024 3072 4096 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{0, 1024},
},
{
@@ -595,7 +595,7 @@ func TestSetLock(t *testing.T) {
LockRange: LockRange{1024, 3072},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -608,11 +608,11 @@ func TestSetLock(t *testing.T) {
// 0 1024 2048
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{1024, 2048},
},
},
@@ -634,15 +634,15 @@ func TestSetLock(t *testing.T) {
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{1024, 2048},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{2048, 4096},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -676,7 +676,7 @@ func TestSetLock(t *testing.T) {
l := fill(test.before)
r := LockRange{Start: test.start, End: test.end}
- success := l.lock(test.uid, test.lockType, r)
+ success := l.lock(test.uid, 0 /* ownerPID */, test.lockType, r)
var got []entry
for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
got = append(got, entry{
@@ -739,7 +739,7 @@ func TestUnlock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -752,7 +752,7 @@ func TestUnlock(t *testing.T) {
// 0 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -765,7 +765,7 @@ func TestUnlock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -797,7 +797,7 @@ func TestUnlock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -810,7 +810,7 @@ func TestUnlock(t *testing.T) {
// 0 4096 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -849,7 +849,7 @@ func TestUnlock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -862,7 +862,7 @@ func TestUnlock(t *testing.T) {
// 0 4096
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}},
LockRange: LockRange{0, 4096},
},
},
@@ -901,7 +901,7 @@ func TestUnlock(t *testing.T) {
// 0 1024 4096 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{0, 1024},
},
{
@@ -909,7 +909,7 @@ func TestUnlock(t *testing.T) {
LockRange: LockRange{1024, 4096},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -922,11 +922,11 @@ func TestUnlock(t *testing.T) {
// 0 1024 4096 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}}},
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -939,7 +939,7 @@ func TestUnlock(t *testing.T) {
// 0 max uint64
before: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{0, LockEOF},
},
},
@@ -952,15 +952,15 @@ func TestUnlock(t *testing.T) {
// 0 1024 4096 max uint64
after: []entry{
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{1024, 4096},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}, 2: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -977,7 +977,7 @@ func TestUnlock(t *testing.T) {
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -994,7 +994,7 @@ func TestUnlock(t *testing.T) {
LockRange: LockRange{0, 8},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -1011,7 +1011,7 @@ func TestUnlock(t *testing.T) {
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{4096, LockEOF},
},
},
@@ -1028,11 +1028,11 @@ func TestUnlock(t *testing.T) {
LockRange: LockRange{0, 1024},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}}},
LockRange: LockRange{4096, 8192},
},
{
- Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}},
+ Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}},
LockRange: LockRange{8192, LockEOF},
},
},
diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go
index e91fa26a4..b44117f40 100644
--- a/pkg/sentry/fsimpl/devpts/master.go
+++ b/pkg/sentry/fsimpl/devpts/master.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -194,16 +193,6 @@ func (mfd *masterFileDescription) Stat(ctx context.Context, opts vfs.StatOptions
return mfd.inode.Stat(ctx, fs, opts)
}
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (mfd *masterFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return mfd.Locks().LockPOSIX(ctx, &mfd.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (mfd *masterFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return mfd.Locks().UnlockPOSIX(ctx, &mfd.vfsfd, uid, start, length, whence)
-}
-
// maybeEmitUnimplementedEvent emits unimplemented event if cmd is valid.
func maybeEmitUnimplementedEvent(ctx context.Context, cmd uint32) {
switch cmd {
diff --git a/pkg/sentry/fsimpl/devpts/replica.go b/pkg/sentry/fsimpl/devpts/replica.go
index 70c68cf0a..a0c5b5af5 100644
--- a/pkg/sentry/fsimpl/devpts/replica.go
+++ b/pkg/sentry/fsimpl/devpts/replica.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -189,13 +188,3 @@ func (rfd *replicaFileDescription) Stat(ctx context.Context, opts vfs.StatOption
fs := rfd.vfsfd.VirtualDentry().Mount().Filesystem()
return rfd.inode.Stat(ctx, fs, opts)
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (rfd *replicaFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return rfd.Locks().LockPOSIX(ctx, &rfd.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (rfd *replicaFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return rfd.Locks().UnlockPOSIX(ctx, &rfd.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/fsimpl/ext/directory.go b/pkg/sentry/fsimpl/ext/directory.go
index 0ad79b381..512b70ede 100644
--- a/pkg/sentry/fsimpl/ext/directory.go
+++ b/pkg/sentry/fsimpl/ext/directory.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -311,13 +310,3 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in
fd.off = offset
return offset, nil
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *directoryFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *directoryFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go
index 4a5539b37..5ad9befcd 100644
--- a/pkg/sentry/fsimpl/ext/regular_file.go
+++ b/pkg/sentry/fsimpl/ext/regular_file.go
@@ -20,7 +20,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/safemem"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -154,13 +153,3 @@ func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpt
// TODO(b/134676337): Implement mmap(2).
return syserror.ENODEV
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *regularFileFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *regularFileFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index 3b5927702..9da01cba3 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -90,6 +90,7 @@ type createSyntheticOpts struct {
// * d.isDir().
// * d does not already contain a child with the given name.
func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) {
+ now := d.fs.clock.Now().Nanoseconds()
child := &dentry{
refs: 1, // held by d
fs: d.fs,
@@ -98,6 +99,10 @@ func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) {
uid: uint32(opts.kuid),
gid: uint32(opts.kgid),
blockSize: usermem.PageSize, // arbitrary
+ atime: now,
+ mtime: now,
+ ctime: now,
+ btime: now,
readFD: -1,
writeFD: -1,
mmapFD: -1,
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 91d5dc174..8f95473b6 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -36,16 +36,26 @@ import (
// Sync implements vfs.FilesystemImpl.Sync.
func (fs *filesystem) Sync(ctx context.Context) error {
// Snapshot current syncable dentries and special file FDs.
+ fs.renameMu.RLock()
fs.syncMu.Lock()
ds := make([]*dentry, 0, len(fs.syncableDentries))
for d := range fs.syncableDentries {
+ // It's safe to use IncRef here even though fs.syncableDentries doesn't
+ // hold references since we hold fs.renameMu. Note that we can't use
+ // TryIncRef since cached dentries at zero references should still be
+ // synced.
d.IncRef()
ds = append(ds, d)
}
+ fs.renameMu.RUnlock()
sffds := make([]*specialFileFD, 0, len(fs.specialFileFDs))
for sffd := range fs.specialFileFDs {
- sffd.vfsfd.IncRef()
- sffds = append(sffds, sffd)
+ // As above, fs.specialFileFDs doesn't hold references. However, unlike
+ // dentries, an FD that has reached zero references can't be
+ // resurrected, so we can use TryIncRef.
+ if sffd.vfsfd.TryIncRef() {
+ sffds = append(sffds, sffd)
+ }
}
fs.syncMu.Unlock()
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 3cdb1e659..98f7bc52f 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1944,22 +1944,22 @@ func (fd *fileDescription) RemoveXattr(ctx context.Context, name string) error {
}
// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
-func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error {
+func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
fd.lockLogging.Do(func() {
log.Infof("File lock using gofer file handled internally.")
})
- return fd.LockFD.LockBSD(ctx, uid, t, block)
+ return fd.LockFD.LockBSD(ctx, uid, ownerPID, t, block)
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
fd.lockLogging.Do(func() {
log.Infof("Range lock using gofer file handled internally.")
})
- return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+ return fd.Locks().LockPOSIX(ctx, uid, ownerPID, t, r, block)
}
// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
+func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error {
+ return fd.Locks().UnlockPOSIX(ctx, uid, r)
}
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 36a3f6810..05f11fbd5 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -28,7 +28,6 @@ import (
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/log"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/hostfd"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -810,13 +809,3 @@ func (f *fileDescription) EventUnregister(e *waiter.Entry) {
func (f *fileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
return fdnotifier.NonBlockingPoll(int32(f.inode.hostFD), mask)
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (f *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return f.Locks().LockPOSIX(ctx, &f.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (f *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return f.Locks().UnlockPOSIX(ctx, &f.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go
index 60acc367f..72aa535f8 100644
--- a/pkg/sentry/fsimpl/host/socket.go
+++ b/pkg/sentry/fsimpl/host/socket.go
@@ -201,7 +201,7 @@ func (c *ConnectedEndpoint) Passcred() bool {
}
// GetLocalAddress implements transport.ConnectedEndpoint.GetLocalAddress.
-func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
return tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, nil
}
diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go
index f5c596fec..0f9e20a84 100644
--- a/pkg/sentry/fsimpl/host/tty.go
+++ b/pkg/sentry/fsimpl/host/tty.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/unimpl"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -370,13 +369,3 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal)
_ = pg.SendSignal(kernel.SignalInfoPriv(sig))
return syserror.ERESTARTSYS
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (t *TTYFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, typ fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return t.Locks().LockPOSIX(ctx, &t.vfsfd, uid, typ, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (t *TTYFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return t.Locks().UnlockPOSIX(ctx, &t.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
index 485504995..65054b0ea 100644
--- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
+++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
@@ -136,13 +135,3 @@ func (fd *DynamicBytesFD) SetStat(context.Context, vfs.SetStatOptions) error {
// DynamicBytesFiles are immutable.
return syserror.EPERM
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *DynamicBytesFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *DynamicBytesFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index f8dae22f8..e55111af0 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -275,13 +274,3 @@ func (fd *GenericDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptio
func (fd *GenericDirectoryFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
return fd.DirectoryFileDescriptionDefaultImpl.Allocate(ctx, mode, offset, length)
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *GenericDirectoryFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *GenericDirectoryFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index eac578f25..8139bff76 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -371,6 +371,8 @@ type OrderedChildrenOptions struct {
// OrderedChildren may modify the tracked children. This applies to
// operations related to rename, unlink and rmdir. If an OrderedChildren is
// not writable, these operations all fail with EPERM.
+ //
+ // Note that writable users must implement the sticky bit (I_SVTX).
Writable bool
}
@@ -556,7 +558,6 @@ func (o *OrderedChildren) Unlink(ctx context.Context, name string, child Inode)
return err
}
- // TODO(gvisor.dev/issue/3027): Check sticky bit before removing.
o.removeLocked(name)
return nil
}
@@ -603,8 +604,8 @@ func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, c
if err := o.checkExistingLocked(oldname, child); err != nil {
return err
}
+ o.removeLocked(oldname)
- // TODO(gvisor.dev/issue/3027): Check sticky bit before removing.
dst.replaceChildLocked(ctx, newname, child)
return nil
}
diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index 3492409b2..082fa6504 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -42,7 +42,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/refsvfs2"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -820,13 +819,3 @@ func (fd *fileDescription) RemoveXattr(ctx context.Context, name string) error {
d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
return nil
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 75be6129f..fdae163d1 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/safemem"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -518,16 +517,6 @@ func (fd *memFD) SetStat(context.Context, vfs.SetStatOptions) error {
// Release implements vfs.FileDescriptionImpl.Release.
func (fd *memFD) Release(context.Context) {}
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *memFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *memFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
-}
-
// mapsData implements vfs.DynamicBytesSource for /proc/[pid]/maps.
//
// +stateify savable
@@ -1110,13 +1099,3 @@ func (fd *namespaceFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) err
func (fd *namespaceFD) Release(ctx context.Context) {
fd.inode.DecRef(ctx)
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *namespaceFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *namespaceFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go
index 7ee6227a9..d6f076cd6 100644
--- a/pkg/sentry/fsimpl/proc/tasks_test.go
+++ b/pkg/sentry/fsimpl/proc/tasks_test.go
@@ -393,7 +393,7 @@ func TestProcSelf(t *testing.T) {
t.Fatalf("CreateTask(): %v", err)
}
- collector := s.WithTemporaryContext(task).ListDirents(&vfs.PathOperation{
+ collector := s.WithTemporaryContext(task.AsyncContext()).ListDirents(&vfs.PathOperation{
Root: s.Root,
Start: s.Root,
Path: fspath.Parse("/proc/self/"),
@@ -491,11 +491,11 @@ func TestTree(t *testing.T) {
t.Fatalf("CreateTask(): %v", err)
}
// Add file to populate /proc/[pid]/fd and fdinfo directories.
- task.FDTable().NewFDVFS2(task, 0, file, kernel.FDFlags{})
+ task.FDTable().NewFDVFS2(task.AsyncContext(), 0, file, kernel.FDFlags{})
tasks = append(tasks, task)
}
- ctx := tasks[0]
+ ctx := tasks[0].AsyncContext()
fd, err := s.VFS.OpenAt(
ctx,
auth.CredentialsFromContext(s.Ctx),
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
index 146c7fdfe..4393cc13b 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
@@ -140,35 +140,35 @@ func TestLocks(t *testing.T) {
uid1 := 123
uid2 := 456
- if err := fd.Impl().LockBSD(ctx, uid1, lock.ReadLock, nil); err != nil {
+ if err := fd.Impl().LockBSD(ctx, uid1, 0 /* ownerPID */, lock.ReadLock, nil); err != nil {
t.Fatalf("fd.Impl().LockBSD failed: err = %v", err)
}
- if err := fd.Impl().LockBSD(ctx, uid2, lock.ReadLock, nil); err != nil {
+ if err := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, nil); err != nil {
t.Fatalf("fd.Impl().LockBSD failed: err = %v", err)
}
- if got, want := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, nil), syserror.ErrWouldBlock; got != want {
+ if got, want := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.WriteLock, nil), syserror.ErrWouldBlock; got != want {
t.Fatalf("fd.Impl().LockBSD failed: got = %v, want = %v", got, want)
}
if err := fd.Impl().UnlockBSD(ctx, uid1); err != nil {
t.Fatalf("fd.Impl().UnlockBSD failed: err = %v", err)
}
- if err := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, nil); err != nil {
+ if err := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.WriteLock, nil); err != nil {
t.Fatalf("fd.Impl().LockBSD failed: err = %v", err)
}
- if err := fd.Impl().LockPOSIX(ctx, uid1, lock.ReadLock, 0, 1, linux.SEEK_SET, nil); err != nil {
+ if err := fd.Impl().LockPOSIX(ctx, uid1, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 0, End: 1}, nil); err != nil {
t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err)
}
- if err := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, 1, 2, linux.SEEK_SET, nil); err != nil {
+ if err := fd.Impl().LockPOSIX(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 1, End: 2}, nil); err != nil {
t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err)
}
- if err := fd.Impl().LockPOSIX(ctx, uid1, lock.WriteLock, 0, 1, linux.SEEK_SET, nil); err != nil {
+ if err := fd.Impl().LockPOSIX(ctx, uid1, 0 /* ownerPID */, lock.WriteLock, lock.LockRange{Start: 0, End: 1}, nil); err != nil {
t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err)
}
- if got, want := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, 0, 1, linux.SEEK_SET, nil), syserror.ErrWouldBlock; got != want {
+ if got, want := fd.Impl().LockPOSIX(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 0, End: 1}, nil), syserror.ErrWouldBlock; got != want {
t.Fatalf("fd.Impl().LockPOSIX failed: got = %v, want = %v", got, want)
}
- if err := fd.Impl().UnlockPOSIX(ctx, uid1, 0, 1, linux.SEEK_SET); err != nil {
+ if err := fd.Impl().UnlockPOSIX(ctx, uid1, lock.LockRange{Start: 0, End: 1}); err != nil {
t.Fatalf("fd.Impl().UnlockPOSIX failed: err = %v", err)
}
}
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 0c9c639d3..b32c54e20 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -36,7 +36,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
@@ -797,16 +796,6 @@ func (fd *fileDescription) RemoveXattr(ctx context.Context, name string) error {
return nil
}
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
-}
-
// Sync implements vfs.FileDescriptionImpl.Sync. It does nothing because all
// filesystem state is in-memory.
func (*fileDescription) Sync(context.Context) error {
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index a5171b5ad..8645078a0 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -660,7 +660,6 @@ func (d *dentry) readlink(ctx context.Context) (string, error) {
type fileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
- vfs.LockFD
// d is the corresponding dentry to the fileDescription.
d *dentry
@@ -1104,14 +1103,29 @@ func (fd *fileDescription) Write(ctx context.Context, src usermem.IOSequence, op
return 0, syserror.EROFS
}
+// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
+func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
+ return fd.lowerFD.LockBSD(ctx, ownerPID, t, block)
+}
+
+// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD.
+func (fd *fileDescription) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
+ return fd.lowerFD.UnlockBSD(ctx)
+}
+
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return fd.lowerFD.LockPOSIX(ctx, uid, t, start, length, whence, block)
+func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
+ return fd.lowerFD.LockPOSIX(ctx, uid, ownerPID, t, r, block)
}
// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return fd.lowerFD.UnlockPOSIX(ctx, uid, start, length, whence)
+func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error {
+ return fd.lowerFD.UnlockPOSIX(ctx, uid, r)
+}
+
+// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX.
+func (fd *fileDescription) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
+ return fd.lowerFD.TestPOSIX(ctx, uid, t, r)
}
// FileReadWriteSeeker is a helper struct to pass a vfs.FileDescription as
diff --git a/pkg/sentry/fsimpl/verity/verity_test.go b/pkg/sentry/fsimpl/verity/verity_test.go
index 30d8b4355..798d6a9bd 100644
--- a/pkg/sentry/fsimpl/verity/verity_test.go
+++ b/pkg/sentry/fsimpl/verity/verity_test.go
@@ -66,7 +66,7 @@ func dentryFromFD(t *testing.T, fd *vfs.FileDescription) *dentry {
// newVerityRoot creates a new verity mount, and returns the root. The
// underlying file system is tmpfs. If the error is not nil, then cleanup
// should be called when the root is no longer needed.
-func newVerityRoot(t *testing.T, hashAlg HashAlgorithm) (*vfs.VirtualFilesystem, vfs.VirtualDentry, *kernel.Task, error) {
+func newVerityRoot(t *testing.T, hashAlg HashAlgorithm) (*vfs.VirtualFilesystem, vfs.VirtualDentry, context.Context, error) {
t.Helper()
k, err := testutil.Boot()
if err != nil {
@@ -119,7 +119,7 @@ func newVerityRoot(t *testing.T, hashAlg HashAlgorithm) (*vfs.VirtualFilesystem,
root.DecRef(ctx)
mntns.DecRef(ctx)
})
- return vfsObj, root, task, nil
+ return vfsObj, root, task.AsyncContext(), nil
}
// openVerityAt opens a verity file.
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 0ee60569c..8a5b11d40 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -240,7 +240,6 @@ go_library(
"//pkg/sentry/fs/lock",
"//pkg/sentry/fs/timerfd",
"//pkg/sentry/fsbridge",
- "//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/fsimpl/pipefs",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/fsimpl/timerfd",
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
index 7aba31587..a6afabb1c 100644
--- a/pkg/sentry/kernel/fd_table.go
+++ b/pkg/sentry/kernel/fd_table.go
@@ -153,20 +153,12 @@ func (f *FDTable) drop(ctx context.Context, file *fs.File) {
// dropVFS2 drops the table reference.
func (f *FDTable) dropVFS2(ctx context.Context, file *vfs.FileDescription) {
- // Release any POSIX lock possibly held by the FDTable. Range {0, 0} means the
- // entire file.
- err := file.UnlockPOSIX(ctx, f, 0, 0, linux.SEEK_SET)
+ // Release any POSIX lock possibly held by the FDTable.
+ err := file.UnlockPOSIX(ctx, f, lock.LockRange{0, lock.LockEOF})
if err != nil && err != syserror.ENOLCK {
panic(fmt.Sprintf("UnlockPOSIX failed: %v", err))
}
- // Generate inotify events.
- ev := uint32(linux.IN_CLOSE_NOWRITE)
- if file.IsWritable() {
- ev = linux.IN_CLOSE_WRITE
- }
- file.Dentry().InotifyWithParent(ctx, ev, 0, vfs.PathEvent)
-
// Drop the table's reference.
file.DecRef(ctx)
}
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 303ae8056..ef4e934a1 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -593,8 +593,8 @@ func (k *Kernel) flushWritesToFiles(ctx context.Context) error {
// Wrap this error in ErrSaveRejection so that it will trigger a save
// error, rather than a panic. This also allows us to distinguish Fsync
// errors from state file errors in state.Save.
- return fs.ErrSaveRejection{
- Err: fmt.Errorf("%q was not sufficiently synced: %v", name, err),
+ return &fs.ErrSaveRejection{
+ Err: fmt.Errorf("%q was not sufficiently synced: %w", name, err),
}
}
return nil
diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD
index 2c32d017d..71daa9f4b 100644
--- a/pkg/sentry/kernel/pipe/BUILD
+++ b/pkg/sentry/kernel/pipe/BUILD
@@ -27,7 +27,6 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
- "//pkg/sentry/fs/lock",
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserror",
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index d5a91730d..3b6336e94 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/arch"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
@@ -441,13 +440,3 @@ func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFr
}
return n, err
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *VFSPipeFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *VFSPipeFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go
index 9419f2e95..ecbe8f920 100644
--- a/pkg/sentry/kernel/task_block.go
+++ b/pkg/sentry/kernel/task_block.go
@@ -69,7 +69,7 @@ func (t *Task) BlockWithTimeout(C chan struct{}, haveTimeout bool, timeout time.
// syserror.ErrInterrupted if t is interrupted.
//
// Preconditions: The caller must be running on the task goroutine.
-func (t *Task) BlockWithDeadline(C chan struct{}, haveDeadline bool, deadline ktime.Time) error {
+func (t *Task) BlockWithDeadline(C <-chan struct{}, haveDeadline bool, deadline ktime.Time) error {
if !haveDeadline {
return t.block(C, nil)
}
diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go
index fdadb52c0..e9da99067 100644
--- a/pkg/sentry/kernel/threads.go
+++ b/pkg/sentry/kernel/threads.go
@@ -216,7 +216,7 @@ func (ns *PIDNamespace) TaskWithID(tid ThreadID) *Task {
return t
}
-// ThreadGroupWithID returns the thread group lead by the task with thread ID
+// ThreadGroupWithID returns the thread group led by the task with thread ID
// tid in PID namespace ns. If no task has that TID, or if the task with that
// TID is not a thread group leader, ThreadGroupWithID returns nil.
func (ns *PIDNamespace) ThreadGroupWithID(tid ThreadID) *ThreadGroup {
@@ -292,6 +292,11 @@ func (ns *PIDNamespace) UserNamespace() *auth.UserNamespace {
return ns.userns
}
+// Root returns the root PID namespace of ns.
+func (ns *PIDNamespace) Root() *PIDNamespace {
+ return ns.owner.Root
+}
+
// A threadGroupNode defines the relationship between a thread group and the
// rest of the system. Conceptually, threadGroupNode is data belonging to the
// owning TaskSet, as if TaskSet contained a field `nodes
@@ -485,3 +490,8 @@ func (t *Task) Parent() *Task {
func (t *Task) ThreadID() ThreadID {
return t.tg.pidns.IDOfTask(t)
}
+
+// TGIDInRoot returns t's TGID in the root PID namespace.
+func (t *Task) TGIDInRoot() ThreadID {
+ return t.tg.pidns.owner.Root.IDOfThreadGroup(t.tg)
+}
diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go
index 6efe5102b..73bfbea49 100644
--- a/pkg/sentry/mm/procfs.go
+++ b/pkg/sentry/mm/procfs.go
@@ -17,7 +17,6 @@ package mm
import (
"bytes"
"fmt"
- "strings"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
@@ -165,12 +164,12 @@ func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaI
}
if s != "" {
// Per linux, we pad until the 74th character.
- if pad := 73 - lineLen; pad > 0 {
- b.WriteString(strings.Repeat(" ", pad))
+ for pad := 73 - lineLen; pad > 0; pad-- {
+ b.WriteByte(' ')
}
b.WriteString(s)
}
- b.WriteString("\n")
+ b.WriteByte('\n')
}
// ReadSmapsDataInto is called by fsimpl/proc.smapsData.Generate to
diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go
index 675efdc7c..69e37330b 100644
--- a/pkg/sentry/mm/syscalls.go
+++ b/pkg/sentry/mm/syscalls.go
@@ -1055,18 +1055,11 @@ func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
mm.activeMu.Lock()
defer mm.activeMu.Unlock()
- // Linux's mm/madvise.c:madvise_dontneed() => mm/memory.c:zap_page_range()
- // is analogous to our mm.invalidateLocked(ar, true, true). We inline this
- // here, with the special case that we synchronously decommit
- // uniquely-owned (non-copy-on-write) pages for private anonymous vma,
- // which is the common case for MADV_DONTNEED. Invalidating these pmas, and
- // allowing them to be reallocated when touched again, increases pma
- // fragmentation, which may significantly reduce performance for
- // non-vectored I/O implementations. Also, decommitting synchronously
- // ensures that Decommit immediately reduces host memory usage.
+ // This is invalidateLocked(invalidatePrivate=true, invalidateShared=true),
+ // with the additional wrinkle that we must refuse to invalidate pmas under
+ // mlocked vmas.
var didUnmapAS bool
pseg := mm.pmas.LowerBoundSegment(ar.Start)
- mf := mm.mfp.MemoryFile()
for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
vma := vseg.ValuePtr()
if vma.mlockMode != memmap.MLockNone {
@@ -1081,20 +1074,8 @@ func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
}
}
for pseg.Ok() && pseg.Start() < vsegAR.End {
- pma := pseg.ValuePtr()
- if pma.private && !mm.isPMACopyOnWriteLocked(vseg, pseg) {
- psegAR := pseg.Range().Intersect(ar)
- if vsegAR.IsSupersetOf(psegAR) && vma.mappable == nil {
- if err := mf.Decommit(pseg.fileRangeOf(psegAR)); err == nil {
- pseg = pseg.NextSegment()
- continue
- }
- // If an error occurs, fall through to the general
- // invalidation case below.
- }
- }
pseg = mm.pmas.Isolate(pseg, vsegAR)
- pma = pseg.ValuePtr()
+ pma := pseg.ValuePtr()
if !didUnmapAS {
// Unmap all of ar, not just pseg.Range(), to minimize host
// syscalls. AddressSpace mappings must be removed before
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index aacd7ce70..17fb0a0d8 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -550,6 +550,12 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool {
// Wait for the syscall-enter stop.
sig := t.wait(stopped)
+ if sig == syscall.SIGSTOP {
+ // SIGSTOP was delivered to another thread in the same thread
+ // group, which initiated another group stop. Just ignore it.
+ continue
+ }
+
// Refresh all registers.
if err := t.getRegs(regs); err != nil {
panic(fmt.Sprintf("ptrace get regs failed: %v", err))
@@ -566,13 +572,11 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool {
// Is it a system call?
if sig == (syscallEvent | syscall.SIGTRAP) {
+ s.arm64SyscallWorkaround(t, regs)
+
// Ensure registers are sane.
updateSyscallRegs(regs)
return true
- } else if sig == syscall.SIGSTOP {
- // SIGSTOP was delivered to another thread in the same thread
- // group, which initiated another group stop. Just ignore it.
- continue
}
// Grab signal information.
diff --git a/pkg/sentry/platform/ptrace/subprocess_amd64.go b/pkg/sentry/platform/ptrace/subprocess_amd64.go
index 020bbda79..04815282b 100644
--- a/pkg/sentry/platform/ptrace/subprocess_amd64.go
+++ b/pkg/sentry/platform/ptrace/subprocess_amd64.go
@@ -257,3 +257,6 @@ func probeSeccomp() bool {
}
}
}
+
+func (s *subprocess) arm64SyscallWorkaround(t *thread, regs *arch.Registers) {
+}
diff --git a/pkg/sentry/platform/ptrace/subprocess_arm64.go b/pkg/sentry/platform/ptrace/subprocess_arm64.go
index bd618fae8..416132967 100644
--- a/pkg/sentry/platform/ptrace/subprocess_arm64.go
+++ b/pkg/sentry/platform/ptrace/subprocess_arm64.go
@@ -21,6 +21,7 @@ import (
"strings"
"syscall"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/seccomp"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -172,3 +173,38 @@ func appendArchSeccompRules(rules []seccomp.RuleSet, defaultAction linux.BPFActi
func probeSeccomp() bool {
return true
}
+
+func (s *subprocess) arm64SyscallWorkaround(t *thread, regs *arch.Registers) {
+ // On ARM64, when ptrace stops on a system call, it uses the x7
+ // register to indicate whether the stop has been signalled from
+ // syscall entry or syscall exit. This means that we can't get a value
+ // of this register and we can't change it. More details are in the
+ // comment for tracehook_report_syscall in arch/arm64/kernel/ptrace.c.
+ //
+ // This happens only if we stop on a system call, so let's queue a
+ // signal, resume a stub thread and catch it on a signal handling.
+ t.NotifyInterrupt()
+ for {
+ if _, _, errno := syscall.RawSyscall6(
+ syscall.SYS_PTRACE,
+ unix.PTRACE_SYSEMU,
+ uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
+ panic(fmt.Sprintf("ptrace sysemu failed: %v", errno))
+ }
+
+ // Wait for the syscall-enter stop.
+ sig := t.wait(stopped)
+ if sig == syscall.SIGSTOP {
+ // SIGSTOP was delivered to another thread in the same thread
+ // group, which initiated another group stop. Just ignore it.
+ continue
+ }
+ if sig == (syscallEvent | syscall.SIGTRAP) {
+ t.dumpAndPanic(fmt.Sprintf("unexpected syscall event"))
+ }
+ break
+ }
+ if err := t.getRegs(regs); err != nil {
+ panic(fmt.Sprintf("ptrace get regs failed: %v", err))
+ }
+}
diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD
index b6ebe29d6..a8e6f172b 100644
--- a/pkg/sentry/socket/hostinet/BUILD
+++ b/pkg/sentry/socket/hostinet/BUILD
@@ -28,7 +28,6 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
- "//pkg/sentry/fs/lock",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/hostfd",
"//pkg/sentry/inet",
diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go
index 5b868216d..17f59ba1f 100644
--- a/pkg/sentry/socket/hostinet/socket.go
+++ b/pkg/sentry/socket/hostinet/socket.go
@@ -377,10 +377,8 @@ func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt []
switch level {
case linux.SOL_IP:
switch name {
- case linux.IP_TOS, linux.IP_RECVTOS, linux.IP_RECVORIGDSTADDR, linux.IP_RECVERR:
+ case linux.IP_TOS, linux.IP_RECVTOS, linux.IP_PKTINFO, linux.IP_RECVORIGDSTADDR, linux.IP_RECVERR:
optlen = sizeofInt32
- case linux.IP_PKTINFO:
- optlen = linux.SizeOfControlMessageIPPacketInfo
}
case linux.SOL_IPV6:
switch name {
diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go
index 9a2cac40b..f82c7c224 100644
--- a/pkg/sentry/socket/hostinet/socket_vfs2.go
+++ b/pkg/sentry/socket/hostinet/socket_vfs2.go
@@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/sentry/arch"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/hostfd"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -144,16 +143,6 @@ func (s *socketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs
return int64(n), err
}
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (s *socketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (s *socketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence)
-}
-
type socketProviderVFS2 struct {
family int
}
diff --git a/pkg/sentry/socket/netfilter/ipv4.go b/pkg/sentry/socket/netfilter/ipv4.go
index 70c561cce..2f913787b 100644
--- a/pkg/sentry/socket/netfilter/ipv4.go
+++ b/pkg/sentry/socket/netfilter/ipv4.go
@@ -15,7 +15,6 @@
package netfilter
import (
- "bytes"
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -220,18 +219,6 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) {
return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of source (%d) and/or source mask (%d) fields", len(iptip.Src), len(iptip.SrcMask))
}
- n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0)
- if n == -1 {
- n = len(iptip.OutputInterface)
- }
- ifname := string(iptip.OutputInterface[:n])
-
- n = bytes.IndexByte([]byte(iptip.OutputInterfaceMask[:]), 0)
- if n == -1 {
- n = len(iptip.OutputInterfaceMask)
- }
- ifnameMask := string(iptip.OutputInterfaceMask[:n])
-
return stack.IPHeaderFilter{
Protocol: tcpip.TransportProtocolNumber(iptip.Protocol),
// A Protocol value of 0 indicates all protocols match.
@@ -242,8 +229,11 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) {
Src: tcpip.Address(iptip.Src[:]),
SrcMask: tcpip.Address(iptip.SrcMask[:]),
SrcInvert: iptip.InverseFlags&linux.IPT_INV_SRCIP != 0,
- OutputInterface: ifname,
- OutputInterfaceMask: ifnameMask,
+ InputInterface: string(trimNullBytes(iptip.InputInterface[:])),
+ InputInterfaceMask: string(trimNullBytes(iptip.InputInterfaceMask[:])),
+ InputInterfaceInvert: iptip.InverseFlags&linux.IPT_INV_VIA_IN != 0,
+ OutputInterface: string(trimNullBytes(iptip.OutputInterface[:])),
+ OutputInterfaceMask: string(trimNullBytes(iptip.OutputInterfaceMask[:])),
OutputInterfaceInvert: iptip.InverseFlags&linux.IPT_INV_VIA_OUT != 0,
}, nil
}
@@ -254,12 +244,12 @@ func containsUnsupportedFields4(iptip linux.IPTIP) bool {
// - Dst and DstMask
// - Src and SrcMask
// - The inverse destination IP check flag
+ // - InputInterface, InputInterfaceMask and its inverse.
// - OutputInterface, OutputInterfaceMask and its inverse.
- var emptyInterface = [linux.IFNAMSIZ]byte{}
+ const flagMask = 0
// Disable any supported inverse flags.
- inverseMask := uint8(linux.IPT_INV_DSTIP) | uint8(linux.IPT_INV_SRCIP) | uint8(linux.IPT_INV_VIA_OUT)
- return iptip.InputInterface != emptyInterface ||
- iptip.InputInterfaceMask != emptyInterface ||
- iptip.Flags != 0 ||
+ const inverseMask = linux.IPT_INV_DSTIP | linux.IPT_INV_SRCIP |
+ linux.IPT_INV_VIA_IN | linux.IPT_INV_VIA_OUT
+ return iptip.Flags&^flagMask != 0 ||
iptip.InverseFlags&^inverseMask != 0
}
diff --git a/pkg/sentry/socket/netfilter/ipv6.go b/pkg/sentry/socket/netfilter/ipv6.go
index 5dbb604f0..263d9d3b5 100644
--- a/pkg/sentry/socket/netfilter/ipv6.go
+++ b/pkg/sentry/socket/netfilter/ipv6.go
@@ -15,7 +15,6 @@
package netfilter
import (
- "bytes"
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -223,18 +222,6 @@ func filterFromIP6TIP(iptip linux.IP6TIP) (stack.IPHeaderFilter, error) {
return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of source (%d) and/or source mask (%d) fields", len(iptip.Src), len(iptip.SrcMask))
}
- n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0)
- if n == -1 {
- n = len(iptip.OutputInterface)
- }
- ifname := string(iptip.OutputInterface[:n])
-
- n = bytes.IndexByte([]byte(iptip.OutputInterfaceMask[:]), 0)
- if n == -1 {
- n = len(iptip.OutputInterfaceMask)
- }
- ifnameMask := string(iptip.OutputInterfaceMask[:n])
-
return stack.IPHeaderFilter{
Protocol: tcpip.TransportProtocolNumber(iptip.Protocol),
// In ip6tables a flag controls whether to check the protocol.
@@ -245,8 +232,11 @@ func filterFromIP6TIP(iptip linux.IP6TIP) (stack.IPHeaderFilter, error) {
Src: tcpip.Address(iptip.Src[:]),
SrcMask: tcpip.Address(iptip.SrcMask[:]),
SrcInvert: iptip.InverseFlags&linux.IP6T_INV_SRCIP != 0,
- OutputInterface: ifname,
- OutputInterfaceMask: ifnameMask,
+ InputInterface: string(trimNullBytes(iptip.InputInterface[:])),
+ InputInterfaceMask: string(trimNullBytes(iptip.InputInterfaceMask[:])),
+ InputInterfaceInvert: iptip.InverseFlags&linux.IP6T_INV_VIA_IN != 0,
+ OutputInterface: string(trimNullBytes(iptip.OutputInterface[:])),
+ OutputInterfaceMask: string(trimNullBytes(iptip.OutputInterfaceMask[:])),
OutputInterfaceInvert: iptip.InverseFlags&linux.IP6T_INV_VIA_OUT != 0,
}, nil
}
@@ -257,14 +247,13 @@ func containsUnsupportedFields6(iptip linux.IP6TIP) bool {
// - Dst and DstMask
// - Src and SrcMask
// - The inverse destination IP check flag
+ // - InputInterface, InputInterfaceMask and its inverse.
// - OutputInterface, OutputInterfaceMask and its inverse.
- var emptyInterface = [linux.IFNAMSIZ]byte{}
- flagMask := uint8(linux.IP6T_F_PROTO)
+ const flagMask = linux.IP6T_F_PROTO
// Disable any supported inverse flags.
- inverseMask := uint8(linux.IP6T_INV_DSTIP) | uint8(linux.IP6T_INV_SRCIP) | uint8(linux.IP6T_INV_VIA_OUT)
- return iptip.InputInterface != emptyInterface ||
- iptip.InputInterfaceMask != emptyInterface ||
- iptip.Flags&^flagMask != 0 ||
+ const inverseMask = linux.IP6T_INV_DSTIP | linux.IP6T_INV_SRCIP |
+ linux.IP6T_INV_VIA_IN | linux.IP6T_INV_VIA_OUT
+ return iptip.Flags&^flagMask != 0 ||
iptip.InverseFlags&^inverseMask != 0 ||
iptip.TOS != 0
}
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index 26bd1abd4..7ae18b2a3 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -17,6 +17,7 @@
package netfilter
import (
+ "bytes"
"errors"
"fmt"
@@ -393,3 +394,11 @@ func TargetRevision(t *kernel.Task, revPtr usermem.Addr, netProto tcpip.NetworkP
rev.Revision = maxSupported
return rev, nil
}
+
+func trimNullBytes(b []byte) []byte {
+ n := bytes.IndexByte(b, 0)
+ if n == -1 {
+ n = len(b)
+ }
+ return b[:n]
+}
diff --git a/pkg/sentry/socket/netfilter/owner_matcher.go b/pkg/sentry/socket/netfilter/owner_matcher.go
index 69d13745e..176fa6116 100644
--- a/pkg/sentry/socket/netfilter/owner_matcher.go
+++ b/pkg/sentry/socket/netfilter/owner_matcher.go
@@ -112,7 +112,7 @@ func (*OwnerMatcher) Name() string {
}
// Match implements Matcher.Match.
-func (om *OwnerMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceName string) (bool, bool) {
+func (om *OwnerMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, _, _ string) (bool, bool) {
// Support only for OUTPUT chain.
// TODO(gvisor.dev/issue/170): Need to support for POSTROUTING chain also.
if hook != stack.Output {
diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go
index 352c51390..2740697b3 100644
--- a/pkg/sentry/socket/netfilter/tcp_matcher.go
+++ b/pkg/sentry/socket/netfilter/tcp_matcher.go
@@ -96,7 +96,7 @@ func (*TCPMatcher) Name() string {
}
// Match implements Matcher.Match.
-func (tm *TCPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceName string) (bool, bool) {
+func (tm *TCPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, _, _ string) (bool, bool) {
// TODO(gvisor.dev/issue/170): Proto checks should ultimately be moved
// into the stack.Check codepath as matchers are added.
switch pkt.NetworkProtocolNumber {
diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go
index c88d8268d..466d5395d 100644
--- a/pkg/sentry/socket/netfilter/udp_matcher.go
+++ b/pkg/sentry/socket/netfilter/udp_matcher.go
@@ -93,7 +93,7 @@ func (*UDPMatcher) Name() string {
}
// Match implements Matcher.Match.
-func (um *UDPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceName string) (bool, bool) {
+func (um *UDPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, _, _ string) (bool, bool) {
// TODO(gvisor.dev/issue/170): Proto checks should ultimately be moved
// into the stack.Check codepath as matchers are added.
switch pkt.NetworkProtocolNumber {
diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD
index 1f926aa91..9313e1167 100644
--- a/pkg/sentry/socket/netlink/BUILD
+++ b/pkg/sentry/socket/netlink/BUILD
@@ -22,7 +22,6 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
- "//pkg/sentry/fs/lock",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
diff --git a/pkg/sentry/socket/netlink/socket_vfs2.go b/pkg/sentry/socket/netlink/socket_vfs2.go
index 461d524e5..842036764 100644
--- a/pkg/sentry/socket/netlink/socket_vfs2.go
+++ b/pkg/sentry/socket/netlink/socket_vfs2.go
@@ -18,7 +18,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/socket/unix"
@@ -149,13 +148,3 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs
n, err := s.sendMsg(ctx, src, nil, 0, socket.ControlMessages{})
return int64(n), err.ToError()
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (s *SocketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (s *SocketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD
index 22abca120..915134b41 100644
--- a/pkg/sentry/socket/netstack/BUILD
+++ b/pkg/sentry/socket/netstack/BUILD
@@ -28,7 +28,6 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
- "//pkg/sentry/fs/lock",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
@@ -42,7 +41,6 @@ go_library(
"//pkg/syserr",
"//pkg/syserror",
"//pkg/tcpip",
- "//pkg/tcpip/buffer",
"//pkg/tcpip/header",
"//pkg/tcpip/network/ipv4",
"//pkg/tcpip/network/ipv6",
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 22e128b96..94f03af48 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -19,7 +19,7 @@
// be used to expose certain endpoints to the sentry while leaving others out,
// for example, TCP endpoints and Unix-domain endpoints.
//
-// Lock ordering: netstack => mm: ioSequencePayload copies user memory inside
+// Lock ordering: netstack => mm: ioSequenceReadWriter copies user memory inside
// tcpip.Endpoint.Write(). Netstack is allowed to (and does) hold locks during
// this operation.
package netstack
@@ -55,7 +55,6 @@ import (
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
- "gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
@@ -194,7 +193,6 @@ var Metrics = tcpip.Stats{
RequestsReceivedUnknownTargetAddress: mustCreateMetric("/netstack/arp/requests_received_unknown_addr", "Number of ARP requests received with an unknown target address."),
OutgoingRequestInterfaceHasNoLocalAddressErrors: mustCreateMetric("/netstack/arp/outgoing_requests_iface_has_no_addr", "Number of failed attempts to send an ARP request with an interface that has no network address."),
OutgoingRequestBadLocalAddressErrors: mustCreateMetric("/netstack/arp/outgoing_requests_invalid_local_addr", "Number of failed attempts to send an ARP request with a provided local address that is invalid."),
- OutgoingRequestNetworkUnreachableErrors: mustCreateMetric("/netstack/arp/outgoing_requests_network_unreachable", "Number of failed attempts to send an ARP request with a network unreachable error."),
OutgoingRequestsDropped: mustCreateMetric("/netstack/arp/outgoing_requests_dropped", "Number of ARP requests which failed to write to a link-layer endpoint."),
OutgoingRequestsSent: mustCreateMetric("/netstack/arp/outgoing_requests_sent", "Number of ARP requests sent."),
RepliesReceived: mustCreateMetric("/netstack/arp/replies_received", "Number of ARP replies received."),
@@ -253,11 +251,11 @@ var errStackType = syserr.New("expected but did not receive a netstack.Stack", l
type commonEndpoint interface {
// GetLocalAddress implements tcpip.Endpoint.GetLocalAddress and
// transport.Endpoint.GetLocalAddress.
- GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
// GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress and
// transport.Endpoint.GetRemoteAddress.
- GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetRemoteAddress() (tcpip.FullAddress, tcpip.Error)
// Readiness implements tcpip.Endpoint.Readiness and
// transport.Endpoint.Readiness.
@@ -265,19 +263,19 @@ type commonEndpoint interface {
// SetSockOpt implements tcpip.Endpoint.SetSockOpt and
// transport.Endpoint.SetSockOpt.
- SetSockOpt(tcpip.SettableSocketOption) *tcpip.Error
+ SetSockOpt(tcpip.SettableSocketOption) tcpip.Error
// SetSockOptInt implements tcpip.Endpoint.SetSockOptInt and
// transport.Endpoint.SetSockOptInt.
- SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error
+ SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error
// GetSockOpt implements tcpip.Endpoint.GetSockOpt and
// transport.Endpoint.GetSockOpt.
- GetSockOpt(tcpip.GettableSocketOption) *tcpip.Error
+ GetSockOpt(tcpip.GettableSocketOption) tcpip.Error
// GetSockOptInt implements tcpip.Endpoint.GetSockOptInt and
// transport.Endpoint.GetSockOpt.
- GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error)
+ GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error)
// State returns a socket's lifecycle state. The returned value is
// protocol-specific and is primarily used for diagnostics.
@@ -285,7 +283,7 @@ type commonEndpoint interface {
// LastError implements tcpip.Endpoint.LastError and
// transport.Endpoint.LastError.
- LastError() *tcpip.Error
+ LastError() tcpip.Error
// SocketOptions implements tcpip.Endpoint.SocketOptions and
// transport.Endpoint.SocketOptions.
@@ -440,115 +438,58 @@ func (s *SocketOperations) WriteTo(ctx context.Context, _ *fs.File, dst io.Write
return int64(res.Count), nil
}
-// ioSequencePayload implements tcpip.Payload.
-//
-// t copies user memory bytes on demand based on the requested size.
-type ioSequencePayload struct {
- ctx context.Context
- src usermem.IOSequence
-}
-
-// FullPayload implements tcpip.Payloader.FullPayload
-func (i *ioSequencePayload) FullPayload() ([]byte, *tcpip.Error) {
- return i.Payload(int(i.src.NumBytes()))
-}
-
-// Payload implements tcpip.Payloader.Payload.
-func (i *ioSequencePayload) Payload(size int) ([]byte, *tcpip.Error) {
- if max := int(i.src.NumBytes()); size > max {
- size = max
- }
- v := buffer.NewView(size)
- if _, err := i.src.CopyIn(i.ctx, v); err != nil {
- // EOF can be returned only if src is a file and this means it
- // is in a splice syscall and the error has to be ignored.
- if err == io.EOF {
- return v, nil
- }
- return nil, tcpip.ErrBadAddress
- }
- return v, nil
-}
-
-// DropFirst drops the first n bytes from underlying src.
-func (i *ioSequencePayload) DropFirst(n int) {
- i.src = i.src.DropFirst(int(n))
-}
-
// Write implements fs.FileOperations.Write.
func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
- f := &ioSequencePayload{ctx: ctx, src: src}
- n, err := s.Endpoint.Write(f, tcpip.WriteOptions{})
- if err == tcpip.ErrWouldBlock {
+ r := src.Reader(ctx)
+ n, err := s.Endpoint.Write(r, tcpip.WriteOptions{})
+ if _, ok := err.(*tcpip.ErrWouldBlock); ok {
return 0, syserror.ErrWouldBlock
}
if err != nil {
return 0, syserr.TranslateNetstackError(err).ToError()
}
- if int64(n) < src.NumBytes() {
- return int64(n), syserror.ErrWouldBlock
+ if n < src.NumBytes() {
+ return n, syserror.ErrWouldBlock
}
- return int64(n), nil
+ return n, nil
}
-// readerPayload implements tcpip.Payloader.
-//
-// It allocates a view and reads from a reader on-demand, based on available
-// capacity in the endpoint.
-type readerPayload struct {
- ctx context.Context
- r io.Reader
- count int64
+var _ tcpip.Payloader = (*limitedPayloader)(nil)
+
+type limitedPayloader struct {
+ inner io.LimitedReader
err error
}
-// FullPayload implements tcpip.Payloader.FullPayload.
-func (r *readerPayload) FullPayload() ([]byte, *tcpip.Error) {
- return r.Payload(int(r.count))
+func (l *limitedPayloader) Read(p []byte) (int, error) {
+ n, err := l.inner.Read(p)
+ l.err = err
+ return n, err
}
-// Payload implements tcpip.Payloader.Payload.
-func (r *readerPayload) Payload(size int) ([]byte, *tcpip.Error) {
- if size > int(r.count) {
- size = int(r.count)
- }
- v := buffer.NewView(size)
- n, err := r.r.Read(v)
- if n > 0 {
- // We ignore the error here. It may re-occur on subsequent
- // reads, but for now we can enqueue some amount of data.
- r.count -= int64(n)
- return v[:n], nil
- }
- if err == syserror.ErrWouldBlock {
- return nil, tcpip.ErrWouldBlock
- } else if err != nil {
- r.err = err // Save for propation.
- return nil, tcpip.ErrBadAddress
- }
-
- // There is no data and no error. Return an error, which will propagate
- // r.err, which will be nil. This is the desired result: (0, nil).
- return nil, tcpip.ErrBadAddress
+func (l *limitedPayloader) Len() int {
+ return int(l.inner.N)
}
// ReadFrom implements fs.FileOperations.ReadFrom.
func (s *SocketOperations) ReadFrom(ctx context.Context, _ *fs.File, r io.Reader, count int64) (int64, error) {
- f := &readerPayload{ctx: ctx, r: r, count: count}
- n, err := s.Endpoint.Write(f, tcpip.WriteOptions{
+ f := limitedPayloader{
+ inner: io.LimitedReader{
+ R: r,
+ N: count,
+ },
+ }
+ n, err := s.Endpoint.Write(&f, tcpip.WriteOptions{
// Reads may be destructive but should be very fast,
// so we can't release the lock while copying data.
Atomic: true,
})
- if err == tcpip.ErrWouldBlock {
- return n, syserror.ErrWouldBlock
- } else if err != nil {
- return int64(n), f.err // Propagate error.
+ if _, ok := err.(*tcpip.ErrBadBuffer); ok {
+ return n, f.err
}
-
- return int64(n), nil
+ return n, syserr.TranslateNetstackError(err).ToError()
}
// Readiness returns a mask of ready events for socket s.
@@ -592,7 +533,7 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool
if family == linux.AF_UNSPEC {
err := s.Endpoint.Disconnect()
- if err == tcpip.ErrNotSupported {
+ if _, ok := err.(*tcpip.ErrNotSupported); ok {
return syserr.ErrAddressFamilyNotSupported
}
return syserr.TranslateNetstackError(err)
@@ -614,15 +555,16 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool
s.EventRegister(&e, waiter.EventOut)
defer s.EventUnregister(&e)
- if err := s.Endpoint.Connect(addr); err != tcpip.ErrConnectStarted && err != tcpip.ErrAlreadyConnecting {
+ switch err := s.Endpoint.Connect(addr); err.(type) {
+ case *tcpip.ErrConnectStarted, *tcpip.ErrAlreadyConnecting:
+ case *tcpip.ErrNoPortAvailable:
if (s.family == unix.AF_INET || s.family == unix.AF_INET6) && s.skType == linux.SOCK_STREAM {
// TCP unlike UDP returns EADDRNOTAVAIL when it can't
// find an available local ephemeral port.
- if err == tcpip.ErrNoPortAvailable {
- return syserr.ErrAddressNotAvailable
- }
+ return syserr.ErrAddressNotAvailable
}
-
+ return syserr.TranslateNetstackError(err)
+ default:
return syserr.TranslateNetstackError(err)
}
@@ -680,16 +622,16 @@ func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
// Issue the bind request to the endpoint.
err := s.Endpoint.Bind(addr)
- if err == tcpip.ErrNoPortAvailable {
+ if _, ok := err.(*tcpip.ErrNoPortAvailable); ok {
// Bind always returns EADDRINUSE irrespective of if the specified port was
// already bound or if an ephemeral port was requested but none were
// available.
//
- // tcpip.ErrNoPortAvailable is mapped to EAGAIN in syserr package because
+ // *tcpip.ErrNoPortAvailable is mapped to EAGAIN in syserr package because
// UDP connect returns EAGAIN on ephemeral port exhaustion.
//
// TCP connect returns EADDRNOTAVAIL on ephemeral port exhaustion.
- err = tcpip.ErrPortInUse
+ err = &tcpip.ErrPortInUse{}
}
return syserr.TranslateNetstackError(err)
@@ -712,7 +654,8 @@ func (s *socketOpsCommon) blockingAccept(t *kernel.Task, peerAddr *tcpip.FullAdd
// Try to accept the connection again; if it fails, then wait until we
// get a notification.
for {
- if ep, wq, err := s.Endpoint.Accept(peerAddr); err != tcpip.ErrWouldBlock {
+ ep, wq, err := s.Endpoint.Accept(peerAddr)
+ if _, ok := err.(*tcpip.ErrWouldBlock); !ok {
return ep, wq, syserr.TranslateNetstackError(err)
}
@@ -731,7 +674,7 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int,
}
ep, wq, terr := s.Endpoint.Accept(peerAddr)
if terr != nil {
- if terr != tcpip.ErrWouldBlock || !blocking {
+ if _, ok := terr.(*tcpip.ErrWouldBlock); !ok || !blocking {
return 0, nil, 0, syserr.TranslateNetstackError(terr)
}
@@ -912,7 +855,7 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
return nil, syserr.ErrInvalidArgument
}
- size, err := ep.GetSockOptInt(tcpip.SendBufferSizeOption)
+ size, err := ep.SocketOptions().GetSendBufferSize()
if err != nil {
return nil, syserr.TranslateNetstackError(err)
}
@@ -1164,6 +1107,29 @@ func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name,
// TODO(b/64800844): Translate fields once they are added to
// tcpip.TCPInfoOption.
info := linux.TCPInfo{}
+ switch v.CcState {
+ case tcpip.RTORecovery:
+ info.CaState = linux.TCP_CA_Loss
+ case tcpip.FastRecovery, tcpip.SACKRecovery:
+ info.CaState = linux.TCP_CA_Recovery
+ case tcpip.Disorder:
+ info.CaState = linux.TCP_CA_Disorder
+ case tcpip.Open:
+ info.CaState = linux.TCP_CA_Open
+ }
+ info.RTO = uint32(v.RTO / time.Microsecond)
+ info.RTT = uint32(v.RTT / time.Microsecond)
+ info.RTTVar = uint32(v.RTTVar / time.Microsecond)
+ info.SndSsthresh = v.SndSsthresh
+ info.SndCwnd = v.SndCwnd
+
+ // In netstack reorderSeen is updated only when RACK is enabled.
+ // We only track whether the reordering is seen, which is
+ // different than Linux where reorderSeen is not specific to
+ // RACK and is incremented when a reordering event is seen.
+ if v.ReorderSeen {
+ info.ReordSeen = 1
+ }
// Linux truncates the output binary to outLen.
buf := t.CopyScratchBuffer(info.SizeBytes())
@@ -1681,8 +1647,16 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
return syserr.ErrInvalidArgument
}
+ family, _, _ := s.Type()
+ // TODO(gvisor.dev/issue/5132): We currently do not support
+ // setting this option for unix sockets.
+ if family == linux.AF_UNIX {
+ return nil
+ }
+
v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.SendBufferSizeOption, int(v)))
+ ep.SocketOptions().SetSendBufferSize(int64(v), true)
+ return nil
case linux.SO_RCVBUF:
if len(optVal) < sizeOfInt32 {
@@ -1814,10 +1788,6 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
var v linux.Linger
binary.Unmarshal(optVal[:linux.SizeOfLinger], usermem.ByteOrder, &v)
- if v != (linux.Linger{}) {
- socket.SetSockOptEmitUnimplementedEvent(t, name)
- }
-
ep.SocketOptions().SetLinger(tcpip.LingerOption{
Enabled: v.OnOff != 0,
Timeout: time.Second * time.Duration(v.Linger),
@@ -2596,7 +2566,7 @@ func (s *socketOpsCommon) nonBlockingRead(ctx context.Context, dst usermem.IOSeq
defer s.readMu.Unlock()
res, err := s.Endpoint.Read(w, readOptions)
- if err == tcpip.ErrBadBuffer && dst.NumBytes() == 0 {
+ if _, ok := err.(*tcpip.ErrBadBuffer); ok && dst.NumBytes() == 0 {
err = nil
}
if err != nil {
@@ -2840,45 +2810,48 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b
EndOfRecord: flags&linux.MSG_EOR != 0,
}
- v := &ioSequencePayload{t, src}
- n, err := s.Endpoint.Write(v, opts)
- dontWait := flags&linux.MSG_DONTWAIT != 0
- if err == nil && (n >= v.src.NumBytes() || dontWait) {
- // Complete write.
- return int(n), nil
- }
- if err != nil && (err != tcpip.ErrWouldBlock || dontWait) {
- return int(n), syserr.TranslateNetstackError(err)
- }
-
- // We'll have to block. Register for notification and keep trying to
- // send all the data.
- e, ch := waiter.NewChannelEntry(nil)
- s.EventRegister(&e, waiter.EventOut)
- defer s.EventUnregister(&e)
-
- v.DropFirst(int(n))
- total := n
+ r := src.Reader(t)
+ var (
+ total int64
+ entry waiter.Entry
+ ch <-chan struct{}
+ )
for {
- n, err = s.Endpoint.Write(v, opts)
- v.DropFirst(int(n))
+ n, err := s.Endpoint.Write(r, opts)
total += n
-
- if err != nil && err != tcpip.ErrWouldBlock && total == 0 {
- return 0, syserr.TranslateNetstackError(err)
- }
-
- if err == nil && v.src.NumBytes() == 0 || err != nil && err != tcpip.ErrWouldBlock {
- return int(total), nil
- }
-
- if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
- if err == syserror.ETIMEDOUT {
- return int(total), syserr.ErrTryAgain
+ if flags&linux.MSG_DONTWAIT != 0 {
+ return int(total), syserr.TranslateNetstackError(err)
+ }
+ block := true
+ switch err.(type) {
+ case nil:
+ block = total != src.NumBytes()
+ case *tcpip.ErrWouldBlock:
+ default:
+ block = false
+ }
+ if block {
+ if ch == nil {
+ // We'll have to block. Register for notification and keep trying to
+ // send all the data.
+ entry, ch = waiter.NewChannelEntry(nil)
+ s.EventRegister(&entry, waiter.EventOut)
+ defer s.EventUnregister(&entry)
+ } else {
+ // Don't wait immediately after registration in case more data
+ // became available between when we last checked and when we setup
+ // the notification.
+ if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
+ if err == syserror.ETIMEDOUT {
+ return int(total), syserr.ErrTryAgain
+ }
+ // handleIOError will consume errors from t.Block if needed.
+ return int(total), syserr.FromError(err)
+ }
}
- // handleIOError will consume errors from t.Block if needed.
- return int(total), syserr.FromError(err)
+ continue
}
+ return int(total), syserr.TranslateNetstackError(err)
}
}
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index 6f70b02fc..24922c400 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -20,7 +20,6 @@ import (
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
@@ -129,20 +128,20 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs
return 0, syserror.EOPNOTSUPP
}
- f := &ioSequencePayload{ctx: ctx, src: src}
- n, err := s.Endpoint.Write(f, tcpip.WriteOptions{})
- if err == tcpip.ErrWouldBlock {
+ r := src.Reader(ctx)
+ n, err := s.Endpoint.Write(r, tcpip.WriteOptions{})
+ if _, ok := err.(*tcpip.ErrWouldBlock); ok {
return 0, syserror.ErrWouldBlock
}
if err != nil {
return 0, syserr.TranslateNetstackError(err).ToError()
}
- if int64(n) < src.NumBytes() {
- return int64(n), syserror.ErrWouldBlock
+ if n < src.NumBytes() {
+ return n, syserror.ErrWouldBlock
}
- return int64(n), nil
+ return n, nil
}
// Accept implements the linux syscall accept(2) for sockets backed by
@@ -155,7 +154,7 @@ func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, block
}
ep, wq, terr := s.Endpoint.Accept(peerAddr)
if terr != nil {
- if terr != tcpip.ErrWouldBlock || !blocking {
+ if _, ok := terr.(*tcpip.ErrWouldBlock); !ok || !blocking {
return 0, nil, 0, syserr.TranslateNetstackError(terr)
}
@@ -262,13 +261,3 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by
return SetSockOpt(t, s, s.Endpoint, level, name, optVal)
}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (s *SocketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (s *SocketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence)
-}
diff --git a/pkg/sentry/socket/netstack/provider.go b/pkg/sentry/socket/netstack/provider.go
index c847ff1c7..2515dda80 100644
--- a/pkg/sentry/socket/netstack/provider.go
+++ b/pkg/sentry/socket/netstack/provider.go
@@ -118,7 +118,7 @@ func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*
// Create the endpoint.
var ep tcpip.Endpoint
- var e *tcpip.Error
+ var e tcpip.Error
wq := &waiter.Queue{}
if stype == linux.SOCK_RAW {
ep, e = eps.Stack.NewRawEndpoint(transProto, p.netProto, wq, associated)
diff --git a/pkg/sentry/socket/netstack/provider_vfs2.go b/pkg/sentry/socket/netstack/provider_vfs2.go
index 0af805246..ba1cc79e9 100644
--- a/pkg/sentry/socket/netstack/provider_vfs2.go
+++ b/pkg/sentry/socket/netstack/provider_vfs2.go
@@ -62,7 +62,7 @@ func (p *providerVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int
// Create the endpoint.
var ep tcpip.Endpoint
- var e *tcpip.Error
+ var e tcpip.Error
wq := &waiter.Queue{}
if stype == linux.SOCK_RAW {
ep, e = eps.Stack.NewRawEndpoint(transProto, p.netProto, wq, associated)
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index 9f7aca305..fc5b823b0 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -48,7 +48,7 @@ type ConnectingEndpoint interface {
Type() linux.SockType
// GetLocalAddress returns the bound path.
- GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
// Locker protects the following methods. While locked, only the holder of
// the lock can change the return value of the protected methods.
@@ -128,7 +128,7 @@ func newConnectioned(ctx context.Context, stype linux.SockType, uid UniqueIDProv
idGenerator: uid,
stype: stype,
}
- ep.ops.InitHandler(ep)
+ ep.ops.InitHandler(ep, nil, nil)
return ep
}
@@ -173,7 +173,7 @@ func NewExternal(ctx context.Context, stype linux.SockType, uid UniqueIDProvider
idGenerator: uid,
stype: stype,
}
- ep.ops.InitHandler(ep)
+ ep.ops.InitHandler(ep, nil, nil)
return ep
}
@@ -296,7 +296,7 @@ func (e *connectionedEndpoint) BidirectionalConnect(ctx context.Context, ce Conn
idGenerator: e.idGenerator,
stype: e.stype,
}
- ne.ops.InitHandler(ne)
+ ne.ops.InitHandler(ne, nil, nil)
readQueue := &queue{ReaderQueue: ce.WaiterQueue(), WriterQueue: ne.Queue, limit: initialLimit}
readQueue.InitRefs()
diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go
index 0813ad87d..20fa8b874 100644
--- a/pkg/sentry/socket/unix/transport/connectionless.go
+++ b/pkg/sentry/socket/unix/transport/connectionless.go
@@ -44,7 +44,7 @@ func NewConnectionless(ctx context.Context) Endpoint {
q := queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: initialLimit}
q.InitRefs()
ep.receiver = &queueReceiver{readQueue: &q}
- ep.ops.InitHandler(ep)
+ ep.ops.InitHandler(ep, nil, nil)
return ep
}
diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go
index 099a56281..70227bbd2 100644
--- a/pkg/sentry/socket/unix/transport/unix.go
+++ b/pkg/sentry/socket/unix/transport/unix.go
@@ -169,32 +169,32 @@ type Endpoint interface {
Type() linux.SockType
// GetLocalAddress returns the address to which the endpoint is bound.
- GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
// GetRemoteAddress returns the address to which the endpoint is
// connected.
- GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetRemoteAddress() (tcpip.FullAddress, tcpip.Error)
// SetSockOpt sets a socket option.
- SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error
+ SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error
// SetSockOptInt sets a socket option for simple cases when a value has
// the int type.
- SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error
+ SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error
// GetSockOpt gets a socket option.
- GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error
+ GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error
// GetSockOptInt gets a socket option for simple cases when a return
// value has the int type.
- GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error)
+ GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error)
// State returns the current state of the socket, as represented by Linux in
// procfs.
State() uint32
// LastError clears and returns the last error reported by the endpoint.
- LastError() *tcpip.Error
+ LastError() tcpip.Error
// SocketOptions returns the structure which contains all the socket
// level options.
@@ -580,7 +580,7 @@ type ConnectedEndpoint interface {
Passcred() bool
// GetLocalAddress implements Endpoint.GetLocalAddress.
- GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
// Send sends a single message. This method does not block.
//
@@ -640,7 +640,7 @@ type connectedEndpoint struct {
Passcred() bool
// GetLocalAddress implements Endpoint.GetLocalAddress.
- GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
// Type implements Endpoint.Type.
Type() linux.SockType
@@ -655,7 +655,7 @@ func (e *connectedEndpoint) Passcred() bool {
}
// GetLocalAddress implements ConnectedEndpoint.GetLocalAddress.
-func (e *connectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (e *connectedEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
return e.endpoint.GetLocalAddress()
}
@@ -836,13 +836,12 @@ func (e *baseEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMess
}
// SetSockOpt sets a socket option.
-func (e *baseEndpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
+func (e *baseEndpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error {
return nil
}
-func (e *baseEndpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
+func (e *baseEndpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error {
switch opt {
- case tcpip.SendBufferSizeOption:
case tcpip.ReceiveBufferSizeOption:
default:
log.Warningf("Unsupported socket option: %d", opt)
@@ -850,19 +849,40 @@ func (e *baseEndpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
return nil
}
-func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
+// IsUnixSocket implements tcpip.SocketOptionsHandler.IsUnixSocket.
+func (e *baseEndpoint) IsUnixSocket() bool {
+ return true
+}
+
+// GetSendBufferSize implements tcpip.SocketOptionsHandler.GetSendBufferSize.
+func (e *baseEndpoint) GetSendBufferSize() (int64, tcpip.Error) {
+ e.Lock()
+ defer e.Unlock()
+
+ if !e.Connected() {
+ return -1, &tcpip.ErrNotConnected{}
+ }
+
+ v := e.connected.SendMaxQueueSize()
+ if v < 0 {
+ return -1, &tcpip.ErrQueueSizeNotSupported{}
+ }
+ return v, nil
+}
+
+func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) {
switch opt {
case tcpip.ReceiveQueueSizeOption:
v := 0
e.Lock()
if !e.Connected() {
e.Unlock()
- return -1, tcpip.ErrNotConnected
+ return -1, &tcpip.ErrNotConnected{}
}
v = int(e.receiver.RecvQueuedSize())
e.Unlock()
if v < 0 {
- return -1, tcpip.ErrQueueSizeNotSupported
+ return -1, &tcpip.ErrQueueSizeNotSupported{}
}
return v, nil
@@ -870,25 +890,12 @@ func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
e.Lock()
if !e.Connected() {
e.Unlock()
- return -1, tcpip.ErrNotConnected
+ return -1, &tcpip.ErrNotConnected{}
}
v := e.connected.SendQueuedSize()
e.Unlock()
if v < 0 {
- return -1, tcpip.ErrQueueSizeNotSupported
- }
- return int(v), nil
-
- case tcpip.SendBufferSizeOption:
- e.Lock()
- if !e.Connected() {
- e.Unlock()
- return -1, tcpip.ErrNotConnected
- }
- v := e.connected.SendMaxQueueSize()
- e.Unlock()
- if v < 0 {
- return -1, tcpip.ErrQueueSizeNotSupported
+ return -1, &tcpip.ErrQueueSizeNotSupported{}
}
return int(v), nil
@@ -896,29 +903,29 @@ func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
e.Lock()
if e.receiver == nil {
e.Unlock()
- return -1, tcpip.ErrNotConnected
+ return -1, &tcpip.ErrNotConnected{}
}
v := e.receiver.RecvMaxQueueSize()
e.Unlock()
if v < 0 {
- return -1, tcpip.ErrQueueSizeNotSupported
+ return -1, &tcpip.ErrQueueSizeNotSupported{}
}
return int(v), nil
default:
log.Warningf("Unsupported socket option: %d", opt)
- return -1, tcpip.ErrUnknownProtocolOption
+ return -1, &tcpip.ErrUnknownProtocolOption{}
}
}
// GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (e *baseEndpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
+func (e *baseEndpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error {
log.Warningf("Unsupported socket option: %T", opt)
- return tcpip.ErrUnknownProtocolOption
+ return &tcpip.ErrUnknownProtocolOption{}
}
// LastError implements Endpoint.LastError.
-func (*baseEndpoint) LastError() *tcpip.Error {
+func (*baseEndpoint) LastError() tcpip.Error {
return nil
}
@@ -958,7 +965,7 @@ func (e *baseEndpoint) Shutdown(flags tcpip.ShutdownFlags) *syserr.Error {
}
// GetLocalAddress returns the bound path.
-func (e *baseEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (e *baseEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
e.Lock()
defer e.Unlock()
return tcpip.FullAddress{Addr: tcpip.Address(e.path)}, nil
@@ -966,14 +973,14 @@ func (e *baseEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
// GetRemoteAddress returns the local address of the connected endpoint (if
// available).
-func (e *baseEndpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (e *baseEndpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) {
e.Lock()
c := e.connected
e.Unlock()
if c != nil {
return c.GetLocalAddress()
}
- return tcpip.FullAddress{}, tcpip.ErrNotConnected
+ return tcpip.FullAddress{}, &tcpip.ErrNotConnected{}
}
// Release implements BoundEndpoint.Release.
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 6c4ec55b2..32e5d2304 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -496,6 +496,9 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b
return int(n), syserr.FromError(err)
}
+ // Only send SCM Rights once (see net/unix/af_unix.c:unix_stream_sendmsg).
+ w.Control.Rights = nil
+
// We'll have to block. Register for notification and keep trying to
// send all the data.
e, ch := waiter.NewChannelEntry(nil)
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index 27f705bb2..a7d4d7f1f 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -20,7 +20,6 @@ import (
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/sentry/arch"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
@@ -331,16 +330,6 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by
return netstack.SetSockOpt(t, s, s.ep, level, name, optVal)
}
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (s *SocketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (s *SocketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
- return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence)
-}
-
// providerVFS2 is a unix domain socket provider for VFS2.
type providerVFS2 struct{}
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index a72df62f6..62d1e8f8b 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -503,8 +503,8 @@ var ARM64 = &kernel.SyscallTable{
72: syscalls.Supported("pselect", Pselect),
73: syscalls.Supported("ppoll", Ppoll),
74: syscalls.PartiallySupported("signalfd4", Signalfd4, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}),
- 75: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
- 76: syscalls.PartiallySupported("splice", Splice, "Stub implementation.", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 75: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 76: syscalls.Supported("splice", Splice),
77: syscalls.Supported("tee", Tee),
78: syscalls.Supported("readlinkat", Readlinkat),
79: syscalls.Supported("fstatat", Fstatat),
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index c33571f43..a6253626e 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -1014,12 +1014,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
if cmd == linux.F_SETLK {
// Non-blocking lock, provide a nil lock.Blocker.
- if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, nil) {
+ if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, nil) {
return 0, nil, syserror.EAGAIN
}
} else {
// Blocking lock, pass in the task to satisfy the lock.Blocker interface.
- if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, t) {
+ if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, t) {
return 0, nil, syserror.EINTR
}
}
@@ -1030,12 +1030,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
if cmd == linux.F_SETLK {
// Non-blocking lock, provide a nil lock.Blocker.
- if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, nil) {
+ if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, nil) {
return 0, nil, syserror.EAGAIN
}
} else {
// Blocking lock, pass in the task to satisfy the lock.Blocker interface.
- if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, t) {
+ if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, t) {
return 0, nil, syserror.EINTR
}
}
@@ -2167,24 +2167,24 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
case linux.LOCK_EX:
if nonblocking {
// Since we're nonblocking we pass a nil lock.Blocker implementation.
- if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, nil) {
+ if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, nil) {
return 0, nil, syserror.EWOULDBLOCK
}
} else {
// Because we're blocking we will pass the task to satisfy the lock.Blocker interface.
- if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, t) {
+ if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, t) {
return 0, nil, syserror.EINTR
}
}
case linux.LOCK_SH:
if nonblocking {
// Since we're nonblocking we pass a nil lock.Blocker implementation.
- if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, nil) {
+ if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, nil) {
return 0, nil, syserror.EWOULDBLOCK
}
} else {
// Because we're blocking we will pass the task to satisfy the lock.Blocker interface.
- if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, t) {
+ if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, t) {
return 0, nil, syserror.EINTR
}
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go
index 7dd9ef857..1a31898e8 100644
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@@ -123,6 +123,15 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
defer file.DecRef(t)
+ if file.StatusFlags()&linux.O_PATH != 0 {
+ switch cmd {
+ case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC, linux.F_GETFD, linux.F_SETFD, linux.F_GETFL:
+ // allowed
+ default:
+ return 0, nil, syserror.EBADF
+ }
+ }
+
switch cmd {
case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC:
minfd := args[2].Int()
@@ -205,8 +214,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
err := tmpfs.AddSeals(file, args[2].Uint())
return 0, nil, err
- case linux.F_SETLK, linux.F_SETLKW:
- return 0, nil, posixLock(t, args, file, cmd)
+ case linux.F_SETLK:
+ return 0, nil, posixLock(t, args, file, false /* blocking */)
+ case linux.F_SETLKW:
+ return 0, nil, posixLock(t, args, file, true /* blocking */)
+ case linux.F_GETLK:
+ return 0, nil, posixTestLock(t, args, file)
case linux.F_GETSIG:
a := file.AsyncHandler()
if a == nil {
@@ -292,7 +305,49 @@ func setAsyncOwner(t *kernel.Task, fd int, file *vfs.FileDescription, ownerType,
}
}
-func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, cmd int32) error {
+func posixTestLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription) error {
+ // Copy in the lock request.
+ flockAddr := args[2].Pointer()
+ var flock linux.Flock
+ if _, err := flock.CopyIn(t, flockAddr); err != nil {
+ return err
+ }
+ var typ lock.LockType
+ switch flock.Type {
+ case linux.F_RDLCK:
+ typ = lock.ReadLock
+ case linux.F_WRLCK:
+ typ = lock.WriteLock
+ default:
+ return syserror.EINVAL
+ }
+ r, err := file.ComputeLockRange(t, uint64(flock.Start), uint64(flock.Len), flock.Whence)
+ if err != nil {
+ return err
+ }
+
+ newFlock, err := file.TestPOSIX(t, t.FDTable(), typ, r)
+ if err != nil {
+ return err
+ }
+ newFlock.PID = translatePID(t.PIDNamespace().Root(), t.PIDNamespace(), newFlock.PID)
+ if _, err = newFlock.CopyOut(t, flockAddr); err != nil {
+ return err
+ }
+ return nil
+}
+
+// translatePID translates a pid from one namespace to another. Note that this
+// may race with task termination/creation, in which case the original task
+// corresponding to pid may no longer exist. This is used to implement the
+// F_GETLK fcntl, which has the same potential race in Linux as well (i.e.,
+// there is no synchronization between retrieving the lock PID and translating
+// it). See fs/locks.c:posix_lock_to_flock.
+func translatePID(old, new *kernel.PIDNamespace, pid int32) int32 {
+ return int32(new.IDOfTask(old.TaskWithID(kernel.ThreadID(pid))))
+}
+
+func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, blocking bool) error {
// Copy in the lock request.
flockAddr := args[2].Pointer()
var flock linux.Flock
@@ -301,25 +356,30 @@ func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescrip
}
var blocker lock.Blocker
- if cmd == linux.F_SETLKW {
+ if blocking {
blocker = t
}
+ r, err := file.ComputeLockRange(t, uint64(flock.Start), uint64(flock.Len), flock.Whence)
+ if err != nil {
+ return err
+ }
+
switch flock.Type {
case linux.F_RDLCK:
if !file.IsReadable() {
return syserror.EBADF
}
- return file.LockPOSIX(t, t.FDTable(), lock.ReadLock, uint64(flock.Start), uint64(flock.Len), flock.Whence, blocker)
+ return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.ReadLock, r, blocker)
case linux.F_WRLCK:
if !file.IsWritable() {
return syserror.EBADF
}
- return file.LockPOSIX(t, t.FDTable(), lock.WriteLock, uint64(flock.Start), uint64(flock.Len), flock.Whence, blocker)
+ return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.WriteLock, r, blocker)
case linux.F_UNLCK:
- return file.UnlockPOSIX(t, t.FDTable(), uint64(flock.Start), uint64(flock.Len), flock.Whence)
+ return file.UnlockPOSIX(t, t.FDTable(), r)
default:
return syserror.EINVAL
@@ -344,6 +404,10 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
defer file.DecRef(t)
+ if file.StatusFlags()&linux.O_PATH != 0 {
+ return 0, nil, syserror.EBADF
+ }
+
// If the FD refers to a pipe or FIFO, return error.
if _, isPipe := file.Impl().(*pipe.VFSPipeFD); isPipe {
return 0, nil, syserror.ESPIPE
diff --git a/pkg/sentry/syscalls/linux/vfs2/ioctl.go b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
index 20c264fef..c7c3fed57 100644
--- a/pkg/sentry/syscalls/linux/vfs2/ioctl.go
+++ b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
@@ -32,6 +32,10 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
defer file.DecRef(t)
+ if file.StatusFlags()&linux.O_PATH != 0 {
+ return 0, nil, syserror.EBADF
+ }
+
// Handle ioctls that apply to all FDs.
switch args[1].Int() {
case linux.FIONCLEX:
diff --git a/pkg/sentry/syscalls/linux/vfs2/lock.go b/pkg/sentry/syscalls/linux/vfs2/lock.go
index b910b5a74..d1452a04d 100644
--- a/pkg/sentry/syscalls/linux/vfs2/lock.go
+++ b/pkg/sentry/syscalls/linux/vfs2/lock.go
@@ -44,11 +44,11 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
switch operation {
case linux.LOCK_EX:
- if err := file.LockBSD(t, lock.WriteLock, blocker); err != nil {
+ if err := file.LockBSD(t, int32(t.TGIDInRoot()), lock.WriteLock, blocker); err != nil {
return 0, nil, err
}
case linux.LOCK_SH:
- if err := file.LockBSD(t, lock.ReadLock, blocker); err != nil {
+ if err := file.LockBSD(t, int32(t.TGIDInRoot()), lock.ReadLock, blocker); err != nil {
return 0, nil, err
}
case linux.LOCK_UN:
diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go
index b77b29dcc..c7417840f 100644
--- a/pkg/sentry/syscalls/linux/vfs2/read_write.go
+++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go
@@ -93,17 +93,11 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
n, err := file.Read(t, dst, opts)
if err != syserror.ErrWouldBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return n, err
}
@@ -134,9 +128,6 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt
}
file.EventUnregister(&w)
- if total > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return total, err
}
@@ -257,17 +248,11 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
n, err := file.PRead(t, dst, offset, opts)
if err != syserror.ErrWouldBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return n, err
}
@@ -297,10 +282,6 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of
}
}
file.EventUnregister(&w)
-
- if total > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return total, err
}
@@ -363,17 +344,11 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
n, err := file.Write(t, src, opts)
if err != syserror.ErrWouldBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- }
return n, err
}
@@ -403,10 +378,6 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op
}
}
file.EventUnregister(&w)
-
- if total > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- }
return total, err
}
@@ -527,17 +498,11 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
n, err := file.PWrite(t, src, offset, opts)
if err != syserror.ErrWouldBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return n, err
}
@@ -567,10 +532,6 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o
}
}
file.EventUnregister(&w)
-
- if total > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return total, err
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go
index 1ee37e5a8..903169dc2 100644
--- a/pkg/sentry/syscalls/linux/vfs2/setstat.go
+++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go
@@ -220,7 +220,6 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
length := args[3].Int64()
file := t.GetFileVFS2(fd)
-
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -229,23 +228,18 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
if !file.IsWritable() {
return 0, nil, syserror.EBADF
}
-
if mode != 0 {
return 0, nil, syserror.ENOTSUP
}
-
if offset < 0 || length <= 0 {
return 0, nil, syserror.EINVAL
}
size := offset + length
-
if size < 0 {
return 0, nil, syserror.EFBIG
}
-
limit := limits.FromContext(t).Get(limits.FileSize).Cur
-
if uint64(size) >= limit {
t.SendSignal(&arch.SignalInfo{
Signo: int32(linux.SIGXFSZ),
@@ -254,12 +248,7 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, syserror.EFBIG
}
- if err := file.Allocate(t, mode, uint64(offset), uint64(length)); err != nil {
- return 0, nil, err
- }
-
- file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- return 0, nil, nil
+ return 0, nil, file.Allocate(t, mode, uint64(offset), uint64(length))
}
// Utime implements Linux syscall utime(2).
diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go
index 8bb763a47..19e175203 100644
--- a/pkg/sentry/syscalls/linux/vfs2/splice.go
+++ b/pkg/sentry/syscalls/linux/vfs2/splice.go
@@ -170,13 +170,6 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
}
}
- if n != 0 {
- // On Linux, inotify behavior is not very consistent with splice(2). We try
- // our best to emulate Linux for very basic calls to splice, where for some
- // reason, events are generated for output files, but not input files.
- outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- }
-
// We can only pass a single file to handleIOError, so pick inFile arbitrarily.
// This is used only for debugging purposes.
return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "splice", outFile)
@@ -256,8 +249,6 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo
}
if n != 0 {
- outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
-
// If a partial write is completed, the error is dropped. Log it here.
if err != nil && err != io.EOF && err != syserror.ErrWouldBlock {
log.Debugf("tee completed a partial write with error: %v", err)
@@ -449,9 +440,6 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
if total != 0 {
- inFile.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
-
if err != nil && err != io.EOF && err != syserror.ErrWouldBlock {
// If a partial write is completed, the error is dropped. Log it here.
log.Debugf("sendfile completed a partial write with error: %v", err)
diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go
index 6e9b599e2..1f8a5878c 100644
--- a/pkg/sentry/syscalls/linux/vfs2/sync.go
+++ b/pkg/sentry/syscalls/linux/vfs2/sync.go
@@ -36,6 +36,10 @@ func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
}
defer file.DecRef(t)
+ if file.StatusFlags()&linux.O_PATH != 0 {
+ return 0, nil, syserror.EBADF
+ }
+
return 0, nil, file.SyncFS(t)
}
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index a3868bf16..df4990854 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -83,6 +83,7 @@ go_library(
"mount.go",
"mount_namespace_refs.go",
"mount_unsafe.go",
+ "opath.go",
"options.go",
"pathname.go",
"permissions.go",
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 5321ac80a..f612a71b2 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -161,6 +161,13 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou
// DecRef decrements fd's reference count.
func (fd *FileDescription) DecRef(ctx context.Context) {
fd.FileDescriptionRefs.DecRef(func() {
+ // Generate inotify events.
+ ev := uint32(linux.IN_CLOSE_NOWRITE)
+ if fd.IsWritable() {
+ ev = linux.IN_CLOSE_WRITE
+ }
+ fd.Dentry().InotifyWithParent(ctx, ev, 0, PathEvent)
+
// Unregister fd from all epoll instances.
fd.epollMu.Lock()
epolls := fd.epolls
@@ -448,16 +455,19 @@ type FileDescriptionImpl interface {
RemoveXattr(ctx context.Context, name string) error
// LockBSD tries to acquire a BSD-style advisory file lock.
- LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error
+ LockBSD(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, block lock.Blocker) error
// UnlockBSD releases a BSD-style advisory file lock.
UnlockBSD(ctx context.Context, uid lock.UniqueID) error
// LockPOSIX tries to acquire a POSIX-style advisory file lock.
- LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, start, length uint64, whence int16, block lock.Blocker) error
+ LockPOSIX(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, r lock.LockRange, block lock.Blocker) error
// UnlockPOSIX releases a POSIX-style advisory file lock.
- UnlockPOSIX(ctx context.Context, uid lock.UniqueID, start, length uint64, whence int16) error
+ UnlockPOSIX(ctx context.Context, uid lock.UniqueID, ComputeLockRange lock.LockRange) error
+
+ // TestPOSIX returns information about whether the specified lock can be held, in the style of the F_GETLK fcntl.
+ TestPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange) (linux.Flock, error)
}
// Dirent holds the information contained in struct linux_dirent64.
@@ -556,7 +566,11 @@ func (fd *FileDescription) Allocate(ctx context.Context, mode, offset, length ui
if !fd.IsWritable() {
return syserror.EBADF
}
- return fd.impl.Allocate(ctx, mode, offset, length)
+ if err := fd.impl.Allocate(ctx, mode, offset, length); err != nil {
+ return err
+ }
+ fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent)
+ return nil
}
// Readiness implements waiter.Waitable.Readiness.
@@ -592,6 +606,9 @@ func (fd *FileDescription) PRead(ctx context.Context, dst usermem.IOSequence, of
}
start := fsmetric.StartReadWait()
n, err := fd.impl.PRead(ctx, dst, offset, opts)
+ if n > 0 {
+ fd.Dentry().InotifyWithParent(ctx, linux.IN_ACCESS, 0, PathEvent)
+ }
fsmetric.Reads.Increment()
fsmetric.FinishReadWait(fsmetric.ReadWait, start)
return n, err
@@ -604,6 +621,9 @@ func (fd *FileDescription) Read(ctx context.Context, dst usermem.IOSequence, opt
}
start := fsmetric.StartReadWait()
n, err := fd.impl.Read(ctx, dst, opts)
+ if n > 0 {
+ fd.Dentry().InotifyWithParent(ctx, linux.IN_ACCESS, 0, PathEvent)
+ }
fsmetric.Reads.Increment()
fsmetric.FinishReadWait(fsmetric.ReadWait, start)
return n, err
@@ -619,7 +639,11 @@ func (fd *FileDescription) PWrite(ctx context.Context, src usermem.IOSequence, o
if !fd.writable {
return 0, syserror.EBADF
}
- return fd.impl.PWrite(ctx, src, offset, opts)
+ n, err := fd.impl.PWrite(ctx, src, offset, opts)
+ if n > 0 {
+ fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent)
+ }
+ return n, err
}
// Write is similar to PWrite, but does not specify an offset.
@@ -627,7 +651,11 @@ func (fd *FileDescription) Write(ctx context.Context, src usermem.IOSequence, op
if !fd.writable {
return 0, syserror.EBADF
}
- return fd.impl.Write(ctx, src, opts)
+ n, err := fd.impl.Write(ctx, src, opts)
+ if n > 0 {
+ fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent)
+ }
+ return n, err
}
// IterDirents invokes cb on each entry in the directory represented by fd. If
@@ -791,9 +819,9 @@ func (fd *FileDescription) Msync(ctx context.Context, mr memmap.MappableRange) e
}
// LockBSD tries to acquire a BSD-style advisory file lock.
-func (fd *FileDescription) LockBSD(ctx context.Context, lockType lock.LockType, blocker lock.Blocker) error {
+func (fd *FileDescription) LockBSD(ctx context.Context, ownerPID int32, lockType lock.LockType, blocker lock.Blocker) error {
atomic.StoreUint32(&fd.usedLockBSD, 1)
- return fd.impl.LockBSD(ctx, fd, lockType, blocker)
+ return fd.impl.LockBSD(ctx, fd, ownerPID, lockType, blocker)
}
// UnlockBSD releases a BSD-style advisory file lock.
@@ -802,13 +830,45 @@ func (fd *FileDescription) UnlockBSD(ctx context.Context) error {
}
// LockPOSIX locks a POSIX-style file range lock.
-func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, start, end uint64, whence int16, block lock.Blocker) error {
- return fd.impl.LockPOSIX(ctx, uid, t, start, end, whence, block)
+func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, r lock.LockRange, block lock.Blocker) error {
+ return fd.impl.LockPOSIX(ctx, uid, ownerPID, t, r, block)
}
// UnlockPOSIX unlocks a POSIX-style file range lock.
-func (fd *FileDescription) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, start, end uint64, whence int16) error {
- return fd.impl.UnlockPOSIX(ctx, uid, start, end, whence)
+func (fd *FileDescription) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, r lock.LockRange) error {
+ return fd.impl.UnlockPOSIX(ctx, uid, r)
+}
+
+// TestPOSIX returns information about whether the specified lock can be held.
+func (fd *FileDescription) TestPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange) (linux.Flock, error) {
+ return fd.impl.TestPOSIX(ctx, uid, t, r)
+}
+
+// ComputeLockRange computes the range of a file lock based on the given values.
+func (fd *FileDescription) ComputeLockRange(ctx context.Context, start uint64, length uint64, whence int16) (lock.LockRange, error) {
+ var off int64
+ switch whence {
+ case linux.SEEK_SET:
+ off = 0
+ case linux.SEEK_CUR:
+ // Note that Linux does not hold any mutexes while retrieving the file
+ // offset, see fs/locks.c:flock_to_posix_lock and fs/locks.c:fcntl_setlk.
+ curOff, err := fd.Seek(ctx, 0, linux.SEEK_CUR)
+ if err != nil {
+ return lock.LockRange{}, err
+ }
+ off = curOff
+ case linux.SEEK_END:
+ stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_SIZE})
+ if err != nil {
+ return lock.LockRange{}, err
+ }
+ off = int64(stat.Size)
+ default:
+ return lock.LockRange{}, syserror.EINVAL
+ }
+
+ return lock.ComputeRange(int64(start), int64(length), off)
}
// A FileAsync sends signals to its owner when w is ready for IO. This is only
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index 48ca9de44..eb7d2fd3b 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -419,8 +419,8 @@ func (fd *LockFD) Locks() *FileLocks {
}
// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
-func (fd *LockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error {
- return fd.locks.LockBSD(uid, t, block)
+func (fd *LockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
+ return fd.locks.LockBSD(ctx, uid, ownerPID, t, block)
}
// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD.
@@ -429,6 +429,21 @@ func (fd *LockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
return nil
}
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *LockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
+ return fd.locks.LockPOSIX(ctx, uid, ownerPID, t, r, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *LockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error {
+ return fd.locks.UnlockPOSIX(ctx, uid, r)
+}
+
+// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX.
+func (fd *LockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
+ return fd.locks.TestPOSIX(ctx, uid, t, r)
+}
+
// NoLockFD implements Lock*/Unlock* portion of FileDescriptionImpl interface
// returning ENOLCK.
//
@@ -436,7 +451,7 @@ func (fd *LockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
type NoLockFD struct{}
// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
-func (NoLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error {
+func (NoLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
return syserror.ENOLCK
}
@@ -446,11 +461,16 @@ func (NoLockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
return syserror.ENOLCK
}
// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error {
return syserror.ENOLCK
}
+
+// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX.
+func (NoLockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
+ return linux.Flock{}, syserror.ENOLCK
+}
diff --git a/pkg/sentry/vfs/lock.go b/pkg/sentry/vfs/lock.go
index 1ff202f2a..cbe4d8c2d 100644
--- a/pkg/sentry/vfs/lock.go
+++ b/pkg/sentry/vfs/lock.go
@@ -39,8 +39,8 @@ type FileLocks struct {
}
// LockBSD tries to acquire a BSD-style lock on the entire file.
-func (fl *FileLocks) LockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error {
- if fl.bsd.LockRegion(uid, t, fslock.LockRange{0, fslock.LockEOF}, block) {
+func (fl *FileLocks) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerID int32, t fslock.LockType, block fslock.Blocker) error {
+ if fl.bsd.LockRegion(uid, ownerID, t, fslock.LockRange{0, fslock.LockEOF}, block) {
return nil
}
@@ -61,12 +61,8 @@ func (fl *FileLocks) UnlockBSD(uid fslock.UniqueID) {
}
// LockPOSIX tries to acquire a POSIX-style lock on a file region.
-func (fl *FileLocks) LockPOSIX(ctx context.Context, fd *FileDescription, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
- rng, err := computeRange(ctx, fd, start, length, whence)
- if err != nil {
- return err
- }
- if fl.posix.LockRegion(uid, t, rng, block) {
+func (fl *FileLocks) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
+ if fl.posix.LockRegion(uid, ownerPID, t, r, block) {
return nil
}
@@ -82,37 +78,12 @@ func (fl *FileLocks) LockPOSIX(ctx context.Context, fd *FileDescription, uid fsl
//
// This operation is always successful, even if there did not exist a lock on
// the requested region held by uid in the first place.
-func (fl *FileLocks) UnlockPOSIX(ctx context.Context, fd *FileDescription, uid fslock.UniqueID, start, length uint64, whence int16) error {
- rng, err := computeRange(ctx, fd, start, length, whence)
- if err != nil {
- return err
- }
- fl.posix.UnlockRegion(uid, rng)
+func (fl *FileLocks) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error {
+ fl.posix.UnlockRegion(uid, r)
return nil
}
-func computeRange(ctx context.Context, fd *FileDescription, start uint64, length uint64, whence int16) (fslock.LockRange, error) {
- var off int64
- switch whence {
- case linux.SEEK_SET:
- off = 0
- case linux.SEEK_CUR:
- // Note that Linux does not hold any mutexes while retrieving the file
- // offset, see fs/locks.c:flock_to_posix_lock and fs/locks.c:fcntl_setlk.
- curOff, err := fd.Seek(ctx, 0, linux.SEEK_CUR)
- if err != nil {
- return fslock.LockRange{}, err
- }
- off = curOff
- case linux.SEEK_END:
- stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_SIZE})
- if err != nil {
- return fslock.LockRange{}, err
- }
- off = int64(stat.Size)
- default:
- return fslock.LockRange{}, syserror.EINVAL
- }
-
- return fslock.ComputeRange(int64(start), int64(length), off)
+// TestPOSIX returns information about whether the specified lock can be held, in the style of the F_GETLK fcntl.
+func (fl *FileLocks) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
+ return fl.posix.TestRegion(ctx, uid, t, r), nil
}
diff --git a/pkg/sentry/vfs/opath.go b/pkg/sentry/vfs/opath.go
new file mode 100644
index 000000000..39fbac987
--- /dev/null
+++ b/pkg/sentry/vfs/opath.go
@@ -0,0 +1,139 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vfs
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// opathFD implements vfs.FileDescriptionImpl for a file description opened with O_PATH.
+//
+// +stateify savable
+type opathFD struct {
+ vfsfd FileDescription
+ FileDescriptionDefaultImpl
+ NoLockFD
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *opathFD) Release(context.Context) {
+ // noop
+}
+
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (fd *opathFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+ return syserror.EBADF
+}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
+func (fd *opathFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
+ return 0, syserror.EBADF
+}
+
+// Read implements vfs.FileDescriptionImpl.Read.
+func (fd *opathFD) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) {
+ return 0, syserror.EBADF
+}
+
+// PWrite implements vfs.FileDescriptionImpl.PWrite.
+func (fd *opathFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) {
+ return 0, syserror.EBADF
+}
+
+// Write implements vfs.FileDescriptionImpl.Write.
+func (fd *opathFD) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) {
+ return 0, syserror.EBADF
+}
+
+// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
+func (fd *opathFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+ return 0, syserror.EBADF
+}
+
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+func (fd *opathFD) IterDirents(ctx context.Context, cb IterDirentsCallback) error {
+ return syserror.EBADF
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *opathFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+ return 0, syserror.EBADF
+}
+
+// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
+func (fd *opathFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+ return syserror.EBADF
+}
+
+// ListXattr implements vfs.FileDescriptionImpl.ListXattr.
+func (fd *opathFD) ListXattr(ctx context.Context, size uint64) ([]string, error) {
+ return nil, syserror.EBADF
+}
+
+// GetXattr implements vfs.FileDescriptionImpl.GetXattr.
+func (fd *opathFD) GetXattr(ctx context.Context, opts GetXattrOptions) (string, error) {
+ return "", syserror.EBADF
+}
+
+// SetXattr implements vfs.FileDescriptionImpl.SetXattr.
+func (fd *opathFD) SetXattr(ctx context.Context, opts SetXattrOptions) error {
+ return syserror.EBADF
+}
+
+// RemoveXattr implements vfs.FileDescriptionImpl.RemoveXattr.
+func (fd *opathFD) RemoveXattr(ctx context.Context, name string) error {
+ return syserror.EBADF
+}
+
+// Sync implements vfs.FileDescriptionImpl.Sync.
+func (fd *opathFD) Sync(ctx context.Context) error {
+ return syserror.EBADF
+}
+
+// SetStat implements vfs.FileDescriptionImpl.SetStat.
+func (fd *opathFD) SetStat(ctx context.Context, opts SetStatOptions) error {
+ return syserror.EBADF
+}
+
+// Stat implements vfs.FileDescriptionImpl.Stat.
+func (fd *opathFD) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) {
+ vfsObj := fd.vfsfd.vd.mount.vfs
+ rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
+ Root: fd.vfsfd.vd,
+ Start: fd.vfsfd.vd,
+ })
+ stat, err := fd.vfsfd.vd.mount.fs.impl.StatAt(ctx, rp, opts)
+ vfsObj.putResolvingPath(ctx, rp)
+ return stat, err
+}
+
+// StatFS returns metadata for the filesystem containing the file represented
+// by fd.
+func (fd *opathFD) StatFS(ctx context.Context) (linux.Statfs, error) {
+ vfsObj := fd.vfsfd.vd.mount.vfs
+ rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
+ Root: fd.vfsfd.vd,
+ Start: fd.vfsfd.vd,
+ })
+ statfs, err := fd.vfsfd.vd.mount.fs.impl.StatFSAt(ctx, rp)
+ vfsObj.putResolvingPath(ctx, rp)
+ return statfs, err
+}
diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go
index bc79e5ecc..c9907843c 100644
--- a/pkg/sentry/vfs/options.go
+++ b/pkg/sentry/vfs/options.go
@@ -129,7 +129,7 @@ type OpenOptions struct {
//
// FilesystemImpls are responsible for implementing the following flags:
// O_RDONLY, O_WRONLY, O_RDWR, O_APPEND, O_CREAT, O_DIRECT, O_DSYNC,
- // O_EXCL, O_NOATIME, O_NOCTTY, O_NONBLOCK, O_PATH, O_SYNC, O_TMPFILE, and
+ // O_EXCL, O_NOATIME, O_NOCTTY, O_NONBLOCK, O_SYNC, O_TMPFILE, and
// O_TRUNC. VFS is responsible for handling O_DIRECTORY, O_LARGEFILE, and
// O_NOFOLLOW. VFS users are responsible for handling O_CLOEXEC, since file
// descriptors are mostly outside the scope of VFS.
diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index 6fd1bb0b2..0aff2dd92 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -425,6 +425,18 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential
rp.mustBeDir = true
rp.mustBeDirOrig = true
}
+ if opts.Flags&linux.O_PATH != 0 {
+ vd, err := vfs.GetDentryAt(ctx, creds, pop, &GetDentryOptions{})
+ if err != nil {
+ return nil, err
+ }
+ fd := &opathFD{}
+ if err := fd.vfsfd.Init(fd, opts.Flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{}); err != nil {
+ return nil, err
+ }
+ vd.DecRef(ctx)
+ return &fd.vfsfd, err
+ }
for {
fd, err := rp.mount.fs.impl.OpenAt(ctx, rp, *opts)
if err == nil {