summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
authorFabricio Voznika <fvoznika@google.com>2019-03-05 23:39:14 -0800
committerShentubot <shentubot@google.com>2019-03-05 23:40:18 -0800
commit0b76887147820a809beaa497ede8dc4f7b7b120a (patch)
treea89be091add2e39163cefe84bf3614a6338f3cd9 /pkg
parentfcba4e8f040ab4b40e04b9315d718d7e5aa44635 (diff)
Priority-inheritance futex implementation
It is Implemented without the priority inheritance part given that gVisor defers scheduling decisions to Go runtime and doesn't have control over it. PiperOrigin-RevId: 236989545 Change-Id: I714c8ca0798743ecf3167b14ffeb5cd834302560
Diffstat (limited to 'pkg')
-rw-r--r--pkg/abi/linux/futex.go6
-rw-r--r--pkg/sentry/kernel/futex/BUILD2
-rw-r--r--pkg/sentry/kernel/futex/futex.go215
-rw-r--r--pkg/sentry/kernel/futex/futex_test.go4
-rw-r--r--pkg/sentry/kernel/task_futex.go7
-rw-r--r--pkg/sentry/mm/io.go45
-rw-r--r--pkg/sentry/platform/platform.go10
-rw-r--r--pkg/sentry/platform/safecopy/BUILD4
-rw-r--r--pkg/sentry/platform/safecopy/atomic_amd64.s28
-rw-r--r--pkg/sentry/platform/safecopy/atomic_arm64.s28
-rw-r--r--pkg/sentry/platform/safecopy/safecopy.go4
-rw-r--r--pkg/sentry/platform/safecopy/safecopy_unsafe.go20
-rw-r--r--pkg/sentry/platform/safecopy/sighandler_amd64.s9
-rw-r--r--pkg/sentry/platform/safecopy/sighandler_arm64.s11
-rw-r--r--pkg/sentry/safemem/block_unsafe.go10
-rw-r--r--pkg/sentry/syscalls/linux/sys_futex.go68
-rw-r--r--pkg/sentry/usermem/bytes_io_unsafe.go8
-rw-r--r--pkg/sentry/usermem/usermem.go7
-rw-r--r--pkg/syserror/syserror.go1
19 files changed, 461 insertions, 26 deletions
diff --git a/pkg/abi/linux/futex.go b/pkg/abi/linux/futex.go
index 5dff01fba..afdf4123b 100644
--- a/pkg/abi/linux/futex.go
+++ b/pkg/abi/linux/futex.go
@@ -54,3 +54,9 @@ const (
// FUTEX_TID_MASK is the TID portion of a PI futex word.
const FUTEX_TID_MASK = 0x3fffffff
+
+// Constants used for priority-inheritance futexes.
+const (
+ FUTEX_WAITERS = 0x80000000
+ FUTEX_OWNER_DIED = 0x40000000
+)
diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD
index 91feeb5ed..b6af5b20b 100644
--- a/pkg/sentry/kernel/futex/BUILD
+++ b/pkg/sentry/kernel/futex/BUILD
@@ -37,6 +37,8 @@ go_library(
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
+ "//pkg/log",
+ "//pkg/sentry/context",
"//pkg/sentry/memmap",
"//pkg/sentry/usermem",
"//pkg/syserror",
diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go
index b3e628fd4..cd7d51621 100644
--- a/pkg/sentry/kernel/futex/futex.go
+++ b/pkg/sentry/kernel/futex/futex.go
@@ -95,12 +95,15 @@ func (k *Key) matches(k2 *Key) bool {
// Target abstracts memory accesses and keys.
type Target interface {
- // SwapUint32 gives access to usermem.SwapUint32.
+ // SwapUint32 gives access to usermem.IO.SwapUint32.
SwapUint32(addr usermem.Addr, new uint32) (uint32, error)
- // CompareAndSwap gives access to usermem.CompareAndSwapUint32.
+ // CompareAndSwap gives access to usermem.IO.CompareAndSwapUint32.
CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error)
+ // LoadUint32 gives access to usermem.IO.LoadUint32.
+ LoadUint32(addr usermem.Addr) (uint32, error)
+
// GetSharedKey returns a Key with kind KindSharedPrivate or
// KindSharedMappable corresponding to the memory mapped at address addr.
//
@@ -112,11 +115,11 @@ type Target interface {
// check performs a basic equality check on the given address.
func check(t Target, addr usermem.Addr, val uint32) error {
- prev, err := t.CompareAndSwapUint32(addr, val, val)
+ cur, err := t.LoadUint32(addr)
if err != nil {
return err
}
- if prev != val {
+ if cur != val {
return syserror.EAGAIN
}
return nil
@@ -140,11 +143,14 @@ func atomicOp(t Target, addr usermem.Addr, opIn uint32) (bool, error) {
)
if opType == linux.FUTEX_OP_SET {
oldVal, err = t.SwapUint32(addr, opArg)
+ if err != nil {
+ return false, err
+ }
} else {
for {
- oldVal, err = t.CompareAndSwapUint32(addr, 0, 0)
+ oldVal, err = t.LoadUint32(addr)
if err != nil {
- break
+ return false, err
}
var newVal uint32
switch opType {
@@ -161,7 +167,7 @@ func atomicOp(t Target, addr usermem.Addr, opIn uint32) (bool, error) {
}
prev, err := t.CompareAndSwapUint32(addr, oldVal, newVal)
if err != nil {
- break
+ return false, err
}
if prev == oldVal {
break // Success.
@@ -222,6 +228,9 @@ type Waiter struct {
// The bitmask we're waiting on.
// This is used the case of a FUTEX_WAKE_BITSET.
bitmask uint32
+
+ // tid is the thread ID for the waiter in case this is a PI mutex.
+ tid uint32
}
// NewWaiter returns a new unqueued Waiter.
@@ -262,23 +271,28 @@ func (b *bucket) wakeLocked(key *Key, bitmask uint32, n int) int {
// Remove from the bucket and wake the waiter.
woke := w
w = w.Next() // Next iteration.
- b.waiters.Remove(woke)
- woke.C <- struct{}{}
-
- // NOTE: The above channel write establishes a write barrier according
- // to the memory model, so nothing may be ordered around it. Since
- // we've dequeued woke and will never touch it again, we can safely
- // store nil to woke.bucket here and allow the WaitComplete() to
- // short-circuit grabbing the bucket lock. If they somehow miss the
- // store, we are still holding the lock, so we can know that they won't
- // dequeue woke, assume it's free and have the below operation
- // afterwards.
- woke.bucket.Store(nil)
+ b.wakeWaiterLocked(woke)
done++
}
return done
}
+func (b *bucket) wakeWaiterLocked(w *Waiter) {
+ // Remove from the bucket and wake the waiter.
+ b.waiters.Remove(w)
+ w.C <- struct{}{}
+
+ // NOTE: The above channel write establishes a write barrier according
+ // to the memory model, so nothing may be ordered around it. Since
+ // we've dequeued w and will never touch it again, we can safely
+ // store nil to w.bucket here and allow the WaitComplete() to
+ // short-circuit grabbing the bucket lock. If they somehow miss the
+ // store, we are still holding the lock, so we can know that they won't
+ // dequeue w, assume it's free and have the below operation
+ // afterwards.
+ w.bucket.Store(nil)
+}
+
// requeueLocked takes n waiters from the bucket and moves them to naddr on the
// bucket "to".
//
@@ -596,7 +610,7 @@ func (m *Manager) WaitComplete(w *Waiter) {
continue
}
- // Remove w from b.
+ // Remove waiter from bucket.
b.waiters.Remove(w)
w.bucket.Store(nil)
b.mu.Unlock()
@@ -606,3 +620,164 @@ func (m *Manager) WaitComplete(w *Waiter) {
// Release references held by the waiter.
w.key.release()
}
+
+// LockPI attempts to lock the futex following the Priority-inheritance futex
+// rules. The lock is acquired only when 'addr' points to 0. The TID of the
+// calling task is set to 'addr' to indicate the futex is owned. It returns true
+// if the futex was successfully acquired.
+//
+// FUTEX_OWNER_DIED is only set by the Linux when robust lists are in use (see
+// exit_robust_list()). Given we don't support robust lists, although handled
+// below, it's never set.
+func (m *Manager) LockPI(w *Waiter, t Target, addr usermem.Addr, tid uint32, private, try bool) (bool, error) {
+ k, err := getKey(t, addr, private)
+ if err != nil {
+ return false, err
+ }
+ // Ownership of k is transferred to w below.
+
+ // Prepare the Waiter before taking the bucket lock.
+ select {
+ case <-w.C:
+ default:
+ }
+ w.key = k
+ w.tid = tid
+
+ b := m.lockBucket(&k)
+ // Hot function: avoid defers.
+
+ success, err := m.lockPILocked(w, t, addr, tid, b, try)
+ if err != nil {
+ w.key.release()
+ b.mu.Unlock()
+ return false, err
+ }
+ if success || try {
+ // Release waiter if it's not going to be a wait.
+ w.key.release()
+ }
+ b.mu.Unlock()
+ return success, nil
+}
+
+func (m *Manager) lockPILocked(w *Waiter, t Target, addr usermem.Addr, tid uint32, b *bucket, try bool) (bool, error) {
+ for {
+ cur, err := t.LoadUint32(addr)
+ if err != nil {
+ return false, err
+ }
+ if (cur & linux.FUTEX_TID_MASK) == tid {
+ return false, syserror.EDEADLK
+ }
+
+ if (cur & linux.FUTEX_TID_MASK) == 0 {
+ // No owner and no waiters, try to acquire the futex.
+
+ // Set TID and preserve owner died status.
+ val := tid
+ val |= cur & linux.FUTEX_OWNER_DIED
+ prev, err := t.CompareAndSwapUint32(addr, cur, val)
+ if err != nil {
+ return false, err
+ }
+ if prev != cur {
+ // CAS failed, retry...
+ // Linux reacquires the bucket lock on retries, which will re-lookup the
+ // mapping at the futex address. However, retrying while holding the
+ // lock is more efficient and reduces the chance of another conflict.
+ continue
+ }
+ // Futex acquired.
+ return true, nil
+ }
+
+ // Futex is already owned, prepare to wait.
+
+ if try {
+ // Caller doesn't want to wait.
+ return false, nil
+ }
+
+ // Set waiters bit if not set yet.
+ if cur&linux.FUTEX_WAITERS == 0 {
+ prev, err := t.CompareAndSwapUint32(addr, cur, cur|linux.FUTEX_WAITERS)
+ if err != nil {
+ return false, err
+ }
+ if prev != cur {
+ // CAS failed, retry...
+ continue
+ }
+ }
+
+ // Add the waiter to the bucket.
+ b.waiters.PushBack(w)
+ w.bucket.Store(b)
+ return false, nil
+ }
+}
+
+// UnlockPI unlock the futex following the Priority-inheritance futex
+// rules. The address provided must contain the caller's TID. If there are
+// waiters, TID of the next waiter (FIFO) is set to the given address, and the
+// waiter woken up. If there are no waiters, 0 is set to the address.
+func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool) error {
+ k, err := getKey(t, addr, private)
+ if err != nil {
+ return err
+ }
+ b := m.lockBucket(&k)
+
+ err = m.unlockPILocked(t, addr, tid, b)
+
+ k.release()
+ b.mu.Unlock()
+ return err
+}
+
+func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *bucket) error {
+ cur, err := t.LoadUint32(addr)
+ if err != nil {
+ return err
+ }
+
+ if (cur & linux.FUTEX_TID_MASK) != tid {
+ return syserror.EPERM
+ }
+
+ if b.waiters.Empty() {
+ // It's safe to set 0 because there are no waiters, no new owner, and the
+ // executing task is the current owner (no owner died bit).
+ prev, err := t.CompareAndSwapUint32(addr, cur, 0)
+ if err != nil {
+ return err
+ }
+ if prev != cur {
+ // Let user mode handle CAS races. This is different than lock, which
+ // retries when CAS fails.
+ return syserror.EAGAIN
+ }
+ return nil
+ }
+
+ next := b.waiters.Front()
+
+ // Set next owner's TID, waiters if there are any. Resets owner died bit, if
+ // set, because the executing task takes over as the owner.
+ val := next.tid
+ if next.Next() != nil {
+ val |= linux.FUTEX_WAITERS
+ }
+
+ prev, err := t.CompareAndSwapUint32(addr, cur, val)
+ if err != nil {
+ return err
+ }
+ if prev != cur {
+ return syserror.EINVAL
+ }
+
+ b.wakeWaiterLocked(next)
+ return nil
+}
diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go
index a7ab9f229..9d44ee8e5 100644
--- a/pkg/sentry/kernel/futex/futex_test.go
+++ b/pkg/sentry/kernel/futex/futex_test.go
@@ -49,6 +49,10 @@ func (t testData) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint
return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t[addr]))), nil
}
+func (t testData) LoadUint32(addr usermem.Addr) (uint32, error) {
+ return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t[addr]))), nil
+}
+
func (t testData) GetSharedKey(addr usermem.Addr) (Key, error) {
return Key{
Kind: KindSharedMappable,
diff --git a/pkg/sentry/kernel/task_futex.go b/pkg/sentry/kernel/task_futex.go
index 921f7bdbc..351cf47d7 100644
--- a/pkg/sentry/kernel/task_futex.go
+++ b/pkg/sentry/kernel/task_futex.go
@@ -41,6 +41,13 @@ func (t *Task) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32,
})
}
+// LoadUint32 implemets futex.Target.LoadUint32.
+func (t *Task) LoadUint32(addr usermem.Addr) (uint32, error) {
+ return t.MemoryManager().LoadUint32(t, addr, usermem.IOOpts{
+ AddressSpaceActive: true,
+ })
+}
+
// GetSharedKey implements futex.Target.GetSharedKey.
func (t *Task) GetSharedKey(addr usermem.Addr) (futex.Key, error) {
return t.MemoryManager().GetSharedFutexKey(t, addr)
diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go
index 6600ddd78..e0cebef84 100644
--- a/pkg/sentry/mm/io.go
+++ b/pkg/sentry/mm/io.go
@@ -346,6 +346,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr usermem.Addr, new
if err != nil {
return 0, translateIOError(ctx, err)
}
+ // Return the number of bytes read.
return 4, nil
})
return old, err
@@ -388,11 +389,55 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr usermem.
if err != nil {
return 0, translateIOError(ctx, err)
}
+ // Return the number of bytes read.
return 4, nil
})
return prev, err
}
+// LoadUint32 implements usermem.IO.LoadUint32.
+func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts usermem.IOOpts) (uint32, error) {
+ ar, ok := mm.CheckIORange(addr, 4)
+ if !ok {
+ return 0, syserror.EFAULT
+ }
+
+ // Do AddressSpace IO if applicable.
+ if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions {
+ for {
+ val, err := mm.as.LoadUint32(addr)
+ if err == nil {
+ return val, nil
+ }
+ if f, ok := err.(platform.SegmentationFault); ok {
+ if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Read); err != nil {
+ return 0, err
+ }
+ continue
+ }
+ return 0, translateIOError(ctx, err)
+ }
+ }
+
+ // Go through internal mappings.
+ var val uint32
+ _, err := mm.withInternalMappings(ctx, ar, usermem.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
+ if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
+ // Atomicity is unachievable across mappings.
+ return 0, syserror.EFAULT
+ }
+ im := ims.Head()
+ var err error
+ val, err = safemem.LoadUint32(im)
+ if err != nil {
+ return 0, translateIOError(ctx, err)
+ }
+ // Return the number of bytes read.
+ return 4, nil
+ })
+ return val, err
+}
+
// handleASIOFault handles a page fault at address addr for an AddressSpaceIO
// operation spanning ioar.
//
diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go
index f16588e6e..a9e76bd45 100644
--- a/pkg/sentry/platform/platform.go
+++ b/pkg/sentry/platform/platform.go
@@ -254,6 +254,11 @@ type AddressSpaceIO interface {
//
// Preconditions: addr must be aligned to a 4-byte boundary.
CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error)
+
+ // LoadUint32 atomically loads the uint32 value at addr and returns it.
+ //
+ // Preconditions: addr must be aligned to a 4-byte boundary.
+ LoadUint32(addr usermem.Addr) (uint32, error)
}
// NoAddressSpaceIO implements AddressSpaceIO methods by panicing.
@@ -284,6 +289,11 @@ func (NoAddressSpaceIO) CompareAndSwapUint32(addr usermem.Addr, old, new uint32)
panic("This platform does not support AddressSpaceIO")
}
+// LoadUint32 implements AddressSpaceIO.LoadUint32.
+func (NoAddressSpaceIO) LoadUint32(addr usermem.Addr) (uint32, error) {
+ panic("This platform does not support AddressSpaceIO")
+}
+
// SegmentationFault is an error returned by AddressSpaceIO methods when IO
// fails due to access of an unmapped page, or a mapped page with insufficient
// permissions.
diff --git a/pkg/sentry/platform/safecopy/BUILD b/pkg/sentry/platform/safecopy/BUILD
index 05a6a61ae..d97a40297 100644
--- a/pkg/sentry/platform/safecopy/BUILD
+++ b/pkg/sentry/platform/safecopy/BUILD
@@ -18,9 +18,7 @@ go_library(
],
importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy",
visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/syserror",
- ],
+ deps = ["//pkg/syserror"],
)
go_test(
diff --git a/pkg/sentry/platform/safecopy/atomic_amd64.s b/pkg/sentry/platform/safecopy/atomic_amd64.s
index 873ffa046..f90b4bfd1 100644
--- a/pkg/sentry/platform/safecopy/atomic_amd64.s
+++ b/pkg/sentry/platform/safecopy/atomic_amd64.s
@@ -106,3 +106,31 @@ TEXT ·compareAndSwapUint32(SB), NOSPLIT, $0-24
CMPXCHGL DX, 0(DI)
MOVL AX, prev+16(FP)
RET
+
+// handleLoadUint32Fault returns the value stored in DI. Control is transferred
+// to it when LoadUint32 below receives SIGSEGV or SIGBUS, with the signal
+// number stored in DI.
+//
+// It must have the same frame configuration as loadUint32 so that it can undo
+// any potential call frame set up by the assembler.
+TEXT handleLoadUint32Fault(SB), NOSPLIT, $0-16
+ MOVL DI, sig+12(FP)
+ RET
+
+// loadUint32 atomically loads *addr and returns it. If a SIGSEGV or SIGBUS
+// signal is received, the value returned is unspecified, and sig is the number
+// of the signal that was received.
+//
+// Preconditions: addr must be aligned to a 4-byte boundary.
+//
+//func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32)
+TEXT ·loadUint32(SB), NOSPLIT, $0-16
+ // Store 0 as the returned signal number. If we run to completion,
+ // this is the value the caller will see; if a signal is received,
+ // handleLoadUint32Fault will store a different value in this address.
+ MOVL $0, sig+12(FP)
+
+ MOVQ addr+0(FP), AX
+ MOVL (AX), BX
+ MOVL BX, val+8(FP)
+ RET
diff --git a/pkg/sentry/platform/safecopy/atomic_arm64.s b/pkg/sentry/platform/safecopy/atomic_arm64.s
index 554a5c1e1..d58ed71f7 100644
--- a/pkg/sentry/platform/safecopy/atomic_arm64.s
+++ b/pkg/sentry/platform/safecopy/atomic_arm64.s
@@ -96,3 +96,31 @@ again:
done:
MOVW R3, prev+16(FP)
RET
+
+// handleLoadUint32Fault returns the value stored in DI. Control is transferred
+// to it when LoadUint32 below receives SIGSEGV or SIGBUS, with the signal
+// number stored in DI.
+//
+// It must have the same frame configuration as loadUint32 so that it can undo
+// any potential call frame set up by the assembler.
+TEXT handleLoadUint32Fault(SB), NOSPLIT, $0-16
+ MOVW R1, sig+12(FP)
+ RET
+
+// loadUint32 atomically loads *addr and returns it. If a SIGSEGV or SIGBUS
+// signal is received, the value returned is unspecified, and sig is the number
+// of the signal that was received.
+//
+// Preconditions: addr must be aligned to a 4-byte boundary.
+//
+//func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32)
+TEXT ·loadUint32(SB), NOSPLIT, $0-16
+ // Store 0 as the returned signal number. If we run to completion,
+ // this is the value the caller will see; if a signal is received,
+ // handleLoadUint32Fault will store a different value in this address.
+ MOVW $0, sig+12(FP)
+
+ MOVD addr+0(FP), R0
+ LDARW (R0), R1
+ MOVW R1, val+8(FP)
+ RET
diff --git a/pkg/sentry/platform/safecopy/safecopy.go b/pkg/sentry/platform/safecopy/safecopy.go
index c60f73103..69c66a3b7 100644
--- a/pkg/sentry/platform/safecopy/safecopy.go
+++ b/pkg/sentry/platform/safecopy/safecopy.go
@@ -75,6 +75,8 @@ var (
swapUint64End uintptr
compareAndSwapUint32Begin uintptr
compareAndSwapUint32End uintptr
+ loadUint32Begin uintptr
+ loadUint32End uintptr
// savedSigSegVHandler is a pointer to the SIGSEGV handler that was
// configured before we replaced it with our own. We still call into it
@@ -119,6 +121,8 @@ func initializeAddresses() {
swapUint64End = FindEndAddress(swapUint64Begin)
compareAndSwapUint32Begin = reflect.ValueOf(compareAndSwapUint32).Pointer()
compareAndSwapUint32End = FindEndAddress(compareAndSwapUint32Begin)
+ loadUint32Begin = reflect.ValueOf(loadUint32).Pointer()
+ loadUint32End = FindEndAddress(loadUint32Begin)
}
func init() {
diff --git a/pkg/sentry/platform/safecopy/safecopy_unsafe.go b/pkg/sentry/platform/safecopy/safecopy_unsafe.go
index e78a6714e..f84527484 100644
--- a/pkg/sentry/platform/safecopy/safecopy_unsafe.go
+++ b/pkg/sentry/platform/safecopy/safecopy_unsafe.go
@@ -79,6 +79,14 @@ func swapUint64(ptr unsafe.Pointer, new uint64) (old uint64, sig int32)
//go:noescape
func compareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (prev uint32, sig int32)
+// LoadUint32 is like sync/atomic.LoadUint32, but operates with user memory. It
+// may fail with SIGSEGV or SIGBUS if it is received while reading from ptr.
+//
+// Preconditions: ptr must be aligned to a 4-byte boundary.
+//
+//go:noescape
+func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32)
+
// CopyIn copies len(dst) bytes from src to dst. It returns the number of bytes
// copied and an error if SIGSEGV or SIGBUS is received while reading from src.
func CopyIn(dst []byte, src unsafe.Pointer) (int, error) {
@@ -260,6 +268,18 @@ func CompareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (uint32, error) {
return prev, errorFromFaultSignal(ptr, sig)
}
+// LoadUint32 is like sync/atomic.LoadUint32, but operates with user memory. It
+// may fail with SIGSEGV or SIGBUS if it is received while reading from ptr.
+//
+// Preconditions: ptr must be aligned to a 4-byte boundary.
+func LoadUint32(ptr unsafe.Pointer) (uint32, error) {
+ if addr := uintptr(ptr); addr&3 != 0 {
+ return 0, AlignmentError{addr, 4}
+ }
+ val, sig := loadUint32(ptr)
+ return val, errorFromFaultSignal(ptr, sig)
+}
+
func errorFromFaultSignal(addr unsafe.Pointer, sig int32) error {
switch sig {
case 0:
diff --git a/pkg/sentry/platform/safecopy/sighandler_amd64.s b/pkg/sentry/platform/safecopy/sighandler_amd64.s
index 06614f1b4..db7701a29 100644
--- a/pkg/sentry/platform/safecopy/sighandler_amd64.s
+++ b/pkg/sentry/platform/safecopy/sighandler_amd64.s
@@ -101,6 +101,15 @@ not_swapuint64:
JMP handle_fault
not_casuint32:
+ CMPQ CX, ·loadUint32Begin(SB)
+ JB not_loaduint32
+ CMPQ CX, ·loadUint32End(SB)
+ JAE not_loaduint32
+
+ LEAQ handleLoadUint32Fault(SB), CX
+ JMP handle_fault
+
+not_loaduint32:
original_handler:
// Jump to the previous signal handler, which is likely the golang one.
XORQ CX, CX
diff --git a/pkg/sentry/platform/safecopy/sighandler_arm64.s b/pkg/sentry/platform/safecopy/sighandler_arm64.s
index 5e8e193e7..cdfca8207 100644
--- a/pkg/sentry/platform/safecopy/sighandler_arm64.s
+++ b/pkg/sentry/platform/safecopy/sighandler_arm64.s
@@ -110,6 +110,17 @@ not_swapuint64:
B handle_fault
not_casuint32:
+ MOVD ·loadUint32Begin(SB), R8
+ CMP R8, R7
+ BLO not_loaduint32
+ MOVD ·loadUint32End(SB), R8
+ CMP R8, R7
+ BHS not_loaduint32
+
+ MOVD $handleLoadUint32Fault(SB), R7
+ B handle_fault
+
+not_loaduint32:
original_handler:
// Jump to the previous signal handler, which is likely the golang one.
MOVD ·savedSigBusHandler(SB), R7
diff --git a/pkg/sentry/safemem/block_unsafe.go b/pkg/sentry/safemem/block_unsafe.go
index e91ff66ae..c3a9780d2 100644
--- a/pkg/sentry/safemem/block_unsafe.go
+++ b/pkg/sentry/safemem/block_unsafe.go
@@ -267,3 +267,13 @@ func CompareAndSwapUint32(b Block, old, new uint32) (uint32, error) {
}
return safecopy.CompareAndSwapUint32(b.start, old, new)
}
+
+// LoadUint32 invokes safecopy.LoadUint32 on the first 4 bytes of b.
+//
+// Preconditions: b.Len() >= 4.
+func LoadUint32(b Block) (uint32, error) {
+ if b.length < 4 {
+ panic(fmt.Sprintf("insufficient length: %d", b.length))
+ }
+ return safecopy.LoadUint32(b.start)
+}
diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go
index 7a1d396ec..f0c89cba4 100644
--- a/pkg/sentry/syscalls/linux/sys_futex.go
+++ b/pkg/sentry/syscalls/linux/sys_futex.go
@@ -124,6 +124,46 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add
return 0, kernel.ERESTART_RESTARTBLOCK
}
+func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr usermem.Addr, private bool) error {
+ w := t.FutexWaiter()
+ locked, err := t.Futex().LockPI(w, t, addr, uint32(t.ThreadID()), private, false)
+ if err != nil {
+ return err
+ }
+ if locked {
+ // Futex acquired, we're done!
+ return nil
+ }
+
+ if forever {
+ err = t.Block(w.C)
+ } else {
+ notifier, tchan := ktime.NewChannelNotifier()
+ timer := ktime.NewTimer(t.Kernel().RealtimeClock(), notifier)
+ timer.Swap(ktime.Setting{
+ Enabled: true,
+ Next: ktime.FromTimespec(ts),
+ })
+ err = t.BlockWithTimer(w.C, tchan)
+ timer.Destroy()
+ }
+
+ t.Futex().WaitComplete(w)
+ return syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+}
+
+func tryLockPI(t *kernel.Task, addr usermem.Addr, private bool) error {
+ w := t.FutexWaiter()
+ locked, err := t.Futex().LockPI(w, t, addr, uint32(t.ThreadID()), private, true)
+ if err != nil {
+ return err
+ }
+ if !locked {
+ return syserror.EWOULDBLOCK
+ }
+ return nil
+}
+
// Futex implements linux syscall futex(2).
// It provides a method for a program to wait for a value at a given address to
// change, and a method to wake up anyone waiting on a particular address.
@@ -144,7 +184,7 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
switch cmd {
case linux.FUTEX_WAIT, linux.FUTEX_WAIT_BITSET:
// WAIT{_BITSET} wait forever if the timeout isn't passed.
- forever := timeout == 0
+ forever := (timeout == 0)
var timespec linux.Timespec
if !forever {
@@ -205,8 +245,30 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
n, err := t.Futex().WakeOp(t, addr, naddr, private, val, nreq, op)
return uintptr(n), nil, err
- case linux.FUTEX_LOCK_PI, linux.FUTEX_UNLOCK_PI, linux.FUTEX_TRYLOCK_PI, linux.FUTEX_WAIT_REQUEUE_PI, linux.FUTEX_CMP_REQUEUE_PI:
- // We don't support any priority inversion futexes.
+ case linux.FUTEX_LOCK_PI:
+ forever := (timeout == 0)
+
+ var timespec linux.Timespec
+ if !forever {
+ var err error
+ timespec, err = copyTimespecIn(t, timeout)
+ if err != nil {
+ return 0, nil, err
+ }
+ }
+ err := futexLockPI(t, timespec, forever, addr, private)
+ return 0, nil, err
+
+ case linux.FUTEX_TRYLOCK_PI:
+ err := tryLockPI(t, addr, private)
+ return 0, nil, err
+
+ case linux.FUTEX_UNLOCK_PI:
+ err := t.Futex().UnlockPI(t, addr, uint32(t.ThreadID()), private)
+ return 0, nil, err
+
+ case linux.FUTEX_WAIT_REQUEUE_PI, linux.FUTEX_CMP_REQUEUE_PI:
+ t.Kernel().EmitUnimplementedEvent(t)
return 0, nil, syserror.ENOSYS
default:
diff --git a/pkg/sentry/usermem/bytes_io_unsafe.go b/pkg/sentry/usermem/bytes_io_unsafe.go
index 8bdf3a508..7add8bc82 100644
--- a/pkg/sentry/usermem/bytes_io_unsafe.go
+++ b/pkg/sentry/usermem/bytes_io_unsafe.go
@@ -37,3 +37,11 @@ func (b *BytesIO) CompareAndSwapUint32(ctx context.Context, addr Addr, old, new
}
return atomicbitops.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)])), old, new), nil
}
+
+// LoadUint32 implements IO.LoadUint32.
+func (b *BytesIO) LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error) {
+ if _, err := b.rangeCheck(addr, 4); err != nil {
+ return 0, err
+ }
+ return atomic.LoadUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)]))), nil
+}
diff --git a/pkg/sentry/usermem/usermem.go b/pkg/sentry/usermem/usermem.go
index 75ac4d22d..c3c9c153b 100644
--- a/pkg/sentry/usermem/usermem.go
+++ b/pkg/sentry/usermem/usermem.go
@@ -103,6 +103,13 @@ type IO interface {
// any following locks in the lock order. addr must be aligned to a 4-byte
// boundary.
CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error)
+
+ // LoadUint32 atomically loads the uint32 value at addr and returns it.
+ //
+ // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
+ // any following locks in the lock order. addr must be aligned to a 4-byte
+ // boundary.
+ LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error)
}
// IOOpts contains options applicable to all IO methods.
diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go
index 4228707f4..5558cccff 100644
--- a/pkg/syserror/syserror.go
+++ b/pkg/syserror/syserror.go
@@ -33,6 +33,7 @@ var (
ECHILD = error(syscall.ECHILD)
ECONNREFUSED = error(syscall.ECONNREFUSED)
ECONNRESET = error(syscall.ECONNRESET)
+ EDEADLK = error(syscall.EDEADLK)
EEXIST = error(syscall.EEXIST)
EFAULT = error(syscall.EFAULT)
EFBIG = error(syscall.EFBIG)