summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/syscalls/linux
diff options
context:
space:
mode:
authorJamie Liu <jamieliu@google.com>2018-10-08 10:19:27 -0700
committerShentubot <shentubot@google.com>2018-10-08 10:20:38 -0700
commite9e8be661328661b5527f1643727b9a13bbeab48 (patch)
treefb6ab9a8e995d56a73b00c237cb6225799f6417c /pkg/sentry/syscalls/linux
parent4a00ea557c6e60cdd131b2a9866aa3b0bcb9cb2c (diff)
Implement shared futexes.
- Shared futex objects on shared mappings are represented by Mappable + offset, analogous to Linux's use of inode + offset. Add type futex.Key, and change the futex.Manager bucket API to use futex.Keys instead of addresses. - Extend the futex.Checker interface to be able to return Keys for memory mappings. It returns Keys rather than just mappings because whether the address or the target of the mapping is used in the Key depends on whether the mapping is MAP_SHARED or MAP_PRIVATE; this matters because using mapping target for a futex on a MAP_PRIVATE mapping causes it to stop working across COW-breaking. - futex.Manager.WaitComplete depends on atomic updates to futex.Waiter.addr to determine when it has locked the right bucket, which is much less straightforward for struct futex.Waiter.key. Switch to an atomically-accessed futex.Waiter.bucket pointer. - futex.Manager.Wake now needs to take a futex.Checker to resolve addresses for shared futexes. CLONE_CHILD_CLEARTID requires the exit path to perform a shared futex wakeup (Linux: kernel/fork.c:mm_release() => sys_futex(tsk->clear_child_tid, FUTEX_WAKE, ...)). This is a problem because futexChecker is in the syscalls/linux package. Move it to kernel. PiperOrigin-RevId: 216207039 Change-Id: I708d68e2d1f47e526d9afd95e7fed410c84afccf
Diffstat (limited to 'pkg/sentry/syscalls/linux')
-rw-r--r--pkg/sentry/syscalls/linux/sys_futex.go135
1 files changed, 16 insertions, 119 deletions
diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go
index 1a0e1f5fb..d35dcecbe 100644
--- a/pkg/sentry/syscalls/linux/sys_futex.go
+++ b/pkg/sentry/syscalls/linux/sys_futex.go
@@ -21,115 +21,9 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
"gvisor.googlesource.com/gvisor/pkg/syserror"
)
-// futexChecker is a futex.Checker that uses a Task's MemoryManager.
-type futexChecker struct {
- t *kernel.Task
-}
-
-// Check checks if the address contains the given value, and returns
-// syserror.EAGAIN if it doesn't. See Checker interface in futex package
-// for more information.
-func (f futexChecker) Check(addr uintptr, val uint32) error {
- in := f.t.CopyScratchBuffer(4)
- _, err := f.t.CopyInBytes(usermem.Addr(addr), in)
- if err != nil {
- return err
- }
- nval := usermem.ByteOrder.Uint32(in)
- if val != nval {
- return syserror.EAGAIN
- }
- return nil
-}
-
-func (f futexChecker) atomicOp(addr uintptr, op func(uint32) uint32) (uint32, error) {
- in := f.t.CopyScratchBuffer(4)
- _, err := f.t.CopyInBytes(usermem.Addr(addr), in)
- if err != nil {
- return 0, err
- }
- o := usermem.ByteOrder.Uint32(in)
- mm := f.t.MemoryManager()
- for {
- n := op(o)
- r, err := mm.CompareAndSwapUint32(f.t, usermem.Addr(addr), o, n, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- if err != nil {
- return 0, err
- }
-
- if r == o {
- return o, nil
- }
- o = r
- }
-}
-
-// Op performs an operation on addr and returns a result based on the operation.
-func (f futexChecker) Op(addr uintptr, opIn uint32) (bool, error) {
- op := (opIn >> 28) & 0xf
- cmp := (opIn >> 24) & 0xf
- opArg := (opIn >> 12) & 0xfff
- cmpArg := opIn & 0xfff
-
- if op&linux.FUTEX_OP_OPARG_SHIFT != 0 {
- opArg = 1 << opArg
- op &^= linux.FUTEX_OP_OPARG_SHIFT // clear flag
- }
-
- var oldVal uint32
- var err error
- switch op {
- case linux.FUTEX_OP_SET:
- oldVal, err = f.t.MemoryManager().SwapUint32(f.t, usermem.Addr(addr), opArg, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- case linux.FUTEX_OP_ADD:
- oldVal, err = f.atomicOp(addr, func(a uint32) uint32 {
- return a + opArg
- })
- case linux.FUTEX_OP_OR:
- oldVal, err = f.atomicOp(addr, func(a uint32) uint32 {
- return a | opArg
- })
- case linux.FUTEX_OP_ANDN:
- oldVal, err = f.atomicOp(addr, func(a uint32) uint32 {
- return a & ^opArg
- })
- case linux.FUTEX_OP_XOR:
- oldVal, err = f.atomicOp(addr, func(a uint32) uint32 {
- return a ^ opArg
- })
- default:
- return false, syserror.ENOSYS
- }
- if err != nil {
- return false, err
- }
-
- switch cmp {
- case linux.FUTEX_OP_CMP_EQ:
- return oldVal == cmpArg, nil
- case linux.FUTEX_OP_CMP_NE:
- return oldVal != cmpArg, nil
- case linux.FUTEX_OP_CMP_LT:
- return oldVal < cmpArg, nil
- case linux.FUTEX_OP_CMP_LE:
- return oldVal <= cmpArg, nil
- case linux.FUTEX_OP_CMP_GT:
- return oldVal > cmpArg, nil
- case linux.FUTEX_OP_CMP_GE:
- return oldVal >= cmpArg, nil
- default:
- return false, syserror.ENOSYS
- }
-}
-
// futexWaitRestartBlock encapsulates the state required to restart futex(2)
// via restart_syscall(2).
//
@@ -140,13 +34,14 @@ type futexWaitRestartBlock struct {
// addr stored as uint64 since uintptr is not save-able.
addr uint64
- val uint32
- mask uint32
+ private bool
+ val uint32
+ mask uint32
}
// Restart implements kernel.SyscallRestartBlock.Restart.
func (f *futexWaitRestartBlock) Restart(t *kernel.Task) (uintptr, error) {
- return futexWaitDuration(t, f.duration, false, uintptr(f.addr), f.val, f.mask)
+ return futexWaitDuration(t, f.duration, false, uintptr(f.addr), f.private, f.val, f.mask)
}
// futexWaitAbsolute performs a FUTEX_WAIT_BITSET, blocking until the wait is
@@ -156,9 +51,9 @@ func (f *futexWaitRestartBlock) Restart(t *kernel.Task) (uintptr, error) {
//
// If blocking is interrupted, the syscall is restarted with the original
// arguments.
-func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, forever bool, addr uintptr, val, mask uint32) (uintptr, error) {
+func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, forever bool, addr uintptr, private bool, val, mask uint32) (uintptr, error) {
w := t.FutexWaiter()
- err := t.Futex().WaitPrepare(w, futexChecker{t}, addr, val, mask)
+ err := t.Futex().WaitPrepare(w, t.FutexChecker(), addr, private, val, mask)
if err != nil {
return 0, err
}
@@ -192,9 +87,9 @@ func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, fo
// syscall. If forever is true, the syscall is restarted with the original
// arguments. If forever is false, duration is a relative timeout and the
// syscall is restarted with the remaining timeout.
-func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, addr uintptr, val, mask uint32) (uintptr, error) {
+func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, addr uintptr, private bool, val, mask uint32) (uintptr, error) {
w := t.FutexWaiter()
- err := t.Futex().WaitPrepare(w, futexChecker{t}, addr, val, mask)
+ err := t.Futex().WaitPrepare(w, t.FutexChecker(), addr, private, val, mask)
if err != nil {
return 0, err
}
@@ -222,6 +117,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add
t.SetSyscallRestartBlock(&futexWaitRestartBlock{
duration: remaining,
addr: uint64(addr),
+ private: private,
val: val,
mask: mask,
})
@@ -243,6 +139,7 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
addr := uintptr(uaddr)
naddr := uintptr(uaddr2)
cmd := futexOp &^ (linux.FUTEX_PRIVATE_FLAG | linux.FUTEX_CLOCK_REALTIME)
+ private := (futexOp & linux.FUTEX_PRIVATE_FLAG) != 0
clockRealtime := (futexOp & linux.FUTEX_CLOCK_REALTIME) == linux.FUTEX_CLOCK_REALTIME
mask := uint32(val3)
@@ -268,7 +165,7 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
if !forever {
timeoutDur = time.Duration(timespec.ToNsecCapped()) * time.Nanosecond
}
- n, err := futexWaitDuration(t, timeoutDur, forever, addr, uint32(val), mask)
+ n, err := futexWaitDuration(t, timeoutDur, forever, addr, private, uint32(val), mask)
return n, nil, err
case linux.FUTEX_WAIT_BITSET:
@@ -277,7 +174,7 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
if mask == 0 {
return 0, nil, syserror.EINVAL
}
- n, err := futexWaitAbsolute(t, clockRealtime, timespec, forever, addr, uint32(val), mask)
+ n, err := futexWaitAbsolute(t, clockRealtime, timespec, forever, addr, private, uint32(val), mask)
return n, nil, err
default:
panic("unreachable")
@@ -291,23 +188,23 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
if mask == 0 {
return 0, nil, syserror.EINVAL
}
- n, err := t.Futex().Wake(addr, mask, val)
+ n, err := t.Futex().Wake(t.FutexChecker(), addr, private, mask, val)
return uintptr(n), nil, err
case linux.FUTEX_REQUEUE:
- n, err := t.Futex().Requeue(addr, naddr, val, nreq)
+ n, err := t.Futex().Requeue(t.FutexChecker(), addr, naddr, private, val, nreq)
return uintptr(n), nil, err
case linux.FUTEX_CMP_REQUEUE:
// 'val3' contains the value to be checked at 'addr' and
// 'val' is the number of waiters that should be woken up.
nval := uint32(val3)
- n, err := t.Futex().RequeueCmp(futexChecker{t}, addr, nval, naddr, val, nreq)
+ n, err := t.Futex().RequeueCmp(t.FutexChecker(), addr, naddr, private, nval, val, nreq)
return uintptr(n), nil, err
case linux.FUTEX_WAKE_OP:
op := uint32(val3)
- n, err := t.Futex().WakeOp(futexChecker{t}, addr, naddr, val, nreq, op)
+ n, err := t.Futex().WakeOp(t.FutexChecker(), addr, naddr, private, val, nreq, op)
return uintptr(n), nil, err
case linux.FUTEX_LOCK_PI, linux.FUTEX_UNLOCK_PI, linux.FUTEX_TRYLOCK_PI, linux.FUTEX_WAIT_REQUEUE_PI, linux.FUTEX_CMP_REQUEUE_PI: