summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/syscalls/linux/sys_futex.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/syscalls/linux/sys_futex.go')
-rw-r--r--pkg/sentry/syscalls/linux/sys_futex.go278
1 files changed, 278 insertions, 0 deletions
diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go
new file mode 100644
index 000000000..7cef4b50c
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/sys_futex.go
@@ -0,0 +1,278 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "time"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// futexWaitRestartBlock encapsulates the state required to restart futex(2)
+// via restart_syscall(2).
+//
+// +stateify savable
+type futexWaitRestartBlock struct {
+ duration time.Duration
+
+ // addr stored as uint64 since uintptr is not save-able.
+ addr uint64
+ private bool
+ val uint32
+ mask uint32
+}
+
+// Restart implements kernel.SyscallRestartBlock.Restart.
+func (f *futexWaitRestartBlock) Restart(t *kernel.Task) (uintptr, error) {
+ return futexWaitDuration(t, f.duration, false, usermem.Addr(f.addr), f.private, f.val, f.mask)
+}
+
+// futexWaitAbsolute performs a FUTEX_WAIT_BITSET, blocking until the wait is
+// complete.
+//
+// The wait blocks forever if forever is true, otherwise it blocks until ts.
+//
+// If blocking is interrupted, the syscall is restarted with the original
+// arguments.
+func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, forever bool, addr usermem.Addr, private bool, val, mask uint32) (uintptr, error) {
+ w := t.FutexWaiter()
+ err := t.Futex().WaitPrepare(w, t, addr, private, val, mask)
+ if err != nil {
+ return 0, err
+ }
+
+ if forever {
+ err = t.Block(w.C)
+ } else if clockRealtime {
+ notifier, tchan := ktime.NewChannelNotifier()
+ timer := ktime.NewTimer(t.Kernel().RealtimeClock(), notifier)
+ timer.Swap(ktime.Setting{
+ Enabled: true,
+ Next: ktime.FromTimespec(ts),
+ })
+ err = t.BlockWithTimer(w.C, tchan)
+ timer.Destroy()
+ } else {
+ err = t.BlockWithDeadline(w.C, true, ktime.FromTimespec(ts))
+ }
+
+ t.Futex().WaitComplete(w)
+ return 0, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+}
+
+// futexWaitDuration performs a FUTEX_WAIT, blocking until the wait is
+// complete.
+//
+// The wait blocks forever if forever is true, otherwise is blocks for
+// duration.
+//
+// If blocking is interrupted, forever determines how to restart the
+// syscall. If forever is true, the syscall is restarted with the original
+// arguments. If forever is false, duration is a relative timeout and the
+// syscall is restarted with the remaining timeout.
+func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, addr usermem.Addr, private bool, val, mask uint32) (uintptr, error) {
+ w := t.FutexWaiter()
+ err := t.Futex().WaitPrepare(w, t, addr, private, val, mask)
+ if err != nil {
+ return 0, err
+ }
+
+ remaining, err := t.BlockWithTimeout(w.C, !forever, duration)
+ t.Futex().WaitComplete(w)
+ if err == nil {
+ return 0, nil
+ }
+
+ // The wait was unsuccessful for some reason other than interruption. Simply
+ // forward the error.
+ if err != syserror.ErrInterrupted {
+ return 0, err
+ }
+
+ // The wait was interrupted and we need to restart. Decide how.
+
+ // The wait duration was absolute, restart with the original arguments.
+ if forever {
+ return 0, kernel.ERESTARTSYS
+ }
+
+ // The wait duration was relative, restart with the remaining duration.
+ t.SetSyscallRestartBlock(&futexWaitRestartBlock{
+ duration: remaining,
+ addr: uint64(addr),
+ private: private,
+ val: val,
+ mask: mask,
+ })
+ return 0, kernel.ERESTART_RESTARTBLOCK
+}
+
+func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr usermem.Addr, private bool) error {
+ w := t.FutexWaiter()
+ locked, err := t.Futex().LockPI(w, t, addr, uint32(t.ThreadID()), private, false)
+ if err != nil {
+ return err
+ }
+ if locked {
+ // Futex acquired, we're done!
+ return nil
+ }
+
+ if forever {
+ err = t.Block(w.C)
+ } else {
+ notifier, tchan := ktime.NewChannelNotifier()
+ timer := ktime.NewTimer(t.Kernel().RealtimeClock(), notifier)
+ timer.Swap(ktime.Setting{
+ Enabled: true,
+ Next: ktime.FromTimespec(ts),
+ })
+ err = t.BlockWithTimer(w.C, tchan)
+ timer.Destroy()
+ }
+
+ t.Futex().WaitComplete(w)
+ return syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+}
+
+func tryLockPI(t *kernel.Task, addr usermem.Addr, private bool) error {
+ w := t.FutexWaiter()
+ locked, err := t.Futex().LockPI(w, t, addr, uint32(t.ThreadID()), private, true)
+ if err != nil {
+ return err
+ }
+ if !locked {
+ return syserror.EWOULDBLOCK
+ }
+ return nil
+}
+
+// Futex implements linux syscall futex(2).
+// It provides a method for a program to wait for a value at a given address to
+// change, and a method to wake up anyone waiting on a particular address.
+func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ addr := args[0].Pointer()
+ futexOp := args[1].Int()
+ val := int(args[2].Int())
+ nreq := int(args[3].Int())
+ timeout := args[3].Pointer()
+ naddr := args[4].Pointer()
+ val3 := args[5].Int()
+
+ cmd := futexOp &^ (linux.FUTEX_PRIVATE_FLAG | linux.FUTEX_CLOCK_REALTIME)
+ private := (futexOp & linux.FUTEX_PRIVATE_FLAG) != 0
+ clockRealtime := (futexOp & linux.FUTEX_CLOCK_REALTIME) == linux.FUTEX_CLOCK_REALTIME
+ mask := uint32(val3)
+
+ switch cmd {
+ case linux.FUTEX_WAIT, linux.FUTEX_WAIT_BITSET:
+ // WAIT{_BITSET} wait forever if the timeout isn't passed.
+ forever := (timeout == 0)
+
+ var timespec linux.Timespec
+ if !forever {
+ var err error
+ timespec, err = copyTimespecIn(t, timeout)
+ if err != nil {
+ return 0, nil, err
+ }
+ }
+
+ switch cmd {
+ case linux.FUTEX_WAIT:
+ // WAIT uses a relative timeout.
+ mask = ^uint32(0)
+ var timeoutDur time.Duration
+ if !forever {
+ timeoutDur = time.Duration(timespec.ToNsecCapped()) * time.Nanosecond
+ }
+ n, err := futexWaitDuration(t, timeoutDur, forever, addr, private, uint32(val), mask)
+ return n, nil, err
+
+ case linux.FUTEX_WAIT_BITSET:
+ // WAIT_BITSET uses an absolute timeout which is either
+ // CLOCK_MONOTONIC or CLOCK_REALTIME.
+ if mask == 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ n, err := futexWaitAbsolute(t, clockRealtime, timespec, forever, addr, private, uint32(val), mask)
+ return n, nil, err
+ default:
+ panic("unreachable")
+ }
+
+ case linux.FUTEX_WAKE:
+ mask = ^uint32(0)
+ fallthrough
+
+ case linux.FUTEX_WAKE_BITSET:
+ if mask == 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ n, err := t.Futex().Wake(t, addr, private, mask, val)
+ return uintptr(n), nil, err
+
+ case linux.FUTEX_REQUEUE:
+ n, err := t.Futex().Requeue(t, addr, naddr, private, val, nreq)
+ return uintptr(n), nil, err
+
+ case linux.FUTEX_CMP_REQUEUE:
+ // 'val3' contains the value to be checked at 'addr' and
+ // 'val' is the number of waiters that should be woken up.
+ nval := uint32(val3)
+ n, err := t.Futex().RequeueCmp(t, addr, naddr, private, nval, val, nreq)
+ return uintptr(n), nil, err
+
+ case linux.FUTEX_WAKE_OP:
+ op := uint32(val3)
+ n, err := t.Futex().WakeOp(t, addr, naddr, private, val, nreq, op)
+ return uintptr(n), nil, err
+
+ case linux.FUTEX_LOCK_PI:
+ forever := (timeout == 0)
+
+ var timespec linux.Timespec
+ if !forever {
+ var err error
+ timespec, err = copyTimespecIn(t, timeout)
+ if err != nil {
+ return 0, nil, err
+ }
+ }
+ err := futexLockPI(t, timespec, forever, addr, private)
+ return 0, nil, err
+
+ case linux.FUTEX_TRYLOCK_PI:
+ err := tryLockPI(t, addr, private)
+ return 0, nil, err
+
+ case linux.FUTEX_UNLOCK_PI:
+ err := t.Futex().UnlockPI(t, addr, uint32(t.ThreadID()), private)
+ return 0, nil, err
+
+ case linux.FUTEX_WAIT_REQUEUE_PI, linux.FUTEX_CMP_REQUEUE_PI:
+ t.Kernel().EmitUnimplementedEvent(t)
+ return 0, nil, syserror.ENOSYS
+
+ default:
+ // We don't even know about this command.
+ return 0, nil, syserror.ENOSYS
+ }
+}