diff options
Diffstat (limited to 'pkg/sentry/syscalls/linux/sys_futex.go')
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_futex.go | 319 |
1 files changed, 319 insertions, 0 deletions
diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go new file mode 100644 index 000000000..57762d058 --- /dev/null +++ b/pkg/sentry/syscalls/linux/sys_futex.go @@ -0,0 +1,319 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "time" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +// futexChecker is a futex.Checker that uses a Task's MemoryManager. +type futexChecker struct { + t *kernel.Task +} + +// Check checks if the address contains the given value, and returns +// syserror.EAGAIN if it doesn't. See Checker interface in futex package +// for more information. +func (f futexChecker) Check(addr uintptr, val uint32) error { + in := f.t.CopyScratchBuffer(4) + _, err := f.t.CopyInBytes(usermem.Addr(addr), in) + if err != nil { + return err + } + nval := usermem.ByteOrder.Uint32(in) + if val != nval { + return syserror.EAGAIN + } + return nil +} + +func (f futexChecker) atomicOp(addr uintptr, op func(uint32) uint32) (uint32, error) { + in := f.t.CopyScratchBuffer(4) + _, err := f.t.CopyInBytes(usermem.Addr(addr), in) + if err != nil { + return 0, err + } + o := usermem.ByteOrder.Uint32(in) + mm := f.t.MemoryManager() + for { + n := op(o) + r, err := mm.CompareAndSwapUint32(f.t, usermem.Addr(addr), o, n, usermem.IOOpts{ + AddressSpaceActive: true, + }) + if err != nil { + return 0, err + } + + if r == o { + return o, nil + } + o = r + } +} + +// Op performs an operation on addr and returns a result based on the operation. +func (f futexChecker) Op(addr uintptr, opIn uint32) (bool, error) { + op := (opIn >> 28) & 0xf + cmp := (opIn >> 24) & 0xf + opArg := (opIn >> 12) & 0xfff + cmpArg := opIn & 0xfff + + if op&linux.FUTEX_OP_OPARG_SHIFT != 0 { + opArg = 1 << opArg + op &^= linux.FUTEX_OP_OPARG_SHIFT // clear flag + } + + var oldVal uint32 + var err error + switch op { + case linux.FUTEX_OP_SET: + oldVal, err = f.t.MemoryManager().SwapUint32(f.t, usermem.Addr(addr), opArg, usermem.IOOpts{ + AddressSpaceActive: true, + }) + case linux.FUTEX_OP_ADD: + oldVal, err = f.atomicOp(addr, func(a uint32) uint32 { + return a + opArg + }) + case linux.FUTEX_OP_OR: + oldVal, err = f.atomicOp(addr, func(a uint32) uint32 { + return a | opArg + }) + case linux.FUTEX_OP_ANDN: + oldVal, err = f.atomicOp(addr, func(a uint32) uint32 { + return a & ^opArg + }) + case linux.FUTEX_OP_XOR: + oldVal, err = f.atomicOp(addr, func(a uint32) uint32 { + return a ^ opArg + }) + default: + return false, syserror.ENOSYS + } + if err != nil { + return false, err + } + + switch cmp { + case linux.FUTEX_OP_CMP_EQ: + return oldVal == cmpArg, nil + case linux.FUTEX_OP_CMP_NE: + return oldVal != cmpArg, nil + case linux.FUTEX_OP_CMP_LT: + return oldVal < cmpArg, nil + case linux.FUTEX_OP_CMP_LE: + return oldVal <= cmpArg, nil + case linux.FUTEX_OP_CMP_GT: + return oldVal > cmpArg, nil + case linux.FUTEX_OP_CMP_GE: + return oldVal >= cmpArg, nil + default: + return false, syserror.ENOSYS + } +} + +// futexWaitRestartBlock encapsulates the state required to restart futex(2) +// via restart_syscall(2). +type futexWaitRestartBlock struct { + duration time.Duration + + // addr stored as uint64 since uintptr is not save-able. + addr uint64 + + val uint32 + mask uint32 +} + +// Restart implements kernel.SyscallRestartBlock.Restart. +func (f *futexWaitRestartBlock) Restart(t *kernel.Task) (uintptr, error) { + return futexWaitDuration(t, f.duration, false, uintptr(f.addr), f.val, f.mask) +} + +// futexWaitAbsolute performs a FUTEX_WAIT_BITSET, blocking until the wait is +// complete. +// +// The wait blocks forever if forever is true, otherwise it blocks until ts. +// +// If blocking is interrupted, the syscall is restarted with the original +// arguments. +func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, forever bool, addr uintptr, val, mask uint32) (uintptr, error) { + w := t.FutexWaiter() + err := t.Futex().WaitPrepare(w, futexChecker{t}, addr, val, mask) + if err != nil { + return 0, err + } + + if forever { + err = t.Block(w.C) + } else if clockRealtime { + notifier, tchan := ktime.NewChannelNotifier() + timer := ktime.NewTimer(t.Kernel().RealtimeClock(), notifier) + timer.Swap(ktime.Setting{ + Enabled: true, + Next: ktime.FromTimespec(ts), + }) + err = t.BlockWithTimer(w.C, tchan) + timer.Destroy() + } else { + err = t.BlockWithDeadline(w.C, true, ktime.FromTimespec(ts)) + } + + t.Futex().WaitComplete(w) + return 0, syserror.ConvertIntr(err, kernel.ERESTARTSYS) +} + +// futexWaitDuration performs a FUTEX_WAIT, blocking until the wait is +// complete. +// +// The wait blocks forever if forever is true, otherwise is blocks for +// duration. +// +// If blocking is interrupted, forever determines how to restart the +// syscall. If forever is true, the syscall is restarted with the original +// arguments. If forever is false, duration is a relative timeout and the +// syscall is restarted with the remaining timeout. +func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, addr uintptr, val, mask uint32) (uintptr, error) { + w := t.FutexWaiter() + err := t.Futex().WaitPrepare(w, futexChecker{t}, addr, val, mask) + if err != nil { + return 0, err + } + + remaining, err := t.BlockWithTimeout(w.C, !forever, duration) + t.Futex().WaitComplete(w) + if err == nil { + return 0, nil + } + + // The wait was unsuccessful for some reason other than interruption. Simply + // forward the error. + if err != syserror.ErrInterrupted { + return 0, err + } + + // The wait was interrupted and we need to restart. Decide how. + + // The wait duration was absolute, restart with the original arguments. + if forever { + return 0, kernel.ERESTARTSYS + } + + // The wait duration was relative, restart with the remaining duration. + t.SetSyscallRestartBlock(&futexWaitRestartBlock{ + duration: remaining, + addr: uint64(addr), + val: val, + mask: mask, + }) + return 0, kernel.ERESTART_RESTARTBLOCK +} + +// Futex implements linux syscall futex(2). +// It provides a method for a program to wait for a value at a given address to +// change, and a method to wake up anyone waiting on a particular address. +func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + uaddr := args[0].Pointer() + futexOp := args[1].Int() + val := int(args[2].Int()) + nreq := int(args[3].Int()) + timeout := args[3].Pointer() + uaddr2 := args[4].Pointer() + val3 := args[5].Int() + + addr := uintptr(uaddr) + naddr := uintptr(uaddr2) + cmd := futexOp &^ (linux.FUTEX_PRIVATE_FLAG | linux.FUTEX_CLOCK_REALTIME) + clockRealtime := (futexOp & linux.FUTEX_CLOCK_REALTIME) == linux.FUTEX_CLOCK_REALTIME + mask := uint32(val3) + + switch cmd { + case linux.FUTEX_WAIT, linux.FUTEX_WAIT_BITSET: + // WAIT{_BITSET} wait forever if the timeout isn't passed. + forever := timeout == 0 + + var timespec linux.Timespec + if !forever { + var err error + timespec, err = copyTimespecIn(t, timeout) + if err != nil { + return 0, nil, err + } + } + + switch cmd { + case linux.FUTEX_WAIT: + // WAIT uses a relative timeout. + mask = ^uint32(0) + var timeoutDur time.Duration + if !forever { + timeoutDur = time.Duration(timespec.ToNsecCapped()) * time.Nanosecond + } + n, err := futexWaitDuration(t, timeoutDur, forever, addr, uint32(val), mask) + return n, nil, err + + case linux.FUTEX_WAIT_BITSET: + // WAIT_BITSET uses an absolute timeout which is either + // CLOCK_MONOTONIC or CLOCK_REALTIME. + if mask == 0 { + return 0, nil, syserror.EINVAL + } + n, err := futexWaitAbsolute(t, clockRealtime, timespec, forever, addr, uint32(val), mask) + return n, nil, err + default: + panic("unreachable") + } + + case linux.FUTEX_WAKE: + mask = ^uint32(0) + fallthrough + + case linux.FUTEX_WAKE_BITSET: + if mask == 0 { + return 0, nil, syserror.EINVAL + } + n, err := t.Futex().Wake(addr, mask, val) + return uintptr(n), nil, err + + case linux.FUTEX_REQUEUE: + n, err := t.Futex().Requeue(addr, naddr, val, nreq) + return uintptr(n), nil, err + + case linux.FUTEX_CMP_REQUEUE: + // 'val3' contains the value to be checked at 'addr' and + // 'val' is the number of waiters that should be woken up. + nval := uint32(val3) + n, err := t.Futex().RequeueCmp(futexChecker{t}, addr, nval, naddr, val, nreq) + return uintptr(n), nil, err + + case linux.FUTEX_WAKE_OP: + op := uint32(val3) + n, err := t.Futex().WakeOp(futexChecker{t}, addr, naddr, val, nreq, op) + return uintptr(n), nil, err + + case linux.FUTEX_LOCK_PI, linux.FUTEX_UNLOCK_PI, linux.FUTEX_TRYLOCK_PI, linux.FUTEX_WAIT_REQUEUE_PI, linux.FUTEX_CMP_REQUEUE_PI: + // We don't support any priority inversion futexes. + return 0, nil, syserror.ENOSYS + + default: + // We don't even know about this command. + return 0, nil, syserror.ENOSYS + } +} |