summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJamie Liu <jamieliu@google.com>2018-08-23 16:31:25 -0700
committerShentubot <shentubot@google.com>2018-08-23 16:32:36 -0700
commit64403265a04aa0c8be3ebb652a09f6e2d7a84ca7 (patch)
tree8191f06fca712de5588cd418a70707e9df0f2c25
parente855e9cebc45f5fd7a9583f476c8965fc395a15e (diff)
Implement POSIX per-process interval timers.
PiperOrigin-RevId: 210021612 Change-Id: If7c161e6fd08cf17942bfb6bc5a8d2c4e271c61e
-rw-r--r--pkg/abi/linux/signal.go20
-rw-r--r--pkg/abi/linux/time.go4
-rw-r--r--pkg/sentry/arch/signal_amd64.go30
-rw-r--r--pkg/sentry/kernel/BUILD2
-rw-r--r--pkg/sentry/kernel/kernel.go19
-rw-r--r--pkg/sentry/kernel/pending_signals.go17
-rw-r--r--pkg/sentry/kernel/pending_signals_state.go21
-rw-r--r--pkg/sentry/kernel/posixtimer.go306
-rw-r--r--pkg/sentry/kernel/ptrace.go2
-rw-r--r--pkg/sentry/kernel/task_exec.go16
-rw-r--r--pkg/sentry/kernel/task_exit.go2
-rw-r--r--pkg/sentry/kernel/task_signals.go12
-rw-r--r--pkg/sentry/kernel/thread_group.go27
-rw-r--r--pkg/sentry/kernel/time/time.go23
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go10
-rw-r--r--pkg/sentry/syscalls/linux/sys_timer.go85
-rw-r--r--pkg/sentry/syscalls/linux/sys_timerfd.go33
17 files changed, 579 insertions, 50 deletions
diff --git a/pkg/abi/linux/signal.go b/pkg/abi/linux/signal.go
index fed2a159f..b2c7230c4 100644
--- a/pkg/abi/linux/signal.go
+++ b/pkg/abi/linux/signal.go
@@ -209,3 +209,23 @@ const (
// POLL_HUP indicates that a device disconnected.
POLL_HUP = SI_POLL | 6
)
+
+// Sigevent represents struct sigevent.
+type Sigevent struct {
+ Value uint64 // union sigval {int, void*}
+ Signo int32
+ Notify int32
+
+ // struct sigevent here contains 48-byte union _sigev_un. However, only
+ // member _tid is significant to the kernel.
+ Tid int32
+ UnRemainder [44]byte
+}
+
+// Possible values for Sigevent.Notify, aka struct sigevent::sigev_notify.
+const (
+ SIGEV_SIGNAL = 0
+ SIGEV_NONE = 1
+ SIGEV_THREAD = 2
+ SIGEV_THREAD_ID = 4
+)
diff --git a/pkg/abi/linux/time.go b/pkg/abi/linux/time.go
index 9109a2848..4569f4208 100644
--- a/pkg/abi/linux/time.go
+++ b/pkg/abi/linux/time.go
@@ -222,3 +222,7 @@ type Tms struct {
CUTime ClockT
CSTime ClockT
}
+
+// TimerID represents type timer_t, which identifies a POSIX per-process
+// interval timer.
+type TimerID int32
diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go
index e81717e8b..9ca4c8ed1 100644
--- a/pkg/sentry/arch/signal_amd64.go
+++ b/pkg/sentry/arch/signal_amd64.go
@@ -176,6 +176,36 @@ func (s *SignalInfo) SetUid(val int32) {
usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val))
}
+// Sigval returns the sigval field, which is aliased to both si_int and si_ptr.
+func (s *SignalInfo) Sigval() uint64 {
+ return usermem.ByteOrder.Uint64(s.Fields[8:16])
+}
+
+// SetSigval mutates the sigval field.
+func (s *SignalInfo) SetSigval(val uint64) {
+ usermem.ByteOrder.PutUint64(s.Fields[8:16], val)
+}
+
+// TimerID returns the si_timerid field.
+func (s *SignalInfo) TimerID() linux.TimerID {
+ return linux.TimerID(usermem.ByteOrder.Uint32(s.Fields[0:4]))
+}
+
+// SetTimerID sets the si_timerid field.
+func (s *SignalInfo) SetTimerID(val linux.TimerID) {
+ usermem.ByteOrder.PutUint32(s.Fields[0:4], uint32(val))
+}
+
+// Overrun returns the si_overrun field.
+func (s *SignalInfo) Overrun() int32 {
+ return int32(usermem.ByteOrder.Uint32(s.Fields[4:8]))
+}
+
+// SetOverrun sets the si_overrun field.
+func (s *SignalInfo) SetOverrun(val int32) {
+ usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val))
+}
+
// Addr returns the si_addr field.
func (s *SignalInfo) Addr() uint64 {
return usermem.ByteOrder.Uint64(s.Fields[0:8])
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 69a3fbc45..a7b847e94 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -71,6 +71,7 @@ go_library(
"pending_signals.go",
"pending_signals_list.go",
"pending_signals_state.go",
+ "posixtimer.go",
"process_group_list.go",
"ptrace.go",
"rseq.go",
@@ -114,7 +115,6 @@ go_library(
importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel",
imports = [
"gvisor.googlesource.com/gvisor/pkg/bpf",
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch",
"gvisor.googlesource.com/gvisor/pkg/tcpip",
],
visibility = ["//:sandbox"],
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index cb43fdcdc..33cd727c6 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -19,9 +19,11 @@
// Lock order (outermost locks must be taken first):
//
// Kernel.extMu
-// TaskSet.mu
-// SignalHandlers.mu
-// Task.mu
+// ThreadGroup.timerMu
+// ktime.Timer.mu (for IntervalTimer)
+// TaskSet.mu
+// SignalHandlers.mu
+// Task.mu
//
// Locking SignalHandlers.mu in multiple SignalHandlers requires locking
// TaskSet.mu exclusively first. Locking Task.mu in multiple Tasks at the same
@@ -706,8 +708,12 @@ func (k *Kernel) pauseTimeLocked() {
if t == t.tg.leader {
t.tg.tm.pause()
}
- // This means we'll iterate FDMaps shared by multiple tasks repeatedly,
- // but ktime.Timer.Pause is idempotent so this is harmless.
+ // This means we'll iterate ThreadGroups and FDMaps shared by multiple
+ // tasks repeatedly, but ktime.Timer.Pause is idempotent so this is
+ // harmless.
+ for _, it := range t.tg.timers {
+ it.PauseTimer()
+ }
if fdm := t.tr.FDMap; fdm != nil {
for _, desc := range fdm.files {
if tfd, ok := desc.file.FileOperations.(*timerfd.TimerOperations); ok {
@@ -735,6 +741,9 @@ func (k *Kernel) resumeTimeLocked() {
if t == t.tg.leader {
t.tg.tm.resume()
}
+ for _, it := range t.tg.timers {
+ it.ResumeTimer()
+ }
if fdm := t.tr.FDMap; fdm != nil {
for _, desc := range fdm.files {
if tfd, ok := desc.file.FileOperations.(*timerfd.TimerOperations); ok {
diff --git a/pkg/sentry/kernel/pending_signals.go b/pkg/sentry/kernel/pending_signals.go
index 06be5a7e1..bb5db0309 100644
--- a/pkg/sentry/kernel/pending_signals.go
+++ b/pkg/sentry/kernel/pending_signals.go
@@ -46,7 +46,7 @@ type pendingSignals struct {
// Note that signals is zero-indexed, but signal 1 is the first valid
// signal, so signals[0] contains signals with signo 1 etc. This offset is
// usually handled by using Signal.index().
- signals [linux.SignalMaximum]pendingSignalQueue `state:".([]*arch.SignalInfo)"`
+ signals [linux.SignalMaximum]pendingSignalQueue `state:".([]savedPendingSignal)"`
// Bit i of pendingSet is set iff there is at least one signal with signo
// i+1 pending.
@@ -66,13 +66,16 @@ type pendingSignal struct {
// pendingSignalEntry links into a pendingSignalList.
pendingSignalEntry
*arch.SignalInfo
+
+ // If timer is not nil, it is the IntervalTimer which sent this signal.
+ timer *IntervalTimer
}
// enqueue enqueues the given signal. enqueue returns true on success and false
// on failure (if the given signal's queue is full).
//
// Preconditions: info represents a valid signal.
-func (p *pendingSignals) enqueue(info *arch.SignalInfo) bool {
+func (p *pendingSignals) enqueue(info *arch.SignalInfo, timer *IntervalTimer) bool {
sig := linux.Signal(info.Signo)
q := &p.signals[sig.Index()]
if sig.IsStandard() {
@@ -82,7 +85,7 @@ func (p *pendingSignals) enqueue(info *arch.SignalInfo) bool {
} else if q.length >= rtSignalCap {
return false
}
- q.pendingSignalList.PushBack(&pendingSignal{SignalInfo: info})
+ q.pendingSignalList.PushBack(&pendingSignal{SignalInfo: info, timer: timer})
q.length++
p.pendingSet |= linux.SignalSetOf(sig)
return true
@@ -119,12 +122,20 @@ func (p *pendingSignals) dequeueSpecific(sig linux.Signal) *arch.SignalInfo {
if q.length == 0 {
p.pendingSet &^= linux.SignalSetOf(sig)
}
+ if ps.timer != nil {
+ ps.timer.updateDequeuedSignalLocked(ps.SignalInfo)
+ }
return ps.SignalInfo
}
// discardSpecific causes all pending signals with number sig to be discarded.
func (p *pendingSignals) discardSpecific(sig linux.Signal) {
q := &p.signals[sig.Index()]
+ for ps := q.pendingSignalList.Front(); ps != nil; ps = ps.Next() {
+ if ps.timer != nil {
+ ps.timer.signalRejectedLocked()
+ }
+ }
q.pendingSignalList.Reset()
q.length = 0
p.pendingSet &^= linux.SignalSetOf(sig)
diff --git a/pkg/sentry/kernel/pending_signals_state.go b/pkg/sentry/kernel/pending_signals_state.go
index af61f6e8e..6d90ed033 100644
--- a/pkg/sentry/kernel/pending_signals_state.go
+++ b/pkg/sentry/kernel/pending_signals_state.go
@@ -18,20 +18,29 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
)
+// +stateify savable
+type savedPendingSignal struct {
+ si *arch.SignalInfo
+ timer *IntervalTimer
+}
+
// saveSignals is invoked by stateify.
-func (p *pendingSignals) saveSignals() []*arch.SignalInfo {
- var pending []*arch.SignalInfo
+func (p *pendingSignals) saveSignals() []savedPendingSignal {
+ var pending []savedPendingSignal
for _, q := range p.signals {
for ps := q.pendingSignalList.Front(); ps != nil; ps = ps.Next() {
- pending = append(pending, ps.SignalInfo)
+ pending = append(pending, savedPendingSignal{
+ si: ps.SignalInfo,
+ timer: ps.timer,
+ })
}
}
return pending
}
// loadSignals is invoked by stateify.
-func (p *pendingSignals) loadSignals(pending []*arch.SignalInfo) {
- for _, si := range pending {
- p.enqueue(si)
+func (p *pendingSignals) loadSignals(pending []savedPendingSignal) {
+ for _, sps := range pending {
+ p.enqueue(sps.si, sps.timer)
}
}
diff --git a/pkg/sentry/kernel/posixtimer.go b/pkg/sentry/kernel/posixtimer.go
new file mode 100644
index 000000000..0ab958529
--- /dev/null
+++ b/pkg/sentry/kernel/posixtimer.go
@@ -0,0 +1,306 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+ "math"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// IntervalTimer represents a POSIX interval timer as described by
+// timer_create(2).
+//
+// +stateify savable
+type IntervalTimer struct {
+ timer *ktime.Timer
+
+ // If target is not nil, it receives signo from timer expirations. If group
+ // is true, these signals are thread-group-directed. These fields are
+ // immutable.
+ target *Task
+ signo linux.Signal
+ id linux.TimerID
+ sigval uint64
+ group bool
+
+ // If sigpending is true, a signal to target is already queued, and timer
+ // expirations should increment overrunCur instead of sending another
+ // signal. sigpending is protected by target's signal mutex. (If target is
+ // nil, the timer will never send signals, so sigpending will be unused.)
+ sigpending bool
+
+ // If sigorphan is true, timer's setting has been changed since sigpending
+ // last became true, such that overruns should no longer be counted in the
+ // pending signals si_overrun. sigorphan is protected by target's signal
+ // mutex.
+ sigorphan bool
+
+ // overrunCur is the number of overruns that have occurred since the last
+ // time a signal was sent. overrunCur is protected by target's signal
+ // mutex.
+ overrunCur uint64
+
+ // Consider the last signal sent by this timer that has been dequeued.
+ // overrunLast is the number of overruns that occurred between when this
+ // signal was sent and when it was dequeued. Equivalently, overrunLast was
+ // the value of overrunCur when this signal was dequeued. overrunLast is
+ // protected by target's signal mutex.
+ overrunLast uint64
+}
+
+// DestroyTimer releases it's resources.
+func (it *IntervalTimer) DestroyTimer() {
+ it.timer.Destroy()
+ it.timerSettingChanged()
+ // A destroyed IntervalTimer is still potentially reachable via a
+ // pendingSignal; nil out timer so that it won't be saved.
+ it.timer = nil
+}
+
+func (it *IntervalTimer) timerSettingChanged() {
+ if it.target == nil {
+ return
+ }
+ it.target.tg.pidns.owner.mu.RLock()
+ defer it.target.tg.pidns.owner.mu.RUnlock()
+ it.target.tg.signalHandlers.mu.Lock()
+ defer it.target.tg.signalHandlers.mu.Unlock()
+ it.sigorphan = true
+ it.overrunCur = 0
+ it.overrunLast = 0
+}
+
+// PauseTimer pauses the associated Timer.
+func (it *IntervalTimer) PauseTimer() {
+ it.timer.Pause()
+}
+
+// ResumeTimer resumes the associated Timer.
+func (it *IntervalTimer) ResumeTimer() {
+ it.timer.Resume()
+}
+
+// Preconditions: it.target's signal mutex must be locked.
+func (it *IntervalTimer) updateDequeuedSignalLocked(si *arch.SignalInfo) {
+ it.sigpending = false
+ if it.sigorphan {
+ return
+ }
+ it.overrunLast = it.overrunCur
+ it.overrunCur = 0
+ si.SetOverrun(saturateI32FromU64(it.overrunLast))
+}
+
+// Preconditions: it.target's signal mutex must be locked.
+func (it *IntervalTimer) signalRejectedLocked() {
+ it.sigpending = false
+ if it.sigorphan {
+ return
+ }
+ it.overrunCur++
+}
+
+// Notify implements ktime.TimerListener.Notify.
+func (it *IntervalTimer) Notify(exp uint64) {
+ if it.target == nil {
+ return
+ }
+
+ it.target.tg.pidns.owner.mu.RLock()
+ defer it.target.tg.pidns.owner.mu.RUnlock()
+ it.target.tg.signalHandlers.mu.Lock()
+ defer it.target.tg.signalHandlers.mu.Unlock()
+
+ if it.sigpending {
+ it.overrunCur += exp
+ return
+ }
+
+ // sigpending must be set before sendSignalTimerLocked() so that it can be
+ // unset if the signal is discarded (in which case sendSignalTimerLocked()
+ // will return nil).
+ it.sigpending = true
+ it.sigorphan = false
+ it.overrunCur += exp - 1
+ si := &arch.SignalInfo{
+ Signo: int32(it.signo),
+ Code: arch.SignalInfoTimer,
+ }
+ si.SetTimerID(it.id)
+ si.SetSigval(it.sigval)
+ // si_overrun is set when the signal is dequeued.
+ if err := it.target.sendSignalTimerLocked(si, it.group, it); err != nil {
+ it.signalRejectedLocked()
+ }
+}
+
+// Destroy implements ktime.TimerListener.Destroy. Users of Timer should call
+// DestroyTimer instead.
+func (it *IntervalTimer) Destroy() {
+}
+
+// IntervalTimerCreate implements timer_create(2).
+func (t *Task) IntervalTimerCreate(c ktime.Clock, sigev *linux.Sigevent) (linux.TimerID, error) {
+ t.tg.timerMu.Lock()
+ defer t.tg.timerMu.Unlock()
+
+ // Allocate a timer ID.
+ var id linux.TimerID
+ end := t.tg.nextTimerID
+ for {
+ id = t.tg.nextTimerID
+ _, ok := t.tg.timers[id]
+ t.tg.nextTimerID++
+ if t.tg.nextTimerID < 0 {
+ t.tg.nextTimerID = 0
+ }
+ if !ok {
+ break
+ }
+ if t.tg.nextTimerID == end {
+ return 0, syserror.EAGAIN
+ }
+ }
+
+ // "The implementation of the default case where evp [sic] is NULL is
+ // handled inside glibc, which invokes the underlying system call with a
+ // suitably populated sigevent structure." - timer_create(2). This is
+ // misleading; the timer_create syscall also handles a NULL sevp as
+ // described by the man page
+ // (kernel/time/posix-timers.c:sys_timer_create(), do_timer_create()). This
+ // must be handled here instead of the syscall wrapper since sigval is the
+ // timer ID, which isn't available until we allocate it in this function.
+ if sigev == nil {
+ sigev = &linux.Sigevent{
+ Signo: int32(linux.SIGALRM),
+ Notify: linux.SIGEV_SIGNAL,
+ Value: uint64(id),
+ }
+ }
+
+ // Construct the timer.
+ it := &IntervalTimer{
+ id: id,
+ sigval: sigev.Value,
+ }
+ switch sigev.Notify {
+ case linux.SIGEV_NONE:
+ // leave it.target = nil
+ case linux.SIGEV_SIGNAL, linux.SIGEV_THREAD:
+ // POSIX SIGEV_THREAD semantics are implemented in userspace by libc;
+ // to the kernel, SIGEV_THREAD and SIGEV_SIGNAL are equivalent. (See
+ // Linux's kernel/time/posix-timers.c:good_sigevent().)
+ it.target = t.tg.leader
+ it.group = true
+ case linux.SIGEV_THREAD_ID:
+ t.tg.pidns.owner.mu.RLock()
+ target, ok := t.tg.pidns.tasks[ThreadID(sigev.Tid)]
+ t.tg.pidns.owner.mu.RUnlock()
+ if !ok || target.tg != t.tg {
+ return 0, syserror.EINVAL
+ }
+ it.target = target
+ default:
+ return 0, syserror.EINVAL
+ }
+ if sigev.Notify != linux.SIGEV_NONE {
+ it.signo = linux.Signal(sigev.Signo)
+ if !it.signo.IsValid() {
+ return 0, syserror.EINVAL
+ }
+ }
+ it.timer = ktime.NewTimer(c, it)
+
+ t.tg.timers[id] = it
+ return id, nil
+}
+
+// IntervalTimerDelete implements timer_delete(2).
+func (t *Task) IntervalTimerDelete(id linux.TimerID) error {
+ t.tg.timerMu.Lock()
+ defer t.tg.timerMu.Unlock()
+ it := t.tg.timers[id]
+ if it == nil {
+ return syserror.EINVAL
+ }
+ delete(t.tg.timers, id)
+ it.DestroyTimer()
+ return nil
+}
+
+// IntervalTimerSettime implements timer_settime(2).
+func (t *Task) IntervalTimerSettime(id linux.TimerID, its linux.Itimerspec, abs bool) (linux.Itimerspec, error) {
+ t.tg.timerMu.Lock()
+ defer t.tg.timerMu.Unlock()
+ it := t.tg.timers[id]
+ if it == nil {
+ return linux.Itimerspec{}, syserror.EINVAL
+ }
+
+ newS, err := ktime.SettingFromItimerspec(its, abs, it.timer.Clock())
+ if err != nil {
+ return linux.Itimerspec{}, err
+ }
+ tm, oldS := it.timer.SwapAnd(newS, it.timerSettingChanged)
+ its = ktime.ItimerspecFromSetting(tm, oldS)
+ return its, nil
+}
+
+// IntervalTimerGettime implements timer_gettime(2).
+func (t *Task) IntervalTimerGettime(id linux.TimerID) (linux.Itimerspec, error) {
+ t.tg.timerMu.Lock()
+ defer t.tg.timerMu.Unlock()
+ it := t.tg.timers[id]
+ if it == nil {
+ return linux.Itimerspec{}, syserror.EINVAL
+ }
+
+ tm, s := it.timer.Get()
+ its := ktime.ItimerspecFromSetting(tm, s)
+ return its, nil
+}
+
+// IntervalTimerGetoverrun implements timer_getoverrun(2).
+//
+// Preconditions: The caller must be running on the task goroutine.
+func (t *Task) IntervalTimerGetoverrun(id linux.TimerID) (int32, error) {
+ t.tg.timerMu.Lock()
+ defer t.tg.timerMu.Unlock()
+ it := t.tg.timers[id]
+ if it == nil {
+ return 0, syserror.EINVAL
+ }
+ // By timer_create(2) invariant, either it.target == nil (in which case
+ // it.overrunLast is immutably 0) or t.tg == it.target.tg; and the fact
+ // that t is executing timer_getoverrun(2) means that t.tg can't be
+ // completing execve, so t.tg.signalHandlers can't be changing, allowing us
+ // to lock t.tg.signalHandlers.mu without holding the TaskSet mutex.
+ t.tg.signalHandlers.mu.Lock()
+ defer t.tg.signalHandlers.mu.Unlock()
+ // This is consistent with Linux after 78c9c4dfbf8c ("posix-timers:
+ // Sanitize overrun handling").
+ return saturateI32FromU64(it.overrunLast), nil
+}
+
+func saturateI32FromU64(x uint64) int32 {
+ if x > math.MaxInt32 {
+ return math.MaxInt32
+ }
+ return int32(x)
+}
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index e9e69004d..1a0d1876d 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -627,7 +627,7 @@ func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions
// running, so we don't have to.
child.pendingSignals.enqueue(&arch.SignalInfo{
Signo: int32(linux.SIGSTOP),
- })
+ }, nil)
child.tg.signalHandlers.mu.Unlock()
}
}
diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go
index 385299b24..bb3d0bd02 100644
--- a/pkg/sentry/kernel/task_exec.go
+++ b/pkg/sentry/kernel/task_exec.go
@@ -143,6 +143,22 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState {
oldTID = tracer.tg.pidns.tids[t]
}
t.promoteLocked()
+ // "POSIX timers are not preserved (timer_create(2))." - execve(2). Handle
+ // this first since POSIX timers are protected by the signal mutex, which
+ // we're about to change. Note that we have to stop and destroy timers
+ // without holding any mutexes to avoid circular lock ordering.
+ var its []*IntervalTimer
+ t.tg.signalHandlers.mu.Lock()
+ for _, it := range t.tg.timers {
+ its = append(its, it)
+ }
+ t.tg.timers = make(map[linux.TimerID]*IntervalTimer)
+ t.tg.signalHandlers.mu.Unlock()
+ t.tg.pidns.owner.mu.Unlock()
+ for _, it := range its {
+ it.DestroyTimer()
+ }
+ t.tg.pidns.owner.mu.Lock()
// "During an execve(2), the dispositions of handled signals are reset to
// the default; the dispositions of ignored signals are left unchanged. ...
// [The] signal mask is preserved across execve(2). ... [The] pending
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index b16844e91..b37fcf4c1 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -129,7 +129,7 @@ func (t *Task) killLocked() {
// enqueueing an actual siginfo, such that
// kernel/signal.c:collect_signal() initializes si_code to SI_USER.
Code: arch.SignalInfoUser,
- })
+ }, nil)
t.interrupt()
}
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index 62ec530be..4a66bce6b 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -396,6 +396,10 @@ func (tg *ThreadGroup) SendTimerSignal(info *arch.SignalInfo, includeSys bool) e
}
func (t *Task) sendSignalLocked(info *arch.SignalInfo, group bool) error {
+ return t.sendSignalTimerLocked(info, group, nil)
+}
+
+func (t *Task) sendSignalTimerLocked(info *arch.SignalInfo, group bool, timer *IntervalTimer) error {
if t.exitState == TaskExitDead {
return syserror.ESRCH
}
@@ -429,6 +433,9 @@ func (t *Task) sendSignalLocked(info *arch.SignalInfo, group bool) error {
ignored := computeAction(sig, t.tg.signalHandlers.actions[sig]) == SignalActionIgnore
if linux.SignalSetOf(sig)&t.tr.SignalMask == 0 && ignored && !t.hasTracer() {
t.Debugf("Discarding ignored signal %d", sig)
+ if timer != nil {
+ timer.signalRejectedLocked()
+ }
return nil
}
@@ -436,11 +443,14 @@ func (t *Task) sendSignalLocked(info *arch.SignalInfo, group bool) error {
if group {
q = &t.tg.pendingSignals
}
- if !q.enqueue(info) {
+ if !q.enqueue(info, timer) {
if sig.IsRealtime() {
return syserror.EAGAIN
}
t.Debugf("Discarding duplicate signal %d", sig)
+ if timer != nil {
+ timer.signalRejectedLocked()
+ }
return nil
}
diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go
index 441b8a822..13dce08ce 100644
--- a/pkg/sentry/kernel/thread_group.go
+++ b/pkg/sentry/kernel/thread_group.go
@@ -155,6 +155,14 @@ type ThreadGroup struct {
// tm contains process timers. TimerManager fields are immutable.
tm TimerManager
+ // timers is the thread group's POSIX interval timers. nextTimerID is the
+ // TimerID at which allocation should begin searching for an unused ID.
+ //
+ // timers and nextTimerID are protected by timerMu.
+ timerMu sync.Mutex `state:"nosave"`
+ timers map[linux.TimerID]*IntervalTimer
+ nextTimerID linux.TimerID
+
// exitedCPUStats is the CPU usage for all exited tasks in the thread
// group. exitedCPUStats is protected by the TaskSet mutex.
exitedCPUStats usage.CPUStats
@@ -218,6 +226,7 @@ func NewThreadGroup(ns *PIDNamespace, sh *SignalHandlers, terminationSignal linu
limits: limits,
}
tg.tm = newTimerManager(tg, monotonicClock)
+ tg.timers = make(map[linux.TimerID]*IntervalTimer)
tg.rscr.Store(&RSEQCriticalRegion{})
return tg
}
@@ -252,9 +261,23 @@ func (tg *ThreadGroup) Limits() *limits.LimitSet {
// release releases the thread group's resources.
func (tg *ThreadGroup) release() {
- // This must be done without holding the TaskSet mutex since thread group
- // timers call SendSignal with Timer.mu locked.
+ // These must be done without holding the TaskSet or signal mutexes since
+ // timers send signals with Timer.mu locked.
+
tg.tm.destroy()
+
+ var its []*IntervalTimer
+ tg.pidns.owner.mu.Lock()
+ tg.signalHandlers.mu.Lock()
+ for _, it := range tg.timers {
+ its = append(its, it)
+ }
+ tg.timers = make(map[linux.TimerID]*IntervalTimer) // nil maps can't be saved
+ tg.signalHandlers.mu.Unlock()
+ tg.pidns.owner.mu.Unlock()
+ for _, it := range its {
+ it.DestroyTimer()
+ }
}
// forEachChildThreadGroupLocked indicates over all child ThreadGroups.
diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go
index 6eadd2878..1f6fed007 100644
--- a/pkg/sentry/kernel/time/time.go
+++ b/pkg/sentry/kernel/time/time.go
@@ -320,8 +320,8 @@ func SettingFromSpec(value time.Duration, interval time.Duration, c Clock) (Sett
}, nil
}
-// SettingFromAbsSpec converts a (value, interval) pair to a Setting based on a
-// reading from c. value is interpreted as an absolute time.
+// SettingFromAbsSpec converts a (value, interval) pair to a Setting. value is
+// interpreted as an absolute time.
func SettingFromAbsSpec(value Time, interval time.Duration) (Setting, error) {
if value.Before(ZeroTime) {
return Setting{}, syserror.EINVAL
@@ -336,6 +336,16 @@ func SettingFromAbsSpec(value Time, interval time.Duration) (Setting, error) {
}, nil
}
+// SettingFromItimerspec converts a linux.Itimerspec to a Setting. If abs is
+// true, its.Value is interpreted as an absolute time. Otherwise, it is
+// interpreted as a time relative to c.Now().
+func SettingFromItimerspec(its linux.Itimerspec, abs bool, c Clock) (Setting, error) {
+ if abs {
+ return SettingFromAbsSpec(FromTimespec(its.Value), its.Interval.ToDuration())
+ }
+ return SettingFromSpec(its.Value.ToDuration(), its.Interval.ToDuration(), c)
+}
+
// SpecFromSetting converts a timestamp and a Setting to a (relative value,
// interval) pair, as used by most Linux syscalls that return a struct
// itimerval or struct itimerspec.
@@ -346,6 +356,15 @@ func SpecFromSetting(now Time, s Setting) (value, period time.Duration) {
return s.Next.Sub(now), s.Period
}
+// ItimerspecFromSetting converts a Setting to a linux.Itimerspec.
+func ItimerspecFromSetting(now Time, s Setting) linux.Itimerspec {
+ val, iv := SpecFromSetting(now, s)
+ return linux.Itimerspec{
+ Interval: linux.DurationToTimespec(iv),
+ Value: linux.DurationToTimespec(val),
+ }
+}
+
// advancedTo returns an updated Setting and a number of expirations after
// the associated Clock indicates a time of now.
//
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index c102af101..4465549ad 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -266,11 +266,11 @@ var AMD64 = &kernel.SyscallTable{
219: RestartSyscall,
// 220: Semtimedop, TODO
221: Fadvise64,
- // 222: TimerCreate, TODO
- // 223: TimerSettime, TODO
- // 224: TimerGettime, TODO
- // 225: TimerGetoverrun, TODO
- // 226: TimerDelete, TODO
+ 222: TimerCreate,
+ 223: TimerSettime,
+ 224: TimerGettime,
+ 225: TimerGetoverrun,
+ 226: TimerDelete,
227: ClockSettime,
228: ClockGettime,
229: ClockGetres,
diff --git a/pkg/sentry/syscalls/linux/sys_timer.go b/pkg/sentry/syscalls/linux/sys_timer.go
index 4ed077626..aaed75c81 100644
--- a/pkg/sentry/syscalls/linux/sys_timer.go
+++ b/pkg/sentry/syscalls/linux/sys_timer.go
@@ -166,3 +166,88 @@ func Alarm(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return uintptr(sec), nil, nil
}
+
+// TimerCreate implements linux syscall timer_create(2).
+func TimerCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ clockID := args[0].Int()
+ sevp := args[1].Pointer()
+ timerIDp := args[2].Pointer()
+
+ c, err := getClock(t, clockID)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ var sev *linux.Sigevent
+ if sevp != 0 {
+ sev = &linux.Sigevent{}
+ if _, err = t.CopyIn(sevp, sev); err != nil {
+ return 0, nil, err
+ }
+ }
+
+ id, err := t.IntervalTimerCreate(c, sev)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ if _, err := t.CopyOut(timerIDp, &id); err != nil {
+ t.IntervalTimerDelete(id)
+ return 0, nil, err
+ }
+
+ return uintptr(id), nil, nil
+}
+
+// TimerSettime implements linux syscall timer_settime(2).
+func TimerSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ timerID := linux.TimerID(args[0].Value)
+ flags := args[1].Int()
+ newValAddr := args[2].Pointer()
+ oldValAddr := args[3].Pointer()
+
+ var newVal linux.Itimerspec
+ if _, err := t.CopyIn(newValAddr, &newVal); err != nil {
+ return 0, nil, err
+ }
+ oldVal, err := t.IntervalTimerSettime(timerID, newVal, flags&linux.TIMER_ABSTIME != 0)
+ if err != nil {
+ return 0, nil, err
+ }
+ if oldValAddr != 0 {
+ if _, err := t.CopyOut(oldValAddr, &oldVal); err != nil {
+ return 0, nil, err
+ }
+ }
+ return 0, nil, nil
+}
+
+// TimerGettime implements linux syscall timer_gettime(2).
+func TimerGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ timerID := linux.TimerID(args[0].Value)
+ curValAddr := args[1].Pointer()
+
+ curVal, err := t.IntervalTimerGettime(timerID)
+ if err != nil {
+ return 0, nil, err
+ }
+ _, err = t.CopyOut(curValAddr, &curVal)
+ return 0, nil, err
+}
+
+// TimerGetoverrun implements linux syscall timer_getoverrun(2).
+func TimerGetoverrun(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ timerID := linux.TimerID(args[0].Value)
+
+ o, err := t.IntervalTimerGetoverrun(timerID)
+ if err != nil {
+ return 0, nil, err
+ }
+ return uintptr(o), nil, nil
+}
+
+// TimerDelete implements linux syscall timer_delete(2).
+func TimerDelete(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ timerID := linux.TimerID(args[0].Value)
+ return 0, nil, t.IntervalTimerDelete(timerID)
+}
diff --git a/pkg/sentry/syscalls/linux/sys_timerfd.go b/pkg/sentry/syscalls/linux/sys_timerfd.go
index cb81d42b9..92c6a3d60 100644
--- a/pkg/sentry/syscalls/linux/sys_timerfd.go
+++ b/pkg/sentry/syscalls/linux/sys_timerfd.go
@@ -85,28 +85,18 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
if _, err := t.CopyIn(newValAddr, &newVal); err != nil {
return 0, nil, err
}
- var s ktime.Setting
- var err error
- if flags&linux.TFD_TIMER_ABSTIME != 0 {
- s, err = ktime.SettingFromAbsSpec(ktime.FromTimespec(newVal.Value),
- newVal.Interval.ToDuration())
- } else {
- s, err = ktime.SettingFromSpec(newVal.Value.ToDuration(),
- newVal.Interval.ToDuration(), tf.Clock())
- }
+ newS, err := ktime.SettingFromItimerspec(newVal, flags&linux.TFD_TIMER_ABSTIME != 0, tf.Clock())
if err != nil {
return 0, nil, err
}
- valueNS, intervalNS := ktime.SpecFromSetting(tf.SetTime(s))
- if oldValAddr == 0 {
- return 0, nil, nil
- }
- oldVal := linux.Itimerspec{
- Interval: linux.DurationToTimespec(intervalNS),
- Value: linux.DurationToTimespec(valueNS),
+ tm, oldS := tf.SetTime(newS)
+ if oldValAddr != 0 {
+ oldVal := ktime.ItimerspecFromSetting(tm, oldS)
+ if _, err := t.CopyOut(oldValAddr, &oldVal); err != nil {
+ return 0, nil, err
+ }
}
- _, err = t.CopyOut(oldValAddr, &oldVal)
- return 0, nil, err
+ return 0, nil, nil
}
// TimerfdGettime implements Linux syscall timerfd_gettime(2).
@@ -125,11 +115,8 @@ func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
return 0, nil, syserror.EINVAL
}
- valueNS, intervalNS := ktime.SpecFromSetting(tf.GetTime())
- curVal := linux.Itimerspec{
- Interval: linux.DurationToTimespec(intervalNS),
- Value: linux.DurationToTimespec(valueNS),
- }
+ tm, s := tf.GetTime()
+ curVal := ktime.ItimerspecFromSetting(tm, s)
_, err := t.CopyOut(curValAddr, &curVal)
return 0, nil, err
}