diff options
Diffstat (limited to 'pkg/sentry')
-rw-r--r-- | pkg/sentry/arch/signal_amd64.go | 30 | ||||
-rw-r--r-- | pkg/sentry/kernel/BUILD | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel.go | 19 | ||||
-rw-r--r-- | pkg/sentry/kernel/pending_signals.go | 17 | ||||
-rw-r--r-- | pkg/sentry/kernel/pending_signals_state.go | 21 | ||||
-rw-r--r-- | pkg/sentry/kernel/posixtimer.go | 306 | ||||
-rw-r--r-- | pkg/sentry/kernel/ptrace.go | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/task_exec.go | 16 | ||||
-rw-r--r-- | pkg/sentry/kernel/task_exit.go | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/task_signals.go | 12 | ||||
-rw-r--r-- | pkg/sentry/kernel/thread_group.go | 27 | ||||
-rw-r--r-- | pkg/sentry/kernel/time/time.go | 23 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/linux64.go | 10 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_timer.go | 85 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_timerfd.go | 33 |
15 files changed, 555 insertions, 50 deletions
diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go index e81717e8b..9ca4c8ed1 100644 --- a/pkg/sentry/arch/signal_amd64.go +++ b/pkg/sentry/arch/signal_amd64.go @@ -176,6 +176,36 @@ func (s *SignalInfo) SetUid(val int32) { usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val)) } +// Sigval returns the sigval field, which is aliased to both si_int and si_ptr. +func (s *SignalInfo) Sigval() uint64 { + return usermem.ByteOrder.Uint64(s.Fields[8:16]) +} + +// SetSigval mutates the sigval field. +func (s *SignalInfo) SetSigval(val uint64) { + usermem.ByteOrder.PutUint64(s.Fields[8:16], val) +} + +// TimerID returns the si_timerid field. +func (s *SignalInfo) TimerID() linux.TimerID { + return linux.TimerID(usermem.ByteOrder.Uint32(s.Fields[0:4])) +} + +// SetTimerID sets the si_timerid field. +func (s *SignalInfo) SetTimerID(val linux.TimerID) { + usermem.ByteOrder.PutUint32(s.Fields[0:4], uint32(val)) +} + +// Overrun returns the si_overrun field. +func (s *SignalInfo) Overrun() int32 { + return int32(usermem.ByteOrder.Uint32(s.Fields[4:8])) +} + +// SetOverrun sets the si_overrun field. +func (s *SignalInfo) SetOverrun(val int32) { + usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val)) +} + // Addr returns the si_addr field. func (s *SignalInfo) Addr() uint64 { return usermem.ByteOrder.Uint64(s.Fields[0:8]) diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index 69a3fbc45..a7b847e94 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -71,6 +71,7 @@ go_library( "pending_signals.go", "pending_signals_list.go", "pending_signals_state.go", + "posixtimer.go", "process_group_list.go", "ptrace.go", "rseq.go", @@ -114,7 +115,6 @@ go_library( importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel", imports = [ "gvisor.googlesource.com/gvisor/pkg/bpf", - "gvisor.googlesource.com/gvisor/pkg/sentry/arch", "gvisor.googlesource.com/gvisor/pkg/tcpip", ], visibility = ["//:sandbox"], diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index cb43fdcdc..33cd727c6 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -19,9 +19,11 @@ // Lock order (outermost locks must be taken first): // // Kernel.extMu -// TaskSet.mu -// SignalHandlers.mu -// Task.mu +// ThreadGroup.timerMu +// ktime.Timer.mu (for IntervalTimer) +// TaskSet.mu +// SignalHandlers.mu +// Task.mu // // Locking SignalHandlers.mu in multiple SignalHandlers requires locking // TaskSet.mu exclusively first. Locking Task.mu in multiple Tasks at the same @@ -706,8 +708,12 @@ func (k *Kernel) pauseTimeLocked() { if t == t.tg.leader { t.tg.tm.pause() } - // This means we'll iterate FDMaps shared by multiple tasks repeatedly, - // but ktime.Timer.Pause is idempotent so this is harmless. + // This means we'll iterate ThreadGroups and FDMaps shared by multiple + // tasks repeatedly, but ktime.Timer.Pause is idempotent so this is + // harmless. + for _, it := range t.tg.timers { + it.PauseTimer() + } if fdm := t.tr.FDMap; fdm != nil { for _, desc := range fdm.files { if tfd, ok := desc.file.FileOperations.(*timerfd.TimerOperations); ok { @@ -735,6 +741,9 @@ func (k *Kernel) resumeTimeLocked() { if t == t.tg.leader { t.tg.tm.resume() } + for _, it := range t.tg.timers { + it.ResumeTimer() + } if fdm := t.tr.FDMap; fdm != nil { for _, desc := range fdm.files { if tfd, ok := desc.file.FileOperations.(*timerfd.TimerOperations); ok { diff --git a/pkg/sentry/kernel/pending_signals.go b/pkg/sentry/kernel/pending_signals.go index 06be5a7e1..bb5db0309 100644 --- a/pkg/sentry/kernel/pending_signals.go +++ b/pkg/sentry/kernel/pending_signals.go @@ -46,7 +46,7 @@ type pendingSignals struct { // Note that signals is zero-indexed, but signal 1 is the first valid // signal, so signals[0] contains signals with signo 1 etc. This offset is // usually handled by using Signal.index(). - signals [linux.SignalMaximum]pendingSignalQueue `state:".([]*arch.SignalInfo)"` + signals [linux.SignalMaximum]pendingSignalQueue `state:".([]savedPendingSignal)"` // Bit i of pendingSet is set iff there is at least one signal with signo // i+1 pending. @@ -66,13 +66,16 @@ type pendingSignal struct { // pendingSignalEntry links into a pendingSignalList. pendingSignalEntry *arch.SignalInfo + + // If timer is not nil, it is the IntervalTimer which sent this signal. + timer *IntervalTimer } // enqueue enqueues the given signal. enqueue returns true on success and false // on failure (if the given signal's queue is full). // // Preconditions: info represents a valid signal. -func (p *pendingSignals) enqueue(info *arch.SignalInfo) bool { +func (p *pendingSignals) enqueue(info *arch.SignalInfo, timer *IntervalTimer) bool { sig := linux.Signal(info.Signo) q := &p.signals[sig.Index()] if sig.IsStandard() { @@ -82,7 +85,7 @@ func (p *pendingSignals) enqueue(info *arch.SignalInfo) bool { } else if q.length >= rtSignalCap { return false } - q.pendingSignalList.PushBack(&pendingSignal{SignalInfo: info}) + q.pendingSignalList.PushBack(&pendingSignal{SignalInfo: info, timer: timer}) q.length++ p.pendingSet |= linux.SignalSetOf(sig) return true @@ -119,12 +122,20 @@ func (p *pendingSignals) dequeueSpecific(sig linux.Signal) *arch.SignalInfo { if q.length == 0 { p.pendingSet &^= linux.SignalSetOf(sig) } + if ps.timer != nil { + ps.timer.updateDequeuedSignalLocked(ps.SignalInfo) + } return ps.SignalInfo } // discardSpecific causes all pending signals with number sig to be discarded. func (p *pendingSignals) discardSpecific(sig linux.Signal) { q := &p.signals[sig.Index()] + for ps := q.pendingSignalList.Front(); ps != nil; ps = ps.Next() { + if ps.timer != nil { + ps.timer.signalRejectedLocked() + } + } q.pendingSignalList.Reset() q.length = 0 p.pendingSet &^= linux.SignalSetOf(sig) diff --git a/pkg/sentry/kernel/pending_signals_state.go b/pkg/sentry/kernel/pending_signals_state.go index af61f6e8e..6d90ed033 100644 --- a/pkg/sentry/kernel/pending_signals_state.go +++ b/pkg/sentry/kernel/pending_signals_state.go @@ -18,20 +18,29 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/arch" ) +// +stateify savable +type savedPendingSignal struct { + si *arch.SignalInfo + timer *IntervalTimer +} + // saveSignals is invoked by stateify. -func (p *pendingSignals) saveSignals() []*arch.SignalInfo { - var pending []*arch.SignalInfo +func (p *pendingSignals) saveSignals() []savedPendingSignal { + var pending []savedPendingSignal for _, q := range p.signals { for ps := q.pendingSignalList.Front(); ps != nil; ps = ps.Next() { - pending = append(pending, ps.SignalInfo) + pending = append(pending, savedPendingSignal{ + si: ps.SignalInfo, + timer: ps.timer, + }) } } return pending } // loadSignals is invoked by stateify. -func (p *pendingSignals) loadSignals(pending []*arch.SignalInfo) { - for _, si := range pending { - p.enqueue(si) +func (p *pendingSignals) loadSignals(pending []savedPendingSignal) { + for _, sps := range pending { + p.enqueue(sps.si, sps.timer) } } diff --git a/pkg/sentry/kernel/posixtimer.go b/pkg/sentry/kernel/posixtimer.go new file mode 100644 index 000000000..0ab958529 --- /dev/null +++ b/pkg/sentry/kernel/posixtimer.go @@ -0,0 +1,306 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +import ( + "math" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +// IntervalTimer represents a POSIX interval timer as described by +// timer_create(2). +// +// +stateify savable +type IntervalTimer struct { + timer *ktime.Timer + + // If target is not nil, it receives signo from timer expirations. If group + // is true, these signals are thread-group-directed. These fields are + // immutable. + target *Task + signo linux.Signal + id linux.TimerID + sigval uint64 + group bool + + // If sigpending is true, a signal to target is already queued, and timer + // expirations should increment overrunCur instead of sending another + // signal. sigpending is protected by target's signal mutex. (If target is + // nil, the timer will never send signals, so sigpending will be unused.) + sigpending bool + + // If sigorphan is true, timer's setting has been changed since sigpending + // last became true, such that overruns should no longer be counted in the + // pending signals si_overrun. sigorphan is protected by target's signal + // mutex. + sigorphan bool + + // overrunCur is the number of overruns that have occurred since the last + // time a signal was sent. overrunCur is protected by target's signal + // mutex. + overrunCur uint64 + + // Consider the last signal sent by this timer that has been dequeued. + // overrunLast is the number of overruns that occurred between when this + // signal was sent and when it was dequeued. Equivalently, overrunLast was + // the value of overrunCur when this signal was dequeued. overrunLast is + // protected by target's signal mutex. + overrunLast uint64 +} + +// DestroyTimer releases it's resources. +func (it *IntervalTimer) DestroyTimer() { + it.timer.Destroy() + it.timerSettingChanged() + // A destroyed IntervalTimer is still potentially reachable via a + // pendingSignal; nil out timer so that it won't be saved. + it.timer = nil +} + +func (it *IntervalTimer) timerSettingChanged() { + if it.target == nil { + return + } + it.target.tg.pidns.owner.mu.RLock() + defer it.target.tg.pidns.owner.mu.RUnlock() + it.target.tg.signalHandlers.mu.Lock() + defer it.target.tg.signalHandlers.mu.Unlock() + it.sigorphan = true + it.overrunCur = 0 + it.overrunLast = 0 +} + +// PauseTimer pauses the associated Timer. +func (it *IntervalTimer) PauseTimer() { + it.timer.Pause() +} + +// ResumeTimer resumes the associated Timer. +func (it *IntervalTimer) ResumeTimer() { + it.timer.Resume() +} + +// Preconditions: it.target's signal mutex must be locked. +func (it *IntervalTimer) updateDequeuedSignalLocked(si *arch.SignalInfo) { + it.sigpending = false + if it.sigorphan { + return + } + it.overrunLast = it.overrunCur + it.overrunCur = 0 + si.SetOverrun(saturateI32FromU64(it.overrunLast)) +} + +// Preconditions: it.target's signal mutex must be locked. +func (it *IntervalTimer) signalRejectedLocked() { + it.sigpending = false + if it.sigorphan { + return + } + it.overrunCur++ +} + +// Notify implements ktime.TimerListener.Notify. +func (it *IntervalTimer) Notify(exp uint64) { + if it.target == nil { + return + } + + it.target.tg.pidns.owner.mu.RLock() + defer it.target.tg.pidns.owner.mu.RUnlock() + it.target.tg.signalHandlers.mu.Lock() + defer it.target.tg.signalHandlers.mu.Unlock() + + if it.sigpending { + it.overrunCur += exp + return + } + + // sigpending must be set before sendSignalTimerLocked() so that it can be + // unset if the signal is discarded (in which case sendSignalTimerLocked() + // will return nil). + it.sigpending = true + it.sigorphan = false + it.overrunCur += exp - 1 + si := &arch.SignalInfo{ + Signo: int32(it.signo), + Code: arch.SignalInfoTimer, + } + si.SetTimerID(it.id) + si.SetSigval(it.sigval) + // si_overrun is set when the signal is dequeued. + if err := it.target.sendSignalTimerLocked(si, it.group, it); err != nil { + it.signalRejectedLocked() + } +} + +// Destroy implements ktime.TimerListener.Destroy. Users of Timer should call +// DestroyTimer instead. +func (it *IntervalTimer) Destroy() { +} + +// IntervalTimerCreate implements timer_create(2). +func (t *Task) IntervalTimerCreate(c ktime.Clock, sigev *linux.Sigevent) (linux.TimerID, error) { + t.tg.timerMu.Lock() + defer t.tg.timerMu.Unlock() + + // Allocate a timer ID. + var id linux.TimerID + end := t.tg.nextTimerID + for { + id = t.tg.nextTimerID + _, ok := t.tg.timers[id] + t.tg.nextTimerID++ + if t.tg.nextTimerID < 0 { + t.tg.nextTimerID = 0 + } + if !ok { + break + } + if t.tg.nextTimerID == end { + return 0, syserror.EAGAIN + } + } + + // "The implementation of the default case where evp [sic] is NULL is + // handled inside glibc, which invokes the underlying system call with a + // suitably populated sigevent structure." - timer_create(2). This is + // misleading; the timer_create syscall also handles a NULL sevp as + // described by the man page + // (kernel/time/posix-timers.c:sys_timer_create(), do_timer_create()). This + // must be handled here instead of the syscall wrapper since sigval is the + // timer ID, which isn't available until we allocate it in this function. + if sigev == nil { + sigev = &linux.Sigevent{ + Signo: int32(linux.SIGALRM), + Notify: linux.SIGEV_SIGNAL, + Value: uint64(id), + } + } + + // Construct the timer. + it := &IntervalTimer{ + id: id, + sigval: sigev.Value, + } + switch sigev.Notify { + case linux.SIGEV_NONE: + // leave it.target = nil + case linux.SIGEV_SIGNAL, linux.SIGEV_THREAD: + // POSIX SIGEV_THREAD semantics are implemented in userspace by libc; + // to the kernel, SIGEV_THREAD and SIGEV_SIGNAL are equivalent. (See + // Linux's kernel/time/posix-timers.c:good_sigevent().) + it.target = t.tg.leader + it.group = true + case linux.SIGEV_THREAD_ID: + t.tg.pidns.owner.mu.RLock() + target, ok := t.tg.pidns.tasks[ThreadID(sigev.Tid)] + t.tg.pidns.owner.mu.RUnlock() + if !ok || target.tg != t.tg { + return 0, syserror.EINVAL + } + it.target = target + default: + return 0, syserror.EINVAL + } + if sigev.Notify != linux.SIGEV_NONE { + it.signo = linux.Signal(sigev.Signo) + if !it.signo.IsValid() { + return 0, syserror.EINVAL + } + } + it.timer = ktime.NewTimer(c, it) + + t.tg.timers[id] = it + return id, nil +} + +// IntervalTimerDelete implements timer_delete(2). +func (t *Task) IntervalTimerDelete(id linux.TimerID) error { + t.tg.timerMu.Lock() + defer t.tg.timerMu.Unlock() + it := t.tg.timers[id] + if it == nil { + return syserror.EINVAL + } + delete(t.tg.timers, id) + it.DestroyTimer() + return nil +} + +// IntervalTimerSettime implements timer_settime(2). +func (t *Task) IntervalTimerSettime(id linux.TimerID, its linux.Itimerspec, abs bool) (linux.Itimerspec, error) { + t.tg.timerMu.Lock() + defer t.tg.timerMu.Unlock() + it := t.tg.timers[id] + if it == nil { + return linux.Itimerspec{}, syserror.EINVAL + } + + newS, err := ktime.SettingFromItimerspec(its, abs, it.timer.Clock()) + if err != nil { + return linux.Itimerspec{}, err + } + tm, oldS := it.timer.SwapAnd(newS, it.timerSettingChanged) + its = ktime.ItimerspecFromSetting(tm, oldS) + return its, nil +} + +// IntervalTimerGettime implements timer_gettime(2). +func (t *Task) IntervalTimerGettime(id linux.TimerID) (linux.Itimerspec, error) { + t.tg.timerMu.Lock() + defer t.tg.timerMu.Unlock() + it := t.tg.timers[id] + if it == nil { + return linux.Itimerspec{}, syserror.EINVAL + } + + tm, s := it.timer.Get() + its := ktime.ItimerspecFromSetting(tm, s) + return its, nil +} + +// IntervalTimerGetoverrun implements timer_getoverrun(2). +// +// Preconditions: The caller must be running on the task goroutine. +func (t *Task) IntervalTimerGetoverrun(id linux.TimerID) (int32, error) { + t.tg.timerMu.Lock() + defer t.tg.timerMu.Unlock() + it := t.tg.timers[id] + if it == nil { + return 0, syserror.EINVAL + } + // By timer_create(2) invariant, either it.target == nil (in which case + // it.overrunLast is immutably 0) or t.tg == it.target.tg; and the fact + // that t is executing timer_getoverrun(2) means that t.tg can't be + // completing execve, so t.tg.signalHandlers can't be changing, allowing us + // to lock t.tg.signalHandlers.mu without holding the TaskSet mutex. + t.tg.signalHandlers.mu.Lock() + defer t.tg.signalHandlers.mu.Unlock() + // This is consistent with Linux after 78c9c4dfbf8c ("posix-timers: + // Sanitize overrun handling"). + return saturateI32FromU64(it.overrunLast), nil +} + +func saturateI32FromU64(x uint64) int32 { + if x > math.MaxInt32 { + return math.MaxInt32 + } + return int32(x) +} diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index e9e69004d..1a0d1876d 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -627,7 +627,7 @@ func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions // running, so we don't have to. child.pendingSignals.enqueue(&arch.SignalInfo{ Signo: int32(linux.SIGSTOP), - }) + }, nil) child.tg.signalHandlers.mu.Unlock() } } diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go index 385299b24..bb3d0bd02 100644 --- a/pkg/sentry/kernel/task_exec.go +++ b/pkg/sentry/kernel/task_exec.go @@ -143,6 +143,22 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState { oldTID = tracer.tg.pidns.tids[t] } t.promoteLocked() + // "POSIX timers are not preserved (timer_create(2))." - execve(2). Handle + // this first since POSIX timers are protected by the signal mutex, which + // we're about to change. Note that we have to stop and destroy timers + // without holding any mutexes to avoid circular lock ordering. + var its []*IntervalTimer + t.tg.signalHandlers.mu.Lock() + for _, it := range t.tg.timers { + its = append(its, it) + } + t.tg.timers = make(map[linux.TimerID]*IntervalTimer) + t.tg.signalHandlers.mu.Unlock() + t.tg.pidns.owner.mu.Unlock() + for _, it := range its { + it.DestroyTimer() + } + t.tg.pidns.owner.mu.Lock() // "During an execve(2), the dispositions of handled signals are reset to // the default; the dispositions of ignored signals are left unchanged. ... // [The] signal mask is preserved across execve(2). ... [The] pending diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index b16844e91..b37fcf4c1 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -129,7 +129,7 @@ func (t *Task) killLocked() { // enqueueing an actual siginfo, such that // kernel/signal.c:collect_signal() initializes si_code to SI_USER. Code: arch.SignalInfoUser, - }) + }, nil) t.interrupt() } diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go index 62ec530be..4a66bce6b 100644 --- a/pkg/sentry/kernel/task_signals.go +++ b/pkg/sentry/kernel/task_signals.go @@ -396,6 +396,10 @@ func (tg *ThreadGroup) SendTimerSignal(info *arch.SignalInfo, includeSys bool) e } func (t *Task) sendSignalLocked(info *arch.SignalInfo, group bool) error { + return t.sendSignalTimerLocked(info, group, nil) +} + +func (t *Task) sendSignalTimerLocked(info *arch.SignalInfo, group bool, timer *IntervalTimer) error { if t.exitState == TaskExitDead { return syserror.ESRCH } @@ -429,6 +433,9 @@ func (t *Task) sendSignalLocked(info *arch.SignalInfo, group bool) error { ignored := computeAction(sig, t.tg.signalHandlers.actions[sig]) == SignalActionIgnore if linux.SignalSetOf(sig)&t.tr.SignalMask == 0 && ignored && !t.hasTracer() { t.Debugf("Discarding ignored signal %d", sig) + if timer != nil { + timer.signalRejectedLocked() + } return nil } @@ -436,11 +443,14 @@ func (t *Task) sendSignalLocked(info *arch.SignalInfo, group bool) error { if group { q = &t.tg.pendingSignals } - if !q.enqueue(info) { + if !q.enqueue(info, timer) { if sig.IsRealtime() { return syserror.EAGAIN } t.Debugf("Discarding duplicate signal %d", sig) + if timer != nil { + timer.signalRejectedLocked() + } return nil } diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go index 441b8a822..13dce08ce 100644 --- a/pkg/sentry/kernel/thread_group.go +++ b/pkg/sentry/kernel/thread_group.go @@ -155,6 +155,14 @@ type ThreadGroup struct { // tm contains process timers. TimerManager fields are immutable. tm TimerManager + // timers is the thread group's POSIX interval timers. nextTimerID is the + // TimerID at which allocation should begin searching for an unused ID. + // + // timers and nextTimerID are protected by timerMu. + timerMu sync.Mutex `state:"nosave"` + timers map[linux.TimerID]*IntervalTimer + nextTimerID linux.TimerID + // exitedCPUStats is the CPU usage for all exited tasks in the thread // group. exitedCPUStats is protected by the TaskSet mutex. exitedCPUStats usage.CPUStats @@ -218,6 +226,7 @@ func NewThreadGroup(ns *PIDNamespace, sh *SignalHandlers, terminationSignal linu limits: limits, } tg.tm = newTimerManager(tg, monotonicClock) + tg.timers = make(map[linux.TimerID]*IntervalTimer) tg.rscr.Store(&RSEQCriticalRegion{}) return tg } @@ -252,9 +261,23 @@ func (tg *ThreadGroup) Limits() *limits.LimitSet { // release releases the thread group's resources. func (tg *ThreadGroup) release() { - // This must be done without holding the TaskSet mutex since thread group - // timers call SendSignal with Timer.mu locked. + // These must be done without holding the TaskSet or signal mutexes since + // timers send signals with Timer.mu locked. + tg.tm.destroy() + + var its []*IntervalTimer + tg.pidns.owner.mu.Lock() + tg.signalHandlers.mu.Lock() + for _, it := range tg.timers { + its = append(its, it) + } + tg.timers = make(map[linux.TimerID]*IntervalTimer) // nil maps can't be saved + tg.signalHandlers.mu.Unlock() + tg.pidns.owner.mu.Unlock() + for _, it := range its { + it.DestroyTimer() + } } // forEachChildThreadGroupLocked indicates over all child ThreadGroups. diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go index 6eadd2878..1f6fed007 100644 --- a/pkg/sentry/kernel/time/time.go +++ b/pkg/sentry/kernel/time/time.go @@ -320,8 +320,8 @@ func SettingFromSpec(value time.Duration, interval time.Duration, c Clock) (Sett }, nil } -// SettingFromAbsSpec converts a (value, interval) pair to a Setting based on a -// reading from c. value is interpreted as an absolute time. +// SettingFromAbsSpec converts a (value, interval) pair to a Setting. value is +// interpreted as an absolute time. func SettingFromAbsSpec(value Time, interval time.Duration) (Setting, error) { if value.Before(ZeroTime) { return Setting{}, syserror.EINVAL @@ -336,6 +336,16 @@ func SettingFromAbsSpec(value Time, interval time.Duration) (Setting, error) { }, nil } +// SettingFromItimerspec converts a linux.Itimerspec to a Setting. If abs is +// true, its.Value is interpreted as an absolute time. Otherwise, it is +// interpreted as a time relative to c.Now(). +func SettingFromItimerspec(its linux.Itimerspec, abs bool, c Clock) (Setting, error) { + if abs { + return SettingFromAbsSpec(FromTimespec(its.Value), its.Interval.ToDuration()) + } + return SettingFromSpec(its.Value.ToDuration(), its.Interval.ToDuration(), c) +} + // SpecFromSetting converts a timestamp and a Setting to a (relative value, // interval) pair, as used by most Linux syscalls that return a struct // itimerval or struct itimerspec. @@ -346,6 +356,15 @@ func SpecFromSetting(now Time, s Setting) (value, period time.Duration) { return s.Next.Sub(now), s.Period } +// ItimerspecFromSetting converts a Setting to a linux.Itimerspec. +func ItimerspecFromSetting(now Time, s Setting) linux.Itimerspec { + val, iv := SpecFromSetting(now, s) + return linux.Itimerspec{ + Interval: linux.DurationToTimespec(iv), + Value: linux.DurationToTimespec(val), + } +} + // advancedTo returns an updated Setting and a number of expirations after // the associated Clock indicates a time of now. // diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index c102af101..4465549ad 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -266,11 +266,11 @@ var AMD64 = &kernel.SyscallTable{ 219: RestartSyscall, // 220: Semtimedop, TODO 221: Fadvise64, - // 222: TimerCreate, TODO - // 223: TimerSettime, TODO - // 224: TimerGettime, TODO - // 225: TimerGetoverrun, TODO - // 226: TimerDelete, TODO + 222: TimerCreate, + 223: TimerSettime, + 224: TimerGettime, + 225: TimerGetoverrun, + 226: TimerDelete, 227: ClockSettime, 228: ClockGettime, 229: ClockGetres, diff --git a/pkg/sentry/syscalls/linux/sys_timer.go b/pkg/sentry/syscalls/linux/sys_timer.go index 4ed077626..aaed75c81 100644 --- a/pkg/sentry/syscalls/linux/sys_timer.go +++ b/pkg/sentry/syscalls/linux/sys_timer.go @@ -166,3 +166,88 @@ func Alarm(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return uintptr(sec), nil, nil } + +// TimerCreate implements linux syscall timer_create(2). +func TimerCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + clockID := args[0].Int() + sevp := args[1].Pointer() + timerIDp := args[2].Pointer() + + c, err := getClock(t, clockID) + if err != nil { + return 0, nil, err + } + + var sev *linux.Sigevent + if sevp != 0 { + sev = &linux.Sigevent{} + if _, err = t.CopyIn(sevp, sev); err != nil { + return 0, nil, err + } + } + + id, err := t.IntervalTimerCreate(c, sev) + if err != nil { + return 0, nil, err + } + + if _, err := t.CopyOut(timerIDp, &id); err != nil { + t.IntervalTimerDelete(id) + return 0, nil, err + } + + return uintptr(id), nil, nil +} + +// TimerSettime implements linux syscall timer_settime(2). +func TimerSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + timerID := linux.TimerID(args[0].Value) + flags := args[1].Int() + newValAddr := args[2].Pointer() + oldValAddr := args[3].Pointer() + + var newVal linux.Itimerspec + if _, err := t.CopyIn(newValAddr, &newVal); err != nil { + return 0, nil, err + } + oldVal, err := t.IntervalTimerSettime(timerID, newVal, flags&linux.TIMER_ABSTIME != 0) + if err != nil { + return 0, nil, err + } + if oldValAddr != 0 { + if _, err := t.CopyOut(oldValAddr, &oldVal); err != nil { + return 0, nil, err + } + } + return 0, nil, nil +} + +// TimerGettime implements linux syscall timer_gettime(2). +func TimerGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + timerID := linux.TimerID(args[0].Value) + curValAddr := args[1].Pointer() + + curVal, err := t.IntervalTimerGettime(timerID) + if err != nil { + return 0, nil, err + } + _, err = t.CopyOut(curValAddr, &curVal) + return 0, nil, err +} + +// TimerGetoverrun implements linux syscall timer_getoverrun(2). +func TimerGetoverrun(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + timerID := linux.TimerID(args[0].Value) + + o, err := t.IntervalTimerGetoverrun(timerID) + if err != nil { + return 0, nil, err + } + return uintptr(o), nil, nil +} + +// TimerDelete implements linux syscall timer_delete(2). +func TimerDelete(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + timerID := linux.TimerID(args[0].Value) + return 0, nil, t.IntervalTimerDelete(timerID) +} diff --git a/pkg/sentry/syscalls/linux/sys_timerfd.go b/pkg/sentry/syscalls/linux/sys_timerfd.go index cb81d42b9..92c6a3d60 100644 --- a/pkg/sentry/syscalls/linux/sys_timerfd.go +++ b/pkg/sentry/syscalls/linux/sys_timerfd.go @@ -85,28 +85,18 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne if _, err := t.CopyIn(newValAddr, &newVal); err != nil { return 0, nil, err } - var s ktime.Setting - var err error - if flags&linux.TFD_TIMER_ABSTIME != 0 { - s, err = ktime.SettingFromAbsSpec(ktime.FromTimespec(newVal.Value), - newVal.Interval.ToDuration()) - } else { - s, err = ktime.SettingFromSpec(newVal.Value.ToDuration(), - newVal.Interval.ToDuration(), tf.Clock()) - } + newS, err := ktime.SettingFromItimerspec(newVal, flags&linux.TFD_TIMER_ABSTIME != 0, tf.Clock()) if err != nil { return 0, nil, err } - valueNS, intervalNS := ktime.SpecFromSetting(tf.SetTime(s)) - if oldValAddr == 0 { - return 0, nil, nil - } - oldVal := linux.Itimerspec{ - Interval: linux.DurationToTimespec(intervalNS), - Value: linux.DurationToTimespec(valueNS), + tm, oldS := tf.SetTime(newS) + if oldValAddr != 0 { + oldVal := ktime.ItimerspecFromSetting(tm, oldS) + if _, err := t.CopyOut(oldValAddr, &oldVal); err != nil { + return 0, nil, err + } } - _, err = t.CopyOut(oldValAddr, &oldVal) - return 0, nil, err + return 0, nil, nil } // TimerfdGettime implements Linux syscall timerfd_gettime(2). @@ -125,11 +115,8 @@ func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne return 0, nil, syserror.EINVAL } - valueNS, intervalNS := ktime.SpecFromSetting(tf.GetTime()) - curVal := linux.Itimerspec{ - Interval: linux.DurationToTimespec(intervalNS), - Value: linux.DurationToTimespec(valueNS), - } + tm, s := tf.GetTime() + curVal := ktime.ItimerspecFromSetting(tm, s) _, err := t.CopyOut(curValAddr, &curVal) return 0, nil, err } |