Check in gVisor.

PiperOrigin-RevId: 194583126 Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
author: Googler <noreply@google.com> 2018-04-27 10:37:02 -0700
committer: Adin Scannell <ascannell@google.com> 2018-04-28 01:44:26 -0400
commit: d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree: 54f95eef73aee6bacbfc736fffc631be2605ed53 /pkg/sentry/kernel/timer.go
parent: f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)
1 files changed, 282 insertions, 0 deletions
diff --git a/pkg/sentry/kernel/timer.go b/pkg/sentry/kernel/timer.go
new file mode 100644
index 000000000..03a3310be
--- /dev/null
+++ b/pkg/sentry/kernel/timer.go
@@ -0,0 +1,282 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+	"fmt"
+	"time"
+
+	"gvisor.googlesource.com/gvisor/pkg/abi/linux"
+	ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
+	sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time"
+)
+
+// timekeeperClock is a ktime.Clock that reads time from a
+// kernel.Timekeeper-managed clock.
+type timekeeperClock struct {
+	tk *Timekeeper
+	c  sentrytime.ClockID
+
+	// Implements ktime.Clock.WallTimeUntil.
+	ktime.WallRateClock `state:"nosave"`
+
+	// Implements waiter.Waitable. (We have no ability to detect
+	// discontinuities from external changes to CLOCK_REALTIME).
+	ktime.NoClockEvents `state:"nosave"`
+}
+
+// Now implements ktime.Clock.Now.
+func (tc *timekeeperClock) Now() ktime.Time {
+	now, err := tc.tk.GetTime(tc.c)
+	if err != nil {
+		panic(fmt.Sprintf("timekeeperClock(ClockID=%v)).Now: %v", tc.c, err))
+	}
+	return ktime.FromNanoseconds(now)
+}
+
+// tgClock is a ktime.Clock that measures the time a thread group has spent
+// executing.
+type tgClock struct {
+	tg *ThreadGroup
+
+	// If includeSys is true, the tgClock includes both time spent executing
+	// application code as well as time spent in the sentry. Otherwise, the
+	// tgClock includes only time spent executing application code.
+	includeSys bool
+
+	// Implements waiter.Waitable.
+	ktime.ClockEventsQueue `state:"nosave"`
+}
+
+// UserCPUClock returns a ktime.Clock that measures the time that a thread
+// group has spent executing.
+func (tg *ThreadGroup) UserCPUClock() ktime.Clock {
+	return tg.tm.virtClock
+}
+
+// CPUClock returns a ktime.Clock that measures the time that a thread group
+// has spent executing, including sentry time.
+func (tg *ThreadGroup) CPUClock() ktime.Clock {
+	return tg.tm.profClock
+}
+
+// Now implements ktime.Clock.Now.
+func (tgc *tgClock) Now() ktime.Time {
+	stats := tgc.tg.CPUStats()
+	if tgc.includeSys {
+		return ktime.FromNanoseconds((stats.UserTime + stats.SysTime).Nanoseconds())
+	}
+	return ktime.FromNanoseconds(stats.UserTime.Nanoseconds())
+}
+
+// WallTimeUntil implements ktime.Clock.WallTimeUntil.
+func (tgc *tgClock) WallTimeUntil(t, now ktime.Time) time.Duration {
+	// The assumption here is that the time spent in this process (not matter
+	// virtual or prof) should not exceed wall time * active tasks, since
+	// Task.exitThreadGroup stops accounting as it transitions to
+	// TaskExitInitiated.
+	tgc.tg.pidns.owner.mu.RLock()
+	n := tgc.tg.activeTasks
+	tgc.tg.pidns.owner.mu.RUnlock()
+	if n == 0 {
+		if t.Before(now) {
+			return 0
+		}
+		// The timer tick raced with thread group exit, after which no more
+		// tasks can enter the thread group. So tgc.Now() will never advance
+		// again. Return a large delay; the timer should be stopped long before
+		// it comes again anyway.
+		return time.Hour
+	}
+	// This is a lower bound on the amount of time that can elapse before an
+	// associated timer expires, so returning this value tends to result in a
+	// sequence of closely-spaced ticks just before timer expiry. To avoid
+	// this, round up to the nearest ClockTick; CPU usage measurements are
+	// limited to this resolution anyway.
+	remaining := time.Duration(int64(t.Sub(now))/int64(n)) * time.Nanosecond
+	return ((remaining + (linux.ClockTick - time.Nanosecond)) / linux.ClockTick) * linux.ClockTick
+}
+
+// taskClock is a ktime.Clock that measures the time that a task has spent
+// executing.
+type taskClock struct {
+	t *Task
+
+	// If includeSys is true, the taskClock includes both time spent executing
+	// application code as well as time spent in the sentry. Otherwise, the
+	// taskClock includes only time spent executing application code.
+	includeSys bool
+
+	// Implements waiter.Waitable. TimeUntil wouldn't change its estimation
+	// based on either of the clock events, so there's no event to be
+	// notified for.
+	ktime.NoClockEvents `state:"nosave"`
+
+	// Implements ktime.Clock.WallTimeUntil.
+	//
+	// As an upper bound, a task's clock cannot advance faster than CPU
+	// time. It would have to execute at a rate of more than 1 task-second
+	// per 1 CPU-second, which isn't possible.
+	ktime.WallRateClock `state:"nosave"`
+}
+
+// UserCPUClock returns a clock measuring the CPU time the task has spent
+// executing application code.
+func (t *Task) UserCPUClock() ktime.Clock {
+	return &taskClock{t: t, includeSys: false}
+}
+
+// CPUClock returns a clock measuring the CPU time the task has spent executing
+// application and "kernel" code.
+func (t *Task) CPUClock() ktime.Clock {
+	return &taskClock{t: t, includeSys: true}
+}
+
+// Now implements ktime.Clock.Now.
+func (tc *taskClock) Now() ktime.Time {
+	stats := tc.t.CPUStats()
+	if tc.includeSys {
+		return ktime.FromNanoseconds((stats.UserTime + stats.SysTime).Nanoseconds())
+	}
+	return ktime.FromNanoseconds(stats.UserTime.Nanoseconds())
+}
+
+// signalNotifier is a ktime.Listener that sends signals to a ThreadGroup.
+type signalNotifier struct {
+	tg         *ThreadGroup
+	signal     linux.Signal
+	realTimer  bool
+	includeSys bool
+}
+
+// Notify implements ktime.TimerListener.Notify.
+func (s *signalNotifier) Notify(exp uint64) {
+	// Since all signals sent using a signalNotifier are standard (not
+	// real-time) signals, we can ignore the number of expirations and send
+	// only a single signal.
+	if s.realTimer {
+		// real timer signal sent to leader. See kernel/time/itimer.c:it_real_fn
+		s.tg.SendSignal(sigPriv(s.signal))
+	} else {
+		s.tg.SendTimerSignal(sigPriv(s.signal), s.includeSys)
+	}
+}
+
+// Destroy implements ktime.TimerListener.Destroy.
+func (s *signalNotifier) Destroy() {}
+
+// TimerManager is a collection of supported process cpu timers.
+type TimerManager struct {
+	// Clocks used to drive thread group execution time timers.
+	virtClock *tgClock
+	profClock *tgClock
+
+	RealTimer      *ktime.Timer
+	VirtualTimer   *ktime.Timer
+	ProfTimer      *ktime.Timer
+	SoftLimitTimer *ktime.Timer
+	HardLimitTimer *ktime.Timer
+}
+
+// newTimerManager returns a new instance of TimerManager.
+func newTimerManager(tg *ThreadGroup, monotonicClock ktime.Clock) TimerManager {
+	virtClock := &tgClock{tg: tg, includeSys: false}
+	profClock := &tgClock{tg: tg, includeSys: true}
+	tm := TimerManager{
+		virtClock: virtClock,
+		profClock: profClock,
+		RealTimer: ktime.NewTimer(monotonicClock, &signalNotifier{
+			tg:         tg,
+			signal:     linux.SIGALRM,
+			realTimer:  true,
+			includeSys: false,
+		}),
+		VirtualTimer: ktime.NewTimer(virtClock, &signalNotifier{
+			tg:         tg,
+			signal:     linux.SIGVTALRM,
+			realTimer:  false,
+			includeSys: false,
+		}),
+		ProfTimer: ktime.NewTimer(profClock, &signalNotifier{
+			tg:         tg,
+			signal:     linux.SIGPROF,
+			realTimer:  false,
+			includeSys: true,
+		}),
+		SoftLimitTimer: ktime.NewTimer(profClock, &signalNotifier{
+			tg:         tg,
+			signal:     linux.SIGXCPU,
+			realTimer:  false,
+			includeSys: true,
+		}),
+		HardLimitTimer: ktime.NewTimer(profClock, &signalNotifier{
+			tg:         tg,
+			signal:     linux.SIGKILL,
+			realTimer:  false,
+			includeSys: true,
+		}),
+	}
+	tm.applyCPULimits(tg.Limits().Get(limits.CPU))
+	return tm
+}
+
+// Save saves this TimerManger.
+
+// destroy destroys all timers.
+func (tm *TimerManager) destroy() {
+	tm.RealTimer.Destroy()
+	tm.VirtualTimer.Destroy()
+	tm.ProfTimer.Destroy()
+	tm.SoftLimitTimer.Destroy()
+	tm.HardLimitTimer.Destroy()
+}
+
+func (tm *TimerManager) applyCPULimits(l limits.Limit) {
+	tm.SoftLimitTimer.Swap(ktime.Setting{
+		Enabled: l.Cur != limits.Infinity,
+		Next:    ktime.FromNanoseconds((time.Duration(l.Cur) * time.Second).Nanoseconds()),
+		Period:  time.Second,
+	})
+	tm.HardLimitTimer.Swap(ktime.Setting{
+		Enabled: l.Max != limits.Infinity,
+		Next:    ktime.FromNanoseconds((time.Duration(l.Max) * time.Second).Nanoseconds()),
+	})
+}
+
+// kick is called when the number of threads in the thread group associated
+// with tm increases.
+func (tm *TimerManager) kick() {
+	tm.virtClock.Notify(ktime.ClockEventRateIncrease)
+	tm.profClock.Notify(ktime.ClockEventRateIncrease)
+}
+
+// pause is to pause the timers and stop timer signal delivery.
+func (tm *TimerManager) pause() {
+	tm.RealTimer.Pause()
+	tm.VirtualTimer.Pause()
+	tm.ProfTimer.Pause()
+	tm.SoftLimitTimer.Pause()
+	tm.HardLimitTimer.Pause()
+}
+
+// resume is to resume the timers and continue timer signal delivery.
+func (tm *TimerManager) resume() {
+	tm.RealTimer.Resume()
+	tm.VirtualTimer.Resume()
+	tm.ProfTimer.Resume()
+	tm.SoftLimitTimer.Resume()
+	tm.HardLimitTimer.Resume()
+}
author	Googler <noreply@google.com>	2018-04-27 10:37:02 -0700
committer	Adin Scannell <ascannell@google.com>	2018-04-28 01:44:26 -0400
commit	d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree	54f95eef73aee6bacbfc736fffc631be2605ed53 /pkg/sentry/kernel/timer.go
parent	f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)