summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel/time/time.go
blob: 6eadd2878b09188c0913c1a55adc29aaa91a096a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package time defines the Timer type, which provides a periodic timer that
// works by sampling a user-provided clock.
package time

import (
	"fmt"
	"math"
	"sync"
	"time"

	"gvisor.googlesource.com/gvisor/pkg/abi/linux"
	"gvisor.googlesource.com/gvisor/pkg/syserror"
	"gvisor.googlesource.com/gvisor/pkg/waiter"
)

// Events that may be generated by a Clock.
const (
	// ClockEventSet occurs when a Clock undergoes a discontinuous change.
	ClockEventSet waiter.EventMask = 1 << iota

	// ClockEventRateIncrease occurs when the rate at which a Clock advances
	// increases significantly, such that values returned by previous calls to
	// Clock.WallTimeUntil may be too large.
	ClockEventRateIncrease
)

// Time represents an instant in time with nanosecond precision.
//
// Time may represent time with respect to any clock and may not have any
// meaning in the real world.
//
// +stateify savable
type Time struct {
	ns int64
}

var (
	// MinTime is the zero time instant, the lowest possible time that can
	// be represented by Time.
	MinTime = Time{ns: math.MinInt64}

	// MaxTime is the highest possible time that can be represented by
	// Time.
	MaxTime = Time{ns: math.MaxInt64}

	// ZeroTime represents the zero time in an unspecified Clock's domain.
	ZeroTime = Time{ns: 0}
)

const (
	// MinDuration is the minimum duration representable by time.Duration.
	MinDuration = time.Duration(math.MinInt64)

	// MaxDuration is the maximum duration representable by time.Duration.
	MaxDuration = time.Duration(math.MaxInt64)
)

// FromNanoseconds returns a Time representing the point ns nanoseconds after
// an unspecified Clock's zero time.
func FromNanoseconds(ns int64) Time {
	return Time{ns}
}

// FromSeconds returns a Time representing the point s seconds after an
// unspecified Clock's zero time.
func FromSeconds(s int64) Time {
	if s > math.MaxInt64/time.Second.Nanoseconds() {
		return MaxTime
	}
	return Time{s * 1e9}
}

// FromUnix converts from Unix seconds and nanoseconds to Time, assuming a real
// time Unix clock domain.
func FromUnix(s int64, ns int64) Time {
	if s > math.MaxInt64/time.Second.Nanoseconds() {
		return MaxTime
	}
	t := s * 1e9
	if t > math.MaxInt64-ns {
		return MaxTime
	}
	return Time{t + ns}
}

// FromTimespec converts from Linux Timespec to Time.
func FromTimespec(ts linux.Timespec) Time {
	return Time{ts.ToNsecCapped()}
}

// FromTimeval converts a Linux Timeval to Time.
func FromTimeval(tv linux.Timeval) Time {
	return Time{tv.ToNsecCapped()}
}

// Nanoseconds returns nanoseconds elapsed since the zero time in t's Clock
// domain. If t represents walltime, this is nanoseconds since the Unix epoch.
func (t Time) Nanoseconds() int64 {
	return t.ns
}

// Seconds returns seconds elapsed since the zero time in t's Clock domain. If
// t represents walltime, this is seconds since Unix epoch.
func (t Time) Seconds() int64 {
	return t.Nanoseconds() / time.Second.Nanoseconds()
}

// Timespec converts Time to a Linux timespec.
func (t Time) Timespec() linux.Timespec {
	return linux.NsecToTimespec(t.Nanoseconds())
}

// Unix returns the (seconds, nanoseconds) representation of t such that
// seconds*1e9 + nanoseconds = t.
func (t Time) Unix() (s int64, ns int64) {
	s = t.ns / 1e9
	ns = t.ns % 1e9
	return
}

// TimeT converts Time to a Linux time_t.
func (t Time) TimeT() linux.TimeT {
	return linux.NsecToTimeT(t.Nanoseconds())
}

// Timeval converts Time to a Linux timeval.
func (t Time) Timeval() linux.Timeval {
	return linux.NsecToTimeval(t.Nanoseconds())
}

// Add adds the duration of d to t.
func (t Time) Add(d time.Duration) Time {
	if t.ns > 0 && d.Nanoseconds() > math.MaxInt64-int64(t.ns) {
		return MaxTime
	}
	if t.ns < 0 && d.Nanoseconds() < math.MinInt64-int64(t.ns) {
		return MinTime
	}
	return Time{int64(t.ns) + d.Nanoseconds()}
}

// AddTime adds the duration of u to t.
func (t Time) AddTime(u Time) Time {
	return t.Add(time.Duration(u.ns))
}

// Equal reports whether the two times represent the same instant in time.
func (t Time) Equal(u Time) bool {
	return t.ns == u.ns
}

// Before reports whether the instant t is before the instant u.
func (t Time) Before(u Time) bool {
	return t.ns < u.ns
}

// After reports whether the instant t is after the instant u.
func (t Time) After(u Time) bool {
	return t.ns > u.ns
}

// Sub returns the duration of t - u.
//
// N.B. This measure may not make sense for every Time returned by ktime.Clock.
// Callers who need wall time duration can use ktime.Clock.WallTimeUntil to
// estimate that wall time.
func (t Time) Sub(u Time) time.Duration {
	dur := time.Duration(int64(t.ns)-int64(u.ns)) * time.Nanosecond
	switch {
	case u.Add(dur).Equal(t):
		return dur
	case t.Before(u):
		return MinDuration
	default:
		return MaxDuration
	}
}

// IsMin returns whether t represents the lowest possible time instant.
func (t Time) IsMin() bool {
	return t == MinTime
}

// IsZero returns whether t represents the zero time instant in t's Clock domain.
func (t Time) IsZero() bool {
	return t == ZeroTime
}

// String returns the time represented in nanoseconds as a string.
func (t Time) String() string {
	return fmt.Sprintf("%dns", t.Nanoseconds())
}

// A Clock is an abstract time source.
type Clock interface {
	// Now returns the current time in nanoseconds according to the Clock.
	Now() Time

	// WallTimeUntil returns the estimated wall time until Now will return a
	// value greater than or equal to t, given that a recent call to Now
	// returned now. If t has already passed, WallTimeUntil may return 0 or a
	// negative value.
	//
	// WallTimeUntil must be abstract to support Clocks that do not represent
	// wall time (e.g. thread group execution timers). Clocks that represent
	// wall times may embed the WallRateClock type to obtain an appropriate
	// trivial implementation of WallTimeUntil.
	//
	// WallTimeUntil is used to determine when associated Timers should next
	// check for expirations. Returning too small a value may result in
	// spurious Timer goroutine wakeups, while returning too large a value may
	// result in late expirations. Implementations should usually err on the
	// side of underestimating.
	WallTimeUntil(t, now Time) time.Duration

	// Waitable methods may be used to subscribe to Clock events. Waiters will
	// not be preserved by Save and must be re-established during restore.
	//
	// Since Clock events are transient, implementations of
	// waiter.Waitable.Readiness should return 0.
	waiter.Waitable
}

// WallRateClock implements Clock.WallTimeUntil for Clocks that elapse at the
// same rate as wall time.
type WallRateClock struct{}

// WallTimeUntil implements Clock.WallTimeUntil.
func (WallRateClock) WallTimeUntil(t, now Time) time.Duration {
	return t.Sub(now)
}

// NoClockEvents implements waiter.Waitable for Clocks that do not generate
// events.
type NoClockEvents struct{}

// Readiness implements waiter.Waitable.Readiness.
func (NoClockEvents) Readiness(mask waiter.EventMask) waiter.EventMask {
	return 0
}

// EventRegister implements waiter.Waitable.EventRegister.
func (NoClockEvents) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
}

// EventUnregister implements waiter.Waitable.EventUnregister.
func (NoClockEvents) EventUnregister(e *waiter.Entry) {
}

// ClockEventsQueue implements waiter.Waitable by wrapping waiter.Queue and
// defining waiter.Waitable.Readiness as required by Clock.
type ClockEventsQueue struct {
	waiter.Queue
}

// Readiness implements waiter.Waitable.Readiness.
func (ClockEventsQueue) Readiness(mask waiter.EventMask) waiter.EventMask {
	return 0
}

// A TimerListener receives expirations from a Timer.
type TimerListener interface {
	// Notify is called when its associated Timer expires. exp is the number of
	// expirations.
	//
	// Notify is called with the associated Timer's mutex locked, so Notify
	// must not take any locks that precede Timer.mu in lock order.
	//
	// Preconditions: exp > 0.
	Notify(exp uint64)

	// Destroy is called when the timer is destroyed.
	Destroy()
}

// Setting contains user-controlled mutable Timer properties.
//
// +stateify savable
type Setting struct {
	// Enabled is true if the timer is running.
	Enabled bool

	// Next is the time in nanoseconds of the next expiration.
	Next Time

	// Period is the time in nanoseconds between expirations. If Period is
	// zero, the timer will not automatically restart after expiring.
	//
	// Invariant: Period >= 0.
	Period time.Duration
}

// SettingFromSpec converts a (value, interval) pair to a Setting based on a
// reading from c. value is interpreted as a time relative to c.Now().
func SettingFromSpec(value time.Duration, interval time.Duration, c Clock) (Setting, error) {
	if value < 0 {
		return Setting{}, syserror.EINVAL
	}
	if value == 0 {
		return Setting{Period: interval}, nil
	}
	return Setting{
		Enabled: true,
		Next:    c.Now().Add(value),
		Period:  interval,
	}, nil
}

// SettingFromAbsSpec converts a (value, interval) pair to a Setting based on a
// reading from c. value is interpreted as an absolute time.
func SettingFromAbsSpec(value Time, interval time.Duration) (Setting, error) {
	if value.Before(ZeroTime) {
		return Setting{}, syserror.EINVAL
	}
	if value.IsZero() {
		return Setting{Period: interval}, nil
	}
	return Setting{
		Enabled: true,
		Next:    value,
		Period:  interval,
	}, nil
}

// SpecFromSetting converts a timestamp and a Setting to a (relative value,
// interval) pair, as used by most Linux syscalls that return a struct
// itimerval or struct itimerspec.
func SpecFromSetting(now Time, s Setting) (value, period time.Duration) {
	if !s.Enabled {
		return 0, s.Period
	}
	return s.Next.Sub(now), s.Period
}

// advancedTo returns an updated Setting and a number of expirations after
// the associated Clock indicates a time of now.
//
// Settings may be created by successive calls to advancedTo with decreasing
// values of now (i.e. time may appear to go backward). Supporting this is
// required to support non-monotonic clocks, as well as allowing
// Timer.clock.Now() to be called without holding Timer.mu.
func (s Setting) advancedTo(now Time) (Setting, uint64) {
	if !s.Enabled {
		return s, 0
	}
	if s.Next.After(now) {
		return s, 0
	}
	if s.Period == 0 {
		s.Enabled = false
		return s, 1
	}
	exp := 1 + uint64(now.Sub(s.Next).Nanoseconds())/uint64(s.Period)
	s.Next = s.Next.Add(time.Duration(uint64(s.Period) * exp))
	return s, exp
}

// Timer is an optionally-periodic timer driven by sampling a user-specified
// Clock. Timer's semantics support the requirements of Linux's interval timers
// (setitimer(2), timer_create(2), timerfd_create(2)).
//
// Timers should be created using NewTimer and must be cleaned up by calling
// Timer.Destroy when no longer used.
//
// +stateify savable
type Timer struct {
	// clock is the time source. clock is immutable.
	clock Clock

	// listener is notified of expirations. listener is immutable.
	listener TimerListener

	// mu protects the following mutable fields.
	mu sync.Mutex `state:"nosave"`

	// setting is the timer setting. setting is protected by mu.
	setting Setting

	// paused is true if the Timer is paused. paused is protected by mu.
	paused bool

	// kicker is used to wake the Timer goroutine. The kicker pointer is
	// immutable, but its state is protected by mu.
	kicker *time.Timer `state:"nosave"`

	// entry is registered with clock.EventRegister. entry is immutable.
	//
	// Per comment in Clock, entry must be re-registered after restore; per
	// comment in Timer.Load, this is done in Timer.Resume.
	entry waiter.Entry `state:"nosave"`

	// events is the channel that will be notified whenever entry receives an
	// event. It is also closed by Timer.Destroy to instruct the Timer
	// goroutine to exit.
	events chan struct{} `state:"nosave"`
}

// timerTickEvents are Clock events that require the Timer goroutine to Tick
// prematurely.
const timerTickEvents = ClockEventSet | ClockEventRateIncrease

// NewTimer returns a new Timer that will obtain time from clock and send
// expirations to listener. The Timer is initially stopped and has no first
// expiration or period configured.
func NewTimer(clock Clock, listener TimerListener) *Timer {
	t := &Timer{
		clock:    clock,
		listener: listener,
	}
	t.init()
	return t
}

// After waits for the duration to elapse according to clock and then sends a
// notification on the returned channel. The timer is started immediately and
// will fire exactly once. The second return value is the start time used with
// the duration.
//
// Callers must call Timer.Destroy.
func After(clock Clock, duration time.Duration) (*Timer, Time, <-chan struct{}) {
	notifier, tchan := NewChannelNotifier()
	t := NewTimer(clock, notifier)
	now := clock.Now()

	t.Swap(Setting{
		Enabled: true,
		Period:  0,
		Next:    now.Add(duration),
	})
	return t, now, tchan
}

// init initializes Timer state that is not preserved across save/restore. If
// init has already been called, calling it again is a no-op.
//
// Preconditions: t.mu must be locked, or the caller must have exclusive access
// to t.
func (t *Timer) init() {
	if t.kicker != nil {
		return
	}
	// If t.kicker is nil, the Timer goroutine can't be running, so we can't
	// race with it.
	t.kicker = time.NewTimer(0)
	t.entry, t.events = waiter.NewChannelEntry(nil)
	t.clock.EventRegister(&t.entry, timerTickEvents)
	go t.runGoroutine() // S/R-SAFE: synchronized by t.mu
}

// Destroy releases resources owned by the Timer. A Destroyed Timer must not be
// used again; in particular, a Destroyed Timer should not be Saved.
func (t *Timer) Destroy() {
	// Stop the Timer, ensuring that the Timer goroutine will not call
	// t.kicker.Reset, before calling t.kicker.Stop.
	t.mu.Lock()
	t.setting.Enabled = false
	t.mu.Unlock()
	t.kicker.Stop()
	// Unregister t.entry, ensuring that the Clock will not send to t.events,
	// before closing t.events to instruct the Timer goroutine to exit.
	t.clock.EventUnregister(&t.entry)
	close(t.events)
	t.listener.Destroy()
}

func (t *Timer) runGoroutine() {
	for {
		select {
		case <-t.kicker.C:
		case _, ok := <-t.events:
			if !ok {
				// Channel closed by Destroy.
				return
			}
		}
		t.Tick()
	}
}

// Tick requests that the Timer immediately check for expirations and
// re-evaluate when it should next check for expirations.
func (t *Timer) Tick() {
	now := t.clock.Now()
	t.mu.Lock()
	defer t.mu.Unlock()
	if t.paused {
		return
	}
	s, exp := t.setting.advancedTo(now)
	t.setting = s
	if exp > 0 {
		t.listener.Notify(exp)
	}
	t.resetKickerLocked(now)
}

// Pause pauses the Timer, ensuring that it does not generate any further
// expirations until Resume is called. If the Timer is already paused, Pause
// has no effect.
func (t *Timer) Pause() {
	t.mu.Lock()
	defer t.mu.Unlock()
	t.paused = true
	// t.kicker may be nil if we were restored but never resumed.
	if t.kicker != nil {
		t.kicker.Stop()
	}
}

// Resume ends the effect of Pause. If the Timer is not paused, Resume has no
// effect.
func (t *Timer) Resume() {
	t.mu.Lock()
	defer t.mu.Unlock()
	if !t.paused {
		return
	}
	t.paused = false

	// Lazily initialize the Timer. We can't call Timer.init until Timer.Resume
	// because save/restore will restore Timers before
	// kernel.Timekeeper.SetClocks() has been called, so if t.clock is backed
	// by a kernel.Timekeeper then the Timer goroutine will panic if it calls
	// t.clock.Now().
	t.init()

	// Kick the Timer goroutine in case it was already initialized, but the
	// Timer goroutine was sleeping.
	t.kicker.Reset(0)
}

// Get returns a snapshot of the Timer's current Setting and the time
// (according to the Timer's Clock) at which the snapshot was taken.
//
// Preconditions: The Timer must not be paused (since its Setting cannot
// be advanced to the current time while it is paused.)
func (t *Timer) Get() (Time, Setting) {
	now := t.clock.Now()
	t.mu.Lock()
	defer t.mu.Unlock()
	if t.paused {
		panic(fmt.Sprintf("Timer.Get called on paused Timer %p", t))
	}
	s, exp := t.setting.advancedTo(now)
	t.setting = s
	if exp > 0 {
		t.listener.Notify(exp)
	}
	t.resetKickerLocked(now)
	return now, s
}

// Swap atomically changes the Timer's Setting and returns the Timer's previous
// Setting and the time (according to the Timer's Clock) at which the snapshot
// was taken. Setting s.Enabled to true starts the Timer, while setting
// s.Enabled to false stops it.
//
// Preconditions: The Timer must not be paused.
func (t *Timer) Swap(s Setting) (Time, Setting) {
	return t.SwapAnd(s, nil)
}

// SwapAnd atomically changes the Timer's Setting, calls f if it is not nil,
// and returns the Timer's previous Setting and the time (according to the
// Timer's Clock) at which the Setting was changed. Setting s.Enabled to true
// starts the timer, while setting s.Enabled to false stops it.
//
// Preconditions: The Timer must not be paused. f cannot call any Timer methods
// since it is called with the Timer mutex locked.
func (t *Timer) SwapAnd(s Setting, f func()) (Time, Setting) {
	now := t.clock.Now()
	t.mu.Lock()
	defer t.mu.Unlock()
	if t.paused {
		panic(fmt.Sprintf("Timer.SwapAnd called on paused Timer %p", t))
	}
	oldS, oldExp := t.setting.advancedTo(now)
	if oldExp > 0 {
		t.listener.Notify(oldExp)
	}
	if f != nil {
		f()
	}
	newS, newExp := s.advancedTo(now)
	t.setting = newS
	if newExp > 0 {
		t.listener.Notify(newExp)
	}
	t.resetKickerLocked(now)
	return now, oldS
}

// Preconditions: t.mu must be locked.
func (t *Timer) resetKickerLocked(now Time) {
	if t.setting.Enabled {
		// Clock.WallTimeUntil may return a negative value. This is fine;
		// time.when treats negative Durations as 0.
		t.kicker.Reset(t.clock.WallTimeUntil(t.setting.Next, now))
	}
	// We don't call t.kicker.Stop if !t.setting.Enabled because in most cases
	// resetKickerLocked will be called from the Timer goroutine itself, in
	// which case t.kicker has already fired and t.kicker.Stop will be an
	// expensive no-op (time.Timer.Stop => time.stopTimer => runtime.stopTimer
	// => runtime.deltimer).
}

// Clock returns the Clock used by t.
func (t *Timer) Clock() Clock {
	return t.clock
}

// ChannelNotifier is a TimerListener that sends a message on an empty struct
// channel.
//
// ChannelNotifier cannot be saved or loaded.
type ChannelNotifier struct {
	// tchan must be a buffered channel.
	tchan chan struct{}
}

// NewChannelNotifier creates a new channel notifier.
//
// If the notifier is used with a timer, Timer.Destroy will close the channel
// returned here.
func NewChannelNotifier() (TimerListener, <-chan struct{}) {
	tchan := make(chan struct{}, 1)
	return &ChannelNotifier{tchan}, tchan
}

// Notify implements ktime.TimerListener.Notify.
func (c *ChannelNotifier) Notify(uint64) {
	select {
	case c.tchan <- struct{}{}:
	default:
	}
}

// Destroy implements ktime.TimerListener.Destroy and will close the channel.
func (c *ChannelNotifier) Destroy() {
	close(c.tchan)
}