Merge 216da0b7 (automated)

author: gVisor bot <gvisor-bot@google.com> 2019-06-02 06:44:55 +0000
committer: gVisor bot <gvisor-bot@google.com> 2019-06-02 06:44:55 +0000
commit: ceb0d792f328d1fc0692197d8856a43c3936a571 (patch)
tree: 83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/sentry/time
parent: deb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff)
parent: 216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff)
13 files changed, 1126 insertions, 0 deletions
diff --git a/pkg/sentry/time/arith_arm64.go b/pkg/sentry/time/arith_arm64.go
new file mode 100644
index 000000000..b94740c2a
--- /dev/null
+++ b/pkg/sentry/time/arith_arm64.go
@@ -0,0 +1,70 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file provides a generic Go implementation of uint128 divided by uint64.
+
+// The code is derived from Go's generic math/big.divWW_g
+// (src/math/big/arith.go), but is only used on ARM64.
+
+package time
+
+import "math/bits"
+
+type word uint
+
+const (
+	_W  = bits.UintSize // word size in bits
+	_W2 = _W / 2        // half word size in bits
+	_B2 = 1 << _W2      // half digit base
+	_M2 = _B2 - 1       // half digit mask
+)
+
+// nlz returns the number of leading zeros in x.
+// Wraps bits.LeadingZeros call for convenience.
+func nlz(x word) uint {
+	return uint(bits.LeadingZeros(uint(x)))
+}
+
+// q = (u1<<_W + u0 - r)/y
+// Adapted from Warren, Hacker's Delight, p. 152.
+func divWW(u1, u0, v word) (q, r word) {
+	if u1 >= v {
+		return 1<<_W - 1, 1<<_W - 1
+	}
+
+	s := nlz(v)
+	v <<= s
+
+	vn1 := v >> _W2
+	vn0 := v & _M2
+	un32 := u1<<s | u0>>(_W-s)
+	un10 := u0 << s
+	un1 := un10 >> _W2
+	un0 := un10 & _M2
+	q1 := un32 / vn1
+	rhat := un32 - q1*vn1
+
+	for q1 >= _B2 || q1*vn0 > _B2*rhat+un1 {
+		q1--
+		rhat += vn1
+
+		if rhat >= _B2 {
+			break
+		}
+	}
+
+	un21 := un32*_B2 + un1 - q1*v
+	q0 := un21 / vn1
+	rhat = un21 - q0*vn1
+
+	for q0 >= _B2 || q0*vn0 > _B2*rhat+un0 {
+		q0--
+		rhat += vn1
+		if rhat >= _B2 {
+			break
+		}
+	}
+
+	return q1*_B2 + q0, (un21*_B2 + un0 - q0*v) >> s
+}
diff --git a/pkg/sentry/time/calibrated_clock.go b/pkg/sentry/time/calibrated_clock.go
new file mode 100644
index 000000000..c27e391c9
--- /dev/null
+++ b/pkg/sentry/time/calibrated_clock.go
@@ -0,0 +1,269 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package time provides a calibrated clock synchronized to a system reference
+// clock.
+package time
+
+import (
+	"sync"
+	"time"
+
+	"gvisor.googlesource.com/gvisor/pkg/log"
+	"gvisor.googlesource.com/gvisor/pkg/metric"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// fallbackMetric tracks failed updates. It is not sync, as it is not critical
+// that all occurrences are captured and CalibratedClock may fallback many
+// times.
+var fallbackMetric = metric.MustCreateNewUint64Metric("/time/fallback", false /* sync */, "Incremented when a clock falls back to system calls due to a failed update")
+
+// CalibratedClock implements a clock that tracks a reference clock.
+//
+// Users should call Update at regular intervals of around approxUpdateInterval
+// to ensure that the clock does not drift significantly from the reference
+// clock.
+type CalibratedClock struct {
+	// mu protects the fields below.
+	// TODO(mpratt): consider a sequence counter for read locking.
+	mu sync.RWMutex
+
+	// ref sample the reference clock that this clock is calibrated
+	// against.
+	ref *sampler
+
+	// ready indicates that the fields below are ready for use calculating
+	// time.
+	ready bool
+
+	// params are the current timekeeping parameters.
+	params Parameters
+
+	// errorNS is the estimated clock error in nanoseconds.
+	errorNS ReferenceNS
+}
+
+// NewCalibratedClock creates a CalibratedClock that tracks the given ClockID.
+func NewCalibratedClock(c ClockID) *CalibratedClock {
+	return &CalibratedClock{
+		ref: newSampler(c),
+	}
+}
+
+// Debugf logs at debug level.
+func (c *CalibratedClock) Debugf(format string, v ...interface{}) {
+	if log.IsLogging(log.Debug) {
+		args := []interface{}{c.ref.clockID}
+		args = append(args, v...)
+		log.Debugf("CalibratedClock(%v): "+format, args...)
+	}
+}
+
+// Infof logs at debug level.
+func (c *CalibratedClock) Infof(format string, v ...interface{}) {
+	if log.IsLogging(log.Info) {
+		args := []interface{}{c.ref.clockID}
+		args = append(args, v...)
+		log.Infof("CalibratedClock(%v): "+format, args...)
+	}
+}
+
+// Warningf logs at debug level.
+func (c *CalibratedClock) Warningf(format string, v ...interface{}) {
+	if log.IsLogging(log.Warning) {
+		args := []interface{}{c.ref.clockID}
+		args = append(args, v...)
+		log.Warningf("CalibratedClock(%v): "+format, args...)
+	}
+}
+
+// reset forces the clock to restart the calibration process, logging the
+// passed message.
+func (c *CalibratedClock) reset(str string, v ...interface{}) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.resetLocked(str, v...)
+}
+
+// resetLocked is equivalent to reset with c.mu already held for writing.
+func (c *CalibratedClock) resetLocked(str string, v ...interface{}) {
+	c.Warningf(str+" Resetting clock; time may jump.", v...)
+	c.ready = false
+	c.ref.Reset()
+	fallbackMetric.Increment()
+}
+
+// updateParams updates the timekeeping parameters based on the passed
+// parameters.
+//
+// actual is the actual estimated timekeeping parameters. The stored parameters
+// may need to be adjusted slightly from these values to compensate for error.
+//
+// Preconditions: c.mu must be held for writing.
+func (c *CalibratedClock) updateParams(actual Parameters) {
+	if !c.ready {
+		// At initial calibration there is nothing to correct.
+		c.params = actual
+		c.ready = true
+
+		c.Infof("ready")
+
+		return
+	}
+
+	// Otherwise, adjust the params to correct for errors.
+	newParams, errorNS, err := errorAdjust(c.params, actual, actual.BaseCycles)
+	if err != nil {
+		// Something is very wrong. Reset and try again from the
+		// beginning.
+		c.resetLocked("Unable to update params: %v.", err)
+		return
+	}
+	logErrorAdjustment(c.ref.clockID, errorNS, c.params, newParams)
+
+	if errorNS.Magnitude() >= MaxClockError {
+		// We should never get such extreme error, something is very
+		// wrong. Reset everything and start again.
+		//
+		// N.B. logErrorAdjustment will have already logged the error
+		// at warning level.
+		//
+		// TODO(mpratt): We could allow Realtime clock jumps here.
+		c.resetLocked("Extreme clock error.")
+		return
+	}
+
+	c.params = newParams
+	c.errorNS = errorNS
+}
+
+// Update runs the update step of the clock, updating its synchronization with
+// the reference clock.
+//
+// Update returns timekeeping and true with the new timekeeping parameters if
+// the clock is calibrated. Update should be called regularly to prevent the
+// clock from getting significantly out of sync from the reference clock.
+//
+// The returned timekeeping parameters are invalidated on the next call to
+// Update.
+func (c *CalibratedClock) Update() (Parameters, bool) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	if err := c.ref.Sample(); err != nil {
+		c.resetLocked("Unable to update calibrated clock: %v.", err)
+		return Parameters{}, false
+	}
+
+	oldest, newest, ok := c.ref.Range()
+	if !ok {
+		// Not ready yet.
+		return Parameters{}, false
+	}
+
+	minCount := uint64(newest.before - oldest.after)
+	maxCount := uint64(newest.after - oldest.before)
+	refInterval := uint64(newest.ref - oldest.ref)
+
+	// freq hz = count / (interval ns) * (nsPerS ns) / (1 s)
+	nsPerS := uint64(time.Second.Nanoseconds())
+
+	minHz, ok := muldiv64(minCount, nsPerS, refInterval)
+	if !ok {
+		c.resetLocked("Unable to update calibrated clock: (%v - %v) * %v / %v overflows.", newest.before, oldest.after, nsPerS, refInterval)
+		return Parameters{}, false
+	}
+
+	maxHz, ok := muldiv64(maxCount, nsPerS, refInterval)
+	if !ok {
+		c.resetLocked("Unable to update calibrated clock: (%v - %v) * %v / %v overflows.", newest.after, oldest.before, nsPerS, refInterval)
+		return Parameters{}, false
+	}
+
+	c.updateParams(Parameters{
+		Frequency:  (minHz + maxHz) / 2,
+		BaseRef:    newest.ref,
+		BaseCycles: newest.after,
+	})
+
+	return c.params, true
+}
+
+// GetTime returns the current time based on the clock calibration.
+func (c *CalibratedClock) GetTime() (int64, error) {
+	c.mu.RLock()
+
+	if !c.ready {
+		// Fallback to a syscall.
+		now, err := c.ref.Syscall()
+		c.mu.RUnlock()
+		return int64(now), err
+	}
+
+	now := c.ref.Cycles()
+	v, ok := c.params.ComputeTime(now)
+	if !ok {
+		// Something is seriously wrong with the clock. Try
+		// again with syscalls.
+		c.resetLocked("Time computation overflowed. params = %+v, now = %v.", c.params, now)
+		now, err := c.ref.Syscall()
+		c.mu.RUnlock()
+		return int64(now), err
+	}
+
+	c.mu.RUnlock()
+	return v, nil
+}
+
+// CalibratedClocks contains calibrated monotonic and realtime clocks.
+//
+// TODO(mpratt): We know that Linux runs the monotonic and realtime clocks at
+// the same rate, so rather than tracking both individually, we could do one
+// calibration for both clocks.
+type CalibratedClocks struct {
+	// monotonic is the clock tracking the system monotonic clock.
+	monotonic *CalibratedClock
+
+	// realtime is the realtime equivalent of monotonic.
+	realtime *CalibratedClock
+}
+
+// NewCalibratedClocks creates a CalibratedClocks.
+func NewCalibratedClocks() *CalibratedClocks {
+	return &CalibratedClocks{
+		monotonic: NewCalibratedClock(Monotonic),
+		realtime:  NewCalibratedClock(Realtime),
+	}
+}
+
+// Update implements Clocks.Update.
+func (c *CalibratedClocks) Update() (Parameters, bool, Parameters, bool) {
+	monotonicParams, monotonicOk := c.monotonic.Update()
+	realtimeParams, realtimeOk := c.realtime.Update()
+
+	return monotonicParams, monotonicOk, realtimeParams, realtimeOk
+}
+
+// GetTime implements Clocks.GetTime.
+func (c *CalibratedClocks) GetTime(id ClockID) (int64, error) {
+	switch id {
+	case Monotonic:
+		return c.monotonic.GetTime()
+	case Realtime:
+		return c.realtime.GetTime()
+	default:
+		return 0, syserror.EINVAL
+	}
+}
diff --git a/pkg/sentry/time/clock_id.go b/pkg/sentry/time/clock_id.go
new file mode 100644
index 000000000..724f59dd9
--- /dev/null
+++ b/pkg/sentry/time/clock_id.go
@@ -0,0 +1,40 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"strconv"
+)
+
+// ClockID is a Linux clock identifier.
+type ClockID int32
+
+// These are the supported Linux clock identifiers.
+const (
+	Realtime ClockID = iota
+	Monotonic
+)
+
+// String implements fmt.Stringer.String.
+func (c ClockID) String() string {
+	switch c {
+	case Realtime:
+		return "Realtime"
+	case Monotonic:
+		return "Monotonic"
+	default:
+		return strconv.Itoa(int(c))
+	}
+}
diff --git a/pkg/sentry/time/clocks.go b/pkg/sentry/time/clocks.go
new file mode 100644
index 000000000..837e86094
--- /dev/null
+++ b/pkg/sentry/time/clocks.go
@@ -0,0 +1,31 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+// Clocks represents a clock source that contains both a monotonic and realtime
+// clock.
+type Clocks interface {
+	// Update performs an update step, keeping the clocks in sync with the
+	// reference host clocks, and returning the new timekeeping parameters.
+	//
+	// Update should be called at approximately ApproxUpdateInterval.
+	Update() (monotonicParams Parameters, monotonicOk bool, realtimeParam Parameters, realtimeOk bool)
+
+	// GetTime returns the current time in nanoseconds for the given clock.
+	//
+	// Clocks implementations must support at least Monotonic and
+	// Realtime.
+	GetTime(c ClockID) (int64, error)
+}
diff --git a/pkg/sentry/time/muldiv_amd64.s b/pkg/sentry/time/muldiv_amd64.s
new file mode 100644
index 000000000..028c6684e
--- /dev/null
+++ b/pkg/sentry/time/muldiv_amd64.s
@@ -0,0 +1,44 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+
+// Documentation is available in parameters.go.
+//
+// func muldiv64(value, multiplier, divisor uint64) (uint64, bool)
+TEXT ·muldiv64(SB),NOSPLIT,$0-33
+	MOVQ value+0(FP), AX
+	MOVQ multiplier+8(FP), BX
+	MOVQ divisor+16(FP), CX
+
+	// Multiply AX*BX and store result in DX:AX.
+	MULQ BX
+
+	// If divisor <= (value*multiplier) / 2^64, then the division will overflow.
+	//
+	// (value*multiplier) / 2^64 is DX:AX >> 64, or simply DX.
+	CMPQ CX, DX
+	JLE overflow
+
+	// Divide DX:AX by CX.
+	DIVQ CX
+
+	MOVQ AX, result+24(FP)
+	MOVB $1, ok+32(FP)
+	RET
+
+overflow:
+	MOVQ $0, result+24(FP)
+	MOVB $0, ok+32(FP)
+	RET
diff --git a/pkg/sentry/time/muldiv_arm64.s b/pkg/sentry/time/muldiv_arm64.s
new file mode 100644
index 000000000..5ad57a8a3
--- /dev/null
+++ b/pkg/sentry/time/muldiv_arm64.s
@@ -0,0 +1,44 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+
+// Documentation is available in parameters.go.
+//
+// func muldiv64(value, multiplier, divisor uint64) (uint64, bool)
+TEXT ·muldiv64(SB),NOSPLIT,$40-33
+    MOVD    value+0(FP), R0
+    MOVD    multiplier+8(FP), R1
+    MOVD    divisor+16(FP), R2
+
+    UMULH   R0, R1, R3
+    MUL     R0, R1, R4
+
+    CMP     R2, R3
+    BHS     overflow
+
+    MOVD    R3, 8(RSP)
+    MOVD    R4, 16(RSP)
+    MOVD    R2, 24(RSP)
+    CALL    ·divWW(SB)
+    MOVD    32(RSP), R0
+    MOVD    R0, result+24(FP)
+    MOVD    $1, R0
+    MOVB    R0, ok+32(FP)
+    RET
+
+overflow:
+    MOVD    ZR, result+24(FP)
+    MOVB    ZR, ok+32(FP)
+    RET
diff --git a/pkg/sentry/time/parameters.go b/pkg/sentry/time/parameters.go
new file mode 100644
index 000000000..63cf7c4a3
--- /dev/null
+++ b/pkg/sentry/time/parameters.go
@@ -0,0 +1,239 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"fmt"
+	"time"
+
+	"gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+const (
+	// ApproxUpdateInterval is the approximate interval that parameters
+	// should be updated at.
+	//
+	// Error correction assumes that the next update will occur after this
+	// much time.
+	//
+	// If an update occurs before ApproxUpdateInterval passes, it has no
+	// adverse effect on error correction behavior.
+	//
+	// If an update occurs after ApproxUpdateInterval passes, the clock
+	// will overshoot its error correction target and begin accumulating
+	// error in the other direction.
+	//
+	// If updates occur after more than 2*ApproxUpdateInterval passes, the
+	// clock becomes unstable, accumulating more error than it had
+	// originally. Repeated updates after more than 2*ApproxUpdateInterval
+	// will cause unbounded increases in error.
+	//
+	// These statements assume that the host clock does not change. Actual
+	// error will depend upon host clock changes.
+	//
+	// TODO(b/68779214): make error correction more robust to delayed
+	// updates.
+	ApproxUpdateInterval = 1 * time.Second
+
+	// MaxClockError is the maximum amount of error that the clocks will
+	// try to correct.
+	//
+	// This limit:
+	//
+	//  * Puts a limit on cases of otherwise unbounded increases in error.
+	//
+	//  * Avoids unreasonably large frequency adjustments required to
+	//    correct large errors over a single update interval.
+	MaxClockError = ReferenceNS(ApproxUpdateInterval) / 4
+)
+
+// Parameters are the timekeeping parameters needed to compute the current
+// time.
+type Parameters struct {
+	// BaseCycles was the TSC counter value when the time was BaseRef.
+	BaseCycles TSCValue
+
+	// BaseRef is the reference clock time in nanoseconds corresponding to
+	// BaseCycles.
+	BaseRef ReferenceNS
+
+	// Frequency is the frequency of the cycle clock in Hertz.
+	Frequency uint64
+}
+
+// muldiv64 multiplies two 64-bit numbers, then divides the result by another
+// 64-bit number.
+//
+// It requires that the result fit in 64 bits, but doesn't require that
+// intermediate values do; in particular, the result of the multiplication may
+// require 128 bits.
+//
+// It returns !ok if divisor is zero or the result does not fit in 64 bits.
+func muldiv64(value, multiplier, divisor uint64) (uint64, bool)
+
+// ComputeTime calculates the current time from a "now" TSC value.
+//
+// time = ref + (now - base) / f
+func (p Parameters) ComputeTime(nowCycles TSCValue) (int64, bool) {
+	diffCycles := nowCycles - p.BaseCycles
+	if diffCycles < 0 {
+		log.Warningf("now cycles %v < base cycles %v", nowCycles, p.BaseCycles)
+		diffCycles = 0
+	}
+
+	// Overflow "won't ever happen". If diffCycles is the max value
+	// (2^63 - 1), then to overflow,
+	//
+	// frequency <= ((2^63 - 1) * 10^9) / 2^64 = 500Mhz
+	//
+	// A TSC running at 2GHz takes 201 years to reach 2^63-1. 805 years at
+	// 500MHz.
+	diffNS, ok := muldiv64(uint64(diffCycles), uint64(time.Second.Nanoseconds()), p.Frequency)
+	return int64(uint64(p.BaseRef) + diffNS), ok
+}
+
+// errorAdjust returns a new Parameters struct "adjusted" that satisfies:
+//
+// 1. adjusted.ComputeTime(now) = prevParams.ComputeTime(now)
+//   * i.e., the current time does not jump.
+//
+// 2. adjusted.ComputeTime(TSC at next update) = newParams.ComputeTime(TSC at next update)
+//   * i.e., Any error between prevParams and newParams will be corrected over
+//     the course of the next update period.
+//
+// errorAdjust also returns the current clock error.
+//
+// Preconditions:
+// * newParams.BaseCycles >= prevParams.BaseCycles; i.e., TSC must not go
+//   backwards.
+// * newParams.BaseCycles <= now; i.e., the new parameters be computed at or
+//   before now.
+func errorAdjust(prevParams Parameters, newParams Parameters, now TSCValue) (Parameters, ReferenceNS, error) {
+	if newParams.BaseCycles < prevParams.BaseCycles {
+		// Oh dear! Something is very wrong.
+		return Parameters{}, 0, fmt.Errorf("TSC went backwards in updated clock params: %v < %v", newParams.BaseCycles, prevParams.BaseCycles)
+	}
+	if newParams.BaseCycles > now {
+		return Parameters{}, 0, fmt.Errorf("parameters contain base cycles later than now: %v > %v", newParams.BaseCycles, now)
+	}
+
+	intervalNS := int64(ApproxUpdateInterval.Nanoseconds())
+	nsPerSec := uint64(time.Second.Nanoseconds())
+
+	// Current time as computed by prevParams.
+	oldNowNS, ok := prevParams.ComputeTime(now)
+	if !ok {
+		return Parameters{}, 0, fmt.Errorf("old now time computation overflowed. params = %+v, now = %v", prevParams, now)
+	}
+
+	// We expect the update ticker to run based on this clock (i.e., it has
+	// been using prevParams and will use the returned adjusted
+	// parameters). Hence it will decide to fire intervalNS from the
+	// current (oldNowNS) "now".
+	nextNS := oldNowNS + intervalNS
+
+	if nextNS <= int64(newParams.BaseRef) {
+		// The next update time already passed before the new
+		// parameters were created! We definitely can't correct the
+		// error by then.
+		return Parameters{}, 0, fmt.Errorf("unable to correct error in single period. oldNowNS = %v, nextNS = %v, p = %v", oldNowNS, nextNS, newParams)
+	}
+
+	// For what TSC value next will newParams.ComputeTime(next) = nextNS?
+	//
+	// Solve ComputeTime for next:
+	//
+	// next = newParams.Frequency * (nextNS - newParams.BaseRef) + newParams.BaseCycles
+	c, ok := muldiv64(newParams.Frequency, uint64(nextNS-int64(newParams.BaseRef)), nsPerSec)
+	if !ok {
+		return Parameters{}, 0, fmt.Errorf("%v * (%v - %v) / %v overflows", newParams.Frequency, nextNS, newParams.BaseRef, nsPerSec)
+	}
+
+	cycles := TSCValue(c)
+	next := cycles + newParams.BaseCycles
+
+	if next <= now {
+		// The next update time already passed now with the new
+		// parameters! We can't correct the error in a single period.
+		return Parameters{}, 0, fmt.Errorf("unable to correct error in single period. oldNowNS = %v, nextNS = %v, now = %v, next = %v", oldNowNS, nextNS, now, next)
+	}
+
+	// We want to solve for parameters that satisfy:
+	//
+	// adjusted.ComputeTime(now) = oldNowNS
+	//
+	// adjusted.ComputeTime(next) = nextNS
+	//
+	// i.e., the current time does not change, but by the time we reach
+	// next we reach the same time as newParams.
+
+	// We choose to keep BaseCycles fixed.
+	adjusted := Parameters{
+		BaseCycles: newParams.BaseCycles,
+	}
+
+	// We want a slope such that time goes from oldNowNS to nextNS when
+	// we reach next.
+	//
+	// In other words, cycles should increase by next - now in the next
+	// interval.
+
+	cycles = next - now
+	ns := intervalNS
+
+	// adjusted.Frequency = cycles / ns
+	adjusted.Frequency, ok = muldiv64(uint64(cycles), nsPerSec, uint64(ns))
+	if !ok {
+		return Parameters{}, 0, fmt.Errorf("(%v - %v) * %v / %v overflows", next, now, nsPerSec, ns)
+	}
+
+	// Now choose a base reference such that the current time remains the
+	// same. Note that this is just ComputeTime, solving for BaseRef:
+	//
+	// oldNowNS = BaseRef + (now - BaseCycles) / Frequency
+	// BaseRef = oldNowNS - (now - BaseCycles) / Frequency
+	diffNS, ok := muldiv64(uint64(now-adjusted.BaseCycles), nsPerSec, adjusted.Frequency)
+	if !ok {
+		return Parameters{}, 0, fmt.Errorf("(%v - %v) * %v / %v overflows", now, adjusted.BaseCycles, nsPerSec, adjusted.Frequency)
+	}
+
+	adjusted.BaseRef = ReferenceNS(oldNowNS - int64(diffNS))
+
+	// The error is the difference between the current time and what the
+	// new parameters say the current time should be.
+	newNowNS, ok := newParams.ComputeTime(now)
+	if !ok {
+		return Parameters{}, 0, fmt.Errorf("new now time computation overflowed. params = %+v, now = %v", newParams, now)
+	}
+
+	errorNS := ReferenceNS(oldNowNS - newNowNS)
+
+	return adjusted, errorNS, nil
+}
+
+// logErrorAdjustment logs the clock error and associated error correction
+// frequency adjustment.
+//
+// The log level is determined by the error severity.
+func logErrorAdjustment(clock ClockID, errorNS ReferenceNS, orig, adjusted Parameters) {
+	fn := log.Debugf
+	if int64(errorNS.Magnitude()) > time.Millisecond.Nanoseconds() {
+		fn = log.Warningf
+	} else if int64(errorNS.Magnitude()) > 10*time.Microsecond.Nanoseconds() {
+		fn = log.Infof
+	}
+
+	fn("Clock(%v): error: %v ns, adjusted frequency from %v Hz to %v Hz", clock, errorNS, orig.Frequency, adjusted.Frequency)
+}
diff --git a/pkg/sentry/time/sampler.go b/pkg/sentry/time/sampler.go
new file mode 100644
index 000000000..2140a99b7
--- /dev/null
+++ b/pkg/sentry/time/sampler.go
@@ -0,0 +1,225 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"errors"
+
+	"gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+const (
+	// defaultOverheadTSC is the default estimated syscall overhead in TSC cycles.
+	// It is further refined as syscalls are made.
+	defaultOverheadCycles = 1 * 1000
+
+	// maxOverheadCycles is the maximum allowed syscall overhead in TSC cycles.
+	maxOverheadCycles = 100 * defaultOverheadCycles
+
+	// maxSampleLoops is the maximum number of times to try to get a clock sample
+	// under the expected overhead.
+	maxSampleLoops = 5
+
+	// maxSamples is the maximum number of samples to collect.
+	maxSamples = 10
+)
+
+// errOverheadTooHigh is returned from sampler.Sample if the syscall
+// overhead is too high.
+var errOverheadTooHigh = errors.New("time syscall overhead exceeds maximum")
+
+// TSCValue is a value from the TSC.
+type TSCValue int64
+
+// Rdtsc reads the TSC.
+//
+// Intel SDM, Vol 3, Ch 17.15:
+// "The RDTSC instruction reads the time-stamp counter and is guaranteed to
+// return a monotonically increasing unique value whenever executed, except for
+// a 64-bit counter wraparound. Intel guarantees that the time-stamp counter
+// will not wraparound within 10 years after being reset."
+//
+// We use int64, so we have 5 years before wrap-around.
+func Rdtsc() TSCValue
+
+// ReferenceNS are nanoseconds in the reference clock domain.
+// int64 gives us ~290 years before this overflows.
+type ReferenceNS int64
+
+// Magnitude returns the absolute value of r.
+func (r ReferenceNS) Magnitude() ReferenceNS {
+	if r < 0 {
+		return -r
+	}
+	return r
+}
+
+// cycleClock is a TSC-based cycle clock.
+type cycleClock interface {
+	// Cycles returns a count value from the TSC.
+	Cycles() TSCValue
+}
+
+// tscCycleClock is a cycleClock that uses the real TSC.
+type tscCycleClock struct{}
+
+// Cycles implements cycleClock.Cycles.
+func (tscCycleClock) Cycles() TSCValue {
+	return Rdtsc()
+}
+
+// sample contains a sample from the reference clock, with TSC values from
+// before and after the reference clock value was captured.
+type sample struct {
+	before TSCValue
+	after  TSCValue
+	ref    ReferenceNS
+}
+
+// Overhead returns the sample overhead in TSC cycles.
+func (s *sample) Overhead() TSCValue {
+	return s.after - s.before
+}
+
+// referenceClocks collects individual samples from a reference clock ID and
+// TSC.
+type referenceClocks interface {
+	cycleClock
+
+	// Sample returns a single sample from the reference clock ID.
+	Sample(c ClockID) (sample, error)
+}
+
+// sampler collects samples from a reference system clock, minimizing
+// the overhead in each sample.
+type sampler struct {
+	// clockID is the reference clock ID (e.g., CLOCK_MONOTONIC).
+	clockID ClockID
+
+	// clocks provides raw samples.
+	clocks referenceClocks
+
+	// overhead is the estimated sample overhead in TSC cycles.
+	overhead TSCValue
+
+	// samples is a ring buffer of the latest samples collected.
+	samples []sample
+}
+
+// newSampler creates a sampler for clockID.
+func newSampler(c ClockID) *sampler {
+	return &sampler{
+		clockID:  c,
+		clocks:   syscallTSCReferenceClocks{},
+		overhead: defaultOverheadCycles,
+	}
+}
+
+// Reset discards previously collected clock samples.
+func (s *sampler) Reset() {
+	s.overhead = defaultOverheadCycles
+	s.samples = []sample{}
+}
+
+// lowOverheadSample returns a reference clock sample with minimized syscall overhead.
+func (s *sampler) lowOverheadSample() (sample, error) {
+	for {
+		for i := 0; i < maxSampleLoops; i++ {
+			samp, err := s.clocks.Sample(s.clockID)
+			if err != nil {
+				return sample{}, err
+			}
+
+			if samp.before > samp.after {
+				log.Warningf("TSC went backwards: %v > %v", samp.before, samp.after)
+				continue
+			}
+
+			if samp.Overhead() <= s.overhead {
+				return samp, nil
+			}
+		}
+
+		// Couldn't get a sample with the current overhead. Increase it.
+		newOverhead := 2 * s.overhead
+		if newOverhead > maxOverheadCycles {
+			// We'll give it one more shot with the max overhead.
+
+			if s.overhead == maxOverheadCycles {
+				return sample{}, errOverheadTooHigh
+			}
+
+			newOverhead = maxOverheadCycles
+		}
+
+		s.overhead = newOverhead
+		log.Debugf("Time: Adjusting syscall overhead up to %v", s.overhead)
+	}
+}
+
+// Sample collects a reference clock sample.
+func (s *sampler) Sample() error {
+	sample, err := s.lowOverheadSample()
+	if err != nil {
+		return err
+	}
+
+	s.samples = append(s.samples, sample)
+	if len(s.samples) > maxSamples {
+		s.samples = s.samples[1:]
+	}
+
+	// If the 4 most recent samples all have an overhead less than half the
+	// expected overhead, adjust downwards.
+	if len(s.samples) < 4 {
+		return nil
+	}
+
+	for _, sample := range s.samples[len(s.samples)-4:] {
+		if sample.Overhead() > s.overhead/2 {
+			return nil
+		}
+	}
+
+	s.overhead -= s.overhead / 8
+	log.Debugf("Time: Adjusting syscall overhead down to %v", s.overhead)
+
+	return nil
+}
+
+// Syscall returns the current raw reference time without storing TSC
+// samples.
+func (s *sampler) Syscall() (ReferenceNS, error) {
+	sample, err := s.clocks.Sample(s.clockID)
+	if err != nil {
+		return 0, err
+	}
+
+	return sample.ref, nil
+}
+
+// Cycles returns a raw TSC value.
+func (s *sampler) Cycles() TSCValue {
+	return s.clocks.Cycles()
+}
+
+// Range returns the widest range of clock samples available.
+func (s *sampler) Range() (sample, sample, bool) {
+	if len(s.samples) < 2 {
+		return sample{}, sample{}, false
+	}
+
+	return s.samples[0], s.samples[len(s.samples)-1], true
+}
diff --git a/pkg/sentry/time/sampler_unsafe.go b/pkg/sentry/time/sampler_unsafe.go
new file mode 100644
index 000000000..e76180217
--- /dev/null
+++ b/pkg/sentry/time/sampler_unsafe.go
@@ -0,0 +1,56 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+// syscallTSCReferenceClocks is the standard referenceClocks, collecting
+// samples using CLOCK_GETTIME and RDTSC.
+type syscallTSCReferenceClocks struct {
+	tscCycleClock
+}
+
+// Sample implements sampler.Sample.
+func (syscallTSCReferenceClocks) Sample(c ClockID) (sample, error) {
+	var s sample
+
+	s.before = Rdtsc()
+
+	// Don't call clockGettime to avoid a call which may call morestack.
+	var ts syscall.Timespec
+	_, _, e := syscall.RawSyscall(syscall.SYS_CLOCK_GETTIME, uintptr(c), uintptr(unsafe.Pointer(&ts)), 0)
+	if e != 0 {
+		return sample{}, e
+	}
+
+	s.after = Rdtsc()
+	s.ref = ReferenceNS(ts.Nano())
+
+	return s, nil
+}
+
+// clockGettime calls SYS_CLOCK_GETTIME, returning time in nanoseconds.
+func clockGettime(c ClockID) (ReferenceNS, error) {
+	var ts syscall.Timespec
+	_, _, e := syscall.RawSyscall(syscall.SYS_CLOCK_GETTIME, uintptr(c), uintptr(unsafe.Pointer(&ts)), 0)
+	if e != 0 {
+		return 0, e
+	}
+
+	return ReferenceNS(ts.Nano()), nil
+}
diff --git a/pkg/sentry/time/seqatomic_parameters.go b/pkg/sentry/time/seqatomic_parameters.go
new file mode 100755
index 000000000..ecbea4d94
--- /dev/null
+++ b/pkg/sentry/time/seqatomic_parameters.go
@@ -0,0 +1,55 @@
+package time
+
+import (
+	"reflect"
+	"strings"
+	"unsafe"
+
+	"fmt"
+	"gvisor.googlesource.com/gvisor/third_party/gvsync"
+)
+
+// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race
+// with any writer critical sections in sc.
+func SeqAtomicLoadParameters(sc *gvsync.SeqCount, ptr *Parameters) Parameters {
+	// This function doesn't use SeqAtomicTryLoad because doing so is
+	// measurably, significantly (~20%) slower; Go is awful at inlining.
+	var val Parameters
+	for {
+		epoch := sc.BeginRead()
+		if gvsync.RaceEnabled {
+
+			gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val))
+		} else {
+
+			val = *ptr
+		}
+		if sc.ReadOk(epoch) {
+			break
+		}
+	}
+	return val
+}
+
+// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section
+// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read
+// would race with a writer critical section, SeqAtomicTryLoad returns
+// (unspecified, false).
+func SeqAtomicTryLoadParameters(sc *gvsync.SeqCount, epoch gvsync.SeqCountEpoch, ptr *Parameters) (Parameters, bool) {
+	var val Parameters
+	if gvsync.RaceEnabled {
+		gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val))
+	} else {
+		val = *ptr
+	}
+	return val, sc.ReadOk(epoch)
+}
+
+func initParameters() {
+	var val Parameters
+	typ := reflect.TypeOf(val)
+	name := typ.Name()
+	if ptrs := gvsync.PointersInType(typ, name); len(ptrs) != 0 {
+		panic(fmt.Sprintf("SeqAtomicLoad<%s> is invalid since values %s of type %s contain pointers:\n%s", typ, name, typ, strings.Join(ptrs, "\n")))
+	}
+}
diff --git a/pkg/sentry/time/time_state_autogen.go b/pkg/sentry/time/time_state_autogen.go
new file mode 100755
index 000000000..ea614b056
--- /dev/null
+++ b/pkg/sentry/time/time_state_autogen.go
@@ -0,0 +1,4 @@
+// automatically generated by stateify.
+
+package time
+
diff --git a/pkg/sentry/time/tsc_amd64.s b/pkg/sentry/time/tsc_amd64.s
new file mode 100644
index 000000000..6a8eed664
--- /dev/null
+++ b/pkg/sentry/time/tsc_amd64.s
@@ -0,0 +1,27 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+
+TEXT ·Rdtsc(SB),NOSPLIT,$0-8
+	// N.B. We need LFENCE on Intel, AMD is more complicated.
+	// Modern AMD CPUs with modern kernels make LFENCE behave like it does
+	// on Intel with MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT. MFENCE is
+	// otherwise needed on AMD.
+	LFENCE
+	RDTSC
+	SHLQ	$32, DX
+	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
+	RET
diff --git a/pkg/sentry/time/tsc_arm64.s b/pkg/sentry/time/tsc_arm64.s
new file mode 100644
index 000000000..da9fa4112
--- /dev/null
+++ b/pkg/sentry/time/tsc_arm64.s
@@ -0,0 +1,22 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+
+TEXT ·Rdtsc(SB),NOSPLIT,$0-8
+	// Get the virtual counter.
+	ISB	$15
+	WORD	$0xd53be040     //MRS	CNTVCT_EL0, R0
+	MOVD	R0, ret+0(FP)
+	RET
author	gVisor bot <gvisor-bot@google.com>	2019-06-02 06:44:55 +0000
committer	gVisor bot <gvisor-bot@google.com>	2019-06-02 06:44:55 +0000
commit	ceb0d792f328d1fc0692197d8856a43c3936a571 (patch)
tree	83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/sentry/time
parent	deb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff)
parent	216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff)