diff options
Diffstat (limited to 'pkg/sentry/time')
-rw-r--r-- | pkg/sentry/time/arith_arm64.go | 70 | ||||
-rw-r--r-- | pkg/sentry/time/calibrated_clock.go | 269 | ||||
-rw-r--r-- | pkg/sentry/time/clock_id.go | 40 | ||||
-rw-r--r-- | pkg/sentry/time/clocks.go | 31 | ||||
-rw-r--r-- | pkg/sentry/time/muldiv_amd64.s | 44 | ||||
-rw-r--r-- | pkg/sentry/time/muldiv_arm64.s | 44 | ||||
-rw-r--r-- | pkg/sentry/time/parameters.go | 239 | ||||
-rw-r--r-- | pkg/sentry/time/sampler.go | 225 | ||||
-rw-r--r-- | pkg/sentry/time/sampler_unsafe.go | 56 | ||||
-rwxr-xr-x | pkg/sentry/time/seqatomic_parameters.go | 55 | ||||
-rwxr-xr-x | pkg/sentry/time/time_state_autogen.go | 4 | ||||
-rw-r--r-- | pkg/sentry/time/tsc_amd64.s | 27 | ||||
-rw-r--r-- | pkg/sentry/time/tsc_arm64.s | 22 |
13 files changed, 1126 insertions, 0 deletions
diff --git a/pkg/sentry/time/arith_arm64.go b/pkg/sentry/time/arith_arm64.go new file mode 100644 index 000000000..b94740c2a --- /dev/null +++ b/pkg/sentry/time/arith_arm64.go @@ -0,0 +1,70 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file provides a generic Go implementation of uint128 divided by uint64. + +// The code is derived from Go's generic math/big.divWW_g +// (src/math/big/arith.go), but is only used on ARM64. + +package time + +import "math/bits" + +type word uint + +const ( + _W = bits.UintSize // word size in bits + _W2 = _W / 2 // half word size in bits + _B2 = 1 << _W2 // half digit base + _M2 = _B2 - 1 // half digit mask +) + +// nlz returns the number of leading zeros in x. +// Wraps bits.LeadingZeros call for convenience. +func nlz(x word) uint { + return uint(bits.LeadingZeros(uint(x))) +} + +// q = (u1<<_W + u0 - r)/y +// Adapted from Warren, Hacker's Delight, p. 152. +func divWW(u1, u0, v word) (q, r word) { + if u1 >= v { + return 1<<_W - 1, 1<<_W - 1 + } + + s := nlz(v) + v <<= s + + vn1 := v >> _W2 + vn0 := v & _M2 + un32 := u1<<s | u0>>(_W-s) + un10 := u0 << s + un1 := un10 >> _W2 + un0 := un10 & _M2 + q1 := un32 / vn1 + rhat := un32 - q1*vn1 + + for q1 >= _B2 || q1*vn0 > _B2*rhat+un1 { + q1-- + rhat += vn1 + + if rhat >= _B2 { + break + } + } + + un21 := un32*_B2 + un1 - q1*v + q0 := un21 / vn1 + rhat = un21 - q0*vn1 + + for q0 >= _B2 || q0*vn0 > _B2*rhat+un0 { + q0-- + rhat += vn1 + if rhat >= _B2 { + break + } + } + + return q1*_B2 + q0, (un21*_B2 + un0 - q0*v) >> s +} diff --git a/pkg/sentry/time/calibrated_clock.go b/pkg/sentry/time/calibrated_clock.go new file mode 100644 index 000000000..c27e391c9 --- /dev/null +++ b/pkg/sentry/time/calibrated_clock.go @@ -0,0 +1,269 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package time provides a calibrated clock synchronized to a system reference +// clock. +package time + +import ( + "sync" + "time" + + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/metric" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +// fallbackMetric tracks failed updates. It is not sync, as it is not critical +// that all occurrences are captured and CalibratedClock may fallback many +// times. +var fallbackMetric = metric.MustCreateNewUint64Metric("/time/fallback", false /* sync */, "Incremented when a clock falls back to system calls due to a failed update") + +// CalibratedClock implements a clock that tracks a reference clock. +// +// Users should call Update at regular intervals of around approxUpdateInterval +// to ensure that the clock does not drift significantly from the reference +// clock. +type CalibratedClock struct { + // mu protects the fields below. + // TODO(mpratt): consider a sequence counter for read locking. + mu sync.RWMutex + + // ref sample the reference clock that this clock is calibrated + // against. + ref *sampler + + // ready indicates that the fields below are ready for use calculating + // time. + ready bool + + // params are the current timekeeping parameters. + params Parameters + + // errorNS is the estimated clock error in nanoseconds. + errorNS ReferenceNS +} + +// NewCalibratedClock creates a CalibratedClock that tracks the given ClockID. +func NewCalibratedClock(c ClockID) *CalibratedClock { + return &CalibratedClock{ + ref: newSampler(c), + } +} + +// Debugf logs at debug level. +func (c *CalibratedClock) Debugf(format string, v ...interface{}) { + if log.IsLogging(log.Debug) { + args := []interface{}{c.ref.clockID} + args = append(args, v...) + log.Debugf("CalibratedClock(%v): "+format, args...) + } +} + +// Infof logs at debug level. +func (c *CalibratedClock) Infof(format string, v ...interface{}) { + if log.IsLogging(log.Info) { + args := []interface{}{c.ref.clockID} + args = append(args, v...) + log.Infof("CalibratedClock(%v): "+format, args...) + } +} + +// Warningf logs at debug level. +func (c *CalibratedClock) Warningf(format string, v ...interface{}) { + if log.IsLogging(log.Warning) { + args := []interface{}{c.ref.clockID} + args = append(args, v...) + log.Warningf("CalibratedClock(%v): "+format, args...) + } +} + +// reset forces the clock to restart the calibration process, logging the +// passed message. +func (c *CalibratedClock) reset(str string, v ...interface{}) { + c.mu.Lock() + defer c.mu.Unlock() + c.resetLocked(str, v...) +} + +// resetLocked is equivalent to reset with c.mu already held for writing. +func (c *CalibratedClock) resetLocked(str string, v ...interface{}) { + c.Warningf(str+" Resetting clock; time may jump.", v...) + c.ready = false + c.ref.Reset() + fallbackMetric.Increment() +} + +// updateParams updates the timekeeping parameters based on the passed +// parameters. +// +// actual is the actual estimated timekeeping parameters. The stored parameters +// may need to be adjusted slightly from these values to compensate for error. +// +// Preconditions: c.mu must be held for writing. +func (c *CalibratedClock) updateParams(actual Parameters) { + if !c.ready { + // At initial calibration there is nothing to correct. + c.params = actual + c.ready = true + + c.Infof("ready") + + return + } + + // Otherwise, adjust the params to correct for errors. + newParams, errorNS, err := errorAdjust(c.params, actual, actual.BaseCycles) + if err != nil { + // Something is very wrong. Reset and try again from the + // beginning. + c.resetLocked("Unable to update params: %v.", err) + return + } + logErrorAdjustment(c.ref.clockID, errorNS, c.params, newParams) + + if errorNS.Magnitude() >= MaxClockError { + // We should never get such extreme error, something is very + // wrong. Reset everything and start again. + // + // N.B. logErrorAdjustment will have already logged the error + // at warning level. + // + // TODO(mpratt): We could allow Realtime clock jumps here. + c.resetLocked("Extreme clock error.") + return + } + + c.params = newParams + c.errorNS = errorNS +} + +// Update runs the update step of the clock, updating its synchronization with +// the reference clock. +// +// Update returns timekeeping and true with the new timekeeping parameters if +// the clock is calibrated. Update should be called regularly to prevent the +// clock from getting significantly out of sync from the reference clock. +// +// The returned timekeeping parameters are invalidated on the next call to +// Update. +func (c *CalibratedClock) Update() (Parameters, bool) { + c.mu.Lock() + defer c.mu.Unlock() + + if err := c.ref.Sample(); err != nil { + c.resetLocked("Unable to update calibrated clock: %v.", err) + return Parameters{}, false + } + + oldest, newest, ok := c.ref.Range() + if !ok { + // Not ready yet. + return Parameters{}, false + } + + minCount := uint64(newest.before - oldest.after) + maxCount := uint64(newest.after - oldest.before) + refInterval := uint64(newest.ref - oldest.ref) + + // freq hz = count / (interval ns) * (nsPerS ns) / (1 s) + nsPerS := uint64(time.Second.Nanoseconds()) + + minHz, ok := muldiv64(minCount, nsPerS, refInterval) + if !ok { + c.resetLocked("Unable to update calibrated clock: (%v - %v) * %v / %v overflows.", newest.before, oldest.after, nsPerS, refInterval) + return Parameters{}, false + } + + maxHz, ok := muldiv64(maxCount, nsPerS, refInterval) + if !ok { + c.resetLocked("Unable to update calibrated clock: (%v - %v) * %v / %v overflows.", newest.after, oldest.before, nsPerS, refInterval) + return Parameters{}, false + } + + c.updateParams(Parameters{ + Frequency: (minHz + maxHz) / 2, + BaseRef: newest.ref, + BaseCycles: newest.after, + }) + + return c.params, true +} + +// GetTime returns the current time based on the clock calibration. +func (c *CalibratedClock) GetTime() (int64, error) { + c.mu.RLock() + + if !c.ready { + // Fallback to a syscall. + now, err := c.ref.Syscall() + c.mu.RUnlock() + return int64(now), err + } + + now := c.ref.Cycles() + v, ok := c.params.ComputeTime(now) + if !ok { + // Something is seriously wrong with the clock. Try + // again with syscalls. + c.resetLocked("Time computation overflowed. params = %+v, now = %v.", c.params, now) + now, err := c.ref.Syscall() + c.mu.RUnlock() + return int64(now), err + } + + c.mu.RUnlock() + return v, nil +} + +// CalibratedClocks contains calibrated monotonic and realtime clocks. +// +// TODO(mpratt): We know that Linux runs the monotonic and realtime clocks at +// the same rate, so rather than tracking both individually, we could do one +// calibration for both clocks. +type CalibratedClocks struct { + // monotonic is the clock tracking the system monotonic clock. + monotonic *CalibratedClock + + // realtime is the realtime equivalent of monotonic. + realtime *CalibratedClock +} + +// NewCalibratedClocks creates a CalibratedClocks. +func NewCalibratedClocks() *CalibratedClocks { + return &CalibratedClocks{ + monotonic: NewCalibratedClock(Monotonic), + realtime: NewCalibratedClock(Realtime), + } +} + +// Update implements Clocks.Update. +func (c *CalibratedClocks) Update() (Parameters, bool, Parameters, bool) { + monotonicParams, monotonicOk := c.monotonic.Update() + realtimeParams, realtimeOk := c.realtime.Update() + + return monotonicParams, monotonicOk, realtimeParams, realtimeOk +} + +// GetTime implements Clocks.GetTime. +func (c *CalibratedClocks) GetTime(id ClockID) (int64, error) { + switch id { + case Monotonic: + return c.monotonic.GetTime() + case Realtime: + return c.realtime.GetTime() + default: + return 0, syserror.EINVAL + } +} diff --git a/pkg/sentry/time/clock_id.go b/pkg/sentry/time/clock_id.go new file mode 100644 index 000000000..724f59dd9 --- /dev/null +++ b/pkg/sentry/time/clock_id.go @@ -0,0 +1,40 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +import ( + "strconv" +) + +// ClockID is a Linux clock identifier. +type ClockID int32 + +// These are the supported Linux clock identifiers. +const ( + Realtime ClockID = iota + Monotonic +) + +// String implements fmt.Stringer.String. +func (c ClockID) String() string { + switch c { + case Realtime: + return "Realtime" + case Monotonic: + return "Monotonic" + default: + return strconv.Itoa(int(c)) + } +} diff --git a/pkg/sentry/time/clocks.go b/pkg/sentry/time/clocks.go new file mode 100644 index 000000000..837e86094 --- /dev/null +++ b/pkg/sentry/time/clocks.go @@ -0,0 +1,31 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +// Clocks represents a clock source that contains both a monotonic and realtime +// clock. +type Clocks interface { + // Update performs an update step, keeping the clocks in sync with the + // reference host clocks, and returning the new timekeeping parameters. + // + // Update should be called at approximately ApproxUpdateInterval. + Update() (monotonicParams Parameters, monotonicOk bool, realtimeParam Parameters, realtimeOk bool) + + // GetTime returns the current time in nanoseconds for the given clock. + // + // Clocks implementations must support at least Monotonic and + // Realtime. + GetTime(c ClockID) (int64, error) +} diff --git a/pkg/sentry/time/muldiv_amd64.s b/pkg/sentry/time/muldiv_amd64.s new file mode 100644 index 000000000..028c6684e --- /dev/null +++ b/pkg/sentry/time/muldiv_amd64.s @@ -0,0 +1,44 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "textflag.h" + +// Documentation is available in parameters.go. +// +// func muldiv64(value, multiplier, divisor uint64) (uint64, bool) +TEXT ·muldiv64(SB),NOSPLIT,$0-33 + MOVQ value+0(FP), AX + MOVQ multiplier+8(FP), BX + MOVQ divisor+16(FP), CX + + // Multiply AX*BX and store result in DX:AX. + MULQ BX + + // If divisor <= (value*multiplier) / 2^64, then the division will overflow. + // + // (value*multiplier) / 2^64 is DX:AX >> 64, or simply DX. + CMPQ CX, DX + JLE overflow + + // Divide DX:AX by CX. + DIVQ CX + + MOVQ AX, result+24(FP) + MOVB $1, ok+32(FP) + RET + +overflow: + MOVQ $0, result+24(FP) + MOVB $0, ok+32(FP) + RET diff --git a/pkg/sentry/time/muldiv_arm64.s b/pkg/sentry/time/muldiv_arm64.s new file mode 100644 index 000000000..5ad57a8a3 --- /dev/null +++ b/pkg/sentry/time/muldiv_arm64.s @@ -0,0 +1,44 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "textflag.h" + +// Documentation is available in parameters.go. +// +// func muldiv64(value, multiplier, divisor uint64) (uint64, bool) +TEXT ·muldiv64(SB),NOSPLIT,$40-33 + MOVD value+0(FP), R0 + MOVD multiplier+8(FP), R1 + MOVD divisor+16(FP), R2 + + UMULH R0, R1, R3 + MUL R0, R1, R4 + + CMP R2, R3 + BHS overflow + + MOVD R3, 8(RSP) + MOVD R4, 16(RSP) + MOVD R2, 24(RSP) + CALL ·divWW(SB) + MOVD 32(RSP), R0 + MOVD R0, result+24(FP) + MOVD $1, R0 + MOVB R0, ok+32(FP) + RET + +overflow: + MOVD ZR, result+24(FP) + MOVB ZR, ok+32(FP) + RET diff --git a/pkg/sentry/time/parameters.go b/pkg/sentry/time/parameters.go new file mode 100644 index 000000000..63cf7c4a3 --- /dev/null +++ b/pkg/sentry/time/parameters.go @@ -0,0 +1,239 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +import ( + "fmt" + "time" + + "gvisor.googlesource.com/gvisor/pkg/log" +) + +const ( + // ApproxUpdateInterval is the approximate interval that parameters + // should be updated at. + // + // Error correction assumes that the next update will occur after this + // much time. + // + // If an update occurs before ApproxUpdateInterval passes, it has no + // adverse effect on error correction behavior. + // + // If an update occurs after ApproxUpdateInterval passes, the clock + // will overshoot its error correction target and begin accumulating + // error in the other direction. + // + // If updates occur after more than 2*ApproxUpdateInterval passes, the + // clock becomes unstable, accumulating more error than it had + // originally. Repeated updates after more than 2*ApproxUpdateInterval + // will cause unbounded increases in error. + // + // These statements assume that the host clock does not change. Actual + // error will depend upon host clock changes. + // + // TODO(b/68779214): make error correction more robust to delayed + // updates. + ApproxUpdateInterval = 1 * time.Second + + // MaxClockError is the maximum amount of error that the clocks will + // try to correct. + // + // This limit: + // + // * Puts a limit on cases of otherwise unbounded increases in error. + // + // * Avoids unreasonably large frequency adjustments required to + // correct large errors over a single update interval. + MaxClockError = ReferenceNS(ApproxUpdateInterval) / 4 +) + +// Parameters are the timekeeping parameters needed to compute the current +// time. +type Parameters struct { + // BaseCycles was the TSC counter value when the time was BaseRef. + BaseCycles TSCValue + + // BaseRef is the reference clock time in nanoseconds corresponding to + // BaseCycles. + BaseRef ReferenceNS + + // Frequency is the frequency of the cycle clock in Hertz. + Frequency uint64 +} + +// muldiv64 multiplies two 64-bit numbers, then divides the result by another +// 64-bit number. +// +// It requires that the result fit in 64 bits, but doesn't require that +// intermediate values do; in particular, the result of the multiplication may +// require 128 bits. +// +// It returns !ok if divisor is zero or the result does not fit in 64 bits. +func muldiv64(value, multiplier, divisor uint64) (uint64, bool) + +// ComputeTime calculates the current time from a "now" TSC value. +// +// time = ref + (now - base) / f +func (p Parameters) ComputeTime(nowCycles TSCValue) (int64, bool) { + diffCycles := nowCycles - p.BaseCycles + if diffCycles < 0 { + log.Warningf("now cycles %v < base cycles %v", nowCycles, p.BaseCycles) + diffCycles = 0 + } + + // Overflow "won't ever happen". If diffCycles is the max value + // (2^63 - 1), then to overflow, + // + // frequency <= ((2^63 - 1) * 10^9) / 2^64 = 500Mhz + // + // A TSC running at 2GHz takes 201 years to reach 2^63-1. 805 years at + // 500MHz. + diffNS, ok := muldiv64(uint64(diffCycles), uint64(time.Second.Nanoseconds()), p.Frequency) + return int64(uint64(p.BaseRef) + diffNS), ok +} + +// errorAdjust returns a new Parameters struct "adjusted" that satisfies: +// +// 1. adjusted.ComputeTime(now) = prevParams.ComputeTime(now) +// * i.e., the current time does not jump. +// +// 2. adjusted.ComputeTime(TSC at next update) = newParams.ComputeTime(TSC at next update) +// * i.e., Any error between prevParams and newParams will be corrected over +// the course of the next update period. +// +// errorAdjust also returns the current clock error. +// +// Preconditions: +// * newParams.BaseCycles >= prevParams.BaseCycles; i.e., TSC must not go +// backwards. +// * newParams.BaseCycles <= now; i.e., the new parameters be computed at or +// before now. +func errorAdjust(prevParams Parameters, newParams Parameters, now TSCValue) (Parameters, ReferenceNS, error) { + if newParams.BaseCycles < prevParams.BaseCycles { + // Oh dear! Something is very wrong. + return Parameters{}, 0, fmt.Errorf("TSC went backwards in updated clock params: %v < %v", newParams.BaseCycles, prevParams.BaseCycles) + } + if newParams.BaseCycles > now { + return Parameters{}, 0, fmt.Errorf("parameters contain base cycles later than now: %v > %v", newParams.BaseCycles, now) + } + + intervalNS := int64(ApproxUpdateInterval.Nanoseconds()) + nsPerSec := uint64(time.Second.Nanoseconds()) + + // Current time as computed by prevParams. + oldNowNS, ok := prevParams.ComputeTime(now) + if !ok { + return Parameters{}, 0, fmt.Errorf("old now time computation overflowed. params = %+v, now = %v", prevParams, now) + } + + // We expect the update ticker to run based on this clock (i.e., it has + // been using prevParams and will use the returned adjusted + // parameters). Hence it will decide to fire intervalNS from the + // current (oldNowNS) "now". + nextNS := oldNowNS + intervalNS + + if nextNS <= int64(newParams.BaseRef) { + // The next update time already passed before the new + // parameters were created! We definitely can't correct the + // error by then. + return Parameters{}, 0, fmt.Errorf("unable to correct error in single period. oldNowNS = %v, nextNS = %v, p = %v", oldNowNS, nextNS, newParams) + } + + // For what TSC value next will newParams.ComputeTime(next) = nextNS? + // + // Solve ComputeTime for next: + // + // next = newParams.Frequency * (nextNS - newParams.BaseRef) + newParams.BaseCycles + c, ok := muldiv64(newParams.Frequency, uint64(nextNS-int64(newParams.BaseRef)), nsPerSec) + if !ok { + return Parameters{}, 0, fmt.Errorf("%v * (%v - %v) / %v overflows", newParams.Frequency, nextNS, newParams.BaseRef, nsPerSec) + } + + cycles := TSCValue(c) + next := cycles + newParams.BaseCycles + + if next <= now { + // The next update time already passed now with the new + // parameters! We can't correct the error in a single period. + return Parameters{}, 0, fmt.Errorf("unable to correct error in single period. oldNowNS = %v, nextNS = %v, now = %v, next = %v", oldNowNS, nextNS, now, next) + } + + // We want to solve for parameters that satisfy: + // + // adjusted.ComputeTime(now) = oldNowNS + // + // adjusted.ComputeTime(next) = nextNS + // + // i.e., the current time does not change, but by the time we reach + // next we reach the same time as newParams. + + // We choose to keep BaseCycles fixed. + adjusted := Parameters{ + BaseCycles: newParams.BaseCycles, + } + + // We want a slope such that time goes from oldNowNS to nextNS when + // we reach next. + // + // In other words, cycles should increase by next - now in the next + // interval. + + cycles = next - now + ns := intervalNS + + // adjusted.Frequency = cycles / ns + adjusted.Frequency, ok = muldiv64(uint64(cycles), nsPerSec, uint64(ns)) + if !ok { + return Parameters{}, 0, fmt.Errorf("(%v - %v) * %v / %v overflows", next, now, nsPerSec, ns) + } + + // Now choose a base reference such that the current time remains the + // same. Note that this is just ComputeTime, solving for BaseRef: + // + // oldNowNS = BaseRef + (now - BaseCycles) / Frequency + // BaseRef = oldNowNS - (now - BaseCycles) / Frequency + diffNS, ok := muldiv64(uint64(now-adjusted.BaseCycles), nsPerSec, adjusted.Frequency) + if !ok { + return Parameters{}, 0, fmt.Errorf("(%v - %v) * %v / %v overflows", now, adjusted.BaseCycles, nsPerSec, adjusted.Frequency) + } + + adjusted.BaseRef = ReferenceNS(oldNowNS - int64(diffNS)) + + // The error is the difference between the current time and what the + // new parameters say the current time should be. + newNowNS, ok := newParams.ComputeTime(now) + if !ok { + return Parameters{}, 0, fmt.Errorf("new now time computation overflowed. params = %+v, now = %v", newParams, now) + } + + errorNS := ReferenceNS(oldNowNS - newNowNS) + + return adjusted, errorNS, nil +} + +// logErrorAdjustment logs the clock error and associated error correction +// frequency adjustment. +// +// The log level is determined by the error severity. +func logErrorAdjustment(clock ClockID, errorNS ReferenceNS, orig, adjusted Parameters) { + fn := log.Debugf + if int64(errorNS.Magnitude()) > time.Millisecond.Nanoseconds() { + fn = log.Warningf + } else if int64(errorNS.Magnitude()) > 10*time.Microsecond.Nanoseconds() { + fn = log.Infof + } + + fn("Clock(%v): error: %v ns, adjusted frequency from %v Hz to %v Hz", clock, errorNS, orig.Frequency, adjusted.Frequency) +} diff --git a/pkg/sentry/time/sampler.go b/pkg/sentry/time/sampler.go new file mode 100644 index 000000000..2140a99b7 --- /dev/null +++ b/pkg/sentry/time/sampler.go @@ -0,0 +1,225 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +import ( + "errors" + + "gvisor.googlesource.com/gvisor/pkg/log" +) + +const ( + // defaultOverheadTSC is the default estimated syscall overhead in TSC cycles. + // It is further refined as syscalls are made. + defaultOverheadCycles = 1 * 1000 + + // maxOverheadCycles is the maximum allowed syscall overhead in TSC cycles. + maxOverheadCycles = 100 * defaultOverheadCycles + + // maxSampleLoops is the maximum number of times to try to get a clock sample + // under the expected overhead. + maxSampleLoops = 5 + + // maxSamples is the maximum number of samples to collect. + maxSamples = 10 +) + +// errOverheadTooHigh is returned from sampler.Sample if the syscall +// overhead is too high. +var errOverheadTooHigh = errors.New("time syscall overhead exceeds maximum") + +// TSCValue is a value from the TSC. +type TSCValue int64 + +// Rdtsc reads the TSC. +// +// Intel SDM, Vol 3, Ch 17.15: +// "The RDTSC instruction reads the time-stamp counter and is guaranteed to +// return a monotonically increasing unique value whenever executed, except for +// a 64-bit counter wraparound. Intel guarantees that the time-stamp counter +// will not wraparound within 10 years after being reset." +// +// We use int64, so we have 5 years before wrap-around. +func Rdtsc() TSCValue + +// ReferenceNS are nanoseconds in the reference clock domain. +// int64 gives us ~290 years before this overflows. +type ReferenceNS int64 + +// Magnitude returns the absolute value of r. +func (r ReferenceNS) Magnitude() ReferenceNS { + if r < 0 { + return -r + } + return r +} + +// cycleClock is a TSC-based cycle clock. +type cycleClock interface { + // Cycles returns a count value from the TSC. + Cycles() TSCValue +} + +// tscCycleClock is a cycleClock that uses the real TSC. +type tscCycleClock struct{} + +// Cycles implements cycleClock.Cycles. +func (tscCycleClock) Cycles() TSCValue { + return Rdtsc() +} + +// sample contains a sample from the reference clock, with TSC values from +// before and after the reference clock value was captured. +type sample struct { + before TSCValue + after TSCValue + ref ReferenceNS +} + +// Overhead returns the sample overhead in TSC cycles. +func (s *sample) Overhead() TSCValue { + return s.after - s.before +} + +// referenceClocks collects individual samples from a reference clock ID and +// TSC. +type referenceClocks interface { + cycleClock + + // Sample returns a single sample from the reference clock ID. + Sample(c ClockID) (sample, error) +} + +// sampler collects samples from a reference system clock, minimizing +// the overhead in each sample. +type sampler struct { + // clockID is the reference clock ID (e.g., CLOCK_MONOTONIC). + clockID ClockID + + // clocks provides raw samples. + clocks referenceClocks + + // overhead is the estimated sample overhead in TSC cycles. + overhead TSCValue + + // samples is a ring buffer of the latest samples collected. + samples []sample +} + +// newSampler creates a sampler for clockID. +func newSampler(c ClockID) *sampler { + return &sampler{ + clockID: c, + clocks: syscallTSCReferenceClocks{}, + overhead: defaultOverheadCycles, + } +} + +// Reset discards previously collected clock samples. +func (s *sampler) Reset() { + s.overhead = defaultOverheadCycles + s.samples = []sample{} +} + +// lowOverheadSample returns a reference clock sample with minimized syscall overhead. +func (s *sampler) lowOverheadSample() (sample, error) { + for { + for i := 0; i < maxSampleLoops; i++ { + samp, err := s.clocks.Sample(s.clockID) + if err != nil { + return sample{}, err + } + + if samp.before > samp.after { + log.Warningf("TSC went backwards: %v > %v", samp.before, samp.after) + continue + } + + if samp.Overhead() <= s.overhead { + return samp, nil + } + } + + // Couldn't get a sample with the current overhead. Increase it. + newOverhead := 2 * s.overhead + if newOverhead > maxOverheadCycles { + // We'll give it one more shot with the max overhead. + + if s.overhead == maxOverheadCycles { + return sample{}, errOverheadTooHigh + } + + newOverhead = maxOverheadCycles + } + + s.overhead = newOverhead + log.Debugf("Time: Adjusting syscall overhead up to %v", s.overhead) + } +} + +// Sample collects a reference clock sample. +func (s *sampler) Sample() error { + sample, err := s.lowOverheadSample() + if err != nil { + return err + } + + s.samples = append(s.samples, sample) + if len(s.samples) > maxSamples { + s.samples = s.samples[1:] + } + + // If the 4 most recent samples all have an overhead less than half the + // expected overhead, adjust downwards. + if len(s.samples) < 4 { + return nil + } + + for _, sample := range s.samples[len(s.samples)-4:] { + if sample.Overhead() > s.overhead/2 { + return nil + } + } + + s.overhead -= s.overhead / 8 + log.Debugf("Time: Adjusting syscall overhead down to %v", s.overhead) + + return nil +} + +// Syscall returns the current raw reference time without storing TSC +// samples. +func (s *sampler) Syscall() (ReferenceNS, error) { + sample, err := s.clocks.Sample(s.clockID) + if err != nil { + return 0, err + } + + return sample.ref, nil +} + +// Cycles returns a raw TSC value. +func (s *sampler) Cycles() TSCValue { + return s.clocks.Cycles() +} + +// Range returns the widest range of clock samples available. +func (s *sampler) Range() (sample, sample, bool) { + if len(s.samples) < 2 { + return sample{}, sample{}, false + } + + return s.samples[0], s.samples[len(s.samples)-1], true +} diff --git a/pkg/sentry/time/sampler_unsafe.go b/pkg/sentry/time/sampler_unsafe.go new file mode 100644 index 000000000..e76180217 --- /dev/null +++ b/pkg/sentry/time/sampler_unsafe.go @@ -0,0 +1,56 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +import ( + "syscall" + "unsafe" +) + +// syscallTSCReferenceClocks is the standard referenceClocks, collecting +// samples using CLOCK_GETTIME and RDTSC. +type syscallTSCReferenceClocks struct { + tscCycleClock +} + +// Sample implements sampler.Sample. +func (syscallTSCReferenceClocks) Sample(c ClockID) (sample, error) { + var s sample + + s.before = Rdtsc() + + // Don't call clockGettime to avoid a call which may call morestack. + var ts syscall.Timespec + _, _, e := syscall.RawSyscall(syscall.SYS_CLOCK_GETTIME, uintptr(c), uintptr(unsafe.Pointer(&ts)), 0) + if e != 0 { + return sample{}, e + } + + s.after = Rdtsc() + s.ref = ReferenceNS(ts.Nano()) + + return s, nil +} + +// clockGettime calls SYS_CLOCK_GETTIME, returning time in nanoseconds. +func clockGettime(c ClockID) (ReferenceNS, error) { + var ts syscall.Timespec + _, _, e := syscall.RawSyscall(syscall.SYS_CLOCK_GETTIME, uintptr(c), uintptr(unsafe.Pointer(&ts)), 0) + if e != 0 { + return 0, e + } + + return ReferenceNS(ts.Nano()), nil +} diff --git a/pkg/sentry/time/seqatomic_parameters.go b/pkg/sentry/time/seqatomic_parameters.go new file mode 100755 index 000000000..ecbea4d94 --- /dev/null +++ b/pkg/sentry/time/seqatomic_parameters.go @@ -0,0 +1,55 @@ +package time + +import ( + "reflect" + "strings" + "unsafe" + + "fmt" + "gvisor.googlesource.com/gvisor/third_party/gvsync" +) + +// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race +// with any writer critical sections in sc. +func SeqAtomicLoadParameters(sc *gvsync.SeqCount, ptr *Parameters) Parameters { + // This function doesn't use SeqAtomicTryLoad because doing so is + // measurably, significantly (~20%) slower; Go is awful at inlining. + var val Parameters + for { + epoch := sc.BeginRead() + if gvsync.RaceEnabled { + + gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + + val = *ptr + } + if sc.ReadOk(epoch) { + break + } + } + return val +} + +// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section +// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read +// would race with a writer critical section, SeqAtomicTryLoad returns +// (unspecified, false). +func SeqAtomicTryLoadParameters(sc *gvsync.SeqCount, epoch gvsync.SeqCountEpoch, ptr *Parameters) (Parameters, bool) { + var val Parameters + if gvsync.RaceEnabled { + gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + val = *ptr + } + return val, sc.ReadOk(epoch) +} + +func initParameters() { + var val Parameters + typ := reflect.TypeOf(val) + name := typ.Name() + if ptrs := gvsync.PointersInType(typ, name); len(ptrs) != 0 { + panic(fmt.Sprintf("SeqAtomicLoad<%s> is invalid since values %s of type %s contain pointers:\n%s", typ, name, typ, strings.Join(ptrs, "\n"))) + } +} diff --git a/pkg/sentry/time/time_state_autogen.go b/pkg/sentry/time/time_state_autogen.go new file mode 100755 index 000000000..ea614b056 --- /dev/null +++ b/pkg/sentry/time/time_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package time + diff --git a/pkg/sentry/time/tsc_amd64.s b/pkg/sentry/time/tsc_amd64.s new file mode 100644 index 000000000..6a8eed664 --- /dev/null +++ b/pkg/sentry/time/tsc_amd64.s @@ -0,0 +1,27 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "textflag.h" + +TEXT ·Rdtsc(SB),NOSPLIT,$0-8 + // N.B. We need LFENCE on Intel, AMD is more complicated. + // Modern AMD CPUs with modern kernels make LFENCE behave like it does + // on Intel with MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT. MFENCE is + // otherwise needed on AMD. + LFENCE + RDTSC + SHLQ $32, DX + ADDQ DX, AX + MOVQ AX, ret+0(FP) + RET diff --git a/pkg/sentry/time/tsc_arm64.s b/pkg/sentry/time/tsc_arm64.s new file mode 100644 index 000000000..da9fa4112 --- /dev/null +++ b/pkg/sentry/time/tsc_arm64.s @@ -0,0 +1,22 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "textflag.h" + +TEXT ·Rdtsc(SB),NOSPLIT,$0-8 + // Get the virtual counter. + ISB $15 + WORD $0xd53be040 //MRS CNTVCT_EL0, R0 + MOVD R0, ret+0(FP) + RET |