diff options
Diffstat (limited to 'pkg/sentry/time')
-rw-r--r-- | pkg/sentry/time/BUILD | 48 | ||||
-rw-r--r-- | pkg/sentry/time/calibrated_clock.go | 269 | ||||
-rw-r--r-- | pkg/sentry/time/calibrated_clock_test.go | 186 | ||||
-rw-r--r-- | pkg/sentry/time/clock_id.go | 40 | ||||
-rw-r--r-- | pkg/sentry/time/clocks.go | 31 | ||||
-rw-r--r-- | pkg/sentry/time/muldiv_amd64.s | 44 | ||||
-rw-r--r-- | pkg/sentry/time/parameters.go | 239 | ||||
-rw-r--r-- | pkg/sentry/time/parameters_test.go | 486 | ||||
-rw-r--r-- | pkg/sentry/time/sampler.go | 225 | ||||
-rw-r--r-- | pkg/sentry/time/sampler_test.go | 183 | ||||
-rw-r--r-- | pkg/sentry/time/sampler_unsafe.go | 56 | ||||
-rw-r--r-- | pkg/sentry/time/tsc_amd64.s | 27 |
12 files changed, 1834 insertions, 0 deletions
diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD new file mode 100644 index 000000000..cbcd699d5 --- /dev/null +++ b/pkg/sentry/time/BUILD @@ -0,0 +1,48 @@ +package(licenses = ["notice"]) # Apache 2.0 + +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +go_template_instance( + name = "seqatomic_parameters", + out = "seqatomic_parameters.go", + package = "time", + suffix = "Parameters", + template = "//pkg/sync:generic_seqatomic", + types = { + "Value": "Parameters", + }, +) + +go_library( + name = "time", + srcs = [ + "calibrated_clock.go", + "clock_id.go", + "clocks.go", + "muldiv_amd64.s", + "parameters.go", + "sampler.go", + "sampler_unsafe.go", + "seqatomic_parameters.go", + "tsc_amd64.s", + ], + importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/time", + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/log", + "//pkg/metric", + "//pkg/sync", + "//pkg/syserror", + ], +) + +go_test( + name = "time_test", + srcs = [ + "calibrated_clock_test.go", + "parameters_test.go", + "sampler_test.go", + ], + embed = [":time"], +) diff --git a/pkg/sentry/time/calibrated_clock.go b/pkg/sentry/time/calibrated_clock.go new file mode 100644 index 000000000..cbb95e2d7 --- /dev/null +++ b/pkg/sentry/time/calibrated_clock.go @@ -0,0 +1,269 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package time provides a calibrated clock synchronized to a system reference +// clock. +package time + +import ( + "sync" + "time" + + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/metric" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +// fallbackMetric tracks failed updates. It is not sync, as it is not critical +// that all occurrences are captured and CalibratedClock may fallback many +// times. +var fallbackMetric = metric.MustCreateNewUint64Metric("/time/fallback", false /* sync */, "Incremented when a clock falls back to system calls due to a failed update") + +// CalibratedClock implements a clock that tracks a reference clock. +// +// Users should call Update at regular intervals of around approxUpdateInterval +// to ensure that the clock does not drift significantly from the reference +// clock. +type CalibratedClock struct { + // mu protects the fields below. + // TODO: consider a sequence counter for read locking. + mu sync.RWMutex + + // ref sample the reference clock that this clock is calibrated + // against. + ref *sampler + + // ready indicates that the fields below are ready for use calculating + // time. + ready bool + + // params are the current timekeeping parameters. + params Parameters + + // errorNS is the estimated clock error in nanoseconds. + errorNS ReferenceNS +} + +// NewCalibratedClock creates a CalibratedClock that tracks the given ClockID. +func NewCalibratedClock(c ClockID) *CalibratedClock { + return &CalibratedClock{ + ref: newSampler(c), + } +} + +// Debugf logs at debug level. +func (c *CalibratedClock) Debugf(format string, v ...interface{}) { + if log.IsLogging(log.Debug) { + args := []interface{}{c.ref.clockID} + args = append(args, v...) + log.Debugf("CalibratedClock(%v): "+format, args...) + } +} + +// Infof logs at debug level. +func (c *CalibratedClock) Infof(format string, v ...interface{}) { + if log.IsLogging(log.Info) { + args := []interface{}{c.ref.clockID} + args = append(args, v...) + log.Infof("CalibratedClock(%v): "+format, args...) + } +} + +// Warningf logs at debug level. +func (c *CalibratedClock) Warningf(format string, v ...interface{}) { + if log.IsLogging(log.Warning) { + args := []interface{}{c.ref.clockID} + args = append(args, v...) + log.Warningf("CalibratedClock(%v): "+format, args...) + } +} + +// reset forces the clock to restart the calibration process, logging the +// passed message. +func (c *CalibratedClock) reset(str string, v ...interface{}) { + c.mu.Lock() + defer c.mu.Unlock() + c.resetLocked(str, v...) +} + +// resetLocked is equivalent to reset with c.mu already held for writing. +func (c *CalibratedClock) resetLocked(str string, v ...interface{}) { + c.Warningf(str+" Resetting clock; time may jump.", v...) + c.ready = false + c.ref.Reset() + fallbackMetric.Increment() +} + +// updateParams updates the timekeeping parameters based on the passed +// parameters. +// +// actual is the actual estimated timekeeping parameters. The stored parameters +// may need to be adjusted slightly from these values to compensate for error. +// +// Preconditions: c.mu must be held for writing. +func (c *CalibratedClock) updateParams(actual Parameters) { + if !c.ready { + // At initial calibration there is nothing to correct. + c.params = actual + c.ready = true + + c.Infof("ready") + + return + } + + // Otherwise, adjust the params to correct for errors. + newParams, errorNS, err := errorAdjust(c.params, actual, actual.BaseCycles) + if err != nil { + // Something is very wrong. Reset and try again from the + // beginning. + c.resetLocked("Unable to update params: %v.", err) + return + } + logErrorAdjustment(c.ref.clockID, errorNS, c.params, newParams) + + if errorNS.Magnitude() >= MaxClockError { + // We should never get such extreme error, something is very + // wrong. Reset everything and start again. + // + // N.B. logErrorAdjustment will have already logged the error + // at warning level. + // + // TODO: We could allow Realtime clock jumps here. + c.resetLocked("Extreme clock error.") + return + } + + c.params = newParams + c.errorNS = errorNS +} + +// Update runs the update step of the clock, updating its synchronization with +// the reference clock. +// +// Update returns timekeeping and true with the new timekeeping parameters if +// the clock is calibrated. Update should be called regularly to prevent the +// clock from getting significantly out of sync from the reference clock. +// +// The returned timekeeping parameters are invalidated on the next call to +// Update. +func (c *CalibratedClock) Update() (Parameters, bool) { + c.mu.Lock() + defer c.mu.Unlock() + + if err := c.ref.Sample(); err != nil { + c.resetLocked("Unable to update calibrated clock: %v.", err) + return Parameters{}, false + } + + oldest, newest, ok := c.ref.Range() + if !ok { + // Not ready yet. + return Parameters{}, false + } + + minCount := uint64(newest.before - oldest.after) + maxCount := uint64(newest.after - oldest.before) + refInterval := uint64(newest.ref - oldest.ref) + + // freq hz = count / (interval ns) * (nsPerS ns) / (1 s) + nsPerS := uint64(time.Second.Nanoseconds()) + + minHz, ok := muldiv64(minCount, nsPerS, refInterval) + if !ok { + c.resetLocked("Unable to update calibrated clock: (%v - %v) * %v / %v overflows.", newest.before, oldest.after, nsPerS, refInterval) + return Parameters{}, false + } + + maxHz, ok := muldiv64(maxCount, nsPerS, refInterval) + if !ok { + c.resetLocked("Unable to update calibrated clock: (%v - %v) * %v / %v overflows.", newest.after, oldest.before, nsPerS, refInterval) + return Parameters{}, false + } + + c.updateParams(Parameters{ + Frequency: (minHz + maxHz) / 2, + BaseRef: newest.ref, + BaseCycles: newest.after, + }) + + return c.params, true +} + +// GetTime returns the current time based on the clock calibration. +func (c *CalibratedClock) GetTime() (int64, error) { + c.mu.RLock() + + if !c.ready { + // Fallback to a syscall. + now, err := c.ref.Syscall() + c.mu.RUnlock() + return int64(now), err + } + + now := c.ref.Cycles() + v, ok := c.params.ComputeTime(now) + if !ok { + // Something is seriously wrong with the clock. Try + // again with syscalls. + c.resetLocked("Time computation overflowed. params = %+v, now = %v.", c.params, now) + now, err := c.ref.Syscall() + c.mu.RUnlock() + return int64(now), err + } + + c.mu.RUnlock() + return v, nil +} + +// CalibratedClocks contains calibrated monotonic and realtime clocks. +// +// TODO: We know that Linux runs the monotonic and realtime clocks at +// the same rate, so rather than tracking both individually, we could do one +// calibration for both clocks. +type CalibratedClocks struct { + // monotonic is the clock tracking the system monotonic clock. + monotonic *CalibratedClock + + // realtime is the realtime equivalent of monotonic. + realtime *CalibratedClock +} + +// NewCalibratedClocks creates a CalibratedClocks. +func NewCalibratedClocks() *CalibratedClocks { + return &CalibratedClocks{ + monotonic: NewCalibratedClock(Monotonic), + realtime: NewCalibratedClock(Realtime), + } +} + +// Update implements Clocks.Update. +func (c *CalibratedClocks) Update() (Parameters, bool, Parameters, bool) { + monotonicParams, monotonicOk := c.monotonic.Update() + realtimeParams, realtimeOk := c.realtime.Update() + + return monotonicParams, monotonicOk, realtimeParams, realtimeOk +} + +// GetTime implements Clocks.GetTime. +func (c *CalibratedClocks) GetTime(id ClockID) (int64, error) { + switch id { + case Monotonic: + return c.monotonic.GetTime() + case Realtime: + return c.realtime.GetTime() + default: + return 0, syserror.EINVAL + } +} diff --git a/pkg/sentry/time/calibrated_clock_test.go b/pkg/sentry/time/calibrated_clock_test.go new file mode 100644 index 000000000..8b6dd5592 --- /dev/null +++ b/pkg/sentry/time/calibrated_clock_test.go @@ -0,0 +1,186 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +import ( + "testing" + "time" +) + +// newTestCalibratedClock returns a CalibratedClock that collects samples from +// the given sample list and cycle counts from the given cycle list. +func newTestCalibratedClock(samples []sample, cycles []TSCValue) *CalibratedClock { + return &CalibratedClock{ + ref: newTestSampler(samples, cycles), + } +} + +func TestConstantFrequency(t *testing.T) { + // Perfectly constant frequency. + samples := []sample{ + {before: 100000, after: 100000 + defaultOverheadCycles, ref: 100}, + {before: 200000, after: 200000 + defaultOverheadCycles, ref: 200}, + {before: 300000, after: 300000 + defaultOverheadCycles, ref: 300}, + {before: 400000, after: 400000 + defaultOverheadCycles, ref: 400}, + {before: 500000, after: 500000 + defaultOverheadCycles, ref: 500}, + {before: 600000, after: 600000 + defaultOverheadCycles, ref: 600}, + {before: 700000, after: 700000 + defaultOverheadCycles, ref: 700}, + } + + c := newTestCalibratedClock(samples, nil) + + // Update from all samples. + for range samples { + c.Update() + } + + c.mu.RLock() + if !c.ready { + c.mu.RUnlock() + t.Fatalf("clock not ready") + } + // A bit after the last sample. + now, ok := c.params.ComputeTime(750000) + c.mu.RUnlock() + if !ok { + t.Fatalf("ComputeTime ok got %v want true", ok) + } + + t.Logf("now: %v", now) + + // Time should be between the current sample and where we'd expect the + // next sample. + if now < 700 || now > 800 { + t.Errorf("now got %v want > 700 && < 800", now) + } +} + +func TestErrorCorrection(t *testing.T) { + testCases := []struct { + name string + samples [5]sample + projectedTimeStart int64 + projectedTimeEnd int64 + }{ + // Initial calibration should be ~1MHz for each of these, and + // the reference clock changes in samples[2]. + { + name: "slow-down", + samples: [5]sample{ + {before: 1000000, after: 1000001, ref: ReferenceNS(1 * ApproxUpdateInterval.Nanoseconds())}, + {before: 2000000, after: 2000001, ref: ReferenceNS(2 * ApproxUpdateInterval.Nanoseconds())}, + // Reference clock has slowed down, causing 100ms of error. + {before: 3010000, after: 3010001, ref: ReferenceNS(3 * ApproxUpdateInterval.Nanoseconds())}, + {before: 4020000, after: 4020001, ref: ReferenceNS(4 * ApproxUpdateInterval.Nanoseconds())}, + {before: 5030000, after: 5030001, ref: ReferenceNS(5 * ApproxUpdateInterval.Nanoseconds())}, + }, + projectedTimeStart: 3005 * time.Millisecond.Nanoseconds(), + projectedTimeEnd: 3015 * time.Millisecond.Nanoseconds(), + }, + { + name: "speed-up", + samples: [5]sample{ + {before: 1000000, after: 1000001, ref: ReferenceNS(1 * ApproxUpdateInterval.Nanoseconds())}, + {before: 2000000, after: 2000001, ref: ReferenceNS(2 * ApproxUpdateInterval.Nanoseconds())}, + // Reference clock has sped up, causing 100ms of error. + {before: 2990000, after: 2990001, ref: ReferenceNS(3 * ApproxUpdateInterval.Nanoseconds())}, + {before: 3980000, after: 3980001, ref: ReferenceNS(4 * ApproxUpdateInterval.Nanoseconds())}, + {before: 4970000, after: 4970001, ref: ReferenceNS(5 * ApproxUpdateInterval.Nanoseconds())}, + }, + projectedTimeStart: 2985 * time.Millisecond.Nanoseconds(), + projectedTimeEnd: 2995 * time.Millisecond.Nanoseconds(), + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := newTestCalibratedClock(tc.samples[:], nil) + + // Initial calibration takes two updates. + _, ok := c.Update() + if ok { + t.Fatalf("Update ready too early") + } + + params, ok := c.Update() + if !ok { + t.Fatalf("Update not ready") + } + + // Initial calibration is ~1MHz. + hz := params.Frequency + if hz < 990000 || hz > 1010000 { + t.Fatalf("Frequency got %v want > 990kHz && < 1010kHz", hz) + } + + // Project time at the next update. Given the 1MHz + // calibration, it is expected to be ~3.1s/2.9s, not + // the actual 3s. + // + // N.B. the next update time is the "after" time above. + projected, ok := params.ComputeTime(tc.samples[2].after) + if !ok { + t.Fatalf("ComputeTime ok got %v want true", ok) + } + if projected < tc.projectedTimeStart || projected > tc.projectedTimeEnd { + t.Fatalf("ComputeTime(%v) got %v want > %v && < %v", tc.samples[2].after, projected, tc.projectedTimeStart, tc.projectedTimeEnd) + } + + // Update again to see the changed reference clock. + params, ok = c.Update() + if !ok { + t.Fatalf("Update not ready") + } + + // We now know that TSC = tc.samples[2].after -> 3s, + // but with the previous params indicated that TSC + // tc.samples[2].after -> 3.5s/2.5s. We can't allow the + // clock to go backwards, and having the clock jump + // forwards is undesirable. There should be a smooth + // transition that corrects the clock error over time. + // Check that the clock is continuous at TSC = + // tc.samples[2].after. + newProjected, ok := params.ComputeTime(tc.samples[2].after) + if !ok { + t.Fatalf("ComputeTime ok got %v want true", ok) + } + if newProjected != projected { + t.Errorf("Discontinuous time; ComputeTime(%v) got %v want %v", tc.samples[2].after, newProjected, projected) + } + + // As the reference clock stablizes, ensure that the clock error + // decreases. + initialErr := c.errorNS + t.Logf("initial error: %v ns", initialErr) + + _, ok = c.Update() + if !ok { + t.Fatalf("Update not ready") + } + if c.errorNS.Magnitude() > initialErr.Magnitude() { + t.Errorf("errorNS increased, got %v want |%v| <= |%v|", c.errorNS, c.errorNS, initialErr) + } + + _, ok = c.Update() + if !ok { + t.Fatalf("Update not ready") + } + if c.errorNS.Magnitude() > initialErr.Magnitude() { + t.Errorf("errorNS increased, got %v want |%v| <= |%v|", c.errorNS, c.errorNS, initialErr) + } + + t.Logf("final error: %v ns", c.errorNS) + }) + } +} diff --git a/pkg/sentry/time/clock_id.go b/pkg/sentry/time/clock_id.go new file mode 100644 index 000000000..500102e58 --- /dev/null +++ b/pkg/sentry/time/clock_id.go @@ -0,0 +1,40 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +import ( + "strconv" +) + +// ClockID is a Linux clock identifier. +type ClockID int32 + +// These are the supported Linux clock identifiers. +const ( + Realtime ClockID = iota + Monotonic +) + +// String implements fmt.Stringer.String. +func (c ClockID) String() string { + switch c { + case Realtime: + return "Realtime" + case Monotonic: + return "Monotonic" + default: + return strconv.Itoa(int(c)) + } +} diff --git a/pkg/sentry/time/clocks.go b/pkg/sentry/time/clocks.go new file mode 100644 index 000000000..9925b407d --- /dev/null +++ b/pkg/sentry/time/clocks.go @@ -0,0 +1,31 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +// Clocks represents a clock source that contains both a monotonic and realtime +// clock. +type Clocks interface { + // Update performs an update step, keeping the clocks in sync with the + // reference host clocks, and returning the new timekeeping parameters. + // + // Update should be called at approximately ApproxUpdateInterval. + Update() (monotonicParams Parameters, monotonicOk bool, realtimeParam Parameters, realtimeOk bool) + + // GetTime returns the current time in nanoseconds for the given clock. + // + // Clocks implementations must support at least Monotonic and + // Realtime. + GetTime(c ClockID) (int64, error) +} diff --git a/pkg/sentry/time/muldiv_amd64.s b/pkg/sentry/time/muldiv_amd64.s new file mode 100644 index 000000000..291940b1d --- /dev/null +++ b/pkg/sentry/time/muldiv_amd64.s @@ -0,0 +1,44 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "textflag.h" + +// Documentation is available in parameters.go. +// +// func muldiv64(value, multiplier, divisor uint64) (uint64, bool) +TEXT ·muldiv64(SB),NOSPLIT,$0-33 + MOVQ value+0(FP), AX + MOVQ multiplier+8(FP), BX + MOVQ divisor+16(FP), CX + + // Multiply AX*BX and store result in DX:AX. + MULQ BX + + // If divisor <= (value*multiplier) / 2^64, then the division will overflow. + // + // (value*multiplier) / 2^64 is DX:AX >> 64, or simply DX. + CMPQ CX, DX + JLE overflow + + // Divide DX:AX by CX. + DIVQ CX + + MOVQ AX, result+24(FP) + MOVB $1, ok+32(FP) + RET + +overflow: + MOVQ $0, result+24(FP) + MOVB $0, ok+32(FP) + RET diff --git a/pkg/sentry/time/parameters.go b/pkg/sentry/time/parameters.go new file mode 100644 index 000000000..594b4874b --- /dev/null +++ b/pkg/sentry/time/parameters.go @@ -0,0 +1,239 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +import ( + "fmt" + "time" + + "gvisor.googlesource.com/gvisor/pkg/log" +) + +const ( + // ApproxUpdateInterval is the approximate interval that parameters + // should be updated at. + // + // Error correction assumes that the next update will occur after this + // much time. + // + // If an update occurs before ApproxUpdateInterval passes, it has no + // adverse effect on error correction behavior. + // + // If an update occurs after ApproxUpdateInterval passes, the clock + // will overshoot its error correction target and begin accumulating + // error in the other direction. + // + // If updates occur after more than 2*ApproxUpdateInterval passes, the + // clock becomes unstable, accumulating more error than it had + // originally. Repeated updates after more than 2*ApproxUpdateInterval + // will cause unbounded increases in error. + // + // These statements assume that the host clock does not change. Actual + // error will depend upon host clock changes. + // + // TODO: make error correction more robust to delayed + // updates. + ApproxUpdateInterval = 1 * time.Second + + // MaxClockError is the maximum amount of error that the clocks will + // try to correct. + // + // This limit: + // + // * Puts a limit on cases of otherwise unbounded increases in error. + // + // * Avoids unreasonably large frequency adjustments required to + // correct large errors over a single update interval. + MaxClockError = ReferenceNS(ApproxUpdateInterval) / 4 +) + +// Parameters are the timekeeping parameters needed to compute the current +// time. +type Parameters struct { + // BaseCycles was the TSC counter value when the time was BaseRef. + BaseCycles TSCValue + + // BaseRef is the reference clock time in nanoseconds corresponding to + // BaseCycles. + BaseRef ReferenceNS + + // Frequency is the frequency of the cycle clock in Hertz. + Frequency uint64 +} + +// muldiv64 multiplies two 64-bit numbers, then divides the result by another +// 64-bit number. +// +// It requires that the result fit in 64 bits, but doesn't require that +// intermediate values do; in particular, the result of the multiplication may +// require 128 bits. +// +// It returns !ok if divisor is zero or the result does not fit in 64 bits. +func muldiv64(value, multiplier, divisor uint64) (uint64, bool) + +// ComputeTime calculates the current time from a "now" TSC value. +// +// time = ref + (now - base) / f +func (p Parameters) ComputeTime(nowCycles TSCValue) (int64, bool) { + diffCycles := nowCycles - p.BaseCycles + if diffCycles < 0 { + log.Warningf("now cycles %v < base cycles %v", nowCycles, p.BaseCycles) + diffCycles = 0 + } + + // Overflow "won't ever happen". If diffCycles is the max value + // (2^63 - 1), then to overflow, + // + // frequency <= ((2^63 - 1) * 10^9) / 2^64 = 500Mhz + // + // A TSC running at 2GHz takes 201 years to reach 2^63-1. 805 years at + // 500MHz. + diffNS, ok := muldiv64(uint64(diffCycles), uint64(time.Second.Nanoseconds()), p.Frequency) + return int64(uint64(p.BaseRef) + diffNS), ok +} + +// errorAdjust returns a new Parameters struct "adjusted" that satisfies: +// +// 1. adjusted.ComputeTime(now) = prevParams.ComputeTime(now) +// * i.e., the current time does not jump. +// +// 2. adjusted.ComputeTime(TSC at next update) = newParams.ComputeTime(TSC at next update) +// * i.e., Any error between prevParams and newParams will be corrected over +// the course of the next update period. +// +// errorAdjust also returns the current clock error. +// +// Preconditions: +// * newParams.BaseCycles >= prevParams.BaseCycles; i.e., TSC must not go +// backwards. +// * newParams.BaseCycles <= now; i.e., the new parameters be computed at or +// before now. +func errorAdjust(prevParams Parameters, newParams Parameters, now TSCValue) (Parameters, ReferenceNS, error) { + if newParams.BaseCycles < prevParams.BaseCycles { + // Oh dear! Something is very wrong. + return Parameters{}, 0, fmt.Errorf("TSC went backwards in updated clock params: %v < %v", newParams.BaseCycles, prevParams.BaseCycles) + } + if newParams.BaseCycles > now { + return Parameters{}, 0, fmt.Errorf("parameters contain base cycles later than now: %v > %v", newParams.BaseCycles, now) + } + + intervalNS := int64(ApproxUpdateInterval.Nanoseconds()) + nsPerSec := uint64(time.Second.Nanoseconds()) + + // Current time as computed by prevParams. + oldNowNS, ok := prevParams.ComputeTime(now) + if !ok { + return Parameters{}, 0, fmt.Errorf("old now time computation overflowed. params = %+v, now = %v", prevParams, now) + } + + // We expect the update ticker to run based on this clock (i.e., it has + // been using prevParams and will use the returned adjusted + // parameters). Hence it will decide to fire intervalNS from the + // current (oldNowNS) "now". + nextNS := oldNowNS + intervalNS + + if nextNS <= int64(newParams.BaseRef) { + // The next update time already passed before the new + // parameters were created! We definitely can't correct the + // error by then. + return Parameters{}, 0, fmt.Errorf("unable to correct error in single period. oldNowNS = %v, nextNS = %v, p = %v", oldNowNS, nextNS, newParams) + } + + // For what TSC value next will newParams.ComputeTime(next) = nextNS? + // + // Solve ComputeTime for next: + // + // next = newParams.Frequency * (nextNS - newParams.BaseRef) + newParams.BaseCycles + c, ok := muldiv64(newParams.Frequency, uint64(nextNS-int64(newParams.BaseRef)), nsPerSec) + if !ok { + return Parameters{}, 0, fmt.Errorf("%v * (%v - %v) / %v overflows", newParams.Frequency, nextNS, newParams.BaseRef, nsPerSec) + } + + cycles := TSCValue(c) + next := cycles + newParams.BaseCycles + + if next <= now { + // The next update time already passed now with the new + // parameters! We can't correct the error in a single period. + return Parameters{}, 0, fmt.Errorf("unable to correct error in single period. oldNowNS = %v, nextNS = %v, now = %v, next = %v", oldNowNS, nextNS, now, next) + } + + // We want to solve for parameters that satisfy: + // + // adjusted.ComputeTime(now) = oldNowNS + // + // adjusted.ComputeTime(next) = nextNS + // + // i.e., the current time does not change, but by the time we reach + // next we reach the same time as newParams. + + // We choose to keep BaseCycles fixed. + adjusted := Parameters{ + BaseCycles: newParams.BaseCycles, + } + + // We want a slope such that time goes from oldNowNS to nextNS when + // we reach next. + // + // In other words, cycles should increase by next - now in the next + // interval. + + cycles = next - now + ns := intervalNS + + // adjusted.Frequency = cycles / ns + adjusted.Frequency, ok = muldiv64(uint64(cycles), nsPerSec, uint64(ns)) + if !ok { + return Parameters{}, 0, fmt.Errorf("(%v - %v) * %v / %v overflows", next, now, nsPerSec, ns) + } + + // Now choose a base reference such that the current time remains the + // same. Note that this is just ComputeTime, solving for BaseRef: + // + // oldNowNS = BaseRef + (now - BaseCycles) / Frequency + // BaseRef = oldNowNS - (now - BaseCycles) / Frequency + diffNS, ok := muldiv64(uint64(now-adjusted.BaseCycles), nsPerSec, adjusted.Frequency) + if !ok { + return Parameters{}, 0, fmt.Errorf("(%v - %v) * %v / %v overflows", now, adjusted.BaseCycles, nsPerSec, adjusted.Frequency) + } + + adjusted.BaseRef = ReferenceNS(oldNowNS - int64(diffNS)) + + // The error is the difference between the current time and what the + // new parameters say the current time should be. + newNowNS, ok := newParams.ComputeTime(now) + if !ok { + return Parameters{}, 0, fmt.Errorf("new now time computation overflowed. params = %+v, now = %v", newParams, now) + } + + errorNS := ReferenceNS(oldNowNS - newNowNS) + + return adjusted, errorNS, nil +} + +// logErrorAdjustment logs the clock error and associated error correction +// frequency adjustment. +// +// The log level is determined by the error severity. +func logErrorAdjustment(clock ClockID, errorNS ReferenceNS, orig, adjusted Parameters) { + fn := log.Debugf + if int64(errorNS.Magnitude()) > time.Millisecond.Nanoseconds() { + fn = log.Warningf + } else if int64(errorNS.Magnitude()) > 10*time.Microsecond.Nanoseconds() { + fn = log.Infof + } + + fn("Clock(%v): error: %v ns, adjusted frequency from %v Hz to %v Hz", clock, errorNS, orig.Frequency, adjusted.Frequency) +} diff --git a/pkg/sentry/time/parameters_test.go b/pkg/sentry/time/parameters_test.go new file mode 100644 index 000000000..7394fc5ee --- /dev/null +++ b/pkg/sentry/time/parameters_test.go @@ -0,0 +1,486 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +import ( + "math" + "testing" + "time" +) + +func TestParametersComputeTime(t *testing.T) { + testCases := []struct { + name string + params Parameters + now TSCValue + want int64 + }{ + { + // Now is the same as the base cycles. + name: "base-cycles", + params: Parameters{ + BaseCycles: 10000, + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + now: 10000, + want: 5000 * time.Millisecond.Nanoseconds(), + }, + { + // Now is the behind the base cycles. Time is frozen. + name: "backwards", + params: Parameters{ + BaseCycles: 10000, + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + now: 9000, + want: 5000 * time.Millisecond.Nanoseconds(), + }, + { + // Now is ahead of the base cycles. + name: "ahead", + params: Parameters{ + BaseCycles: 10000, + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + now: 15000, + want: 5500 * time.Millisecond.Nanoseconds(), + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got, ok := tc.params.ComputeTime(tc.now) + if !ok { + t.Errorf("ComputeTime ok got %v want true", got) + } + if got != tc.want { + t.Errorf("ComputeTime got %+v want %+v", got, tc.want) + } + }) + } +} + +func TestParametersErrorAdjust(t *testing.T) { + testCases := []struct { + name string + oldParams Parameters + now TSCValue + newParams Parameters + want Parameters + errorNS ReferenceNS + wantErr bool + }{ + { + // newParams are perfectly continuous with oldParams + // and don't need adjustment. + name: "continuous", + oldParams: Parameters{ + BaseCycles: 0, + BaseRef: 0, + Frequency: 10000, + }, + now: 50000, + newParams: Parameters{ + BaseCycles: 50000, + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + want: Parameters{ + BaseCycles: 50000, + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + }, + { + // Same as "continuous", but with now ahead of + // newParams.BaseCycles. The result is the same as + // there is no error to correct. + name: "continuous-nowdiff", + oldParams: Parameters{ + BaseCycles: 0, + BaseRef: 0, + Frequency: 10000, + }, + now: 60000, + newParams: Parameters{ + BaseCycles: 50000, + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + want: Parameters{ + BaseCycles: 50000, + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + }, + { + // errorAdjust bails out if the TSC goes backwards. + name: "tsc-backwards", + oldParams: Parameters{ + BaseCycles: 10000, + BaseRef: ReferenceNS(1000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + now: 9000, + newParams: Parameters{ + BaseCycles: 9000, + BaseRef: ReferenceNS(1100 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + wantErr: true, + }, + { + // errorAdjust bails out if new params are from after now. + name: "params-after-now", + oldParams: Parameters{ + BaseCycles: 10000, + BaseRef: ReferenceNS(1000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + now: 11000, + newParams: Parameters{ + BaseCycles: 12000, + BaseRef: ReferenceNS(1200 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + wantErr: true, + }, + { + // Host clock sped up. + name: "speed-up", + oldParams: Parameters{ + BaseCycles: 0, + BaseRef: 0, + Frequency: 10000, + }, + now: 45000, + // Host frequency changed to 9000 immediately after + // oldParams was returned. + newParams: Parameters{ + BaseCycles: 45000, + // From oldParams, we think ref = 4.5s at cycles = 45000. + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 9000, + }, + want: Parameters{ + BaseCycles: 45000, + BaseRef: ReferenceNS(4500 * time.Millisecond.Nanoseconds()), + // We must decrease the new frequency by 50% to + // correct 0.5s of error in 1s + // (ApproxUpdateInterval). + Frequency: 4500, + }, + errorNS: ReferenceNS(-500 * time.Millisecond.Nanoseconds()), + }, + { + // Host clock sped up, with now ahead of newParams. + name: "speed-up-nowdiff", + oldParams: Parameters{ + BaseCycles: 0, + BaseRef: 0, + Frequency: 10000, + }, + now: 50000, + // Host frequency changed to 9000 immediately after + // oldParams was returned. + newParams: Parameters{ + BaseCycles: 45000, + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 9000, + }, + // nextRef = 6000ms + // nextCycles = 9000 * (6000ms - 5000ms) + 45000 + // nextCycles = 9000 * (1s) + 45000 + // nextCycles = 54000 + // f = (54000 - 50000) / 1s = 4000 + // + // ref = 5000ms - (50000 - 45000) / 4000 + // ref = 3.75s + want: Parameters{ + BaseCycles: 45000, + BaseRef: ReferenceNS(3750 * time.Millisecond.Nanoseconds()), + Frequency: 4000, + }, + // oldNow = 50000 * 10000 = 5s + // newNow = (50000 - 45000) / 9000 + 5s = 5.555s + errorNS: ReferenceNS((5000*time.Millisecond - 5555555555).Nanoseconds()), + }, + { + // Host clock sped up. The new parameters are so far + // ahead that the next update time already passed. + name: "speed-up-uncorrectable-baseref", + oldParams: Parameters{ + BaseCycles: 0, + BaseRef: 0, + Frequency: 10000, + }, + now: 50000, + // Host frequency changed to 5000 immediately after + // oldParams was returned. + newParams: Parameters{ + BaseCycles: 45000, + BaseRef: ReferenceNS(9000 * time.Millisecond.Nanoseconds()), + Frequency: 5000, + }, + // The next update should be at 10s, but newParams + // already passed 6s. Thus it is impossible to correct + // the clock by then. + wantErr: true, + }, + { + // Host clock sped up. The new parameters are moving so + // fast that the next update should be before now. + name: "speed-up-uncorrectable-frequency", + oldParams: Parameters{ + BaseCycles: 0, + BaseRef: 0, + Frequency: 10000, + }, + now: 55000, + // Host frequency changed to 7500 immediately after + // oldParams was returned. + newParams: Parameters{ + BaseCycles: 45000, + BaseRef: ReferenceNS(6000 * time.Millisecond.Nanoseconds()), + Frequency: 7500, + }, + // The next update should be at 6.5s, but newParams are + // so far ahead and fast that they reach 6.5s at cycle + // 48750, which before now! Thus it is impossible to + // correct the clock by then. + wantErr: true, + }, + { + // Host clock slowed down. + name: "slow-down", + oldParams: Parameters{ + BaseCycles: 0, + BaseRef: 0, + Frequency: 10000, + }, + now: 55000, + // Host frequency changed to 11000 immediately after + // oldParams was returned. + newParams: Parameters{ + BaseCycles: 55000, + // From oldParams, we think ref = 5.5s at cycles = 55000. + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 11000, + }, + want: Parameters{ + BaseCycles: 55000, + BaseRef: ReferenceNS(5500 * time.Millisecond.Nanoseconds()), + // We must increase the new frequency by 50% to + // correct 0.5s of error in 1s + // (ApproxUpdateInterval). + Frequency: 16500, + }, + errorNS: ReferenceNS(500 * time.Millisecond.Nanoseconds()), + }, + { + // Host clock slowed down, with now ahead of newParams. + name: "slow-down-nowdiff", + oldParams: Parameters{ + BaseCycles: 0, + BaseRef: 0, + Frequency: 10000, + }, + now: 60000, + // Host frequency changed to 11000 immediately after + // oldParams was returned. + newParams: Parameters{ + BaseCycles: 55000, + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 11000, + }, + // nextRef = 7000ms + // nextCycles = 11000 * (7000ms - 5000ms) + 55000 + // nextCycles = 11000 * (2000ms) + 55000 + // nextCycles = 77000 + // f = (77000 - 60000) / 1s = 17000 + // + // ref = 6000ms - (60000 - 55000) / 17000 + // ref = 5705882353ns + want: Parameters{ + BaseCycles: 55000, + BaseRef: ReferenceNS(5705882353), + Frequency: 17000, + }, + // oldNow = 60000 * 10000 = 6s + // newNow = (60000 - 55000) / 11000 + 5s = 5.4545s + errorNS: ReferenceNS((6*time.Second - 5454545454).Nanoseconds()), + }, + { + // Host time went backwards. + name: "time-backwards", + oldParams: Parameters{ + BaseCycles: 50000, + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + now: 60000, + newParams: Parameters{ + BaseCycles: 60000, + // From oldParams, we think ref = 6s at cycles = 60000. + BaseRef: ReferenceNS(4000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + want: Parameters{ + BaseCycles: 60000, + BaseRef: ReferenceNS(6000 * time.Millisecond.Nanoseconds()), + // We must increase the frequency by 200% to + // correct 2s of error in 1s + // (ApproxUpdateInterval). + Frequency: 30000, + }, + errorNS: ReferenceNS(2000 * time.Millisecond.Nanoseconds()), + }, + { + // Host time went backwards, with now ahead of newParams. + name: "time-backwards-nowdiff", + oldParams: Parameters{ + BaseCycles: 50000, + BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + now: 65000, + // nextRef = 7500ms + // nextCycles = 10000 * (7500ms - 4000ms) + 60000 + // nextCycles = 10000 * (3500ms) + 60000 + // nextCycles = 95000 + // f = (95000 - 65000) / 1s = 30000 + // + // ref = 6500ms - (65000 - 60000) / 30000 + // ref = 6333333333ns + newParams: Parameters{ + BaseCycles: 60000, + BaseRef: ReferenceNS(4000 * time.Millisecond.Nanoseconds()), + Frequency: 10000, + }, + want: Parameters{ + BaseCycles: 60000, + BaseRef: ReferenceNS(6333333334), + Frequency: 30000, + }, + // oldNow = 65000 * 10000 = 6.5s + // newNow = (65000 - 60000) / 10000 + 4s = 4.5s + errorNS: ReferenceNS(2000 * time.Millisecond.Nanoseconds()), + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got, errorNS, err := errorAdjust(tc.oldParams, tc.newParams, tc.now) + if err != nil && !tc.wantErr { + t.Errorf("err got %v want nil", err) + } else if err == nil && tc.wantErr { + t.Errorf("err got nil want non-nil") + } + + if got != tc.want { + t.Errorf("Parameters got %+v want %+v", got, tc.want) + } + if errorNS != tc.errorNS { + t.Errorf("errorNS got %v want %v", errorNS, tc.errorNS) + } + }) + } +} + +func testMuldiv(t *testing.T, v uint64) { + for i := uint64(1); i <= 1000000; i++ { + mult := uint64(1000000000) + div := i * mult + res, ok := muldiv64(v, mult, div) + if !ok { + t.Errorf("Result of %v * %v / %v ok got false want true", v, mult, div) + } + if want := v / i; res != want { + t.Errorf("Bad result of %v * %v / %v: got %v, want %v", v, mult, div, res, want) + } + } +} + +func TestMulDiv(t *testing.T) { + testMuldiv(t, math.MaxUint64) + for i := int64(-10); i <= 10; i++ { + testMuldiv(t, uint64(i)) + } +} + +func TestMulDivZero(t *testing.T) { + if r, ok := muldiv64(2, 4, 0); ok { + t.Errorf("muldiv64(2, 4, 0) got %d, ok want !ok", r) + } + + if r, ok := muldiv64(0, 0, 0); ok { + t.Errorf("muldiv64(0, 0, 0) got %d, ok want !ok", r) + } +} + +func TestMulDivOverflow(t *testing.T) { + testCases := []struct { + name string + val uint64 + mult uint64 + div uint64 + ok bool + ret uint64 + }{ + { + name: "2^62", + val: 1 << 63, + mult: 4, + div: 8, + ok: true, + ret: 1 << 62, + }, + { + name: "2^64-1", + val: 0xffffffffffffffff, + mult: 1, + div: 1, + ok: true, + ret: 0xffffffffffffffff, + }, + { + name: "2^64", + val: 1 << 63, + mult: 4, + div: 2, + ok: false, + }, + { + name: "2^125", + val: 1 << 63, + mult: 1 << 63, + div: 2, + ok: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + r, ok := muldiv64(tc.val, tc.mult, tc.div) + if ok != tc.ok { + t.Errorf("ok got %v want %v", ok, tc.ok) + } + if tc.ok && r != tc.ret { + t.Errorf("ret got %v want %v", r, tc.ret) + } + }) + } +} diff --git a/pkg/sentry/time/sampler.go b/pkg/sentry/time/sampler.go new file mode 100644 index 000000000..cf581b5fa --- /dev/null +++ b/pkg/sentry/time/sampler.go @@ -0,0 +1,225 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +import ( + "errors" + + "gvisor.googlesource.com/gvisor/pkg/log" +) + +const ( + // defaultOverheadTSC is the default estimated syscall overhead in TSC cycles. + // It is further refined as syscalls are made. + defaultOverheadCycles = 1 * 1000 + + // maxOverheadCycles is the maximum allowed syscall overhead in TSC cycles. + maxOverheadCycles = 100 * defaultOverheadCycles + + // maxSampleLoops is the maximum number of times to try to get a clock sample + // under the expected overhead. + maxSampleLoops = 5 + + // maxSamples is the maximum number of samples to collect. + maxSamples = 10 +) + +// errOverheadTooHigh is returned from sampler.Sample if the syscall +// overhead is too high. +var errOverheadTooHigh = errors.New("time syscall overhead exceeds maximum") + +// TSCValue is a value from the TSC. +type TSCValue int64 + +// Rdtsc reads the TSC. +// +// Intel SDM, Vol 3, Ch 17.15: +// "The RDTSC instruction reads the time-stamp counter and is guaranteed to +// return a monotonically increasing unique value whenever executed, except for +// a 64-bit counter wraparound. Intel guarantees that the time-stamp counter +// will not wraparound within 10 years after being reset." +// +// We use int64, so we have 5 years before wrap-around. +func Rdtsc() TSCValue + +// ReferenceNS are nanoseconds in the reference clock domain. +// int64 gives us ~290 years before this overflows. +type ReferenceNS int64 + +// Magnitude returns the absolute value of r. +func (r ReferenceNS) Magnitude() ReferenceNS { + if r < 0 { + return -r + } + return r +} + +// cycleClock is a TSC-based cycle clock. +type cycleClock interface { + // Cycles returns a count value from the TSC. + Cycles() TSCValue +} + +// tscCycleClock is a cycleClock that uses the real TSC. +type tscCycleClock struct{} + +// Cycles implements cycleClock.Cycles. +func (tscCycleClock) Cycles() TSCValue { + return Rdtsc() +} + +// sample contains a sample from the reference clock, with TSC values from +// before and after the reference clock value was captured. +type sample struct { + before TSCValue + after TSCValue + ref ReferenceNS +} + +// Overhead returns the sample overhead in TSC cycles. +func (s *sample) Overhead() TSCValue { + return s.after - s.before +} + +// referenceClocks collects individual samples from a reference clock ID and +// TSC. +type referenceClocks interface { + cycleClock + + // Sample returns a single sample from the reference clock ID. + Sample(c ClockID) (sample, error) +} + +// sampler collects samples from a reference system clock, minimizing +// the overhead in each sample. +type sampler struct { + // clockID is the reference clock ID (e.g., CLOCK_MONOTONIC). + clockID ClockID + + // clocks provides raw samples. + clocks referenceClocks + + // overhead is the estimated sample overhead in TSC cycles. + overhead TSCValue + + // samples is a ring buffer of the latest samples collected. + samples []sample +} + +// newSampler creates a sampler for clockID. +func newSampler(c ClockID) *sampler { + return &sampler{ + clockID: c, + clocks: syscallTSCReferenceClocks{}, + overhead: defaultOverheadCycles, + } +} + +// Reset discards previously collected clock samples. +func (s *sampler) Reset() { + s.overhead = defaultOverheadCycles + s.samples = []sample{} +} + +// lowOverheadSample returns a reference clock sample with minimized syscall overhead. +func (s *sampler) lowOverheadSample() (sample, error) { + for { + for i := 0; i < maxSampleLoops; i++ { + samp, err := s.clocks.Sample(s.clockID) + if err != nil { + return sample{}, err + } + + if samp.before > samp.after { + log.Warningf("TSC went backwards: %v > %v", samp.before, samp.after) + continue + } + + if samp.Overhead() <= s.overhead { + return samp, nil + } + } + + // Couldn't get a sample with the current overhead. Increase it. + newOverhead := 2 * s.overhead + if newOverhead > maxOverheadCycles { + // We'll give it one more shot with the max overhead. + + if s.overhead == maxOverheadCycles { + return sample{}, errOverheadTooHigh + } + + newOverhead = maxOverheadCycles + } + + s.overhead = newOverhead + log.Debugf("Time: Adjusting syscall overhead up to %v", s.overhead) + } +} + +// Sample collects a reference clock sample. +func (s *sampler) Sample() error { + sample, err := s.lowOverheadSample() + if err != nil { + return err + } + + s.samples = append(s.samples, sample) + if len(s.samples) > maxSamples { + s.samples = s.samples[1:] + } + + // If the 4 most recent samples all have an overhead less than half the + // expected overhead, adjust downwards. + if len(s.samples) < 4 { + return nil + } + + for _, sample := range s.samples[len(s.samples)-4:] { + if sample.Overhead() > s.overhead/2 { + return nil + } + } + + s.overhead -= s.overhead / 8 + log.Debugf("Time: Adjusting syscall overhead down to %v", s.overhead) + + return nil +} + +// Syscall returns the current raw reference time without storing TSC +// samples. +func (s *sampler) Syscall() (ReferenceNS, error) { + sample, err := s.clocks.Sample(s.clockID) + if err != nil { + return 0, err + } + + return sample.ref, nil +} + +// Cycles returns a raw TSC value. +func (s *sampler) Cycles() TSCValue { + return s.clocks.Cycles() +} + +// Range returns the widest range of clock samples available. +func (s *sampler) Range() (sample, sample, bool) { + if len(s.samples) < 2 { + return sample{}, sample{}, false + } + + return s.samples[0], s.samples[len(s.samples)-1], true +} diff --git a/pkg/sentry/time/sampler_test.go b/pkg/sentry/time/sampler_test.go new file mode 100644 index 000000000..caf7e5c53 --- /dev/null +++ b/pkg/sentry/time/sampler_test.go @@ -0,0 +1,183 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +import ( + "errors" + "testing" +) + +// errNoSamples is returned when testReferenceClocks runs out of samples. +var errNoSamples = errors.New("no samples available") + +// testReferenceClocks returns a preset list of samples and cycle counts. +type testReferenceClocks struct { + samples []sample + cycles []TSCValue +} + +// Sample implements referenceClocks.Sample, returning the next sample in the list. +func (t *testReferenceClocks) Sample(_ ClockID) (sample, error) { + if len(t.samples) == 0 { + return sample{}, errNoSamples + } + + s := t.samples[0] + if len(t.samples) == 1 { + t.samples = nil + } else { + t.samples = t.samples[1:] + } + + return s, nil +} + +// Cycles implements referenceClocks.Cycles, returning the next TSCValue in the list. +func (t *testReferenceClocks) Cycles() TSCValue { + if len(t.cycles) == 0 { + return 0 + } + + c := t.cycles[0] + if len(t.cycles) == 1 { + t.cycles = nil + } else { + t.cycles = t.cycles[1:] + } + + return c +} + +// newTestSampler returns a sampler that collects samples from +// the given sample list and cycle counts from the given cycle list. +func newTestSampler(samples []sample, cycles []TSCValue) *sampler { + return &sampler{ + clocks: &testReferenceClocks{ + samples: samples, + cycles: cycles, + }, + overhead: defaultOverheadCycles, + } +} + +// generateSamples generates n samples with the given overhead. +func generateSamples(n int, overhead TSCValue) []sample { + samples := []sample{{before: 1000000, after: 1000000 + overhead, ref: 100}} + for i := 0; i < n-1; i++ { + prev := samples[len(samples)-1] + samples = append(samples, sample{ + before: prev.before + 1000000, + after: prev.after + 1000000, + ref: prev.ref + 100, + }) + } + return samples +} + +// TestSample ensures that samples can be collected. +func TestSample(t *testing.T) { + testCases := []struct { + name string + samples []sample + err error + }{ + { + name: "basic", + samples: []sample{ + {before: 100000, after: 100000 + defaultOverheadCycles, ref: 100}, + }, + err: nil, + }, + { + // Sample with backwards TSC ignored. + // referenceClock should retry and get errNoSamples. + name: "backwards-tsc-ignored", + samples: []sample{ + {before: 100000, after: 90000, ref: 100}, + }, + err: errNoSamples, + }, + { + // Sample far above overhead skipped. + // referenceClock should retry and get errNoSamples. + name: "reject-overhead", + samples: []sample{ + {before: 100000, after: 100000 + 5*defaultOverheadCycles, ref: 100}, + }, + err: errNoSamples, + }, + { + // Maximum overhead allowed is bounded. + name: "over-max-overhead", + // Generate a bunch of samples. The reference clock + // needs a while to ramp up its expected overhead. + samples: generateSamples(100, 2*maxOverheadCycles), + err: errOverheadTooHigh, + }, + { + // Overhead at maximum overhead is allowed. + name: "max-overhead", + // Generate a bunch of samples. The reference clock + // needs a while to ramp up its expected overhead. + samples: generateSamples(100, maxOverheadCycles), + err: nil, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + s := newTestSampler(tc.samples, nil) + err := s.Sample() + if err != tc.err { + t.Errorf("Sample err got %v want %v", err, tc.err) + } + }) + } +} + +// TestOutliersIgnored tests that referenceClock ignores samples with very high +// overhead. +func TestOutliersIgnored(t *testing.T) { + s := newTestSampler([]sample{ + {before: 100000, after: 100000 + defaultOverheadCycles, ref: 100}, + {before: 200000, after: 200000 + defaultOverheadCycles, ref: 200}, + {before: 300000, after: 300000 + defaultOverheadCycles, ref: 300}, + {before: 400000, after: 400000 + defaultOverheadCycles, ref: 400}, + {before: 500000, after: 500000 + 5*defaultOverheadCycles, ref: 500}, // Ignored + {before: 600000, after: 600000 + defaultOverheadCycles, ref: 600}, + {before: 700000, after: 700000 + defaultOverheadCycles, ref: 700}, + }, nil) + + // Collect 5 samples. + for i := 0; i < 5; i++ { + err := s.Sample() + if err != nil { + t.Fatalf("Unexpected error while sampling: %v", err) + } + } + + oldest, newest, ok := s.Range() + if !ok { + t.Fatalf("Range not ok") + } + + if oldest.ref != 100 { + t.Errorf("oldest.ref got %v want %v", oldest.ref, 100) + } + + // We skipped the high-overhead sample. + if newest.ref != 600 { + t.Errorf("newest.ref got %v want %v", newest.ref, 600) + } +} diff --git a/pkg/sentry/time/sampler_unsafe.go b/pkg/sentry/time/sampler_unsafe.go new file mode 100644 index 000000000..7ea19d387 --- /dev/null +++ b/pkg/sentry/time/sampler_unsafe.go @@ -0,0 +1,56 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package time + +import ( + "syscall" + "unsafe" +) + +// syscallTSCReferenceClocks is the standard referenceClocks, collecting +// samples using CLOCK_GETTIME and RDTSC. +type syscallTSCReferenceClocks struct { + tscCycleClock +} + +// Sample implements sampler.Sample. +func (syscallTSCReferenceClocks) Sample(c ClockID) (sample, error) { + var s sample + + s.before = Rdtsc() + + // Don't call clockGettime to avoid a call which may call morestack. + var ts syscall.Timespec + _, _, e := syscall.RawSyscall(syscall.SYS_CLOCK_GETTIME, uintptr(c), uintptr(unsafe.Pointer(&ts)), 0) + if e != 0 { + return sample{}, e + } + + s.after = Rdtsc() + s.ref = ReferenceNS(ts.Nano()) + + return s, nil +} + +// clockGettime calls SYS_CLOCK_GETTIME, returning time in nanoseconds. +func clockGettime(c ClockID) (ReferenceNS, error) { + var ts syscall.Timespec + _, _, e := syscall.RawSyscall(syscall.SYS_CLOCK_GETTIME, uintptr(c), uintptr(unsafe.Pointer(&ts)), 0) + if e != 0 { + return 0, e + } + + return ReferenceNS(ts.Nano()), nil +} diff --git a/pkg/sentry/time/tsc_amd64.s b/pkg/sentry/time/tsc_amd64.s new file mode 100644 index 000000000..4cc604392 --- /dev/null +++ b/pkg/sentry/time/tsc_amd64.s @@ -0,0 +1,27 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "textflag.h" + +TEXT ·Rdtsc(SB),NOSPLIT,$0-8 + // N.B. We need LFENCE on Intel, AMD is more complicated. + // Modern AMD CPUs with modern kernels make LFENCE behave like it does + // on Intel with MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT. MFENCE is + // otherwise needed on AMD. + LFENCE + RDTSC + SHLQ $32, DX + ADDQ DX, AX + MOVQ AX, ret+0(FP) + RET |