Check in gVisor.

PiperOrigin-RevId: 194583126 Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
author: Googler <noreply@google.com> 2018-04-27 10:37:02 -0700
committer: Adin Scannell <ascannell@google.com> 2018-04-28 01:44:26 -0400
commit: d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree: 54f95eef73aee6bacbfc736fffc631be2605ed53 /pkg/sentry/time
parent: f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)
12 files changed, 1834 insertions, 0 deletions
diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD
new file mode 100644
index 000000000..cbcd699d5
--- /dev/null
+++ b/pkg/sentry/time/BUILD
@@ -0,0 +1,48 @@
+package(licenses = ["notice"])  # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
+
+go_template_instance(
+    name = "seqatomic_parameters",
+    out = "seqatomic_parameters.go",
+    package = "time",
+    suffix = "Parameters",
+    template = "//pkg/sync:generic_seqatomic",
+    types = {
+        "Value": "Parameters",
+    },
+)
+
+go_library(
+    name = "time",
+    srcs = [
+        "calibrated_clock.go",
+        "clock_id.go",
+        "clocks.go",
+        "muldiv_amd64.s",
+        "parameters.go",
+        "sampler.go",
+        "sampler_unsafe.go",
+        "seqatomic_parameters.go",
+        "tsc_amd64.s",
+    ],
+    importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/time",
+    visibility = ["//pkg/sentry:internal"],
+    deps = [
+        "//pkg/log",
+        "//pkg/metric",
+        "//pkg/sync",
+        "//pkg/syserror",
+    ],
+)
+
+go_test(
+    name = "time_test",
+    srcs = [
+        "calibrated_clock_test.go",
+        "parameters_test.go",
+        "sampler_test.go",
+    ],
+    embed = [":time"],
+)
diff --git a/pkg/sentry/time/calibrated_clock.go b/pkg/sentry/time/calibrated_clock.go
new file mode 100644
index 000000000..cbb95e2d7
--- /dev/null
+++ b/pkg/sentry/time/calibrated_clock.go
@@ -0,0 +1,269 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package time provides a calibrated clock synchronized to a system reference
+// clock.
+package time
+
+import (
+	"sync"
+	"time"
+
+	"gvisor.googlesource.com/gvisor/pkg/log"
+	"gvisor.googlesource.com/gvisor/pkg/metric"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// fallbackMetric tracks failed updates. It is not sync, as it is not critical
+// that all occurrences are captured and CalibratedClock may fallback many
+// times.
+var fallbackMetric = metric.MustCreateNewUint64Metric("/time/fallback", false /* sync */, "Incremented when a clock falls back to system calls due to a failed update")
+
+// CalibratedClock implements a clock that tracks a reference clock.
+//
+// Users should call Update at regular intervals of around approxUpdateInterval
+// to ensure that the clock does not drift significantly from the reference
+// clock.
+type CalibratedClock struct {
+	// mu protects the fields below.
+	// TODO: consider a sequence counter for read locking.
+	mu sync.RWMutex
+
+	// ref sample the reference clock that this clock is calibrated
+	// against.
+	ref *sampler
+
+	// ready indicates that the fields below are ready for use calculating
+	// time.
+	ready bool
+
+	// params are the current timekeeping parameters.
+	params Parameters
+
+	// errorNS is the estimated clock error in nanoseconds.
+	errorNS ReferenceNS
+}
+
+// NewCalibratedClock creates a CalibratedClock that tracks the given ClockID.
+func NewCalibratedClock(c ClockID) *CalibratedClock {
+	return &CalibratedClock{
+		ref: newSampler(c),
+	}
+}
+
+// Debugf logs at debug level.
+func (c *CalibratedClock) Debugf(format string, v ...interface{}) {
+	if log.IsLogging(log.Debug) {
+		args := []interface{}{c.ref.clockID}
+		args = append(args, v...)
+		log.Debugf("CalibratedClock(%v): "+format, args...)
+	}
+}
+
+// Infof logs at debug level.
+func (c *CalibratedClock) Infof(format string, v ...interface{}) {
+	if log.IsLogging(log.Info) {
+		args := []interface{}{c.ref.clockID}
+		args = append(args, v...)
+		log.Infof("CalibratedClock(%v): "+format, args...)
+	}
+}
+
+// Warningf logs at debug level.
+func (c *CalibratedClock) Warningf(format string, v ...interface{}) {
+	if log.IsLogging(log.Warning) {
+		args := []interface{}{c.ref.clockID}
+		args = append(args, v...)
+		log.Warningf("CalibratedClock(%v): "+format, args...)
+	}
+}
+
+// reset forces the clock to restart the calibration process, logging the
+// passed message.
+func (c *CalibratedClock) reset(str string, v ...interface{}) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.resetLocked(str, v...)
+}
+
+// resetLocked is equivalent to reset with c.mu already held for writing.
+func (c *CalibratedClock) resetLocked(str string, v ...interface{}) {
+	c.Warningf(str+" Resetting clock; time may jump.", v...)
+	c.ready = false
+	c.ref.Reset()
+	fallbackMetric.Increment()
+}
+
+// updateParams updates the timekeeping parameters based on the passed
+// parameters.
+//
+// actual is the actual estimated timekeeping parameters. The stored parameters
+// may need to be adjusted slightly from these values to compensate for error.
+//
+// Preconditions: c.mu must be held for writing.
+func (c *CalibratedClock) updateParams(actual Parameters) {
+	if !c.ready {
+		// At initial calibration there is nothing to correct.
+		c.params = actual
+		c.ready = true
+
+		c.Infof("ready")
+
+		return
+	}
+
+	// Otherwise, adjust the params to correct for errors.
+	newParams, errorNS, err := errorAdjust(c.params, actual, actual.BaseCycles)
+	if err != nil {
+		// Something is very wrong. Reset and try again from the
+		// beginning.
+		c.resetLocked("Unable to update params: %v.", err)
+		return
+	}
+	logErrorAdjustment(c.ref.clockID, errorNS, c.params, newParams)
+
+	if errorNS.Magnitude() >= MaxClockError {
+		// We should never get such extreme error, something is very
+		// wrong. Reset everything and start again.
+		//
+		// N.B. logErrorAdjustment will have already logged the error
+		// at warning level.
+		//
+		// TODO: We could allow Realtime clock jumps here.
+		c.resetLocked("Extreme clock error.")
+		return
+	}
+
+	c.params = newParams
+	c.errorNS = errorNS
+}
+
+// Update runs the update step of the clock, updating its synchronization with
+// the reference clock.
+//
+// Update returns timekeeping and true with the new timekeeping parameters if
+// the clock is calibrated. Update should be called regularly to prevent the
+// clock from getting significantly out of sync from the reference clock.
+//
+// The returned timekeeping parameters are invalidated on the next call to
+// Update.
+func (c *CalibratedClock) Update() (Parameters, bool) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	if err := c.ref.Sample(); err != nil {
+		c.resetLocked("Unable to update calibrated clock: %v.", err)
+		return Parameters{}, false
+	}
+
+	oldest, newest, ok := c.ref.Range()
+	if !ok {
+		// Not ready yet.
+		return Parameters{}, false
+	}
+
+	minCount := uint64(newest.before - oldest.after)
+	maxCount := uint64(newest.after - oldest.before)
+	refInterval := uint64(newest.ref - oldest.ref)
+
+	// freq hz = count / (interval ns) * (nsPerS ns) / (1 s)
+	nsPerS := uint64(time.Second.Nanoseconds())
+
+	minHz, ok := muldiv64(minCount, nsPerS, refInterval)
+	if !ok {
+		c.resetLocked("Unable to update calibrated clock: (%v - %v) * %v / %v overflows.", newest.before, oldest.after, nsPerS, refInterval)
+		return Parameters{}, false
+	}
+
+	maxHz, ok := muldiv64(maxCount, nsPerS, refInterval)
+	if !ok {
+		c.resetLocked("Unable to update calibrated clock: (%v - %v) * %v / %v overflows.", newest.after, oldest.before, nsPerS, refInterval)
+		return Parameters{}, false
+	}
+
+	c.updateParams(Parameters{
+		Frequency:  (minHz + maxHz) / 2,
+		BaseRef:    newest.ref,
+		BaseCycles: newest.after,
+	})
+
+	return c.params, true
+}
+
+// GetTime returns the current time based on the clock calibration.
+func (c *CalibratedClock) GetTime() (int64, error) {
+	c.mu.RLock()
+
+	if !c.ready {
+		// Fallback to a syscall.
+		now, err := c.ref.Syscall()
+		c.mu.RUnlock()
+		return int64(now), err
+	}
+
+	now := c.ref.Cycles()
+	v, ok := c.params.ComputeTime(now)
+	if !ok {
+		// Something is seriously wrong with the clock. Try
+		// again with syscalls.
+		c.resetLocked("Time computation overflowed. params = %+v, now = %v.", c.params, now)
+		now, err := c.ref.Syscall()
+		c.mu.RUnlock()
+		return int64(now), err
+	}
+
+	c.mu.RUnlock()
+	return v, nil
+}
+
+// CalibratedClocks contains calibrated monotonic and realtime clocks.
+//
+// TODO: We know that Linux runs the monotonic and realtime clocks at
+// the same rate, so rather than tracking both individually, we could do one
+// calibration for both clocks.
+type CalibratedClocks struct {
+	// monotonic is the clock tracking the system monotonic clock.
+	monotonic *CalibratedClock
+
+	// realtime is the realtime equivalent of monotonic.
+	realtime *CalibratedClock
+}
+
+// NewCalibratedClocks creates a CalibratedClocks.
+func NewCalibratedClocks() *CalibratedClocks {
+	return &CalibratedClocks{
+		monotonic: NewCalibratedClock(Monotonic),
+		realtime:  NewCalibratedClock(Realtime),
+	}
+}
+
+// Update implements Clocks.Update.
+func (c *CalibratedClocks) Update() (Parameters, bool, Parameters, bool) {
+	monotonicParams, monotonicOk := c.monotonic.Update()
+	realtimeParams, realtimeOk := c.realtime.Update()
+
+	return monotonicParams, monotonicOk, realtimeParams, realtimeOk
+}
+
+// GetTime implements Clocks.GetTime.
+func (c *CalibratedClocks) GetTime(id ClockID) (int64, error) {
+	switch id {
+	case Monotonic:
+		return c.monotonic.GetTime()
+	case Realtime:
+		return c.realtime.GetTime()
+	default:
+		return 0, syserror.EINVAL
+	}
+}
diff --git a/pkg/sentry/time/calibrated_clock_test.go b/pkg/sentry/time/calibrated_clock_test.go
new file mode 100644
index 000000000..8b6dd5592
--- /dev/null
+++ b/pkg/sentry/time/calibrated_clock_test.go
@@ -0,0 +1,186 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"testing"
+	"time"
+)
+
+// newTestCalibratedClock returns a CalibratedClock that collects samples from
+// the given sample list and cycle counts from the given cycle list.
+func newTestCalibratedClock(samples []sample, cycles []TSCValue) *CalibratedClock {
+	return &CalibratedClock{
+		ref: newTestSampler(samples, cycles),
+	}
+}
+
+func TestConstantFrequency(t *testing.T) {
+	// Perfectly constant frequency.
+	samples := []sample{
+		{before: 100000, after: 100000 + defaultOverheadCycles, ref: 100},
+		{before: 200000, after: 200000 + defaultOverheadCycles, ref: 200},
+		{before: 300000, after: 300000 + defaultOverheadCycles, ref: 300},
+		{before: 400000, after: 400000 + defaultOverheadCycles, ref: 400},
+		{before: 500000, after: 500000 + defaultOverheadCycles, ref: 500},
+		{before: 600000, after: 600000 + defaultOverheadCycles, ref: 600},
+		{before: 700000, after: 700000 + defaultOverheadCycles, ref: 700},
+	}
+
+	c := newTestCalibratedClock(samples, nil)
+
+	// Update from all samples.
+	for range samples {
+		c.Update()
+	}
+
+	c.mu.RLock()
+	if !c.ready {
+		c.mu.RUnlock()
+		t.Fatalf("clock not ready")
+	}
+	// A bit after the last sample.
+	now, ok := c.params.ComputeTime(750000)
+	c.mu.RUnlock()
+	if !ok {
+		t.Fatalf("ComputeTime ok got %v want true", ok)
+	}
+
+	t.Logf("now: %v", now)
+
+	// Time should be between the current sample and where we'd expect the
+	// next sample.
+	if now < 700 || now > 800 {
+		t.Errorf("now got %v want > 700 && < 800", now)
+	}
+}
+
+func TestErrorCorrection(t *testing.T) {
+	testCases := []struct {
+		name               string
+		samples            [5]sample
+		projectedTimeStart int64
+		projectedTimeEnd   int64
+	}{
+		// Initial calibration should be ~1MHz for each of these, and
+		// the reference clock changes in samples[2].
+		{
+			name: "slow-down",
+			samples: [5]sample{
+				{before: 1000000, after: 1000001, ref: ReferenceNS(1 * ApproxUpdateInterval.Nanoseconds())},
+				{before: 2000000, after: 2000001, ref: ReferenceNS(2 * ApproxUpdateInterval.Nanoseconds())},
+				// Reference clock has slowed down, causing 100ms of error.
+				{before: 3010000, after: 3010001, ref: ReferenceNS(3 * ApproxUpdateInterval.Nanoseconds())},
+				{before: 4020000, after: 4020001, ref: ReferenceNS(4 * ApproxUpdateInterval.Nanoseconds())},
+				{before: 5030000, after: 5030001, ref: ReferenceNS(5 * ApproxUpdateInterval.Nanoseconds())},
+			},
+			projectedTimeStart: 3005 * time.Millisecond.Nanoseconds(),
+			projectedTimeEnd:   3015 * time.Millisecond.Nanoseconds(),
+		},
+		{
+			name: "speed-up",
+			samples: [5]sample{
+				{before: 1000000, after: 1000001, ref: ReferenceNS(1 * ApproxUpdateInterval.Nanoseconds())},
+				{before: 2000000, after: 2000001, ref: ReferenceNS(2 * ApproxUpdateInterval.Nanoseconds())},
+				// Reference clock has sped up, causing 100ms of error.
+				{before: 2990000, after: 2990001, ref: ReferenceNS(3 * ApproxUpdateInterval.Nanoseconds())},
+				{before: 3980000, after: 3980001, ref: ReferenceNS(4 * ApproxUpdateInterval.Nanoseconds())},
+				{before: 4970000, after: 4970001, ref: ReferenceNS(5 * ApproxUpdateInterval.Nanoseconds())},
+			},
+			projectedTimeStart: 2985 * time.Millisecond.Nanoseconds(),
+			projectedTimeEnd:   2995 * time.Millisecond.Nanoseconds(),
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			c := newTestCalibratedClock(tc.samples[:], nil)
+
+			// Initial calibration takes two updates.
+			_, ok := c.Update()
+			if ok {
+				t.Fatalf("Update ready too early")
+			}
+
+			params, ok := c.Update()
+			if !ok {
+				t.Fatalf("Update not ready")
+			}
+
+			// Initial calibration is ~1MHz.
+			hz := params.Frequency
+			if hz < 990000 || hz > 1010000 {
+				t.Fatalf("Frequency got %v want > 990kHz && < 1010kHz", hz)
+			}
+
+			// Project time at the next update. Given the 1MHz
+			// calibration, it is expected to be ~3.1s/2.9s, not
+			// the actual 3s.
+			//
+			// N.B. the next update time is the "after" time above.
+			projected, ok := params.ComputeTime(tc.samples[2].after)
+			if !ok {
+				t.Fatalf("ComputeTime ok got %v want true", ok)
+			}
+			if projected < tc.projectedTimeStart || projected > tc.projectedTimeEnd {
+				t.Fatalf("ComputeTime(%v) got %v want > %v && < %v", tc.samples[2].after, projected, tc.projectedTimeStart, tc.projectedTimeEnd)
+			}
+
+			// Update again to see the changed reference clock.
+			params, ok = c.Update()
+			if !ok {
+				t.Fatalf("Update not ready")
+			}
+
+			// We now know that TSC = tc.samples[2].after -> 3s,
+			// but with the previous params indicated that TSC
+			// tc.samples[2].after -> 3.5s/2.5s. We can't allow the
+			// clock to go backwards, and having the clock jump
+			// forwards is undesirable. There should be a smooth
+			// transition that corrects the clock error over time.
+			// Check that the clock is continuous at TSC =
+			// tc.samples[2].after.
+			newProjected, ok := params.ComputeTime(tc.samples[2].after)
+			if !ok {
+				t.Fatalf("ComputeTime ok got %v want true", ok)
+			}
+			if newProjected != projected {
+				t.Errorf("Discontinuous time; ComputeTime(%v) got %v want %v", tc.samples[2].after, newProjected, projected)
+			}
+
+			// As the reference clock stablizes, ensure that the clock error
+			// decreases.
+			initialErr := c.errorNS
+			t.Logf("initial error: %v ns", initialErr)
+
+			_, ok = c.Update()
+			if !ok {
+				t.Fatalf("Update not ready")
+			}
+			if c.errorNS.Magnitude() > initialErr.Magnitude() {
+				t.Errorf("errorNS increased, got %v want |%v| <= |%v|", c.errorNS, c.errorNS, initialErr)
+			}
+
+			_, ok = c.Update()
+			if !ok {
+				t.Fatalf("Update not ready")
+			}
+			if c.errorNS.Magnitude() > initialErr.Magnitude() {
+				t.Errorf("errorNS increased, got %v want |%v| <= |%v|", c.errorNS, c.errorNS, initialErr)
+			}
+
+			t.Logf("final error: %v ns", c.errorNS)
+		})
+	}
+}
diff --git a/pkg/sentry/time/clock_id.go b/pkg/sentry/time/clock_id.go
new file mode 100644
index 000000000..500102e58
--- /dev/null
+++ b/pkg/sentry/time/clock_id.go
@@ -0,0 +1,40 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"strconv"
+)
+
+// ClockID is a Linux clock identifier.
+type ClockID int32
+
+// These are the supported Linux clock identifiers.
+const (
+	Realtime ClockID = iota
+	Monotonic
+)
+
+// String implements fmt.Stringer.String.
+func (c ClockID) String() string {
+	switch c {
+	case Realtime:
+		return "Realtime"
+	case Monotonic:
+		return "Monotonic"
+	default:
+		return strconv.Itoa(int(c))
+	}
+}
diff --git a/pkg/sentry/time/clocks.go b/pkg/sentry/time/clocks.go
new file mode 100644
index 000000000..9925b407d
--- /dev/null
+++ b/pkg/sentry/time/clocks.go
@@ -0,0 +1,31 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+// Clocks represents a clock source that contains both a monotonic and realtime
+// clock.
+type Clocks interface {
+	// Update performs an update step, keeping the clocks in sync with the
+	// reference host clocks, and returning the new timekeeping parameters.
+	//
+	// Update should be called at approximately ApproxUpdateInterval.
+	Update() (monotonicParams Parameters, monotonicOk bool, realtimeParam Parameters, realtimeOk bool)
+
+	// GetTime returns the current time in nanoseconds for the given clock.
+	//
+	// Clocks implementations must support at least Monotonic and
+	// Realtime.
+	GetTime(c ClockID) (int64, error)
+}
diff --git a/pkg/sentry/time/muldiv_amd64.s b/pkg/sentry/time/muldiv_amd64.s
new file mode 100644
index 000000000..291940b1d
--- /dev/null
+++ b/pkg/sentry/time/muldiv_amd64.s
@@ -0,0 +1,44 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+
+// Documentation is available in parameters.go.
+//
+// func muldiv64(value, multiplier, divisor uint64) (uint64, bool)
+TEXT ·muldiv64(SB),NOSPLIT,$0-33
+	MOVQ value+0(FP), AX
+	MOVQ multiplier+8(FP), BX
+	MOVQ divisor+16(FP), CX
+
+	// Multiply AX*BX and store result in DX:AX.
+	MULQ BX
+
+	// If divisor <= (value*multiplier) / 2^64, then the division will overflow.
+	//
+	// (value*multiplier) / 2^64 is DX:AX >> 64, or simply DX.
+	CMPQ CX, DX
+	JLE overflow
+
+	// Divide DX:AX by CX.
+	DIVQ CX
+
+	MOVQ AX, result+24(FP)
+	MOVB $1, ok+32(FP)
+	RET
+
+overflow:
+	MOVQ $0, result+24(FP)
+	MOVB $0, ok+32(FP)
+	RET
diff --git a/pkg/sentry/time/parameters.go b/pkg/sentry/time/parameters.go
new file mode 100644
index 000000000..594b4874b
--- /dev/null
+++ b/pkg/sentry/time/parameters.go
@@ -0,0 +1,239 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"fmt"
+	"time"
+
+	"gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+const (
+	// ApproxUpdateInterval is the approximate interval that parameters
+	// should be updated at.
+	//
+	// Error correction assumes that the next update will occur after this
+	// much time.
+	//
+	// If an update occurs before ApproxUpdateInterval passes, it has no
+	// adverse effect on error correction behavior.
+	//
+	// If an update occurs after ApproxUpdateInterval passes, the clock
+	// will overshoot its error correction target and begin accumulating
+	// error in the other direction.
+	//
+	// If updates occur after more than 2*ApproxUpdateInterval passes, the
+	// clock becomes unstable, accumulating more error than it had
+	// originally. Repeated updates after more than 2*ApproxUpdateInterval
+	// will cause unbounded increases in error.
+	//
+	// These statements assume that the host clock does not change. Actual
+	// error will depend upon host clock changes.
+	//
+	// TODO: make error correction more robust to delayed
+	// updates.
+	ApproxUpdateInterval = 1 * time.Second
+
+	// MaxClockError is the maximum amount of error that the clocks will
+	// try to correct.
+	//
+	// This limit:
+	//
+	//  * Puts a limit on cases of otherwise unbounded increases in error.
+	//
+	//  * Avoids unreasonably large frequency adjustments required to
+	//    correct large errors over a single update interval.
+	MaxClockError = ReferenceNS(ApproxUpdateInterval) / 4
+)
+
+// Parameters are the timekeeping parameters needed to compute the current
+// time.
+type Parameters struct {
+	// BaseCycles was the TSC counter value when the time was BaseRef.
+	BaseCycles TSCValue
+
+	// BaseRef is the reference clock time in nanoseconds corresponding to
+	// BaseCycles.
+	BaseRef ReferenceNS
+
+	// Frequency is the frequency of the cycle clock in Hertz.
+	Frequency uint64
+}
+
+// muldiv64 multiplies two 64-bit numbers, then divides the result by another
+// 64-bit number.
+//
+// It requires that the result fit in 64 bits, but doesn't require that
+// intermediate values do; in particular, the result of the multiplication may
+// require 128 bits.
+//
+// It returns !ok if divisor is zero or the result does not fit in 64 bits.
+func muldiv64(value, multiplier, divisor uint64) (uint64, bool)
+
+// ComputeTime calculates the current time from a "now" TSC value.
+//
+// time = ref + (now - base) / f
+func (p Parameters) ComputeTime(nowCycles TSCValue) (int64, bool) {
+	diffCycles := nowCycles - p.BaseCycles
+	if diffCycles < 0 {
+		log.Warningf("now cycles %v < base cycles %v", nowCycles, p.BaseCycles)
+		diffCycles = 0
+	}
+
+	// Overflow "won't ever happen". If diffCycles is the max value
+	// (2^63 - 1), then to overflow,
+	//
+	// frequency <= ((2^63 - 1) * 10^9) / 2^64 = 500Mhz
+	//
+	// A TSC running at 2GHz takes 201 years to reach 2^63-1. 805 years at
+	// 500MHz.
+	diffNS, ok := muldiv64(uint64(diffCycles), uint64(time.Second.Nanoseconds()), p.Frequency)
+	return int64(uint64(p.BaseRef) + diffNS), ok
+}
+
+// errorAdjust returns a new Parameters struct "adjusted" that satisfies:
+//
+// 1. adjusted.ComputeTime(now) = prevParams.ComputeTime(now)
+//   * i.e., the current time does not jump.
+//
+// 2. adjusted.ComputeTime(TSC at next update) = newParams.ComputeTime(TSC at next update)
+//   * i.e., Any error between prevParams and newParams will be corrected over
+//     the course of the next update period.
+//
+// errorAdjust also returns the current clock error.
+//
+// Preconditions:
+// * newParams.BaseCycles >= prevParams.BaseCycles; i.e., TSC must not go
+//   backwards.
+// * newParams.BaseCycles <= now; i.e., the new parameters be computed at or
+//   before now.
+func errorAdjust(prevParams Parameters, newParams Parameters, now TSCValue) (Parameters, ReferenceNS, error) {
+	if newParams.BaseCycles < prevParams.BaseCycles {
+		// Oh dear! Something is very wrong.
+		return Parameters{}, 0, fmt.Errorf("TSC went backwards in updated clock params: %v < %v", newParams.BaseCycles, prevParams.BaseCycles)
+	}
+	if newParams.BaseCycles > now {
+		return Parameters{}, 0, fmt.Errorf("parameters contain base cycles later than now: %v > %v", newParams.BaseCycles, now)
+	}
+
+	intervalNS := int64(ApproxUpdateInterval.Nanoseconds())
+	nsPerSec := uint64(time.Second.Nanoseconds())
+
+	// Current time as computed by prevParams.
+	oldNowNS, ok := prevParams.ComputeTime(now)
+	if !ok {
+		return Parameters{}, 0, fmt.Errorf("old now time computation overflowed. params = %+v, now = %v", prevParams, now)
+	}
+
+	// We expect the update ticker to run based on this clock (i.e., it has
+	// been using prevParams and will use the returned adjusted
+	// parameters). Hence it will decide to fire intervalNS from the
+	// current (oldNowNS) "now".
+	nextNS := oldNowNS + intervalNS
+
+	if nextNS <= int64(newParams.BaseRef) {
+		// The next update time already passed before the new
+		// parameters were created! We definitely can't correct the
+		// error by then.
+		return Parameters{}, 0, fmt.Errorf("unable to correct error in single period. oldNowNS = %v, nextNS = %v, p = %v", oldNowNS, nextNS, newParams)
+	}
+
+	// For what TSC value next will newParams.ComputeTime(next) = nextNS?
+	//
+	// Solve ComputeTime for next:
+	//
+	// next = newParams.Frequency * (nextNS - newParams.BaseRef) + newParams.BaseCycles
+	c, ok := muldiv64(newParams.Frequency, uint64(nextNS-int64(newParams.BaseRef)), nsPerSec)
+	if !ok {
+		return Parameters{}, 0, fmt.Errorf("%v * (%v - %v) / %v overflows", newParams.Frequency, nextNS, newParams.BaseRef, nsPerSec)
+	}
+
+	cycles := TSCValue(c)
+	next := cycles + newParams.BaseCycles
+
+	if next <= now {
+		// The next update time already passed now with the new
+		// parameters! We can't correct the error in a single period.
+		return Parameters{}, 0, fmt.Errorf("unable to correct error in single period. oldNowNS = %v, nextNS = %v, now = %v, next = %v", oldNowNS, nextNS, now, next)
+	}
+
+	// We want to solve for parameters that satisfy:
+	//
+	// adjusted.ComputeTime(now) = oldNowNS
+	//
+	// adjusted.ComputeTime(next) = nextNS
+	//
+	// i.e., the current time does not change, but by the time we reach
+	// next we reach the same time as newParams.
+
+	// We choose to keep BaseCycles fixed.
+	adjusted := Parameters{
+		BaseCycles: newParams.BaseCycles,
+	}
+
+	// We want a slope such that time goes from oldNowNS to nextNS when
+	// we reach next.
+	//
+	// In other words, cycles should increase by next - now in the next
+	// interval.
+
+	cycles = next - now
+	ns := intervalNS
+
+	// adjusted.Frequency = cycles / ns
+	adjusted.Frequency, ok = muldiv64(uint64(cycles), nsPerSec, uint64(ns))
+	if !ok {
+		return Parameters{}, 0, fmt.Errorf("(%v - %v) * %v / %v overflows", next, now, nsPerSec, ns)
+	}
+
+	// Now choose a base reference such that the current time remains the
+	// same. Note that this is just ComputeTime, solving for BaseRef:
+	//
+	// oldNowNS = BaseRef + (now - BaseCycles) / Frequency
+	// BaseRef = oldNowNS - (now - BaseCycles) / Frequency
+	diffNS, ok := muldiv64(uint64(now-adjusted.BaseCycles), nsPerSec, adjusted.Frequency)
+	if !ok {
+		return Parameters{}, 0, fmt.Errorf("(%v - %v) * %v / %v overflows", now, adjusted.BaseCycles, nsPerSec, adjusted.Frequency)
+	}
+
+	adjusted.BaseRef = ReferenceNS(oldNowNS - int64(diffNS))
+
+	// The error is the difference between the current time and what the
+	// new parameters say the current time should be.
+	newNowNS, ok := newParams.ComputeTime(now)
+	if !ok {
+		return Parameters{}, 0, fmt.Errorf("new now time computation overflowed. params = %+v, now = %v", newParams, now)
+	}
+
+	errorNS := ReferenceNS(oldNowNS - newNowNS)
+
+	return adjusted, errorNS, nil
+}
+
+// logErrorAdjustment logs the clock error and associated error correction
+// frequency adjustment.
+//
+// The log level is determined by the error severity.
+func logErrorAdjustment(clock ClockID, errorNS ReferenceNS, orig, adjusted Parameters) {
+	fn := log.Debugf
+	if int64(errorNS.Magnitude()) > time.Millisecond.Nanoseconds() {
+		fn = log.Warningf
+	} else if int64(errorNS.Magnitude()) > 10*time.Microsecond.Nanoseconds() {
+		fn = log.Infof
+	}
+
+	fn("Clock(%v): error: %v ns, adjusted frequency from %v Hz to %v Hz", clock, errorNS, orig.Frequency, adjusted.Frequency)
+}
diff --git a/pkg/sentry/time/parameters_test.go b/pkg/sentry/time/parameters_test.go
new file mode 100644
index 000000000..7394fc5ee
--- /dev/null
+++ b/pkg/sentry/time/parameters_test.go
@@ -0,0 +1,486 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"math"
+	"testing"
+	"time"
+)
+
+func TestParametersComputeTime(t *testing.T) {
+	testCases := []struct {
+		name   string
+		params Parameters
+		now    TSCValue
+		want   int64
+	}{
+		{
+			// Now is the same as the base cycles.
+			name: "base-cycles",
+			params: Parameters{
+				BaseCycles: 10000,
+				BaseRef:    ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			now:  10000,
+			want: 5000 * time.Millisecond.Nanoseconds(),
+		},
+		{
+			// Now is the behind the base cycles. Time is frozen.
+			name: "backwards",
+			params: Parameters{
+				BaseCycles: 10000,
+				BaseRef:    ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			now:  9000,
+			want: 5000 * time.Millisecond.Nanoseconds(),
+		},
+		{
+			// Now is ahead of the base cycles.
+			name: "ahead",
+			params: Parameters{
+				BaseCycles: 10000,
+				BaseRef:    ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			now:  15000,
+			want: 5500 * time.Millisecond.Nanoseconds(),
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			got, ok := tc.params.ComputeTime(tc.now)
+			if !ok {
+				t.Errorf("ComputeTime ok got %v want true", got)
+			}
+			if got != tc.want {
+				t.Errorf("ComputeTime got %+v want %+v", got, tc.want)
+			}
+		})
+	}
+}
+
+func TestParametersErrorAdjust(t *testing.T) {
+	testCases := []struct {
+		name      string
+		oldParams Parameters
+		now       TSCValue
+		newParams Parameters
+		want      Parameters
+		errorNS   ReferenceNS
+		wantErr   bool
+	}{
+		{
+			// newParams are perfectly continuous with oldParams
+			// and don't need adjustment.
+			name: "continuous",
+			oldParams: Parameters{
+				BaseCycles: 0,
+				BaseRef:    0,
+				Frequency:  10000,
+			},
+			now: 50000,
+			newParams: Parameters{
+				BaseCycles: 50000,
+				BaseRef:    ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			want: Parameters{
+				BaseCycles: 50000,
+				BaseRef:    ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+		},
+		{
+			// Same as "continuous", but with now ahead of
+			// newParams.BaseCycles. The result is the same as
+			// there is no error to correct.
+			name: "continuous-nowdiff",
+			oldParams: Parameters{
+				BaseCycles: 0,
+				BaseRef:    0,
+				Frequency:  10000,
+			},
+			now: 60000,
+			newParams: Parameters{
+				BaseCycles: 50000,
+				BaseRef:    ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			want: Parameters{
+				BaseCycles: 50000,
+				BaseRef:    ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+		},
+		{
+			// errorAdjust bails out if the TSC goes backwards.
+			name: "tsc-backwards",
+			oldParams: Parameters{
+				BaseCycles: 10000,
+				BaseRef:    ReferenceNS(1000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			now: 9000,
+			newParams: Parameters{
+				BaseCycles: 9000,
+				BaseRef:    ReferenceNS(1100 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			wantErr: true,
+		},
+		{
+			// errorAdjust bails out if new params are from after now.
+			name: "params-after-now",
+			oldParams: Parameters{
+				BaseCycles: 10000,
+				BaseRef:    ReferenceNS(1000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			now: 11000,
+			newParams: Parameters{
+				BaseCycles: 12000,
+				BaseRef:    ReferenceNS(1200 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			wantErr: true,
+		},
+		{
+			// Host clock sped up.
+			name: "speed-up",
+			oldParams: Parameters{
+				BaseCycles: 0,
+				BaseRef:    0,
+				Frequency:  10000,
+			},
+			now: 45000,
+			// Host frequency changed to 9000 immediately after
+			// oldParams was returned.
+			newParams: Parameters{
+				BaseCycles: 45000,
+				// From oldParams, we think ref = 4.5s at cycles = 45000.
+				BaseRef:   ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency: 9000,
+			},
+			want: Parameters{
+				BaseCycles: 45000,
+				BaseRef:    ReferenceNS(4500 * time.Millisecond.Nanoseconds()),
+				// We must decrease the new frequency by 50% to
+				// correct 0.5s of error in 1s
+				// (ApproxUpdateInterval).
+				Frequency: 4500,
+			},
+			errorNS: ReferenceNS(-500 * time.Millisecond.Nanoseconds()),
+		},
+		{
+			// Host clock sped up, with now ahead of newParams.
+			name: "speed-up-nowdiff",
+			oldParams: Parameters{
+				BaseCycles: 0,
+				BaseRef:    0,
+				Frequency:  10000,
+			},
+			now: 50000,
+			// Host frequency changed to 9000 immediately after
+			// oldParams was returned.
+			newParams: Parameters{
+				BaseCycles: 45000,
+				BaseRef:    ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency:  9000,
+			},
+			// nextRef = 6000ms
+			// nextCycles = 9000 * (6000ms - 5000ms) + 45000
+			// nextCycles = 9000 * (1s) + 45000
+			// nextCycles = 54000
+			// f = (54000 - 50000) / 1s = 4000
+			//
+			// ref = 5000ms - (50000 - 45000) / 4000
+			// ref = 3.75s
+			want: Parameters{
+				BaseCycles: 45000,
+				BaseRef:    ReferenceNS(3750 * time.Millisecond.Nanoseconds()),
+				Frequency:  4000,
+			},
+			// oldNow = 50000 * 10000 = 5s
+			// newNow = (50000 - 45000) / 9000 + 5s = 5.555s
+			errorNS: ReferenceNS((5000*time.Millisecond - 5555555555).Nanoseconds()),
+		},
+		{
+			// Host clock sped up. The new parameters are so far
+			// ahead that the next update time already passed.
+			name: "speed-up-uncorrectable-baseref",
+			oldParams: Parameters{
+				BaseCycles: 0,
+				BaseRef:    0,
+				Frequency:  10000,
+			},
+			now: 50000,
+			// Host frequency changed to 5000 immediately after
+			// oldParams was returned.
+			newParams: Parameters{
+				BaseCycles: 45000,
+				BaseRef:    ReferenceNS(9000 * time.Millisecond.Nanoseconds()),
+				Frequency:  5000,
+			},
+			// The next update should be at 10s, but newParams
+			// already passed 6s.  Thus it is impossible to correct
+			// the clock by then.
+			wantErr: true,
+		},
+		{
+			// Host clock sped up. The new parameters are moving so
+			// fast that the next update should be before now.
+			name: "speed-up-uncorrectable-frequency",
+			oldParams: Parameters{
+				BaseCycles: 0,
+				BaseRef:    0,
+				Frequency:  10000,
+			},
+			now: 55000,
+			// Host frequency changed to 7500 immediately after
+			// oldParams was returned.
+			newParams: Parameters{
+				BaseCycles: 45000,
+				BaseRef:    ReferenceNS(6000 * time.Millisecond.Nanoseconds()),
+				Frequency:  7500,
+			},
+			// The next update should be at 6.5s, but newParams are
+			// so far ahead and fast that they reach 6.5s at cycle
+			// 48750, which before now! Thus it is impossible to
+			// correct the clock by then.
+			wantErr: true,
+		},
+		{
+			// Host clock slowed down.
+			name: "slow-down",
+			oldParams: Parameters{
+				BaseCycles: 0,
+				BaseRef:    0,
+				Frequency:  10000,
+			},
+			now: 55000,
+			// Host frequency changed to 11000 immediately after
+			// oldParams was returned.
+			newParams: Parameters{
+				BaseCycles: 55000,
+				// From oldParams, we think ref = 5.5s at cycles = 55000.
+				BaseRef:   ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency: 11000,
+			},
+			want: Parameters{
+				BaseCycles: 55000,
+				BaseRef:    ReferenceNS(5500 * time.Millisecond.Nanoseconds()),
+				// We must increase the new frequency by 50% to
+				// correct 0.5s of error in 1s
+				// (ApproxUpdateInterval).
+				Frequency: 16500,
+			},
+			errorNS: ReferenceNS(500 * time.Millisecond.Nanoseconds()),
+		},
+		{
+			// Host clock slowed down, with now ahead of newParams.
+			name: "slow-down-nowdiff",
+			oldParams: Parameters{
+				BaseCycles: 0,
+				BaseRef:    0,
+				Frequency:  10000,
+			},
+			now: 60000,
+			// Host frequency changed to 11000 immediately after
+			// oldParams was returned.
+			newParams: Parameters{
+				BaseCycles: 55000,
+				BaseRef:    ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency:  11000,
+			},
+			// nextRef = 7000ms
+			// nextCycles = 11000 * (7000ms - 5000ms) + 55000
+			// nextCycles = 11000 * (2000ms) + 55000
+			// nextCycles = 77000
+			// f = (77000 - 60000) / 1s = 17000
+			//
+			// ref = 6000ms - (60000 - 55000) / 17000
+			// ref = 5705882353ns
+			want: Parameters{
+				BaseCycles: 55000,
+				BaseRef:    ReferenceNS(5705882353),
+				Frequency:  17000,
+			},
+			// oldNow = 60000 * 10000 = 6s
+			// newNow = (60000 - 55000) / 11000 + 5s = 5.4545s
+			errorNS: ReferenceNS((6*time.Second - 5454545454).Nanoseconds()),
+		},
+		{
+			// Host time went backwards.
+			name: "time-backwards",
+			oldParams: Parameters{
+				BaseCycles: 50000,
+				BaseRef:    ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			now: 60000,
+			newParams: Parameters{
+				BaseCycles: 60000,
+				// From oldParams, we think ref = 6s at cycles = 60000.
+				BaseRef:   ReferenceNS(4000 * time.Millisecond.Nanoseconds()),
+				Frequency: 10000,
+			},
+			want: Parameters{
+				BaseCycles: 60000,
+				BaseRef:    ReferenceNS(6000 * time.Millisecond.Nanoseconds()),
+				// We must increase the frequency by 200% to
+				// correct 2s of error in 1s
+				// (ApproxUpdateInterval).
+				Frequency: 30000,
+			},
+			errorNS: ReferenceNS(2000 * time.Millisecond.Nanoseconds()),
+		},
+		{
+			// Host time went backwards, with now ahead of newParams.
+			name: "time-backwards-nowdiff",
+			oldParams: Parameters{
+				BaseCycles: 50000,
+				BaseRef:    ReferenceNS(5000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			now: 65000,
+			// nextRef = 7500ms
+			// nextCycles = 10000 * (7500ms - 4000ms) + 60000
+			// nextCycles = 10000 * (3500ms) + 60000
+			// nextCycles = 95000
+			// f = (95000 - 65000) / 1s = 30000
+			//
+			// ref = 6500ms - (65000 - 60000) / 30000
+			// ref = 6333333333ns
+			newParams: Parameters{
+				BaseCycles: 60000,
+				BaseRef:    ReferenceNS(4000 * time.Millisecond.Nanoseconds()),
+				Frequency:  10000,
+			},
+			want: Parameters{
+				BaseCycles: 60000,
+				BaseRef:    ReferenceNS(6333333334),
+				Frequency:  30000,
+			},
+			// oldNow = 65000 * 10000 = 6.5s
+			// newNow = (65000 - 60000) / 10000 + 4s = 4.5s
+			errorNS: ReferenceNS(2000 * time.Millisecond.Nanoseconds()),
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			got, errorNS, err := errorAdjust(tc.oldParams, tc.newParams, tc.now)
+			if err != nil && !tc.wantErr {
+				t.Errorf("err got %v want nil", err)
+			} else if err == nil && tc.wantErr {
+				t.Errorf("err got nil want non-nil")
+			}
+
+			if got != tc.want {
+				t.Errorf("Parameters got %+v want %+v", got, tc.want)
+			}
+			if errorNS != tc.errorNS {
+				t.Errorf("errorNS got %v want %v", errorNS, tc.errorNS)
+			}
+		})
+	}
+}
+
+func testMuldiv(t *testing.T, v uint64) {
+	for i := uint64(1); i <= 1000000; i++ {
+		mult := uint64(1000000000)
+		div := i * mult
+		res, ok := muldiv64(v, mult, div)
+		if !ok {
+			t.Errorf("Result of %v * %v / %v ok got false want true", v, mult, div)
+		}
+		if want := v / i; res != want {
+			t.Errorf("Bad result of %v * %v / %v: got %v, want %v", v, mult, div, res, want)
+		}
+	}
+}
+
+func TestMulDiv(t *testing.T) {
+	testMuldiv(t, math.MaxUint64)
+	for i := int64(-10); i <= 10; i++ {
+		testMuldiv(t, uint64(i))
+	}
+}
+
+func TestMulDivZero(t *testing.T) {
+	if r, ok := muldiv64(2, 4, 0); ok {
+		t.Errorf("muldiv64(2, 4, 0) got %d, ok want !ok", r)
+	}
+
+	if r, ok := muldiv64(0, 0, 0); ok {
+		t.Errorf("muldiv64(0, 0, 0) got %d, ok want !ok", r)
+	}
+}
+
+func TestMulDivOverflow(t *testing.T) {
+	testCases := []struct {
+		name string
+		val  uint64
+		mult uint64
+		div  uint64
+		ok   bool
+		ret  uint64
+	}{
+		{
+			name: "2^62",
+			val:  1 << 63,
+			mult: 4,
+			div:  8,
+			ok:   true,
+			ret:  1 << 62,
+		},
+		{
+			name: "2^64-1",
+			val:  0xffffffffffffffff,
+			mult: 1,
+			div:  1,
+			ok:   true,
+			ret:  0xffffffffffffffff,
+		},
+		{
+			name: "2^64",
+			val:  1 << 63,
+			mult: 4,
+			div:  2,
+			ok:   false,
+		},
+		{
+			name: "2^125",
+			val:  1 << 63,
+			mult: 1 << 63,
+			div:  2,
+			ok:   false,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			r, ok := muldiv64(tc.val, tc.mult, tc.div)
+			if ok != tc.ok {
+				t.Errorf("ok got %v want %v", ok, tc.ok)
+			}
+			if tc.ok && r != tc.ret {
+				t.Errorf("ret got %v want %v", r, tc.ret)
+			}
+		})
+	}
+}
diff --git a/pkg/sentry/time/sampler.go b/pkg/sentry/time/sampler.go
new file mode 100644
index 000000000..cf581b5fa
--- /dev/null
+++ b/pkg/sentry/time/sampler.go
@@ -0,0 +1,225 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"errors"
+
+	"gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+const (
+	// defaultOverheadTSC is the default estimated syscall overhead in TSC cycles.
+	// It is further refined as syscalls are made.
+	defaultOverheadCycles = 1 * 1000
+
+	// maxOverheadCycles is the maximum allowed syscall overhead in TSC cycles.
+	maxOverheadCycles = 100 * defaultOverheadCycles
+
+	// maxSampleLoops is the maximum number of times to try to get a clock sample
+	// under the expected overhead.
+	maxSampleLoops = 5
+
+	// maxSamples is the maximum number of samples to collect.
+	maxSamples = 10
+)
+
+// errOverheadTooHigh is returned from sampler.Sample if the syscall
+// overhead is too high.
+var errOverheadTooHigh = errors.New("time syscall overhead exceeds maximum")
+
+// TSCValue is a value from the TSC.
+type TSCValue int64
+
+// Rdtsc reads the TSC.
+//
+// Intel SDM, Vol 3, Ch 17.15:
+// "The RDTSC instruction reads the time-stamp counter and is guaranteed to
+// return a monotonically increasing unique value whenever executed, except for
+// a 64-bit counter wraparound. Intel guarantees that the time-stamp counter
+// will not wraparound within 10 years after being reset."
+//
+// We use int64, so we have 5 years before wrap-around.
+func Rdtsc() TSCValue
+
+// ReferenceNS are nanoseconds in the reference clock domain.
+// int64 gives us ~290 years before this overflows.
+type ReferenceNS int64
+
+// Magnitude returns the absolute value of r.
+func (r ReferenceNS) Magnitude() ReferenceNS {
+	if r < 0 {
+		return -r
+	}
+	return r
+}
+
+// cycleClock is a TSC-based cycle clock.
+type cycleClock interface {
+	// Cycles returns a count value from the TSC.
+	Cycles() TSCValue
+}
+
+// tscCycleClock is a cycleClock that uses the real TSC.
+type tscCycleClock struct{}
+
+// Cycles implements cycleClock.Cycles.
+func (tscCycleClock) Cycles() TSCValue {
+	return Rdtsc()
+}
+
+// sample contains a sample from the reference clock, with TSC values from
+// before and after the reference clock value was captured.
+type sample struct {
+	before TSCValue
+	after  TSCValue
+	ref    ReferenceNS
+}
+
+// Overhead returns the sample overhead in TSC cycles.
+func (s *sample) Overhead() TSCValue {
+	return s.after - s.before
+}
+
+// referenceClocks collects individual samples from a reference clock ID and
+// TSC.
+type referenceClocks interface {
+	cycleClock
+
+	// Sample returns a single sample from the reference clock ID.
+	Sample(c ClockID) (sample, error)
+}
+
+// sampler collects samples from a reference system clock, minimizing
+// the overhead in each sample.
+type sampler struct {
+	// clockID is the reference clock ID (e.g., CLOCK_MONOTONIC).
+	clockID ClockID
+
+	// clocks provides raw samples.
+	clocks referenceClocks
+
+	// overhead is the estimated sample overhead in TSC cycles.
+	overhead TSCValue
+
+	// samples is a ring buffer of the latest samples collected.
+	samples []sample
+}
+
+// newSampler creates a sampler for clockID.
+func newSampler(c ClockID) *sampler {
+	return &sampler{
+		clockID:  c,
+		clocks:   syscallTSCReferenceClocks{},
+		overhead: defaultOverheadCycles,
+	}
+}
+
+// Reset discards previously collected clock samples.
+func (s *sampler) Reset() {
+	s.overhead = defaultOverheadCycles
+	s.samples = []sample{}
+}
+
+// lowOverheadSample returns a reference clock sample with minimized syscall overhead.
+func (s *sampler) lowOverheadSample() (sample, error) {
+	for {
+		for i := 0; i < maxSampleLoops; i++ {
+			samp, err := s.clocks.Sample(s.clockID)
+			if err != nil {
+				return sample{}, err
+			}
+
+			if samp.before > samp.after {
+				log.Warningf("TSC went backwards: %v > %v", samp.before, samp.after)
+				continue
+			}
+
+			if samp.Overhead() <= s.overhead {
+				return samp, nil
+			}
+		}
+
+		// Couldn't get a sample with the current overhead. Increase it.
+		newOverhead := 2 * s.overhead
+		if newOverhead > maxOverheadCycles {
+			// We'll give it one more shot with the max overhead.
+
+			if s.overhead == maxOverheadCycles {
+				return sample{}, errOverheadTooHigh
+			}
+
+			newOverhead = maxOverheadCycles
+		}
+
+		s.overhead = newOverhead
+		log.Debugf("Time: Adjusting syscall overhead up to %v", s.overhead)
+	}
+}
+
+// Sample collects a reference clock sample.
+func (s *sampler) Sample() error {
+	sample, err := s.lowOverheadSample()
+	if err != nil {
+		return err
+	}
+
+	s.samples = append(s.samples, sample)
+	if len(s.samples) > maxSamples {
+		s.samples = s.samples[1:]
+	}
+
+	// If the 4 most recent samples all have an overhead less than half the
+	// expected overhead, adjust downwards.
+	if len(s.samples) < 4 {
+		return nil
+	}
+
+	for _, sample := range s.samples[len(s.samples)-4:] {
+		if sample.Overhead() > s.overhead/2 {
+			return nil
+		}
+	}
+
+	s.overhead -= s.overhead / 8
+	log.Debugf("Time: Adjusting syscall overhead down to %v", s.overhead)
+
+	return nil
+}
+
+// Syscall returns the current raw reference time without storing TSC
+// samples.
+func (s *sampler) Syscall() (ReferenceNS, error) {
+	sample, err := s.clocks.Sample(s.clockID)
+	if err != nil {
+		return 0, err
+	}
+
+	return sample.ref, nil
+}
+
+// Cycles returns a raw TSC value.
+func (s *sampler) Cycles() TSCValue {
+	return s.clocks.Cycles()
+}
+
+// Range returns the widest range of clock samples available.
+func (s *sampler) Range() (sample, sample, bool) {
+	if len(s.samples) < 2 {
+		return sample{}, sample{}, false
+	}
+
+	return s.samples[0], s.samples[len(s.samples)-1], true
+}
diff --git a/pkg/sentry/time/sampler_test.go b/pkg/sentry/time/sampler_test.go
new file mode 100644
index 000000000..caf7e5c53
--- /dev/null
+++ b/pkg/sentry/time/sampler_test.go
@@ -0,0 +1,183 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"errors"
+	"testing"
+)
+
+// errNoSamples is returned when testReferenceClocks runs out of samples.
+var errNoSamples = errors.New("no samples available")
+
+// testReferenceClocks returns a preset list of samples and cycle counts.
+type testReferenceClocks struct {
+	samples []sample
+	cycles  []TSCValue
+}
+
+// Sample implements referenceClocks.Sample, returning the next sample in the list.
+func (t *testReferenceClocks) Sample(_ ClockID) (sample, error) {
+	if len(t.samples) == 0 {
+		return sample{}, errNoSamples
+	}
+
+	s := t.samples[0]
+	if len(t.samples) == 1 {
+		t.samples = nil
+	} else {
+		t.samples = t.samples[1:]
+	}
+
+	return s, nil
+}
+
+// Cycles implements referenceClocks.Cycles, returning the next TSCValue in the list.
+func (t *testReferenceClocks) Cycles() TSCValue {
+	if len(t.cycles) == 0 {
+		return 0
+	}
+
+	c := t.cycles[0]
+	if len(t.cycles) == 1 {
+		t.cycles = nil
+	} else {
+		t.cycles = t.cycles[1:]
+	}
+
+	return c
+}
+
+// newTestSampler returns a sampler that collects samples from
+// the given sample list and cycle counts from the given cycle list.
+func newTestSampler(samples []sample, cycles []TSCValue) *sampler {
+	return &sampler{
+		clocks: &testReferenceClocks{
+			samples: samples,
+			cycles:  cycles,
+		},
+		overhead: defaultOverheadCycles,
+	}
+}
+
+// generateSamples generates n samples with the given overhead.
+func generateSamples(n int, overhead TSCValue) []sample {
+	samples := []sample{{before: 1000000, after: 1000000 + overhead, ref: 100}}
+	for i := 0; i < n-1; i++ {
+		prev := samples[len(samples)-1]
+		samples = append(samples, sample{
+			before: prev.before + 1000000,
+			after:  prev.after + 1000000,
+			ref:    prev.ref + 100,
+		})
+	}
+	return samples
+}
+
+// TestSample ensures that samples can be collected.
+func TestSample(t *testing.T) {
+	testCases := []struct {
+		name    string
+		samples []sample
+		err     error
+	}{
+		{
+			name: "basic",
+			samples: []sample{
+				{before: 100000, after: 100000 + defaultOverheadCycles, ref: 100},
+			},
+			err: nil,
+		},
+		{
+			// Sample with backwards TSC ignored.
+			// referenceClock should retry and get errNoSamples.
+			name: "backwards-tsc-ignored",
+			samples: []sample{
+				{before: 100000, after: 90000, ref: 100},
+			},
+			err: errNoSamples,
+		},
+		{
+			// Sample far above overhead skipped.
+			// referenceClock should retry and get errNoSamples.
+			name: "reject-overhead",
+			samples: []sample{
+				{before: 100000, after: 100000 + 5*defaultOverheadCycles, ref: 100},
+			},
+			err: errNoSamples,
+		},
+		{
+			// Maximum overhead allowed is bounded.
+			name: "over-max-overhead",
+			// Generate a bunch of samples. The reference clock
+			// needs a while to ramp up its expected overhead.
+			samples: generateSamples(100, 2*maxOverheadCycles),
+			err:     errOverheadTooHigh,
+		},
+		{
+			// Overhead at maximum overhead is allowed.
+			name: "max-overhead",
+			// Generate a bunch of samples. The reference clock
+			// needs a while to ramp up its expected overhead.
+			samples: generateSamples(100, maxOverheadCycles),
+			err:     nil,
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			s := newTestSampler(tc.samples, nil)
+			err := s.Sample()
+			if err != tc.err {
+				t.Errorf("Sample err got %v want %v", err, tc.err)
+			}
+		})
+	}
+}
+
+// TestOutliersIgnored tests that referenceClock ignores samples with very high
+// overhead.
+func TestOutliersIgnored(t *testing.T) {
+	s := newTestSampler([]sample{
+		{before: 100000, after: 100000 + defaultOverheadCycles, ref: 100},
+		{before: 200000, after: 200000 + defaultOverheadCycles, ref: 200},
+		{before: 300000, after: 300000 + defaultOverheadCycles, ref: 300},
+		{before: 400000, after: 400000 + defaultOverheadCycles, ref: 400},
+		{before: 500000, after: 500000 + 5*defaultOverheadCycles, ref: 500}, // Ignored
+		{before: 600000, after: 600000 + defaultOverheadCycles, ref: 600},
+		{before: 700000, after: 700000 + defaultOverheadCycles, ref: 700},
+	}, nil)
+
+	// Collect 5 samples.
+	for i := 0; i < 5; i++ {
+		err := s.Sample()
+		if err != nil {
+			t.Fatalf("Unexpected error while sampling: %v", err)
+		}
+	}
+
+	oldest, newest, ok := s.Range()
+	if !ok {
+		t.Fatalf("Range not ok")
+	}
+
+	if oldest.ref != 100 {
+		t.Errorf("oldest.ref got %v want %v", oldest.ref, 100)
+	}
+
+	// We skipped the high-overhead sample.
+	if newest.ref != 600 {
+		t.Errorf("newest.ref got %v want %v", newest.ref, 600)
+	}
+}
diff --git a/pkg/sentry/time/sampler_unsafe.go b/pkg/sentry/time/sampler_unsafe.go
new file mode 100644
index 000000000..7ea19d387
--- /dev/null
+++ b/pkg/sentry/time/sampler_unsafe.go
@@ -0,0 +1,56 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+// syscallTSCReferenceClocks is the standard referenceClocks, collecting
+// samples using CLOCK_GETTIME and RDTSC.
+type syscallTSCReferenceClocks struct {
+	tscCycleClock
+}
+
+// Sample implements sampler.Sample.
+func (syscallTSCReferenceClocks) Sample(c ClockID) (sample, error) {
+	var s sample
+
+	s.before = Rdtsc()
+
+	// Don't call clockGettime to avoid a call which may call morestack.
+	var ts syscall.Timespec
+	_, _, e := syscall.RawSyscall(syscall.SYS_CLOCK_GETTIME, uintptr(c), uintptr(unsafe.Pointer(&ts)), 0)
+	if e != 0 {
+		return sample{}, e
+	}
+
+	s.after = Rdtsc()
+	s.ref = ReferenceNS(ts.Nano())
+
+	return s, nil
+}
+
+// clockGettime calls SYS_CLOCK_GETTIME, returning time in nanoseconds.
+func clockGettime(c ClockID) (ReferenceNS, error) {
+	var ts syscall.Timespec
+	_, _, e := syscall.RawSyscall(syscall.SYS_CLOCK_GETTIME, uintptr(c), uintptr(unsafe.Pointer(&ts)), 0)
+	if e != 0 {
+		return 0, e
+	}
+
+	return ReferenceNS(ts.Nano()), nil
+}
diff --git a/pkg/sentry/time/tsc_amd64.s b/pkg/sentry/time/tsc_amd64.s
new file mode 100644
index 000000000..4cc604392
--- /dev/null
+++ b/pkg/sentry/time/tsc_amd64.s
@@ -0,0 +1,27 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+
+TEXT ·Rdtsc(SB),NOSPLIT,$0-8
+	// N.B. We need LFENCE on Intel, AMD is more complicated.
+	// Modern AMD CPUs with modern kernels make LFENCE behave like it does
+	// on Intel with MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT. MFENCE is
+	// otherwise needed on AMD.
+	LFENCE
+	RDTSC
+	SHLQ	$32, DX
+	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
+	RET
author	Googler <noreply@google.com>	2018-04-27 10:37:02 -0700
committer	Adin Scannell <ascannell@google.com>	2018-04-28 01:44:26 -0400
commit	d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree	54f95eef73aee6bacbfc736fffc631be2605ed53 /pkg/sentry/time
parent	f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)