diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/kernel/kernel.go | 69 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel_state_autogen.go | 124 | ||||
-rw-r--r-- | pkg/sentry/kernel/thread_group.go | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/timekeeper.go | 42 | ||||
-rw-r--r-- | pkg/sentry/state/state.go | 4 |
5 files changed, 124 insertions, 117 deletions
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index d537e608a..352c36ba9 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -143,12 +143,6 @@ type Kernel struct { // to CreateProcess, and is protected by extMu. globalInit *ThreadGroup - // realtimeClock is a ktime.Clock based on timekeeper's Realtime. - realtimeClock *timekeeperClock - - // monotonicClock is a ktime.Clock based on timekeeper's Monotonic. - monotonicClock *timekeeperClock - // syslog is the kernel log. syslog syslog @@ -306,6 +300,9 @@ type InitKernelArgs struct { // FeatureSet is the emulated CPU feature set. FeatureSet *cpuid.FeatureSet + // Timekeeper manages time for all tasks in the system. + Timekeeper *Timekeeper + // RootUserNamespace is the root user namespace. RootUserNamespace *auth.UserNamespace @@ -345,24 +342,18 @@ type InitKernelArgs struct { PIDNamespace *PIDNamespace } -// SetTimekeeper sets Kernel.timekeeper. SetTimekeeper must be called before -// Init. -func (k *Kernel) SetTimekeeper(tk *Timekeeper) { - k.timekeeper = tk -} - // Init initialize the Kernel with no tasks. // // Callers must manually set Kernel.Platform and call Kernel.SetMemoryFile -// and Kernel.SetTimekeeper before calling Init. +// before calling Init. func (k *Kernel) Init(args InitKernelArgs) error { if args.FeatureSet == nil { return fmt.Errorf("args.FeatureSet is nil") } - if k.timekeeper == nil { - return fmt.Errorf("timekeeper is nil") + if args.Timekeeper == nil { + return fmt.Errorf("args.Timekeeper is nil") } - if k.timekeeper.clocks == nil { + if args.Timekeeper.clocks == nil { return fmt.Errorf("must call Timekeeper.SetClocks() before Kernel.Init()") } if args.RootUserNamespace == nil { @@ -373,6 +364,7 @@ func (k *Kernel) Init(args InitKernelArgs) error { } k.featureSet = args.FeatureSet + k.timekeeper = args.Timekeeper k.tasks = newTaskSet(args.PIDNamespace) k.rootUserNamespace = args.RootUserNamespace k.rootUTSNamespace = args.RootUTSNamespace @@ -397,8 +389,6 @@ func (k *Kernel) Init(args InitKernelArgs) error { } k.extraAuxv = args.ExtraAuxv k.vdso = args.Vdso - k.realtimeClock = &timekeeperClock{tk: k.timekeeper, c: sentrytime.Realtime} - k.monotonicClock = &timekeeperClock{tk: k.timekeeper, c: sentrytime.Monotonic} k.futexes = futex.NewManager() k.netlinkPorts = port.New() k.ptraceExceptions = make(map[*Task]*Task) @@ -531,6 +521,8 @@ func (k *Kernel) SaveTo(ctx context.Context, w wire.Writer) error { } log.Infof("CPUID save took [%s].", time.Since(cpuidStart)) + // Save the timekeeper's state. + // Save the kernel state. kernelStart := time.Now() stats, err := state.Save(ctx, w, k) @@ -675,7 +667,7 @@ func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error { } // LoadFrom returns a new Kernel loaded from args. -func (k *Kernel) LoadFrom(ctx context.Context, r wire.Reader, net inet.Stack, clocks sentrytime.Clocks, vfsOpts *vfs.CompleteRestoreOptions) error { +func (k *Kernel) LoadFrom(ctx context.Context, r wire.Reader, timeReady chan struct{}, net inet.Stack, clocks sentrytime.Clocks, vfsOpts *vfs.CompleteRestoreOptions) error { loadStart := time.Now() initAppCores := k.applicationCores @@ -722,6 +714,11 @@ func (k *Kernel) LoadFrom(ctx context.Context, r wire.Reader, net inet.Stack, cl log.Infof("Overall load took [%s]", time.Since(loadStart)) k.Timekeeper().SetClocks(clocks) + + if timeReady != nil { + close(timeReady) + } + if net != nil { net.Resume() } @@ -1103,7 +1100,7 @@ func (k *Kernel) Start() error { } k.started = true - k.cpuClockTicker = ktime.NewTimer(k.monotonicClock, newKernelCPUClockTicker(k)) + k.cpuClockTicker = ktime.NewTimer(k.timekeeper.monotonicClock, newKernelCPUClockTicker(k)) k.cpuClockTicker.Swap(ktime.Setting{ Enabled: true, Period: linux.ClockTick, @@ -1258,7 +1255,7 @@ func (k *Kernel) incRunningTasks() { // These cause very different value of cpuClock. But again, since // nothing was running while the ticker was disabled, those differences // don't matter. - setting, exp := k.cpuClockTickerSetting.At(k.monotonicClock.Now()) + setting, exp := k.cpuClockTickerSetting.At(k.timekeeper.monotonicClock.Now()) if exp > 0 { atomic.AddUint64(&k.cpuClock, exp) } @@ -1468,12 +1465,12 @@ func (k *Kernel) ApplicationCores() uint { // RealtimeClock returns the application CLOCK_REALTIME clock. func (k *Kernel) RealtimeClock() ktime.Clock { - return k.realtimeClock + return k.timekeeper.realtimeClock } // MonotonicClock returns the application CLOCK_MONOTONIC clock. func (k *Kernel) MonotonicClock() ktime.Clock { - return k.monotonicClock + return k.timekeeper.monotonicClock } // CPUClockNow returns the current value of k.cpuClock. @@ -1553,32 +1550,6 @@ func (k *Kernel) SetSaveError(err error) { } } -var _ tcpip.Clock = (*Kernel)(nil) - -// Now implements tcpip.Clock.NowNanoseconds. -func (k *Kernel) Now() time.Time { - nsec, err := k.timekeeper.GetTime(sentrytime.Realtime) - if err != nil { - panic("timekeeper.GetTime(sentrytime.Realtime): " + err.Error()) - } - return time.Unix(0, nsec) -} - -// NowMonotonic implements tcpip.Clock.NowMonotonic. -func (k *Kernel) NowMonotonic() tcpip.MonotonicTime { - nsec, err := k.timekeeper.GetTime(sentrytime.Monotonic) - if err != nil { - panic("timekeeper.GetTime(sentrytime.Monotonic): " + err.Error()) - } - var mt tcpip.MonotonicTime - return mt.Add(time.Duration(nsec) * time.Nanosecond) -} - -// AfterFunc implements tcpip.Clock.AfterFunc. -func (k *Kernel) AfterFunc(d time.Duration, f func()) tcpip.Timer { - return ktime.TcpipAfterFunc(k.realtimeClock, d, f) -} - // SetMemoryFile sets Kernel.mf. SetMemoryFile must be called before Init or // LoadFrom. func (k *Kernel) SetMemoryFile(mf *pgalloc.MemoryFile) { diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go index ac1d8141c..200af304d 100644 --- a/pkg/sentry/kernel/kernel_state_autogen.go +++ b/pkg/sentry/kernel/kernel_state_autogen.go @@ -418,8 +418,6 @@ func (k *Kernel) StateFields() []string { "rootAbstractSocketNamespace", "futexes", "globalInit", - "realtimeClock", - "monotonicClock", "syslog", "runningTasks", "cpuClock", @@ -453,9 +451,9 @@ func (k *Kernel) beforeSave() {} func (k *Kernel) StateSave(stateSinkObject state.Sink) { k.beforeSave() var danglingEndpointsValue []tcpip.Endpoint = k.saveDanglingEndpoints() - stateSinkObject.SaveValue(24, danglingEndpointsValue) + stateSinkObject.SaveValue(22, danglingEndpointsValue) var deviceRegistryValue *device.Registry = k.saveDeviceRegistry() - stateSinkObject.SaveValue(28, deviceRegistryValue) + stateSinkObject.SaveValue(26, deviceRegistryValue) stateSinkObject.Save(0, &k.featureSet) stateSinkObject.Save(1, &k.timekeeper) stateSinkObject.Save(2, &k.tasks) @@ -470,30 +468,28 @@ func (k *Kernel) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(11, &k.rootAbstractSocketNamespace) stateSinkObject.Save(12, &k.futexes) stateSinkObject.Save(13, &k.globalInit) - stateSinkObject.Save(14, &k.realtimeClock) - stateSinkObject.Save(15, &k.monotonicClock) - stateSinkObject.Save(16, &k.syslog) - stateSinkObject.Save(17, &k.runningTasks) - stateSinkObject.Save(18, &k.cpuClock) - stateSinkObject.Save(19, &k.cpuClockTickerDisabled) - stateSinkObject.Save(20, &k.cpuClockTickerSetting) - stateSinkObject.Save(21, &k.uniqueID) - stateSinkObject.Save(22, &k.nextInotifyCookie) - stateSinkObject.Save(23, &k.netlinkPorts) - stateSinkObject.Save(25, &k.sockets) - stateSinkObject.Save(26, &k.socketsVFS2) - stateSinkObject.Save(27, &k.nextSocketRecord) - stateSinkObject.Save(29, &k.DirentCacheLimiter) - stateSinkObject.Save(30, &k.SpecialOpts) - stateSinkObject.Save(31, &k.vfs) - stateSinkObject.Save(32, &k.hostMount) - stateSinkObject.Save(33, &k.pipeMount) - stateSinkObject.Save(34, &k.shmMount) - stateSinkObject.Save(35, &k.socketMount) - stateSinkObject.Save(36, &k.SleepForAddressSpaceActivation) - stateSinkObject.Save(37, &k.ptraceExceptions) - stateSinkObject.Save(38, &k.YAMAPtraceScope) - stateSinkObject.Save(39, &k.cgroupRegistry) + stateSinkObject.Save(14, &k.syslog) + stateSinkObject.Save(15, &k.runningTasks) + stateSinkObject.Save(16, &k.cpuClock) + stateSinkObject.Save(17, &k.cpuClockTickerDisabled) + stateSinkObject.Save(18, &k.cpuClockTickerSetting) + stateSinkObject.Save(19, &k.uniqueID) + stateSinkObject.Save(20, &k.nextInotifyCookie) + stateSinkObject.Save(21, &k.netlinkPorts) + stateSinkObject.Save(23, &k.sockets) + stateSinkObject.Save(24, &k.socketsVFS2) + stateSinkObject.Save(25, &k.nextSocketRecord) + stateSinkObject.Save(27, &k.DirentCacheLimiter) + stateSinkObject.Save(28, &k.SpecialOpts) + stateSinkObject.Save(29, &k.vfs) + stateSinkObject.Save(30, &k.hostMount) + stateSinkObject.Save(31, &k.pipeMount) + stateSinkObject.Save(32, &k.shmMount) + stateSinkObject.Save(33, &k.socketMount) + stateSinkObject.Save(34, &k.SleepForAddressSpaceActivation) + stateSinkObject.Save(35, &k.ptraceExceptions) + stateSinkObject.Save(36, &k.YAMAPtraceScope) + stateSinkObject.Save(37, &k.cgroupRegistry) } func (k *Kernel) afterLoad() {} @@ -514,32 +510,30 @@ func (k *Kernel) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(11, &k.rootAbstractSocketNamespace) stateSourceObject.Load(12, &k.futexes) stateSourceObject.Load(13, &k.globalInit) - stateSourceObject.Load(14, &k.realtimeClock) - stateSourceObject.Load(15, &k.monotonicClock) - stateSourceObject.Load(16, &k.syslog) - stateSourceObject.Load(17, &k.runningTasks) - stateSourceObject.Load(18, &k.cpuClock) - stateSourceObject.Load(19, &k.cpuClockTickerDisabled) - stateSourceObject.Load(20, &k.cpuClockTickerSetting) - stateSourceObject.Load(21, &k.uniqueID) - stateSourceObject.Load(22, &k.nextInotifyCookie) - stateSourceObject.Load(23, &k.netlinkPorts) - stateSourceObject.Load(25, &k.sockets) - stateSourceObject.Load(26, &k.socketsVFS2) - stateSourceObject.Load(27, &k.nextSocketRecord) - stateSourceObject.Load(29, &k.DirentCacheLimiter) - stateSourceObject.Load(30, &k.SpecialOpts) - stateSourceObject.Load(31, &k.vfs) - stateSourceObject.Load(32, &k.hostMount) - stateSourceObject.Load(33, &k.pipeMount) - stateSourceObject.Load(34, &k.shmMount) - stateSourceObject.Load(35, &k.socketMount) - stateSourceObject.Load(36, &k.SleepForAddressSpaceActivation) - stateSourceObject.Load(37, &k.ptraceExceptions) - stateSourceObject.Load(38, &k.YAMAPtraceScope) - stateSourceObject.Load(39, &k.cgroupRegistry) - stateSourceObject.LoadValue(24, new([]tcpip.Endpoint), func(y interface{}) { k.loadDanglingEndpoints(y.([]tcpip.Endpoint)) }) - stateSourceObject.LoadValue(28, new(*device.Registry), func(y interface{}) { k.loadDeviceRegistry(y.(*device.Registry)) }) + stateSourceObject.Load(14, &k.syslog) + stateSourceObject.Load(15, &k.runningTasks) + stateSourceObject.Load(16, &k.cpuClock) + stateSourceObject.Load(17, &k.cpuClockTickerDisabled) + stateSourceObject.Load(18, &k.cpuClockTickerSetting) + stateSourceObject.Load(19, &k.uniqueID) + stateSourceObject.Load(20, &k.nextInotifyCookie) + stateSourceObject.Load(21, &k.netlinkPorts) + stateSourceObject.Load(23, &k.sockets) + stateSourceObject.Load(24, &k.socketsVFS2) + stateSourceObject.Load(25, &k.nextSocketRecord) + stateSourceObject.Load(27, &k.DirentCacheLimiter) + stateSourceObject.Load(28, &k.SpecialOpts) + stateSourceObject.Load(29, &k.vfs) + stateSourceObject.Load(30, &k.hostMount) + stateSourceObject.Load(31, &k.pipeMount) + stateSourceObject.Load(32, &k.shmMount) + stateSourceObject.Load(33, &k.socketMount) + stateSourceObject.Load(34, &k.SleepForAddressSpaceActivation) + stateSourceObject.Load(35, &k.ptraceExceptions) + stateSourceObject.Load(36, &k.YAMAPtraceScope) + stateSourceObject.Load(37, &k.cgroupRegistry) + stateSourceObject.LoadValue(22, new([]tcpip.Endpoint), func(y interface{}) { k.loadDanglingEndpoints(y.([]tcpip.Endpoint)) }) + stateSourceObject.LoadValue(26, new(*device.Registry), func(y interface{}) { k.loadDeviceRegistry(y.(*device.Registry)) }) } func (s *SocketRecord) StateTypeName() string { @@ -2404,6 +2398,8 @@ func (t *Timekeeper) StateTypeName() string { func (t *Timekeeper) StateFields() []string { return []string{ + "realtimeClock", + "monotonicClock", "bootTime", "saveMonotonic", "saveRealtime", @@ -2414,18 +2410,22 @@ func (t *Timekeeper) StateFields() []string { // +checklocksignore func (t *Timekeeper) StateSave(stateSinkObject state.Sink) { t.beforeSave() - stateSinkObject.Save(0, &t.bootTime) - stateSinkObject.Save(1, &t.saveMonotonic) - stateSinkObject.Save(2, &t.saveRealtime) - stateSinkObject.Save(3, &t.params) + stateSinkObject.Save(0, &t.realtimeClock) + stateSinkObject.Save(1, &t.monotonicClock) + stateSinkObject.Save(2, &t.bootTime) + stateSinkObject.Save(3, &t.saveMonotonic) + stateSinkObject.Save(4, &t.saveRealtime) + stateSinkObject.Save(5, &t.params) } // +checklocksignore func (t *Timekeeper) StateLoad(stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.bootTime) - stateSourceObject.Load(1, &t.saveMonotonic) - stateSourceObject.Load(2, &t.saveRealtime) - stateSourceObject.Load(3, &t.params) + stateSourceObject.Load(0, &t.realtimeClock) + stateSourceObject.Load(1, &t.monotonicClock) + stateSourceObject.Load(2, &t.bootTime) + stateSourceObject.Load(3, &t.saveMonotonic) + stateSourceObject.Load(4, &t.saveRealtime) + stateSourceObject.Load(5, &t.params) stateSourceObject.AfterLoad(t.afterLoad) } diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go index 891e2201d..4566e4c7c 100644 --- a/pkg/sentry/kernel/thread_group.go +++ b/pkg/sentry/kernel/thread_group.go @@ -278,7 +278,7 @@ func (k *Kernel) NewThreadGroup(mntns *fs.MountNamespace, pidns *PIDNamespace, s limits: limits, mounts: mntns, } - tg.itimerRealTimer = ktime.NewTimer(k.monotonicClock, &itimerRealListener{tg: tg}) + tg.itimerRealTimer = ktime.NewTimer(k.timekeeper.monotonicClock, &itimerRealListener{tg: tg}) tg.timers = make(map[linux.TimerID]*IntervalTimer) tg.oldRSeqCritical.Store(&OldRSeqCriticalRegion{}) return tg diff --git a/pkg/sentry/kernel/timekeeper.go b/pkg/sentry/kernel/timekeeper.go index 7c4fefb16..6255bae7a 100644 --- a/pkg/sentry/kernel/timekeeper.go +++ b/pkg/sentry/kernel/timekeeper.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" sentrytime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/tcpip" ) // Timekeeper manages all of the kernel clocks. @@ -39,6 +40,12 @@ type Timekeeper struct { // It is set only once, by SetClocks. clocks sentrytime.Clocks `state:"nosave"` + // realtimeClock is a ktime.Clock based on timekeeper's Realtime. + realtimeClock *timekeeperClock + + // monotonicClock is a ktime.Clock based on timekeeper's Monotonic. + monotonicClock *timekeeperClock + // bootTime is the realtime when the system "booted". i.e., when // SetClocks was called in the initial (not restored) run. bootTime ktime.Time @@ -90,10 +97,13 @@ type Timekeeper struct { // NewTimekeeper does not take ownership of paramPage. // // SetClocks must be called on the returned Timekeeper before it is usable. -func NewTimekeeper(mfp pgalloc.MemoryFileProvider, paramPage memmap.FileRange) (*Timekeeper, error) { - return &Timekeeper{ +func NewTimekeeper(mfp pgalloc.MemoryFileProvider, paramPage memmap.FileRange) *Timekeeper { + t := Timekeeper{ params: NewVDSOParamPage(mfp, paramPage), - }, nil + } + t.realtimeClock = &timekeeperClock{tk: &t, c: sentrytime.Realtime} + t.monotonicClock = &timekeeperClock{tk: &t, c: sentrytime.Monotonic} + return &t } // SetClocks the backing clock source. @@ -167,6 +177,32 @@ func (t *Timekeeper) SetClocks(c sentrytime.Clocks) { } } +var _ tcpip.Clock = (*Timekeeper)(nil) + +// Now implements tcpip.Clock. +func (t *Timekeeper) Now() time.Time { + nsec, err := t.GetTime(sentrytime.Realtime) + if err != nil { + panic("timekeeper.GetTime(sentrytime.Realtime): " + err.Error()) + } + return time.Unix(0, nsec) +} + +// NowMonotonic implements tcpip.Clock. +func (t *Timekeeper) NowMonotonic() tcpip.MonotonicTime { + nsec, err := t.GetTime(sentrytime.Monotonic) + if err != nil { + panic("timekeeper.GetTime(sentrytime.Monotonic): " + err.Error()) + } + var mt tcpip.MonotonicTime + return mt.Add(time.Duration(nsec) * time.Nanosecond) +} + +// AfterFunc implements tcpip.Clock. +func (t *Timekeeper) AfterFunc(d time.Duration, f func()) tcpip.Timer { + return ktime.TcpipAfterFunc(t.realtimeClock, d, f) +} + // startUpdater starts an update goroutine that keeps the clocks updated. // // mu must be held. diff --git a/pkg/sentry/state/state.go b/pkg/sentry/state/state.go index 167754537..2f0aba4e2 100644 --- a/pkg/sentry/state/state.go +++ b/pkg/sentry/state/state.go @@ -110,7 +110,7 @@ type LoadOpts struct { } // Load loads the given kernel, setting the provided platform and stack. -func (opts LoadOpts) Load(ctx context.Context, k *kernel.Kernel, n inet.Stack, clocks time.Clocks, vfsOpts *vfs.CompleteRestoreOptions) error { +func (opts LoadOpts) Load(ctx context.Context, k *kernel.Kernel, timeReady chan struct{}, n inet.Stack, clocks time.Clocks, vfsOpts *vfs.CompleteRestoreOptions) error { // Open the file. r, m, err := statefile.NewReader(opts.Source, opts.Key) if err != nil { @@ -120,5 +120,5 @@ func (opts LoadOpts) Load(ctx context.Context, k *kernel.Kernel, n inet.Stack, c previousMetadata = m // Restore the Kernel object graph. - return k.LoadFrom(ctx, r, n, clocks, vfsOpts) + return k.LoadFrom(ctx, r, timeReady, n, clocks, vfsOpts) } |