diff options
Diffstat (limited to 'pkg/sentry/pgalloc/save_restore.go')
-rw-r--r-- | pkg/sentry/pgalloc/save_restore.go | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/pkg/sentry/pgalloc/save_restore.go b/pkg/sentry/pgalloc/save_restore.go new file mode 100644 index 000000000..d4ba384b1 --- /dev/null +++ b/pkg/sentry/pgalloc/save_restore.go @@ -0,0 +1,210 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pgalloc + +import ( + "bytes" + "fmt" + "io" + "runtime" + "sync/atomic" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/state" +) + +// SaveTo writes f's state to the given stream. +func (f *MemoryFile) SaveTo(w io.Writer) error { + // Wait for reclaim. + f.mu.Lock() + defer f.mu.Unlock() + for f.reclaimable { + f.reclaimCond.Signal() + f.mu.Unlock() + runtime.Gosched() + f.mu.Lock() + } + + // Ensure that there are no pending evictions. + if len(f.evictable) != 0 { + panic(fmt.Sprintf("evictions still pending for %d users; call StartEvictions and WaitForEvictions before SaveTo", len(f.evictable))) + } + + // Ensure that all pages that contain data have knownCommitted set, since + // we only store knownCommitted pages below. + zeroPage := make([]byte, usermem.PageSize) + err := f.updateUsageLocked(0, func(bs []byte, committed []byte) error { + for pgoff := 0; pgoff < len(bs); pgoff += usermem.PageSize { + i := pgoff / usermem.PageSize + pg := bs[pgoff : pgoff+usermem.PageSize] + if !bytes.Equal(pg, zeroPage) { + committed[i] = 1 + continue + } + committed[i] = 0 + // Reading the page caused it to be committed; decommit it to + // reduce memory usage. + // + // "MADV_REMOVE [...] Free up a given range of pages and its + // associated backing store. This is equivalent to punching a hole + // in the corresponding byte range of the backing store (see + // fallocate(2))." - madvise(2) + if err := syscall.Madvise(pg, syscall.MADV_REMOVE); err != nil { + // This doesn't impact the correctness of saved memory, it + // just means that we're incrementally more likely to OOM. + // Complain, but don't abort saving. + log.Warningf("Decommitting page %p while saving failed: %v", pg, err) + } + } + return nil + }) + if err != nil { + return err + } + + // Save metadata. + if err := state.Save(w, &f.fileSize, nil); err != nil { + return err + } + if err := state.Save(w, &f.usage, nil); err != nil { + return err + } + + // Dump out committed pages. + for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + if !seg.Value().knownCommitted { + continue + } + // Write a header to distinguish from objects. + if err := state.WriteHeader(w, uint64(seg.Range().Length()), false); err != nil { + return err + } + // Write out data. + var ioErr error + err := f.forEachMappingSlice(seg.Range(), func(s []byte) { + if ioErr != nil { + return + } + _, ioErr = w.Write(s) + }) + if ioErr != nil { + return ioErr + } + if err != nil { + return err + } + } + + return nil +} + +// LoadFrom loads MemoryFile state from the given stream. +func (f *MemoryFile) LoadFrom(r io.Reader) error { + // Load metadata. + if err := state.Load(r, &f.fileSize, nil); err != nil { + return err + } + if err := f.file.Truncate(f.fileSize); err != nil { + return err + } + newMappings := make([]uintptr, f.fileSize>>chunkShift) + f.mappings.Store(newMappings) + if err := state.Load(r, &f.usage, nil); err != nil { + return err + } + + // Try to map committed chunks concurrently: For any given chunk, either + // this loop or the following one will mmap the chunk first and cache it in + // f.mappings for the other, but this loop is likely to run ahead of the + // other since it doesn't do any work between mmaps. The rest of this + // function doesn't mutate f.usage, so it's safe to iterate concurrently. + mapperDone := make(chan struct{}) + mapperCanceled := int32(0) + go func() { // S/R-SAFE: see comment + defer func() { close(mapperDone) }() + for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + if atomic.LoadInt32(&mapperCanceled) != 0 { + return + } + if seg.Value().knownCommitted { + f.forEachMappingSlice(seg.Range(), func(s []byte) {}) + } + } + }() + defer func() { + atomic.StoreInt32(&mapperCanceled, 1) + <-mapperDone + }() + + // Load committed pages. + for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + if !seg.Value().knownCommitted { + continue + } + // Verify header. + length, object, err := state.ReadHeader(r) + if err != nil { + return err + } + if object { + // Not expected. + return fmt.Errorf("unexpected object") + } + if expected := uint64(seg.Range().Length()); length != expected { + // Size mismatch. + return fmt.Errorf("mismatched segment: expected %d, got %d", expected, length) + } + // Read data. + var ioErr error + err = f.forEachMappingSlice(seg.Range(), func(s []byte) { + if ioErr != nil { + return + } + _, ioErr = io.ReadFull(r, s) + }) + if ioErr != nil { + return ioErr + } + if err != nil { + return err + } + + // Update accounting for restored pages. We need to do this here since + // these segments are marked as "known committed", and will be skipped + // over on accounting scans. + usage.MemoryAccounting.Inc(seg.End()-seg.Start(), seg.Value().kind) + } + + return nil +} + +// MemoryFileProvider provides the MemoryFile method. +// +// This type exists to work around a save/restore defect. The only object in a +// saved object graph that S/R allows to be replaced at time of restore is the +// starting point of the restore, kernel.Kernel. However, the MemoryFile +// changes between save and restore as well, so objects that need persistent +// access to the MemoryFile must instead store a pointer to the Kernel and call +// Kernel.MemoryFile() as required. In most cases, depending on the kernel +// package directly would create a package dependency loop, so the stored +// pointer must instead be a MemoryProvider interface object. Correspondingly, +// kernel.Kernel is the only implementation of this interface. +type MemoryFileProvider interface { + // MemoryFile returns the Kernel MemoryFile. + MemoryFile() *MemoryFile +} |