summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/pgalloc/save_restore.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/pgalloc/save_restore.go')
-rw-r--r--pkg/sentry/pgalloc/save_restore.go210
1 files changed, 210 insertions, 0 deletions
diff --git a/pkg/sentry/pgalloc/save_restore.go b/pkg/sentry/pgalloc/save_restore.go
new file mode 100644
index 000000000..d4ba384b1
--- /dev/null
+++ b/pkg/sentry/pgalloc/save_restore.go
@@ -0,0 +1,210 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pgalloc
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "runtime"
+ "sync/atomic"
+ "syscall"
+
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/state"
+)
+
+// SaveTo writes f's state to the given stream.
+func (f *MemoryFile) SaveTo(w io.Writer) error {
+ // Wait for reclaim.
+ f.mu.Lock()
+ defer f.mu.Unlock()
+ for f.reclaimable {
+ f.reclaimCond.Signal()
+ f.mu.Unlock()
+ runtime.Gosched()
+ f.mu.Lock()
+ }
+
+ // Ensure that there are no pending evictions.
+ if len(f.evictable) != 0 {
+ panic(fmt.Sprintf("evictions still pending for %d users; call StartEvictions and WaitForEvictions before SaveTo", len(f.evictable)))
+ }
+
+ // Ensure that all pages that contain data have knownCommitted set, since
+ // we only store knownCommitted pages below.
+ zeroPage := make([]byte, usermem.PageSize)
+ err := f.updateUsageLocked(0, func(bs []byte, committed []byte) error {
+ for pgoff := 0; pgoff < len(bs); pgoff += usermem.PageSize {
+ i := pgoff / usermem.PageSize
+ pg := bs[pgoff : pgoff+usermem.PageSize]
+ if !bytes.Equal(pg, zeroPage) {
+ committed[i] = 1
+ continue
+ }
+ committed[i] = 0
+ // Reading the page caused it to be committed; decommit it to
+ // reduce memory usage.
+ //
+ // "MADV_REMOVE [...] Free up a given range of pages and its
+ // associated backing store. This is equivalent to punching a hole
+ // in the corresponding byte range of the backing store (see
+ // fallocate(2))." - madvise(2)
+ if err := syscall.Madvise(pg, syscall.MADV_REMOVE); err != nil {
+ // This doesn't impact the correctness of saved memory, it
+ // just means that we're incrementally more likely to OOM.
+ // Complain, but don't abort saving.
+ log.Warningf("Decommitting page %p while saving failed: %v", pg, err)
+ }
+ }
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+
+ // Save metadata.
+ if err := state.Save(w, &f.fileSize, nil); err != nil {
+ return err
+ }
+ if err := state.Save(w, &f.usage, nil); err != nil {
+ return err
+ }
+
+ // Dump out committed pages.
+ for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ if !seg.Value().knownCommitted {
+ continue
+ }
+ // Write a header to distinguish from objects.
+ if err := state.WriteHeader(w, uint64(seg.Range().Length()), false); err != nil {
+ return err
+ }
+ // Write out data.
+ var ioErr error
+ err := f.forEachMappingSlice(seg.Range(), func(s []byte) {
+ if ioErr != nil {
+ return
+ }
+ _, ioErr = w.Write(s)
+ })
+ if ioErr != nil {
+ return ioErr
+ }
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// LoadFrom loads MemoryFile state from the given stream.
+func (f *MemoryFile) LoadFrom(r io.Reader) error {
+ // Load metadata.
+ if err := state.Load(r, &f.fileSize, nil); err != nil {
+ return err
+ }
+ if err := f.file.Truncate(f.fileSize); err != nil {
+ return err
+ }
+ newMappings := make([]uintptr, f.fileSize>>chunkShift)
+ f.mappings.Store(newMappings)
+ if err := state.Load(r, &f.usage, nil); err != nil {
+ return err
+ }
+
+ // Try to map committed chunks concurrently: For any given chunk, either
+ // this loop or the following one will mmap the chunk first and cache it in
+ // f.mappings for the other, but this loop is likely to run ahead of the
+ // other since it doesn't do any work between mmaps. The rest of this
+ // function doesn't mutate f.usage, so it's safe to iterate concurrently.
+ mapperDone := make(chan struct{})
+ mapperCanceled := int32(0)
+ go func() { // S/R-SAFE: see comment
+ defer func() { close(mapperDone) }()
+ for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ if atomic.LoadInt32(&mapperCanceled) != 0 {
+ return
+ }
+ if seg.Value().knownCommitted {
+ f.forEachMappingSlice(seg.Range(), func(s []byte) {})
+ }
+ }
+ }()
+ defer func() {
+ atomic.StoreInt32(&mapperCanceled, 1)
+ <-mapperDone
+ }()
+
+ // Load committed pages.
+ for seg := f.usage.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ if !seg.Value().knownCommitted {
+ continue
+ }
+ // Verify header.
+ length, object, err := state.ReadHeader(r)
+ if err != nil {
+ return err
+ }
+ if object {
+ // Not expected.
+ return fmt.Errorf("unexpected object")
+ }
+ if expected := uint64(seg.Range().Length()); length != expected {
+ // Size mismatch.
+ return fmt.Errorf("mismatched segment: expected %d, got %d", expected, length)
+ }
+ // Read data.
+ var ioErr error
+ err = f.forEachMappingSlice(seg.Range(), func(s []byte) {
+ if ioErr != nil {
+ return
+ }
+ _, ioErr = io.ReadFull(r, s)
+ })
+ if ioErr != nil {
+ return ioErr
+ }
+ if err != nil {
+ return err
+ }
+
+ // Update accounting for restored pages. We need to do this here since
+ // these segments are marked as "known committed", and will be skipped
+ // over on accounting scans.
+ usage.MemoryAccounting.Inc(seg.End()-seg.Start(), seg.Value().kind)
+ }
+
+ return nil
+}
+
+// MemoryFileProvider provides the MemoryFile method.
+//
+// This type exists to work around a save/restore defect. The only object in a
+// saved object graph that S/R allows to be replaced at time of restore is the
+// starting point of the restore, kernel.Kernel. However, the MemoryFile
+// changes between save and restore as well, so objects that need persistent
+// access to the MemoryFile must instead store a pointer to the Kernel and call
+// Kernel.MemoryFile() as required. In most cases, depending on the kernel
+// package directly would create a package dependency loop, so the stored
+// pointer must instead be a MemoryProvider interface object. Correspondingly,
+// kernel.Kernel is the only implementation of this interface.
+type MemoryFileProvider interface {
+ // MemoryFile returns the Kernel MemoryFile.
+ MemoryFile() *MemoryFile
+}