From 174161013de22be6a42b02ee06611a9de9e20b18 Mon Sep 17 00:00:00 2001 From: Zhaozhong Ni Date: Tue, 8 May 2018 11:36:11 -0700 Subject: Capture restore file system corruption errors in exit error. PiperOrigin-RevId: 195850822 Change-Id: I4d7bdd8fe129c5ed461b73e1d7458be2cf5680c2 --- pkg/sentry/fs/fdpipe/pipe_state.go | 9 +++--- pkg/sentry/fs/fs.go | 63 ++++++++++++++++++++++++++++++++++++-- pkg/sentry/fs/gofer/file_state.go | 9 ++++-- pkg/sentry/fs/gofer/inode_state.go | 23 ++++++++------ pkg/sentry/fs/host/inode_state.go | 6 ++-- pkg/sentry/kernel/kernel.go | 4 ++- 6 files changed, 90 insertions(+), 24 deletions(-) diff --git a/pkg/sentry/fs/fdpipe/pipe_state.go b/pkg/sentry/fs/fdpipe/pipe_state.go index 8996a2178..99c40d8ed 100644 --- a/pkg/sentry/fs/fdpipe/pipe_state.go +++ b/pkg/sentry/fs/fdpipe/pipe_state.go @@ -63,7 +63,7 @@ func (p *pipeOperations) loadFlags(flags fs.FileFlags) { // afterLoad is invoked by stateify. func (p *pipeOperations) afterLoad() { - load := func() { + load := func() error { if !p.flags.Read { readPipeOperationsLoading.Wait() } else { @@ -75,14 +75,15 @@ func (p *pipeOperations) afterLoad() { Write: p.flags.Write, }) if err != nil { - panic(fmt.Sprintf("unable to open pipe %v: %v", p, err)) + return fmt.Errorf("unable to open pipe %v: %v", p, err) } if err := p.init(); err != nil { - panic(fmt.Sprintf("unable to initialize pipe %v: %v", p, err)) + return fmt.Errorf("unable to initialize pipe %v: %v", p, err) } + return nil } // Do background opening of pipe ends. Note for write-only pipe ends we // have to do it asynchronously to avoid blocking the restore. - fs.Async(load) + fs.Async(fs.CatchError(load)) } diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go index f54f767d3..6ec9ff446 100644 --- a/pkg/sentry/fs/fs.go +++ b/pkg/sentry/fs/fs.go @@ -55,11 +55,19 @@ package fs import ( "sync" + + "gvisor.googlesource.com/gvisor/pkg/log" ) -// work is a sync.WaitGroup that can be used to queue asynchronous operations -// via Do. Callers can use Barrier to ensure no operations are outstanding. -var work sync.WaitGroup +var ( + // work is a sync.WaitGroup that can be used to queue asynchronous + // operations via Do. Callers can use Barrier to ensure no operations + // are outstanding. + work sync.WaitGroup + + // asyncError is used to store up to one asynchronous execution error. + asyncError = make(chan error, 1) +) // AsyncBarrier waits for all outstanding asynchronous work to complete. func AsyncBarrier() { @@ -75,6 +83,43 @@ func Async(f func()) { }() } +// AsyncErrorBarrier waits for all outstanding asynchronous work to complete, or +// the first async error to arrive. Other unfinished async executions will +// continue in the background. Other past and future async errors are ignored. +func AsyncErrorBarrier() error { + wait := make(chan struct{}, 1) + go func() { // S/R-SAFE: Does not touch persistent state. + work.Wait() + wait <- struct{}{} + }() + select { + case <-wait: + select { + case err := <-asyncError: + return err + default: + return nil + } + case err := <-asyncError: + return err + } +} + +// CatchError tries to capture the potential async error returned by the +// function. At most one async error will be captured globally so excessive +// errors will be dropped. +func CatchError(f func() error) func() { + return func() { + if err := f(); err != nil { + select { + case asyncError <- err: + default: + log.Warningf("excessive async error dropped: %v", err) + } + } + } +} + // ErrSaveRejection indicates a failed save due to unsupported file system state // such as dangling open fd, etc. type ErrSaveRejection struct { @@ -86,3 +131,15 @@ type ErrSaveRejection struct { func (e ErrSaveRejection) Error() string { return "save rejected due to unsupported file system state: " + e.Err.Error() } + +// ErrCorruption indicates a failed restore due to external file system state in +// corruption. +type ErrCorruption struct { + // Err is the wrapped error. + Err error +} + +// Error returns a sensible description of the save rejection error. +func (e ErrCorruption) Error() string { + return "restore failed due to external file system state in corruption: " + e.Err.Error() +} diff --git a/pkg/sentry/fs/gofer/file_state.go b/pkg/sentry/fs/gofer/file_state.go index 1d63e33ec..715af8f16 100644 --- a/pkg/sentry/fs/gofer/file_state.go +++ b/pkg/sentry/fs/gofer/file_state.go @@ -15,13 +15,15 @@ package gofer import ( + "fmt" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" ) // afterLoad is invoked by stateify. func (f *fileOperations) afterLoad() { - load := func() { + load := func() error { f.inodeOperations.fileState.waitForLoad() // Manually load the open handles. @@ -29,9 +31,10 @@ func (f *fileOperations) afterLoad() { // TODO: Context is not plumbed to save/restore. f.handles, err = newHandles(context.Background(), f.inodeOperations.fileState.file, f.flags) if err != nil { - panic("failed to re-open handle: " + err.Error()) + return fmt.Errorf("failed to re-open handle: %v", err) } f.inodeOperations.fileState.setHandlesForCachedIO(f.flags, f.handles) + return nil } - fs.Async(load) + fs.Async(fs.CatchError(load)) } diff --git a/pkg/sentry/fs/gofer/inode_state.go b/pkg/sentry/fs/gofer/inode_state.go index 997a7d1c1..82d1dd4da 100644 --- a/pkg/sentry/fs/gofer/inode_state.go +++ b/pkg/sentry/fs/gofer/inode_state.go @@ -15,6 +15,7 @@ package gofer import ( + "errors" "fmt" "strings" @@ -83,7 +84,7 @@ func (i *inodeFileState) loadLoading(_ struct{}) { // afterLoad is invoked by stateify. func (i *inodeFileState) afterLoad() { - load := func() { + load := func() error { // See comment on i.loading(). defer i.loading.Unlock() @@ -92,14 +93,14 @@ func (i *inodeFileState) afterLoad() { if !ok { // This should be impossible, see assertion in // beforeSave. - panic(fmt.Sprintf("failed to find path for inode number %d. Device %s contains %s", i.sattr.InodeID, i.s.connID, fs.InodeMappings(i.s.inodeMappings))) + return fmt.Errorf("failed to find path for inode number %d. Device %s contains %s", i.sattr.InodeID, i.s.connID, fs.InodeMappings(i.s.inodeMappings)) } // TODO: Context is not plumbed to save/restore. ctx := &dummyClockContext{context.Background()} var err error _, i.file, err = i.s.attach.walk(ctx, strings.Split(name, "/")) if err != nil { - panic(fmt.Sprintf("failed to walk to %q: %v", name, err)) + return fmt.Errorf("failed to walk to %q: %v", name, err) } // Remap the saved inode number into the gofer device using the @@ -107,10 +108,10 @@ func (i *inodeFileState) afterLoad() { // environment. qid, mask, attrs, err := i.file.getAttr(ctx, p9.AttrMaskAll()) if err != nil { - panic(fmt.Sprintf("failed to get file attributes of %s: %v", name, err)) + return fmt.Errorf("failed to get file attributes of %s: %v", name, err) } if !mask.RDev { - panic(fmt.Sprintf("file %s lacks device", name)) + return fs.ErrCorruption{fmt.Errorf("file %s lacks device", name)} } i.key = device.MultiDeviceKey{ Device: attrs.RDev, @@ -118,24 +119,26 @@ func (i *inodeFileState) afterLoad() { Inode: qid.Path, } if !goferDevice.Load(i.key, i.sattr.InodeID) { - panic(fmt.Sprintf("gofer device %s -> %d conflict in gofer device mappings: %s", i.key, i.sattr.InodeID, goferDevice)) + return fs.ErrCorruption{fmt.Errorf("gofer device %s -> %d conflict in gofer device mappings: %s", i.key, i.sattr.InodeID, goferDevice)} } if i.sattr.Type == fs.RegularFile { env, ok := fs.CurrentRestoreEnvironment() if !ok { - panic("missing restore environment") + return errors.New("missing restore environment") } uattr := unstable(ctx, mask, attrs, i.s.mounter, i.s.client) if env.ValidateFileSize && uattr.Size != i.savedUAttr.Size { - panic(fmt.Errorf("file size has changed for %s: previously %d, now %d", i.s.inodeMappings[i.sattr.InodeID], i.savedUAttr.Size, uattr.Size)) + return fs.ErrCorruption{fmt.Errorf("file size has changed for %s: previously %d, now %d", i.s.inodeMappings[i.sattr.InodeID], i.savedUAttr.Size, uattr.Size)} } if env.ValidateFileTimestamp && uattr.ModificationTime != i.savedUAttr.ModificationTime { - panic(fmt.Errorf("file modification time has changed for %s: previously %v, now %v", i.s.inodeMappings[i.sattr.InodeID], i.savedUAttr.ModificationTime, uattr.ModificationTime)) + return fs.ErrCorruption{fmt.Errorf("file modification time has changed for %s: previously %v, now %v", i.s.inodeMappings[i.sattr.InodeID], i.savedUAttr.ModificationTime, uattr.ModificationTime)} } i.savedUAttr = nil } + + return nil } - fs.Async(load) + fs.Async(fs.CatchError(load)) } diff --git a/pkg/sentry/fs/host/inode_state.go b/pkg/sentry/fs/host/inode_state.go index 80066512a..135c75fd5 100644 --- a/pkg/sentry/fs/host/inode_state.go +++ b/pkg/sentry/fs/host/inode_state.go @@ -59,7 +59,7 @@ func (i *inodeFileState) afterLoad() { // saved filesystem are no longer unique on this filesystem. // Since this violates the contract that filesystems cannot // change across save and restore, error out. - panic(fmt.Sprintf("host %s conflict in host device mappings: %s", key, hostFileDevice)) + panic(fs.ErrCorruption{fmt.Errorf("host %s conflict in host device mappings: %s", key, hostFileDevice)}) } if !i.descriptor.donated && i.sattr.Type == fs.RegularFile { @@ -69,10 +69,10 @@ func (i *inodeFileState) afterLoad() { } uattr := unstableAttr(i.mops, &s) if env.ValidateFileSize && uattr.Size != i.savedUAttr.Size { - panic(fmt.Errorf("file size has changed for %s: previously %d, now %d", i.mops.inodeMappings[i.sattr.InodeID], i.savedUAttr.Size, uattr.Size)) + panic(fs.ErrCorruption{fmt.Errorf("file size has changed for %s: previously %d, now %d", i.mops.inodeMappings[i.sattr.InodeID], i.savedUAttr.Size, uattr.Size)}) } if env.ValidateFileTimestamp && uattr.ModificationTime != i.savedUAttr.ModificationTime { - panic(fmt.Errorf("file modification time has changed for %s: previously %v, now %v", i.mops.inodeMappings[i.sattr.InodeID], i.savedUAttr.ModificationTime, uattr.ModificationTime)) + panic(fs.ErrCorruption{fmt.Errorf("file modification time has changed for %s: previously %v, now %v", i.mops.inodeMappings[i.sattr.InodeID], i.savedUAttr.ModificationTime, uattr.ModificationTime)}) } i.savedUAttr = nil } diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 25c8dd885..536461bbd 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -418,7 +418,9 @@ func (k *Kernel) LoadFrom(r io.Reader, p platform.Platform, net inet.Stack) erro // Ensure that all pending asynchronous work is complete: // - namedpipe opening // - inode file opening - fs.AsyncBarrier() + if err := fs.AsyncErrorBarrier(); err != nil { + return err + } log.Infof("Overall load took [%s]", time.Since(loadStart)) -- cgit v1.2.3