diff options
author | Fabricio Voznika <fvoznika@google.com> | 2018-10-31 11:27:10 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-10-31 11:28:27 -0700 |
commit | ccc3d7ca11a2a623587c651a6690aaa46d2c2665 (patch) | |
tree | baa04e2e5ebe4a1ee638dbfbbd86dbcbc613696c /runsc | |
parent | e9dbd5ab67bc31e59910930e6c1b551c0fd05ee6 (diff) |
Make lazy open the mode of operation for fsgofer
With recent changes to 9P server, path walks are now safe inside
open, create, rename and setattr calls. To simplify the code, remove
the lazyopen=false mode that was used for bind mounts, and converge
all mounts to using lazy open.
PiperOrigin-RevId: 219508628
Change-Id: I073e7e1e2e9a9972d150eaf4cb29e553997a9b76
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/cmd/gofer.go | 8 | ||||
-rw-r--r-- | runsc/fsgofer/BUILD | 1 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer.go | 97 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer_test.go | 12 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer_unsafe.go | 71 |
5 files changed, 97 insertions, 92 deletions
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 3842fdf64..7cc666e10 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -124,9 +124,6 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) ats = append(ats, fsgofer.NewAttachPoint("/", fsgofer.Config{ ROMount: spec.Root.Readonly, PanicOnWrite: g.panicOnWrite, - // Docker uses overlay2 by default for the root mount, and overlay2 does a copy-up when - // each file is opened as writable. Thus, we open files lazily to avoid copy-up. - LazyOpenForWrite: true, })) log.Infof("Serving %q mapped to %q on FD %d (ro: %t)", "/", root, g.ioFDs[0], spec.Root.Readonly) @@ -134,9 +131,8 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) for _, m := range spec.Mounts { if specutils.Is9PMount(m) { cfg := fsgofer.Config{ - ROMount: isReadonlyMount(m.Options), - PanicOnWrite: g.panicOnWrite, - LazyOpenForWrite: false, + ROMount: isReadonlyMount(m.Options), + PanicOnWrite: g.panicOnWrite, } ats = append(ats, fsgofer.NewAttachPoint(m.Destination, cfg)) diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index f28e4fa77..ab12388ab 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -17,6 +17,7 @@ go_library( "//pkg/fd", "//pkg/log", "//pkg/p9", + "//pkg/syserr", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index fd913831a..4412d7e2f 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -77,13 +77,6 @@ type Config struct { // PanicOnWrite panics on attempts to write to RO mounts. PanicOnWrite bool - - // LazyOpenForWrite makes the underlying file to be opened in RDONLY - // mode initially and be reopened in case write access is desired. - // This is done to workaround the behavior in 'overlay2' that - // copies the entire file up eagerly when it's opened in write mode - // even if the file is never actually written to. - LazyOpenForWrite bool } type attachPoint struct { @@ -182,9 +175,10 @@ func (a *attachPoint) makeQID(stat syscall.Stat_t) p9.QID { // localFile implements p9.File wrapping a local file. The underlying file // is opened during Walk() and stored in 'controlFile' to be used with other -// operations. The mode in which the file is opened varies depending on the -// configuration (see below). 'controlFile' is dup'ed when Walk(nil) is called -// to clone the file. +// operations. The control file is opened as readonly, unless it's a symlink +// which requires O_PATH. 'controlFile' is dup'ed when Walk(nil) is called +// to clone the file. This reduces the number of walks that need to be done by +// the host file system when files are reused. // // 'openedFile' is assigned when Open() is called. If requested open mode is // a subset of controlFile's mode, it's possible to use the same file. If mode @@ -193,22 +187,10 @@ func (a *attachPoint) makeQID(stat syscall.Stat_t) p9.QID { // operations. Before the file is opened and after it's closed, 'mode' is set to // an invalid value to prevent an unopened file from being used. // -// localFile has 2 modes of operation based on the configuration: -// -// ** conf.lazyRWOpen == false ** -// This is the preferred mode. 'controlFile' is opened in RW mode in Walk() -// and used across all functions. The file is never reopened as the mode will -// always be a super set of the requested open mode. This reduces the number of -// syscalls required per operation and makes it resilient to renames anywhere -// in the path to the file. -// -// ** conf.lazyRWOpen == true ** -// This mode is used for better performance with 'overlay2' storage driver. -// overlay2 eagerly copies the entire file up when it's opened in write mode -// which makes the mode above perform badly when serveral of files are opened -// for read (esp. startup). In this mode, 'controlFile' is opened as readonly -// (or O_PATH for symlinks). Reopening the file is required if write mode -// is requested in Open(). +// The reason that the control file is never opened as read-write is for better +// performance with 'overlay2' storage driver. overlay2 eagerly copies the +// entire file up when it's opened in write mode, and would perform badly when +// multiple files are being opened for read-only (esp. startup). type localFile struct { p9.DefaultWalkGetAttr @@ -238,23 +220,14 @@ type localFile struct { func openAnyFile(parent *localFile, name string) (*os.File, string, error) { // Attempt to open file in the following mode in order: - // 1. RDWR: for files with rw mounts and LazyOpenForWrite disabled - // 2. RDONLY: for directories, ro mounts or LazyOpenForWrite enabled - // 3. PATH: for symlinks - modes := []int{syscall.O_RDWR, syscall.O_RDONLY, unix.O_PATH} - symlinkIdx := len(modes) - 1 - - startIdx := 0 - conf := parent.attachPoint.conf - if conf.ROMount || conf.LazyOpenForWrite { - // Skip attempt to open in RDWR based on configuration. - startIdx = 1 - } + // 1. RDONLY: for all files, works for directories and ro mounts too + // 2. PATH: for symlinks + modes := []int{syscall.O_RDONLY, unix.O_PATH} var err error var fd int - for i := startIdx; i < len(modes); i++ { - fd, err = syscall.Openat(parent.controlFD(), name, openFlags|modes[i], 0) + for i, mode := range modes { + fd, err = syscall.Openat(parent.controlFD(), name, openFlags|mode, 0) if err == nil { // openat succeeded, we're done. break @@ -263,16 +236,10 @@ func openAnyFile(parent *localFile, name string) (*os.File, string, error) { case syscall.ENOENT: // File doesn't exist, no point in retrying. return nil, "", e - case syscall.ELOOP: - if i < symlinkIdx { - // File was opened with O_NOFOLLOW, so this error can only happen when - // trying ot open a symlink. Jump straight to flags compatible with symlink. - i = symlinkIdx - 1 - } } - // openat failed. Try again with next mode, preserving 'err' in - // case this was the last attempt. - log.Debugf("Attempt %d to open file failed, mode: %#x, path: %s/%s, err: %v", i, openFlags|modes[i], parent.controlFile.Name(), name, err) + // openat failed. Try again with next mode, preserving 'err' in case this + // was the last attempt. + log.Debugf("Attempt %d to open file failed, mode: %#x, path: %s/%s, err: %v", i, openFlags|mode, parent.controlFile.Name(), name, err) } if err != nil { // All attempts to open file have failed, return the last error. @@ -353,13 +320,13 @@ func (l *localFile) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { // Check if control file can be used or if a new open must be created. var newFile *os.File - if mode == p9.ReadOnly || !l.attachPoint.conf.LazyOpenForWrite { + if mode == p9.ReadOnly { log.Debugf("Open reusing control file, mode: %v, %q", mode, l.controlFile.Name()) newFile = l.controlFile } else { - // Ideally reopen would call name_to_handle_at (with empty name) and open_by_handle_at - // to reopen the file without using 'hostPath'. However, name_to_handle_at and - // open_by_handle_at aren't supported by overlay2. + // Ideally reopen would call name_to_handle_at (with empty name) and + // open_by_handle_at to reopen the file without using 'hostPath'. However, + // name_to_handle_at and open_by_handle_at aren't supported by overlay2. log.Debugf("Open reopening file, mode: %v, %q", mode, l.controlFile.Name()) var err error @@ -397,9 +364,10 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid return nil, nil, p9.QID{}, 0, syscall.EBADF } - // Use a single file for both 'controlFile' and 'openedFile'. Mode must include read for control - // and whichever else was requested by caller. Note that resulting file might have a wider mode - // than needed for each particular case. + // Use a single file for both 'controlFile' and 'openedFile'. Mode must + // include read for control and whichever else was requested by caller. Note + // that resulting file might have a wider mode than needed for each particular + // case. flags := openFlags | syscall.O_CREAT | syscall.O_EXCL if mode == p9.WriteOnly { flags |= syscall.O_RDWR @@ -622,9 +590,9 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { } fd := l.controlFD() - if conf.LazyOpenForWrite && l.ft == regular { - // Regular files are opened in RO mode when lazy open is set. - // Thus it needs to be reopened here for write. + if l.ft == regular { + // Regular files are opened in RO mode, thus it needs to be reopened here + // for write. f, err := os.OpenFile(l.hostPath, openFlags|os.O_WRONLY, 0) if err != nil { return extractErrno(err) @@ -728,8 +696,6 @@ func (l *localFile) Rename(p9.File, string) error { } // RenameAt implements p9.File.RenameAt. -// -// TODO: change to renameat(2). func (l *localFile) RenameAt(oldName string, directory p9.File, newName string) error { conf := l.attachPoint.conf if conf.ROMount { @@ -740,9 +706,7 @@ func (l *localFile) RenameAt(oldName string, directory p9.File, newName string) } newParent := directory.(*localFile) - oldPath := path.Join(l.hostPath, oldName) - newPath := path.Join(newParent.hostPath, newName) - if err := syscall.Rename(oldPath, newPath); err != nil { + if err := renameat(l.controlFD(), oldName, newParent.controlFD(), newName); err != nil { return extractErrno(err) } return nil @@ -863,7 +827,8 @@ func (l *localFile) Readdir(offset uint64, count uint32) ([]p9.Dirent, error) { } // Readdirnames is a cursor over directories, so seek back to 0 to ensure it's - // reading all directory contents. Take a lock because this operation is stateful. + // reading all directory contents. Take a lock because this operation is + // stateful. l.readDirMu.Lock() if _, err := l.openedFile.Seek(0, 0); err != nil { l.readDirMu.Unlock() @@ -944,7 +909,7 @@ func (l *localFile) Renamed(newDir p9.File, newName string) { func extractErrno(err error) syscall.Errno { if err == nil { // This should never happen. The likely result will be that - // some user gets the frustration "error: SUCCESS" message. + // some user gets the frustrating "error: SUCCESS" message. log.Warningf("extractErrno called with nil error!") return 0 } diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go index 34033245b..f799b1e25 100644 --- a/runsc/fsgofer/fsgofer_test.go +++ b/runsc/fsgofer/fsgofer_test.go @@ -48,14 +48,8 @@ var ( // allConfs is set in init() above. allConfs []Config - rwConfs = []Config{ - {ROMount: false, LazyOpenForWrite: false}, - {ROMount: false, LazyOpenForWrite: true}, - } - roConfs = []Config{ - {ROMount: true, LazyOpenForWrite: false}, - {ROMount: true, LazyOpenForWrite: true}, - } + rwConfs = []Config{{ROMount: false}} + roConfs = []Config{{ROMount: true}} ) type state struct { @@ -66,7 +60,7 @@ type state struct { } func (s state) String() string { - return fmt.Sprintf("lazyopen(%v)-%v", s.conf.LazyOpenForWrite, s.ft) + return fmt.Sprintf("type(%v)", s.ft) } func runAll(t *testing.T, test func(*testing.T, state)) { diff --git a/runsc/fsgofer/fsgofer_unsafe.go b/runsc/fsgofer/fsgofer_unsafe.go index 99bc25ec1..94413db86 100644 --- a/runsc/fsgofer/fsgofer_unsafe.go +++ b/runsc/fsgofer/fsgofer_unsafe.go @@ -19,20 +19,29 @@ import ( "unsafe" "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/syserr" ) func statAt(dirFd int, name string) (syscall.Stat_t, error) { nameBytes, err := syscall.BytePtrFromString(name) if err != nil { - return syscall.Stat_t{}, extractErrno(err) + return syscall.Stat_t{}, err } - namePtr := uintptr(unsafe.Pointer(nameBytes)) + namePtr := unsafe.Pointer(nameBytes) var stat syscall.Stat_t - statPtr := uintptr(unsafe.Pointer(&stat)) + statPtr := unsafe.Pointer(&stat) - if _, _, err := syscall.Syscall6(syscall.SYS_NEWFSTATAT, uintptr(dirFd), namePtr, statPtr, linux.AT_SYMLINK_NOFOLLOW, 0, 0); err != 0 { - return syscall.Stat_t{}, err + if _, _, errno := syscall.Syscall6( + syscall.SYS_NEWFSTATAT, + uintptr(dirFd), + uintptr(namePtr), + uintptr(statPtr), + linux.AT_SYMLINK_NOFOLLOW, + 0, + 0); errno != 0 { + + return syscall.Stat_t{}, syserr.FromHost(errno).ToError() } return stat, nil } @@ -40,19 +49,59 @@ func statAt(dirFd int, name string) (syscall.Stat_t, error) { func utimensat(dirFd int, name string, times [2]syscall.Timespec, flags int) error { // utimensat(2) doesn't accept empty name, instead name must be nil to make it // operate directly on 'dirFd' unlike other *at syscalls. - var namePtr uintptr + var namePtr unsafe.Pointer if name != "" { nameBytes, err := syscall.BytePtrFromString(name) if err != nil { - return extractErrno(err) + return err + } + namePtr = unsafe.Pointer(nameBytes) + } + + timesPtr := unsafe.Pointer(×[0]) + + if _, _, errno := syscall.Syscall6( + syscall.SYS_UTIMENSAT, + uintptr(dirFd), + uintptr(namePtr), + uintptr(timesPtr), + uintptr(flags), + 0, + 0); errno != 0 { + + return syserr.FromHost(errno).ToError() + } + return nil +} + +func renameat(oldDirFD int, oldName string, newDirFD int, newName string) error { + var oldNamePtr unsafe.Pointer + if oldName != "" { + nameBytes, err := syscall.BytePtrFromString(oldName) + if err != nil { + return err + } + oldNamePtr = unsafe.Pointer(nameBytes) + } + var newNamePtr unsafe.Pointer + if newName != "" { + nameBytes, err := syscall.BytePtrFromString(newName) + if err != nil { + return err } - namePtr = uintptr(unsafe.Pointer(nameBytes)) + newNamePtr = unsafe.Pointer(nameBytes) } - timesPtr := uintptr(unsafe.Pointer(×[0])) + if _, _, errno := syscall.Syscall6( + syscall.SYS_RENAMEAT, + uintptr(oldDirFD), + uintptr(oldNamePtr), + uintptr(newDirFD), + uintptr(newNamePtr), + 0, + 0); errno != 0 { - if _, _, err := syscall.Syscall6(syscall.SYS_UTIMENSAT, uintptr(dirFd), namePtr, timesPtr, uintptr(flags), 0, 0); err != 0 { - return err + return syserr.FromHost(errno).ToError() } return nil } |