diff options
-rw-r--r-- | pkg/abi/linux/linux_abi_autogen_unsafe.go | 50 | ||||
-rw-r--r-- | pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go | 6 | ||||
-rw-r--r-- | pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go | 6 | ||||
-rw-r--r-- | pkg/sentry/time/seqatomic_parameters_unsafe.go | 6 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 184 |
6 files changed, 178 insertions, 76 deletions
diff --git a/pkg/abi/linux/linux_abi_autogen_unsafe.go b/pkg/abi/linux/linux_abi_autogen_unsafe.go index a2ed4b058..d7ec3b0fd 100644 --- a/pkg/abi/linux/linux_abi_autogen_unsafe.go +++ b/pkg/abi/linux/linux_abi_autogen_unsafe.go @@ -1099,7 +1099,7 @@ func (s *Statx) UnmarshalBytes(src []byte) { // Packed implements marshal.Marshallable.Packed. //go:nosplit func (s *Statx) Packed() bool { - return s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() + return s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() } // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. @@ -1125,7 +1125,7 @@ func (s *Statx) UnmarshalUnsafe(src []byte) { // CopyOutN implements marshal.Marshallable.CopyOutN. //go:nosplit func (s *Statx) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) { - if !s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() { + if !s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() { // Type Statx doesn't have a packed layout in memory, fall back to MarshalBytes. buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. s.MarshalBytes(buf) // escapes: fallback. @@ -1155,7 +1155,7 @@ func (s *Statx) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (s *Statx) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) { - if !s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() { + if !s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() { // Type Statx doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. length, err := cc.CopyInBytes(addr, buf) // escapes: okay. @@ -1181,7 +1181,7 @@ func (s *Statx) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) { // WriteTo implements io.WriterTo.WriteTo. func (s *Statx) WriteTo(writer io.Writer) (int64, error) { - if !s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() { + if !s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() { // Type Statx doesn't have a packed layout in memory, fall back to MarshalBytes. buf := make([]byte, s.SizeBytes()) s.MarshalBytes(buf) @@ -1600,7 +1600,7 @@ func (f *FUSEHeaderIn) UnmarshalUnsafe(src []byte) { // CopyOutN implements marshal.Marshallable.CopyOutN. //go:nosplit func (f *FUSEHeaderIn) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) { - if !f.Opcode.Packed() && f.Unique.Packed() { + if !f.Unique.Packed() && f.Opcode.Packed() { // Type FUSEHeaderIn doesn't have a packed layout in memory, fall back to MarshalBytes. buf := cc.CopyScratchBuffer(f.SizeBytes()) // escapes: okay. f.MarshalBytes(buf) // escapes: fallback. @@ -4442,7 +4442,7 @@ func (i *IPTEntry) UnmarshalUnsafe(src []byte) { // CopyOutN implements marshal.Marshallable.CopyOutN. //go:nosplit func (i *IPTEntry) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) { - if !i.IP.Packed() && i.Counters.Packed() { + if !i.Counters.Packed() && i.IP.Packed() { // Type IPTEntry doesn't have a packed layout in memory, fall back to MarshalBytes. buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. i.MarshalBytes(buf) // escapes: fallback. @@ -4619,7 +4619,7 @@ func (i *IPTIP) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (i *IPTIP) UnmarshalUnsafe(src []byte) { - if i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() { + if i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() { safecopy.CopyOut(unsafe.Pointer(i), src) } else { // Type IPTIP doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -4686,7 +4686,7 @@ func (i *IPTIP) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) { // WriteTo implements io.WriterTo.WriteTo. func (i *IPTIP) WriteTo(writer io.Writer) (int64, error) { - if !i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() { + if !i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() { // Type IPTIP doesn't have a packed layout in memory, fall back to MarshalBytes. buf := make([]byte, i.SizeBytes()) i.MarshalBytes(buf) @@ -5410,7 +5410,7 @@ func (i *IP6TEntry) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (i *IP6TEntry) UnmarshalUnsafe(src []byte) { - if i.Counters.Packed() && i.IPv6.Packed() { + if i.IPv6.Packed() && i.Counters.Packed() { safecopy.CopyOut(unsafe.Pointer(i), src) } else { // Type IP6TEntry doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -5421,7 +5421,7 @@ func (i *IP6TEntry) UnmarshalUnsafe(src []byte) { // CopyOutN implements marshal.Marshallable.CopyOutN. //go:nosplit func (i *IP6TEntry) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) { - if !i.Counters.Packed() && i.IPv6.Packed() { + if !i.IPv6.Packed() && i.Counters.Packed() { // Type IP6TEntry doesn't have a packed layout in memory, fall back to MarshalBytes. buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. i.MarshalBytes(buf) // escapes: fallback. @@ -5451,7 +5451,7 @@ func (i *IP6TEntry) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, err // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (i *IP6TEntry) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) { - if !i.IPv6.Packed() && i.Counters.Packed() { + if !i.Counters.Packed() && i.IPv6.Packed() { // Type IP6TEntry doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. length, err := cc.CopyInBytes(addr, buf) // escapes: okay. @@ -5592,12 +5592,12 @@ func (i *IP6TIP) UnmarshalBytes(src []byte) { // Packed implements marshal.Marshallable.Packed. //go:nosplit func (i *IP6TIP) Packed() bool { - return i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() + return i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() } // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. func (i *IP6TIP) MarshalUnsafe(dst []byte) { - if i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() { + if i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() { safecopy.CopyIn(dst, unsafe.Pointer(i)) } else { // Type IP6TIP doesn't have a packed layout in memory, fallback to MarshalBytes. @@ -5607,7 +5607,7 @@ func (i *IP6TIP) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (i *IP6TIP) UnmarshalUnsafe(src []byte) { - if i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() { + if i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() { safecopy.CopyOut(unsafe.Pointer(i), src) } else { // Type IP6TIP doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -5618,7 +5618,7 @@ func (i *IP6TIP) UnmarshalUnsafe(src []byte) { // CopyOutN implements marshal.Marshallable.CopyOutN. //go:nosplit func (i *IP6TIP) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) { - if !i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() { + if !i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() { // Type IP6TIP doesn't have a packed layout in memory, fall back to MarshalBytes. buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. i.MarshalBytes(buf) // escapes: fallback. @@ -5648,7 +5648,7 @@ func (i *IP6TIP) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (i *IP6TIP) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) { - if !i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() { + if !i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() { // Type IP6TIP doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. length, err := cc.CopyInBytes(addr, buf) // escapes: okay. @@ -6319,7 +6319,7 @@ func (s *SemidDS) Packed() bool { // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. func (s *SemidDS) MarshalUnsafe(dst []byte) { - if s.SemPerm.Packed() && s.SemOTime.Packed() && s.SemCTime.Packed() { + if s.SemCTime.Packed() && s.SemPerm.Packed() && s.SemOTime.Packed() { safecopy.CopyIn(dst, unsafe.Pointer(s)) } else { // Type SemidDS doesn't have a packed layout in memory, fallback to MarshalBytes. @@ -6329,7 +6329,7 @@ func (s *SemidDS) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (s *SemidDS) UnmarshalUnsafe(src []byte) { - if s.SemPerm.Packed() && s.SemOTime.Packed() && s.SemCTime.Packed() { + if s.SemOTime.Packed() && s.SemCTime.Packed() && s.SemPerm.Packed() { safecopy.CopyOut(unsafe.Pointer(s), src) } else { // Type SemidDS doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -6370,7 +6370,7 @@ func (s *SemidDS) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (s *SemidDS) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) { - if !s.SemCTime.Packed() && s.SemPerm.Packed() && s.SemOTime.Packed() { + if !s.SemPerm.Packed() && s.SemOTime.Packed() && s.SemCTime.Packed() { // Type SemidDS doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. length, err := cc.CopyInBytes(addr, buf) // escapes: okay. @@ -6743,7 +6743,7 @@ func (s *ShmidDS) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) // WriteTo implements io.WriterTo.WriteTo. func (s *ShmidDS) WriteTo(writer io.Writer) (int64, error) { - if !s.ShmCtime.Packed() && s.ShmPerm.Packed() && s.ShmAtime.Packed() && s.ShmDtime.Packed() { + if !s.ShmAtime.Packed() && s.ShmDtime.Packed() && s.ShmCtime.Packed() && s.ShmPerm.Packed() { // Type ShmidDS doesn't have a packed layout in memory, fall back to MarshalBytes. buf := make([]byte, s.SizeBytes()) s.MarshalBytes(buf) @@ -8851,7 +8851,7 @@ func (i *Itimerspec) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, err // WriteTo implements io.WriterTo.WriteTo. func (i *Itimerspec) WriteTo(writer io.Writer) (int64, error) { - if !i.Interval.Packed() && i.Value.Packed() { + if !i.Value.Packed() && i.Interval.Packed() { // Type Itimerspec doesn't have a packed layout in memory, fall back to MarshalBytes. buf := make([]byte, i.SizeBytes()) i.MarshalBytes(buf) @@ -9133,7 +9133,7 @@ func (t *Tms) Packed() bool { // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. func (t *Tms) MarshalUnsafe(dst []byte) { - if t.CUTime.Packed() && t.CSTime.Packed() && t.UTime.Packed() && t.STime.Packed() { + if t.UTime.Packed() && t.STime.Packed() && t.CUTime.Packed() && t.CSTime.Packed() { safecopy.CopyIn(dst, unsafe.Pointer(t)) } else { // Type Tms doesn't have a packed layout in memory, fallback to MarshalBytes. @@ -9143,7 +9143,7 @@ func (t *Tms) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (t *Tms) UnmarshalUnsafe(src []byte) { - if t.UTime.Packed() && t.STime.Packed() && t.CUTime.Packed() && t.CSTime.Packed() { + if t.STime.Packed() && t.CUTime.Packed() && t.CSTime.Packed() && t.UTime.Packed() { safecopy.CopyOut(unsafe.Pointer(t), src) } else { // Type Tms doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -9154,7 +9154,7 @@ func (t *Tms) UnmarshalUnsafe(src []byte) { // CopyOutN implements marshal.Marshallable.CopyOutN. //go:nosplit func (t *Tms) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) { - if !t.UTime.Packed() && t.STime.Packed() && t.CUTime.Packed() && t.CSTime.Packed() { + if !t.CSTime.Packed() && t.UTime.Packed() && t.STime.Packed() && t.CUTime.Packed() { // Type Tms doesn't have a packed layout in memory, fall back to MarshalBytes. buf := cc.CopyScratchBuffer(t.SizeBytes()) // escapes: okay. t.MarshalBytes(buf) // escapes: fallback. @@ -9184,7 +9184,7 @@ func (t *Tms) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) { // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (t *Tms) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) { - if !t.UTime.Packed() && t.STime.Packed() && t.CUTime.Packed() && t.CSTime.Packed() { + if !t.CSTime.Packed() && t.UTime.Packed() && t.STime.Packed() && t.CUTime.Packed() { // Type Tms doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := cc.CopyScratchBuffer(t.SizeBytes()) // escapes: okay. length, err := cc.CopyInBytes(addr, buf) // escapes: okay. diff --git a/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go b/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go index 44296e32e..eed5e6475 100644 --- a/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go +++ b/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go @@ -288,12 +288,12 @@ func (s *Stat) UnmarshalBytes(src []byte) { // Packed implements marshal.Marshallable.Packed. //go:nosplit func (s *Stat) Packed() bool { - return s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() + return s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed() } // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. func (s *Stat) MarshalUnsafe(dst []byte) { - if s.MTime.Packed() && s.CTime.Packed() && s.ATime.Packed() { + if s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() { safecopy.CopyIn(dst, unsafe.Pointer(s)) } else { // Type Stat doesn't have a packed layout in memory, fallback to MarshalBytes. @@ -314,7 +314,7 @@ func (s *Stat) UnmarshalUnsafe(src []byte) { // CopyOutN implements marshal.Marshallable.CopyOutN. //go:nosplit func (s *Stat) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) { - if !s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() { + if !s.MTime.Packed() && s.CTime.Packed() && s.ATime.Packed() { // Type Stat doesn't have a packed layout in memory, fall back to MarshalBytes. buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. s.MarshalBytes(buf) // escapes: fallback. diff --git a/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go b/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go index bcf5496c0..e448fd95d 100644 --- a/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go +++ b/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go @@ -300,7 +300,7 @@ func (s *Stat) Packed() bool { // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. func (s *Stat) MarshalUnsafe(dst []byte) { - if s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() { + if s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed() { safecopy.CopyIn(dst, unsafe.Pointer(s)) } else { // Type Stat doesn't have a packed layout in memory, fallback to MarshalBytes. diff --git a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go index 90148bbb2..a0f2fe45c 100644 --- a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go +++ b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go @@ -1,12 +1,12 @@ package kernel import ( - "fmt" - "reflect" - "strings" "unsafe" + "fmt" "gvisor.dev/gvisor/pkg/sync" + "reflect" + "strings" ) // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race diff --git a/pkg/sentry/time/seqatomic_parameters_unsafe.go b/pkg/sentry/time/seqatomic_parameters_unsafe.go index 2cb001080..f18440378 100644 --- a/pkg/sentry/time/seqatomic_parameters_unsafe.go +++ b/pkg/sentry/time/seqatomic_parameters_unsafe.go @@ -1,12 +1,12 @@ package time import ( - "fmt" - "reflect" - "strings" "unsafe" + "fmt" "gvisor.dev/gvisor/pkg/sync" + "reflect" + "strings" ) // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 7844ea28c..e36664938 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -21,6 +21,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" @@ -168,26 +169,30 @@ func (c *containerMounter) mountAll(conf *config.Config, procArgs *kernel.Create } rootProcArgs.MountNamespaceVFS2 = mns + root := mns.Root() + defer root.DecRef(rootCtx) + if root.Mount().ReadOnly() { + // Switch to ReadWrite while we setup submounts. + if err := c.k.VFS().SetMountReadOnly(root.Mount(), false); err != nil { + return nil, fmt.Errorf(`failed to set mount at "/" readwrite: %w`, err) + } + // Restore back to ReadOnly at the end. + defer func() { + if err := c.k.VFS().SetMountReadOnly(root.Mount(), true); err != nil { + panic(fmt.Sprintf(`failed to restore mount at "/" back to readonly: %v`, err)) + } + }() + } + // Mount submounts. if err := c.mountSubmountsVFS2(rootCtx, conf, mns, rootCreds); err != nil { return nil, fmt.Errorf("mounting submounts vfs2: %w", err) } - if c.root.Readonly || conf.Overlay { - // Switch to ReadOnly after all submounts were setup. - root := mns.Root() - defer root.DecRef(rootCtx) - if err := c.k.VFS().SetMountReadOnly(root.Mount(), true); err != nil { - return nil, fmt.Errorf(`failed to set mount at "/" readonly: %v`, err) - } - } - return mns, nil } // createMountNamespaceVFS2 creates the container's root mount and namespace. -// The mount is created ReadWrite to allow mount point for submounts to be -// created. ** The caller is responsible to switch to ReadOnly if needed ** func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *config.Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { fd := c.fds.remove() data := p9MountData(fd, conf.FileAccess, true /* vfs2 */) @@ -201,21 +206,71 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *c log.Infof("Mounting root over 9P, ioFD: %d", fd) opts := &vfs.MountOptions{ - // Always mount as ReadWrite to allow other mounts on top of it. It'll be - // made ReadOnly in the caller (if needed). - ReadOnly: false, + ReadOnly: c.root.Readonly, GetFilesystemOptions: vfs.GetFilesystemOptions{ Data: strings.Join(data, ","), }, InternalMount: true, } - mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, opts) + + fsName := gofer.Name + if conf.Overlay && !c.root.Readonly { + log.Infof("Adding overlay on top of root") + var err error + var cleanup func() + opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName) + if err != nil { + return nil, fmt.Errorf("mounting root with overlay: %w", err) + } + defer cleanup() + fsName = overlay.Name + } + + mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", fsName, opts) if err != nil { return nil, fmt.Errorf("setting up mount namespace: %w", err) } return mns, nil } +// configureOverlay mounts the lower layer using "lowerOpts", mounts the upper +// layer using tmpfs, and return overlay mount options. "cleanup" must be called +// after the options have been used to mount the overlay, to release refs on +// lower and upper mounts. +func (c *containerMounter) configureOverlay(ctx context.Context, creds *auth.Credentials, lowerOpts *vfs.MountOptions, lowerFSName string) (*vfs.MountOptions, func(), error) { + // First copy options from lower layer to upper layer and overlay. Clear + // filesystem specific options. + upperOpts := *lowerOpts + upperOpts.GetFilesystemOptions = vfs.GetFilesystemOptions{} + + overlayOpts := *lowerOpts + overlayOpts.GetFilesystemOptions = vfs.GetFilesystemOptions{} + + // Next mount upper and lower. Upper is a tmpfs mount to keep all + // modifications inside the sandbox. + upper, err := c.k.VFS().MountDisconnected(ctx, creds, "" /* source */, tmpfs.Name, &upperOpts) + if err != nil { + return nil, nil, fmt.Errorf("failed to create upper layer for overlay, opts: %+v: %v", upperOpts, err) + } + cu := cleanup.Make(func() { upper.DecRef(ctx) }) + defer cu.Clean() + + // All writes go to the upper layer, be paranoid and make lower readonly. + lowerOpts.ReadOnly = true + lower, err := c.k.VFS().MountDisconnected(ctx, creds, "" /* source */, lowerFSName, lowerOpts) + if err != nil { + return nil, nil, err + } + cu.Add(func() { lower.DecRef(ctx) }) + + // Configure overlay with both layers. + overlayOpts.GetFilesystemOptions.InternalData = overlay.FilesystemOptions{ + UpperRoot: vfs.MakeVirtualDentry(upper, upper.Root()), + LowerRoots: []vfs.VirtualDentry{vfs.MakeVirtualDentry(lower, lower.Root())}, + } + return &overlayOpts, cu.Release(), nil +} + func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials) error { mounts, err := c.prepareMountsVFS2() if err != nil { @@ -245,7 +300,7 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config. if mnt != nil && mnt.ReadOnly() { // Switch to ReadWrite while we setup submounts. if err := c.k.VFS().SetMountReadOnly(mnt, false); err != nil { - return fmt.Errorf("failed to set mount at %q readwrite: %v", submount.Destination, err) + return fmt.Errorf("failed to set mount at %q readwrite: %w", submount.Destination, err) } // Restore back to ReadOnly at the end. defer func() { @@ -297,14 +352,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { } func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) (*vfs.Mount, error) { - root := mns.Root() - defer root.DecRef(ctx) - target := &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(submount.Destination), - } - fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, submount) + fsName, opts, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, submount) if err != nil { return nil, fmt.Errorf("mountOptions failed: %w", err) } @@ -313,8 +361,27 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C return nil, nil } - if err := c.k.VFS().MakeSyntheticMountpoint(ctx, submount.Destination, root, creds); err != nil { - return nil, err + if err := c.makeMountPoint(ctx, creds, mns, submount.Destination); err != nil { + return nil, fmt.Errorf("creating mount point %q: %w", submount.Destination, err) + } + + if useOverlay { + log.Infof("Adding overlay on top of mount %q", submount.Destination) + var cleanup func() + opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName) + if err != nil { + return nil, fmt.Errorf("mounting volume with overlay at %q: %w", submount.Destination, err) + } + defer cleanup() + fsName = overlay.Name + } + + root := mns.Root() + defer root.DecRef(ctx) + target := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(submount.Destination), } mnt, err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts) if err != nil { @@ -326,8 +393,9 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C // getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values // used for mounts. -func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mountAndFD) (string, *vfs.MountOptions, error) { +func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mountAndFD) (string, *vfs.MountOptions, bool, error) { fsName := m.Type + useOverlay := false var data []string // Find filesystem name and FS specific data field. @@ -342,7 +410,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo var err error data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...) if err != nil { - return "", nil, err + return "", nil, false, err } case bind: @@ -350,13 +418,16 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo if m.fd == 0 { // Check that an FD was provided to fails fast. Technically FD=0 is valid, // but unlikely to be correct in this context. - return "", nil, fmt.Errorf("9P mount requires a connection FD") + return "", nil, false, fmt.Errorf("9P mount requires a connection FD") } data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) + // If configured, add overlay to all writable mounts. + useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly + default: log.Warningf("ignoring unknown filesystem type %q", m.Type) - return "", nil, nil + return "", nil, false, nil } opts := &vfs.MountOptions{ @@ -381,11 +452,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo } } - if conf.Overlay { - // All writes go to upper, be paranoid and make lower readonly. - opts.ReadOnly = true - } - return fsName, opts, nil + return fsName, opts, useOverlay, nil } // mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so. @@ -488,13 +555,25 @@ func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *conf // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. mntFD := &mountAndFD{Mount: hint.mount} - fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, mntFD) + fsName, opts, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, mntFD) if err != nil { return nil, err } if len(fsName) == 0 { return nil, fmt.Errorf("mount type not supported %q", hint.mount.Type) } + + if useOverlay { + log.Infof("Adding overlay on top of shared mount %q", mntFD.Destination) + var cleanup func() + opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName) + if err != nil { + return nil, fmt.Errorf("mounting shared volume with overlay at %q: %w", mntFD.Destination, err) + } + defer cleanup() + fsName = overlay.Name + } + return c.k.VFS().MountDisconnected(ctx, creds, "", fsName, opts) } @@ -505,7 +584,9 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *co return nil, err } - _, opts, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{Mount: mount}) + // Ignore data and useOverlay because these were already applied to + // the master mount. + _, opts, _, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{Mount: mount}) if err != nil { return nil, err } @@ -517,18 +598,39 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *co root := mns.Root() defer root.DecRef(ctx) - if err := c.k.VFS().MakeSyntheticMountpoint(ctx, mount.Destination, root, creds); err != nil { - return nil, err - } - target := &vfs.PathOperation{ Root: root, Start: root, Path: fspath.Parse(mount.Destination), } + + if err := c.makeMountPoint(ctx, creds, mns, mount.Destination); err != nil { + return nil, fmt.Errorf("creating mount point %q: %w", mount.Destination, err) + } + if err := c.k.VFS().ConnectMountAt(ctx, creds, newMnt, target); err != nil { return nil, err } log.Infof("Mounted %q type shared bind to %q", mount.Destination, source.name) return newMnt, nil } + +func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, dest string) error { + root := mns.Root() + defer root.DecRef(ctx) + target := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(dest), + } + // First check if mount point exists. When overlay is enabled, gofer doesn't + // allow changes to the FS, making MakeSytheticMountpoint() ineffective + // because MkdirAt fails with EROFS even if file exists. + vd, err := c.k.VFS().GetDentryAt(ctx, creds, target, &vfs.GetDentryOptions{}) + if err == nil { + // File exists, we're done. + vd.DecRef(ctx) + return nil + } + return c.k.VFS().MakeSyntheticMountpoint(ctx, dest, root, creds) +} |