diff options
-rw-r--r-- | pkg/abi/linux/linux_abi_autogen_unsafe.go | 24 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/connected_endpoint_refs.go | 3 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/inode_refs.go | 3 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/inode_refs.go | 3 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/socket_refs.go | 3 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/mount.go | 4 | ||||
-rw-r--r-- | pkg/sentry/vfs/context.go | 24 | ||||
-rw-r--r-- | pkg/sentry/vfs/mount.go | 19 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 55 |
9 files changed, 99 insertions, 39 deletions
diff --git a/pkg/abi/linux/linux_abi_autogen_unsafe.go b/pkg/abi/linux/linux_abi_autogen_unsafe.go index 93a6b4cd4..64994692a 100644 --- a/pkg/abi/linux/linux_abi_autogen_unsafe.go +++ b/pkg/abi/linux/linux_abi_autogen_unsafe.go @@ -167,7 +167,7 @@ func (s *Statx) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (s *Statx) UnmarshalUnsafe(src []byte) { - if s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() { + if s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() { safecopy.CopyOut(unsafe.Pointer(s), src) } else { // Type Statx doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -208,7 +208,7 @@ func (s *Statx) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) { // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (s *Statx) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) { - if !s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() { + if !s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() { // Type Statx doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := task.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. length, err := task.CopyInBytes(addr, buf) // escapes: okay. @@ -642,7 +642,7 @@ func (f *FUSEHeaderIn) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (f *FUSEHeaderIn) UnmarshalUnsafe(src []byte) { - if f.Opcode.Packed() && f.Unique.Packed() { + if f.Unique.Packed() && f.Opcode.Packed() { safecopy.CopyOut(unsafe.Pointer(f), src) } else { // Type FUSEHeaderIn doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -683,7 +683,7 @@ func (f *FUSEHeaderIn) CopyOut(task marshal.Task, addr usermem.Addr) (int, error // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (f *FUSEHeaderIn) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) { - if !f.Opcode.Packed() && f.Unique.Packed() { + if !f.Unique.Packed() && f.Opcode.Packed() { // Type FUSEHeaderIn doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := task.CopyScratchBuffer(f.SizeBytes()) // escapes: okay. length, err := task.CopyInBytes(addr, buf) // escapes: okay. @@ -2020,7 +2020,7 @@ func (i *IPTEntry) UnmarshalBytes(src []byte) { // Packed implements marshal.Marshallable.Packed. //go:nosplit func (i *IPTEntry) Packed() bool { - return i.Counters.Packed() && i.IP.Packed() + return i.IP.Packed() && i.Counters.Packed() } // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. @@ -2035,7 +2035,7 @@ func (i *IPTEntry) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (i *IPTEntry) UnmarshalUnsafe(src []byte) { - if i.IP.Packed() && i.Counters.Packed() { + if i.Counters.Packed() && i.IP.Packed() { safecopy.CopyOut(unsafe.Pointer(i), src) } else { // Type IPTEntry doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -2208,7 +2208,7 @@ func (i *IPTIP) UnmarshalBytes(src []byte) { // Packed implements marshal.Marshallable.Packed. //go:nosplit func (i *IPTIP) Packed() bool { - return i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() + return i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() } // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. @@ -2223,7 +2223,7 @@ func (i *IPTIP) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (i *IPTIP) UnmarshalUnsafe(src []byte) { - if i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() { + if i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() { safecopy.CopyOut(unsafe.Pointer(i), src) } else { // Type IPTIP doesn't have a packed layout in memory, fallback to UnmarshalBytes. @@ -2290,7 +2290,7 @@ func (i *IPTIP) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) { // WriteTo implements io.WriterTo.WriteTo. func (i *IPTIP) WriteTo(w io.Writer) (int64, error) { - if !i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() { + if !i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() { // Type IPTIP doesn't have a packed layout in memory, fall back to MarshalBytes. buf := make([]byte, i.SizeBytes()) i.MarshalBytes(buf) @@ -3055,7 +3055,7 @@ func (i *IP6TEntry) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) { // CopyIn implements marshal.Marshallable.CopyIn. //go:nosplit func (i *IP6TEntry) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) { - if !i.IPv6.Packed() && i.Counters.Packed() { + if !i.Counters.Packed() && i.IPv6.Packed() { // Type IP6TEntry doesn't have a packed layout in memory, fall back to UnmarshalBytes. buf := task.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. length, err := task.CopyInBytes(addr, buf) // escapes: okay. @@ -3196,7 +3196,7 @@ func (i *IP6TIP) UnmarshalBytes(src []byte) { // Packed implements marshal.Marshallable.Packed. //go:nosplit func (i *IP6TIP) Packed() bool { - return i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() + return i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() } // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. @@ -3278,7 +3278,7 @@ func (i *IP6TIP) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) { // WriteTo implements io.WriterTo.WriteTo. func (i *IP6TIP) WriteTo(w io.Writer) (int64, error) { - if !i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() { + if !i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() { // Type IP6TIP doesn't have a packed layout in memory, fall back to MarshalBytes. buf := make([]byte, i.SizeBytes()) i.MarshalBytes(buf) diff --git a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go index 1fcf8ab73..3ef6500fa 100644 --- a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go +++ b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go @@ -1,10 +1,11 @@ package host import ( - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/host/inode_refs.go b/pkg/sentry/fsimpl/host/inode_refs.go index c920a3c4e..fc13b3cc7 100644 --- a/pkg/sentry/fsimpl/host/inode_refs.go +++ b/pkg/sentry/fsimpl/host/inode_refs.go @@ -1,10 +1,11 @@ package host import ( - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/fsimpl/tmpfs/inode_refs.go b/pkg/sentry/fsimpl/tmpfs/inode_refs.go index 38eddde7e..175dc3409 100644 --- a/pkg/sentry/fsimpl/tmpfs/inode_refs.go +++ b/pkg/sentry/fsimpl/tmpfs/inode_refs.go @@ -1,10 +1,11 @@ package tmpfs import ( - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/socket/unix/socket_refs.go b/pkg/sentry/socket/unix/socket_refs.go index 8eb0c9327..090395057 100644 --- a/pkg/sentry/socket/unix/socket_refs.go +++ b/pkg/sentry/socket/unix/socket_refs.go @@ -1,10 +1,11 @@ package unix import ( - "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "runtime" "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // ownerType is used to customize logging. Note that we use a pointer to T so diff --git a/pkg/sentry/syscalls/linux/vfs2/mount.go b/pkg/sentry/syscalls/linux/vfs2/mount.go index 4bd5c7ca2..769c9b92f 100644 --- a/pkg/sentry/syscalls/linux/vfs2/mount.go +++ b/pkg/sentry/syscalls/linux/vfs2/mount.go @@ -109,8 +109,8 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err } defer target.Release(t) - - return 0, nil, t.Kernel().VFS().MountAt(t, creds, source, &target.pop, fsType, &opts) + _, err = t.Kernel().VFS().MountAt(t, creds, source, &target.pop, fsType, &opts) + return 0, nil, err } // Umount2 implements Linux syscall umount2(2). diff --git a/pkg/sentry/vfs/context.go b/pkg/sentry/vfs/context.go index c9e724fef..97018651f 100644 --- a/pkg/sentry/vfs/context.go +++ b/pkg/sentry/vfs/context.go @@ -40,6 +40,30 @@ func MountNamespaceFromContext(ctx context.Context) *MountNamespace { return nil } +type mountNamespaceContext struct { + context.Context + mntns *MountNamespace +} + +// Value implements Context.Value. +func (mc mountNamespaceContext) Value(key interface{}) interface{} { + switch key { + case CtxMountNamespace: + mc.mntns.IncRef() + return mc.mntns + default: + return mc.Context.Value(key) + } +} + +// WithMountNamespace returns a copy of ctx with the given MountNamespace. +func WithMountNamespace(ctx context.Context, mntns *MountNamespace) context.Context { + return &mountNamespaceContext{ + Context: ctx, + mntns: mntns, + } +} + // RootFromContext returns the VFS root used by ctx. It takes a reference on // the returned VirtualDentry. If ctx does not have a specific VFS root, // RootFromContext returns a zero-value VirtualDentry. diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 714af6907..09fea3628 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -263,16 +263,20 @@ func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Cr } // MountAt creates and mounts a Filesystem configured by the given arguments. -func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *MountOptions) error { +// The VirtualFilesystem will hold a reference to the Mount until it is unmounted. +// +// This method returns the mounted Mount without a reference, for convenience +// during VFS setup when there is no chance of racing with unmount. +func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *MountOptions) (*Mount, error) { mnt, err := vfs.MountDisconnected(ctx, creds, source, fsTypeName, opts) if err != nil { - return err + return nil, err } defer mnt.DecRef(ctx) if err := vfs.ConnectMountAt(ctx, creds, mnt, target); err != nil { - return err + return nil, err } - return nil + return mnt, nil } // UmountAt removes the Mount at the given path. @@ -657,6 +661,13 @@ retryFirst: return VirtualDentry{mnt, d} } +// SetMountReadOnly sets the mount as ReadOnly. +func (vfs *VirtualFilesystem) SetMountReadOnly(mnt *Mount, ro bool) error { + vfs.mountMu.Lock() + defer vfs.mountMu.Unlock() + return mnt.setReadOnlyLocked(ro) +} + // CheckBeginWrite increments the counter of in-progress write operations on // mnt. If mnt is mounted MS_RDONLY, CheckBeginWrite does nothing and returns // EROFS. diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index f27a6ff6b..fb200e988 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -205,15 +205,34 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config. for i := range mounts { submount := &mounts[i] log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options) + var ( + mnt *vfs.Mount + err error + ) + if hint := c.hints.findMount(submount.Mount); hint != nil && hint.isSupported() { - if err := c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.Mount, hint); err != nil { + mnt, err = c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.Mount, hint) + if err != nil { return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, submount.Destination, err) } } else { - if err := c.mountSubmountVFS2(ctx, conf, mns, creds, submount); err != nil { + mnt, err = c.mountSubmountVFS2(ctx, conf, mns, creds, submount) + if err != nil { return fmt.Errorf("mount submount %q: %w", submount.Destination, err) } } + + if mnt != nil && mnt.ReadOnly() { + // Switch to ReadWrite while we setup submounts. + if err := c.k.VFS().SetMountReadOnly(mnt, false); err != nil { + return fmt.Errorf("failed to set mount at %q readwrite: %v", submount.Destination, err) + } + defer func() { + if err := c.k.VFS().SetMountReadOnly(mnt, true); err != nil { + panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.Destination, err)) + } + }() + } } if err := c.mountTmpVFS2(ctx, conf, creds, mns); err != nil { @@ -256,7 +275,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { return mounts, nil } -func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error { +func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) (*vfs.Mount, error) { root := mns.Root() defer root.DecRef(ctx) target := &vfs.PathOperation{ @@ -266,21 +285,22 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C } fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, submount) if err != nil { - return fmt.Errorf("mountOptions failed: %w", err) + return nil, fmt.Errorf("mountOptions failed: %w", err) } if len(fsName) == 0 { // Filesystem is not supported (e.g. cgroup), just skip it. - return nil + return nil, nil } if err := c.k.VFS().MkdirAllAt(ctx, submount.Destination, root, creds, &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}); err != nil { - return err + return nil, err } - if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil { - return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts) + mnt, err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts) + if err != nil { + return nil, fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts) } log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts.GetFilesystemOptions.Data) - return nil + return mnt, nil } // getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values @@ -407,7 +427,8 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config // another user. This is normally done for /tmp. Options: []string{"mode=01777"}, } - return c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount}) + _, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount}) + return err case syserror.ENOTDIR: // Not a dir?! Let it be. @@ -458,25 +479,25 @@ func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *conf // mountSharedSubmount binds mount to a previously mounted volume that is shared // among containers in the same pod. -func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount specs.Mount, source *mountHint) error { +func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount specs.Mount, source *mountHint) (*vfs.Mount, error) { if err := source.checkCompatible(mount); err != nil { - return err + return nil, err } _, opts, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{Mount: mount}) if err != nil { - return err + return nil, err } newMnt, err := c.k.VFS().NewDisconnectedMount(source.vfsMount.Filesystem(), source.vfsMount.Root(), opts) if err != nil { - return err + return nil, err } defer newMnt.DecRef(ctx) root := mns.Root() defer root.DecRef(ctx) if err := c.k.VFS().MkdirAllAt(ctx, mount.Destination, root, creds, &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}); err != nil { - return err + return nil, err } target := &vfs.PathOperation{ @@ -485,8 +506,8 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *co Path: fspath.Parse(mount.Destination), } if err := c.k.VFS().ConnectMountAt(ctx, creds, newMnt, target); err != nil { - return err + return nil, err } log.Infof("Mounted %q type shared bind to %q", mount.Destination, source.name) - return nil + return newMnt, nil } |