diff options
Diffstat (limited to 'runsc/boot/fs.go')
-rw-r--r-- | runsc/boot/fs.go | 87 |
1 files changed, 47 insertions, 40 deletions
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index e83584b82..ddf288456 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -29,10 +29,12 @@ import ( _ "gvisor.dev/gvisor/pkg/sentry/fs/sys" _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" _ "gvisor.dev/gvisor/pkg/sentry/fs/tty" + "gvisor.dev/gvisor/pkg/sentry/vfs" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/gofer" @@ -47,6 +49,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/specutils" ) @@ -65,7 +68,7 @@ const ( // tmpfs has some extra supported options that we must pass through. var tmpfsAllowedData = []string{"mode", "uid", "gid"} -func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) { +func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) { // Upper layer uses the same flags as lower, but it must be read-write. upperFlags := lowerFlags upperFlags.ReadOnly = false @@ -155,7 +158,7 @@ func compileMounts(spec *specs.Spec) []specs.Mount { } // p9MountData creates a slice of p9 mount data. -func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string { +func p9MountData(fd int, fa config.FileAccessType, vfs2 bool) []string { opts := []string{ "trans=fd", "rfdno=" + strconv.Itoa(fd), @@ -166,7 +169,7 @@ func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string { // enablement. opts = append(opts, "privateunixsocket=true") } - if fa == FileAccessShared { + if fa == config.FileAccessShared { opts = append(opts, "cache=remote_revalidating") } return opts @@ -251,7 +254,7 @@ func mustFindFilesystem(name string) fs.Filesystem { // addSubmountOverlay overlays the inode over a ramfs tree containing the given // paths. -func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string) (*fs.Inode, error) { +func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string, mf fs.MountSourceFlags) (*fs.Inode, error) { // Construct a ramfs tree of mount points. The contents never // change, so this can be fully caching. There's no real // filesystem backing this tree, so we set the filesystem to @@ -261,7 +264,7 @@ func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string if err != nil { return nil, fmt.Errorf("creating mount tree: %v", err) } - overlayInode, err := fs.NewOverlayRoot(ctx, inode, mountTree, fs.MountSourceFlags{}) + overlayInode, err := fs.NewOverlayRoot(ctx, inode, mountTree, mf) if err != nil { return nil, fmt.Errorf("adding mount overlay: %v", err) } @@ -280,7 +283,7 @@ func subtargets(root string, mnts []specs.Mount) []string { return targets } -func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error { +func setupContainerFS(ctx context.Context, conf *config.Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error { if conf.VFS2 { return setupContainerVFS2(ctx, conf, mntr, procArgs) } @@ -318,14 +321,14 @@ func adjustDirentCache(k *kernel.Kernel) error { } type fdDispenser struct { - fds []int + fds []*fd.FD } func (f *fdDispenser) remove() int { if f.empty() { panic("fdDispenser out of fds") } - rv := f.fds[0] + rv := f.fds[0].Release() f.fds = f.fds[1:] return rv } @@ -390,6 +393,10 @@ type mountHint struct { // root is the inode where the volume is mounted. For mounts with 'pod' share // the volume is mounted once and then bind mounted inside the containers. root *fs.Inode + + // vfsMount is the master mount for the volume. For mounts with 'pod' share + // the master volume is bind mounted inside the containers. + vfsMount *vfs.Mount } func (m *mountHint) setField(key, val string) error { @@ -447,27 +454,27 @@ func (m *mountHint) isSupported() bool { func (m *mountHint) checkCompatible(mount specs.Mount) error { // Remove options that don't affect to mount's behavior. masterOpts := filterUnsupportedOptions(m.mount) - slaveOpts := filterUnsupportedOptions(mount) + replicaOpts := filterUnsupportedOptions(mount) - if len(masterOpts) != len(slaveOpts) { - return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, slaveOpts) + if len(masterOpts) != len(replicaOpts) { + return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, replicaOpts) } sort.Strings(masterOpts) - sort.Strings(slaveOpts) + sort.Strings(replicaOpts) for i, opt := range masterOpts { - if opt != slaveOpts[i] { - return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, slaveOpts) + if opt != replicaOpts[i] { + return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, replicaOpts) } } return nil } -func (m *mountHint) fileAccessType() FileAccessType { +func (m *mountHint) fileAccessType() config.FileAccessType { if m.share == container { - return FileAccessExclusive + return config.FileAccessExclusive } - return FileAccessShared + return config.FileAccessShared } func filterUnsupportedOptions(mount specs.Mount) []string { @@ -558,7 +565,7 @@ type containerMounter struct { hints *podMountHints } -func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter { +func newContainerMounter(spec *specs.Spec, goferFDs []*fd.FD, k *kernel.Kernel, hints *podMountHints) *containerMounter { return &containerMounter{ root: spec.Root, mounts: compileMounts(spec), @@ -571,9 +578,9 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin // processHints processes annotations that container hints about how volumes // should be mounted (e.g. a volume shared between containers). It must be // called for the root container only. -func (c *containerMounter) processHints(conf *Config) error { +func (c *containerMounter) processHints(conf *config.Config, creds *auth.Credentials) error { if conf.VFS2 { - return nil + return c.processHintsVFS2(conf, creds) } ctx := c.k.SupervisorContext() for _, hint := range c.hints.mounts { @@ -595,7 +602,7 @@ func (c *containerMounter) processHints(conf *Config) error { // setupFS is used to set up the file system for all containers. This is the // main entry point method, with most of the other being internal only. It // returns the mount namespace that is created for the container. -func (c *containerMounter) setupFS(conf *Config, procArgs *kernel.CreateProcessArgs) (*fs.MountNamespace, error) { +func (c *containerMounter) setupFS(conf *config.Config, procArgs *kernel.CreateProcessArgs) (*fs.MountNamespace, error) { log.Infof("Configuring container's file system") // Create context with root credentials to mount the filesystem (the current @@ -621,7 +628,7 @@ func (c *containerMounter) setupFS(conf *Config, procArgs *kernel.CreateProcessA return mns, nil } -func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Config) (*fs.MountNamespace, error) { +func (c *containerMounter) createMountNamespace(ctx context.Context, conf *config.Config) (*fs.MountNamespace, error) { rootInode, err := c.createRootMount(ctx, conf) if err != nil { return nil, fmt.Errorf("creating filesystem for container: %v", err) @@ -633,9 +640,9 @@ func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Confi return mns, nil } -func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace) error { +func (c *containerMounter) mountSubmounts(ctx context.Context, conf *config.Config, mns *fs.MountNamespace) error { root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) for _, m := range c.mounts { log.Debugf("Mounting %q to %q, type: %s, options: %s", m.Source, m.Destination, m.Type, m.Options) @@ -669,7 +676,7 @@ func (c *containerMounter) checkDispenser() error { // mountSharedMaster mounts the master of a volume that is shared among // containers in a pod. It returns the root mount's inode. -func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *Config, hint *mountHint) (*fs.Inode, error) { +func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *config.Config, hint *mountHint) (*fs.Inode, error) { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, hint.mount) @@ -709,7 +716,7 @@ func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *Config, } // createRootMount creates the root filesystem. -func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*fs.Inode, error) { +func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Config) (*fs.Inode, error) { // First construct the filesystem from the spec.Root. mf := fs.MountSourceFlags{ReadOnly: c.root.Readonly || conf.Overlay} @@ -734,7 +741,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (* // for submount paths. "/dev" "/sys" "/proc" and "/tmp" are always // mounted even if they are not in the spec. submounts := append(subtargets("/", c.mounts), "/dev", "/sys", "/proc", "/tmp") - rootInode, err = addSubmountOverlay(ctx, rootInode, submounts) + rootInode, err = addSubmountOverlay(ctx, rootInode, submounts, mf) if err != nil { return nil, fmt.Errorf("adding submount overlay: %v", err) } @@ -754,7 +761,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (* // getMountNameAndOptions retrieves the fsName, opts, and useOverlay values // used for mounts. -func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (string, []string, bool, error) { +func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.Mount) (string, []string, bool, error) { var ( fsName string opts []string @@ -788,19 +795,19 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( return fsName, opts, useOverlay, nil } -func (c *containerMounter) getMountAccessType(mount specs.Mount) FileAccessType { +func (c *containerMounter) getMountAccessType(mount specs.Mount) config.FileAccessType { if hint := c.hints.findMount(mount); hint != nil { return hint.fileAccessType() } // Non-root bind mounts are always shared if no hints were provided. - return FileAccessShared + return config.FileAccessShared } // mountSubmount mounts volumes inside the container's root. Because mounts may // be readonly, a lower ramfs overlay is added to create the mount point dir. // Another overlay is added with tmpfs on top if Config.Overlay is true. // 'm.Destination' must be an absolute path with '..' and symlinks resolved. -func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, m specs.Mount) error { +func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent, m specs.Mount) error { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m) @@ -844,7 +851,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns submounts := subtargets(m.Destination, c.mounts) if len(submounts) > 0 { log.Infof("Adding submount overlay over %q", m.Destination) - inode, err = addSubmountOverlay(ctx, inode, submounts) + inode, err = addSubmountOverlay(ctx, inode, submounts, mf) if err != nil { return fmt.Errorf("adding submount overlay: %v", err) } @@ -863,7 +870,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns if err != nil { return fmt.Errorf("can't find mount destination %q: %v", m.Destination, err) } - defer dirent.DecRef() + defer dirent.DecRef(ctx) if err := mns.Mount(ctx, dirent, inode); err != nil { return fmt.Errorf("mount %q error: %v", m.Destination, err) } @@ -884,12 +891,12 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun if err != nil { return fmt.Errorf("can't find mount destination %q: %v", mount.Destination, err) } - defer target.DecRef() + defer target.DecRef(ctx) // Take a ref on the inode that is about to be (re)-mounted. source.root.IncRef() if err := mns.Mount(ctx, target, source.root); err != nil { - source.root.DecRef() + source.root.DecRef(ctx) return fmt.Errorf("bind mount %q error: %v", mount.Destination, err) } @@ -899,7 +906,7 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun // addRestoreMount adds a mount to the MountSources map used for restoring a // checkpointed container. -func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount) error { +func (c *containerMounter) addRestoreMount(conf *config.Config, renv *fs.RestoreEnvironment, m specs.Mount) error { fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m) if err != nil { return err @@ -924,7 +931,7 @@ func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnviron // createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding // the mounts to the environment. -func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEnvironment, error) { +func (c *containerMounter) createRestoreEnvironment(conf *config.Config) (*fs.RestoreEnvironment, error) { renv := &fs.RestoreEnvironment{ MountSources: make(map[string][]fs.MountArgs), } @@ -979,7 +986,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // // Note that when there are submounts inside of '/tmp', directories for the // mount points must be present, making '/tmp' not empty anymore. -func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error { +func (c *containerMounter) mountTmp(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent) error { for _, m := range c.mounts { if filepath.Clean(m.Destination) == "/tmp" { log.Debugf("Explict %q mount found, skipping internal tmpfs, mount: %+v", "/tmp", m) @@ -992,12 +999,12 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M switch err { case nil: // Found '/tmp' in filesystem, check if it's empty. - defer tmp.DecRef() + defer tmp.DecRef(ctx) f, err := tmp.Inode.GetFile(ctx, tmp, fs.FileFlags{Read: true, Directory: true}) if err != nil { return err } - defer f.DecRef() + defer f.DecRef(ctx) serializer := &fs.CollectEntriesSerializer{} if err := f.Readdir(ctx, serializer); err != nil { return err |