summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot/fs.go
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot/fs.go')
-rw-r--r--runsc/boot/fs.go87
1 files changed, 47 insertions, 40 deletions
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index e83584b82..ddf288456 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -29,10 +29,12 @@ import (
_ "gvisor.dev/gvisor/pkg/sentry/fs/sys"
_ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
_ "gvisor.dev/gvisor/pkg/sentry/fs/tty"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/gofer"
@@ -47,6 +49,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/runsc/config"
"gvisor.dev/gvisor/runsc/specutils"
)
@@ -65,7 +68,7 @@ const (
// tmpfs has some extra supported options that we must pass through.
var tmpfsAllowedData = []string{"mode", "uid", "gid"}
-func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) {
+func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) {
// Upper layer uses the same flags as lower, but it must be read-write.
upperFlags := lowerFlags
upperFlags.ReadOnly = false
@@ -155,7 +158,7 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
}
// p9MountData creates a slice of p9 mount data.
-func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string {
+func p9MountData(fd int, fa config.FileAccessType, vfs2 bool) []string {
opts := []string{
"trans=fd",
"rfdno=" + strconv.Itoa(fd),
@@ -166,7 +169,7 @@ func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string {
// enablement.
opts = append(opts, "privateunixsocket=true")
}
- if fa == FileAccessShared {
+ if fa == config.FileAccessShared {
opts = append(opts, "cache=remote_revalidating")
}
return opts
@@ -251,7 +254,7 @@ func mustFindFilesystem(name string) fs.Filesystem {
// addSubmountOverlay overlays the inode over a ramfs tree containing the given
// paths.
-func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string) (*fs.Inode, error) {
+func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string, mf fs.MountSourceFlags) (*fs.Inode, error) {
// Construct a ramfs tree of mount points. The contents never
// change, so this can be fully caching. There's no real
// filesystem backing this tree, so we set the filesystem to
@@ -261,7 +264,7 @@ func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string
if err != nil {
return nil, fmt.Errorf("creating mount tree: %v", err)
}
- overlayInode, err := fs.NewOverlayRoot(ctx, inode, mountTree, fs.MountSourceFlags{})
+ overlayInode, err := fs.NewOverlayRoot(ctx, inode, mountTree, mf)
if err != nil {
return nil, fmt.Errorf("adding mount overlay: %v", err)
}
@@ -280,7 +283,7 @@ func subtargets(root string, mnts []specs.Mount) []string {
return targets
}
-func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
+func setupContainerFS(ctx context.Context, conf *config.Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
if conf.VFS2 {
return setupContainerVFS2(ctx, conf, mntr, procArgs)
}
@@ -318,14 +321,14 @@ func adjustDirentCache(k *kernel.Kernel) error {
}
type fdDispenser struct {
- fds []int
+ fds []*fd.FD
}
func (f *fdDispenser) remove() int {
if f.empty() {
panic("fdDispenser out of fds")
}
- rv := f.fds[0]
+ rv := f.fds[0].Release()
f.fds = f.fds[1:]
return rv
}
@@ -390,6 +393,10 @@ type mountHint struct {
// root is the inode where the volume is mounted. For mounts with 'pod' share
// the volume is mounted once and then bind mounted inside the containers.
root *fs.Inode
+
+ // vfsMount is the master mount for the volume. For mounts with 'pod' share
+ // the master volume is bind mounted inside the containers.
+ vfsMount *vfs.Mount
}
func (m *mountHint) setField(key, val string) error {
@@ -447,27 +454,27 @@ func (m *mountHint) isSupported() bool {
func (m *mountHint) checkCompatible(mount specs.Mount) error {
// Remove options that don't affect to mount's behavior.
masterOpts := filterUnsupportedOptions(m.mount)
- slaveOpts := filterUnsupportedOptions(mount)
+ replicaOpts := filterUnsupportedOptions(mount)
- if len(masterOpts) != len(slaveOpts) {
- return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, slaveOpts)
+ if len(masterOpts) != len(replicaOpts) {
+ return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, replicaOpts)
}
sort.Strings(masterOpts)
- sort.Strings(slaveOpts)
+ sort.Strings(replicaOpts)
for i, opt := range masterOpts {
- if opt != slaveOpts[i] {
- return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, slaveOpts)
+ if opt != replicaOpts[i] {
+ return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, replicaOpts)
}
}
return nil
}
-func (m *mountHint) fileAccessType() FileAccessType {
+func (m *mountHint) fileAccessType() config.FileAccessType {
if m.share == container {
- return FileAccessExclusive
+ return config.FileAccessExclusive
}
- return FileAccessShared
+ return config.FileAccessShared
}
func filterUnsupportedOptions(mount specs.Mount) []string {
@@ -558,7 +565,7 @@ type containerMounter struct {
hints *podMountHints
}
-func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter {
+func newContainerMounter(spec *specs.Spec, goferFDs []*fd.FD, k *kernel.Kernel, hints *podMountHints) *containerMounter {
return &containerMounter{
root: spec.Root,
mounts: compileMounts(spec),
@@ -571,9 +578,9 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin
// processHints processes annotations that container hints about how volumes
// should be mounted (e.g. a volume shared between containers). It must be
// called for the root container only.
-func (c *containerMounter) processHints(conf *Config) error {
+func (c *containerMounter) processHints(conf *config.Config, creds *auth.Credentials) error {
if conf.VFS2 {
- return nil
+ return c.processHintsVFS2(conf, creds)
}
ctx := c.k.SupervisorContext()
for _, hint := range c.hints.mounts {
@@ -595,7 +602,7 @@ func (c *containerMounter) processHints(conf *Config) error {
// setupFS is used to set up the file system for all containers. This is the
// main entry point method, with most of the other being internal only. It
// returns the mount namespace that is created for the container.
-func (c *containerMounter) setupFS(conf *Config, procArgs *kernel.CreateProcessArgs) (*fs.MountNamespace, error) {
+func (c *containerMounter) setupFS(conf *config.Config, procArgs *kernel.CreateProcessArgs) (*fs.MountNamespace, error) {
log.Infof("Configuring container's file system")
// Create context with root credentials to mount the filesystem (the current
@@ -621,7 +628,7 @@ func (c *containerMounter) setupFS(conf *Config, procArgs *kernel.CreateProcessA
return mns, nil
}
-func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Config) (*fs.MountNamespace, error) {
+func (c *containerMounter) createMountNamespace(ctx context.Context, conf *config.Config) (*fs.MountNamespace, error) {
rootInode, err := c.createRootMount(ctx, conf)
if err != nil {
return nil, fmt.Errorf("creating filesystem for container: %v", err)
@@ -633,9 +640,9 @@ func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Confi
return mns, nil
}
-func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace) error {
+func (c *containerMounter) mountSubmounts(ctx context.Context, conf *config.Config, mns *fs.MountNamespace) error {
root := mns.Root()
- defer root.DecRef()
+ defer root.DecRef(ctx)
for _, m := range c.mounts {
log.Debugf("Mounting %q to %q, type: %s, options: %s", m.Source, m.Destination, m.Type, m.Options)
@@ -669,7 +676,7 @@ func (c *containerMounter) checkDispenser() error {
// mountSharedMaster mounts the master of a volume that is shared among
// containers in a pod. It returns the root mount's inode.
-func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *Config, hint *mountHint) (*fs.Inode, error) {
+func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *config.Config, hint *mountHint) (*fs.Inode, error) {
// Map mount type to filesystem name, and parse out the options that we are
// capable of dealing with.
fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, hint.mount)
@@ -709,7 +716,7 @@ func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *Config,
}
// createRootMount creates the root filesystem.
-func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*fs.Inode, error) {
+func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Config) (*fs.Inode, error) {
// First construct the filesystem from the spec.Root.
mf := fs.MountSourceFlags{ReadOnly: c.root.Readonly || conf.Overlay}
@@ -734,7 +741,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*
// for submount paths. "/dev" "/sys" "/proc" and "/tmp" are always
// mounted even if they are not in the spec.
submounts := append(subtargets("/", c.mounts), "/dev", "/sys", "/proc", "/tmp")
- rootInode, err = addSubmountOverlay(ctx, rootInode, submounts)
+ rootInode, err = addSubmountOverlay(ctx, rootInode, submounts, mf)
if err != nil {
return nil, fmt.Errorf("adding submount overlay: %v", err)
}
@@ -754,7 +761,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*
// getMountNameAndOptions retrieves the fsName, opts, and useOverlay values
// used for mounts.
-func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (string, []string, bool, error) {
+func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.Mount) (string, []string, bool, error) {
var (
fsName string
opts []string
@@ -788,19 +795,19 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
return fsName, opts, useOverlay, nil
}
-func (c *containerMounter) getMountAccessType(mount specs.Mount) FileAccessType {
+func (c *containerMounter) getMountAccessType(mount specs.Mount) config.FileAccessType {
if hint := c.hints.findMount(mount); hint != nil {
return hint.fileAccessType()
}
// Non-root bind mounts are always shared if no hints were provided.
- return FileAccessShared
+ return config.FileAccessShared
}
// mountSubmount mounts volumes inside the container's root. Because mounts may
// be readonly, a lower ramfs overlay is added to create the mount point dir.
// Another overlay is added with tmpfs on top if Config.Overlay is true.
// 'm.Destination' must be an absolute path with '..' and symlinks resolved.
-func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, m specs.Mount) error {
+func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent, m specs.Mount) error {
// Map mount type to filesystem name, and parse out the options that we are
// capable of dealing with.
fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m)
@@ -844,7 +851,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns
submounts := subtargets(m.Destination, c.mounts)
if len(submounts) > 0 {
log.Infof("Adding submount overlay over %q", m.Destination)
- inode, err = addSubmountOverlay(ctx, inode, submounts)
+ inode, err = addSubmountOverlay(ctx, inode, submounts, mf)
if err != nil {
return fmt.Errorf("adding submount overlay: %v", err)
}
@@ -863,7 +870,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns
if err != nil {
return fmt.Errorf("can't find mount destination %q: %v", m.Destination, err)
}
- defer dirent.DecRef()
+ defer dirent.DecRef(ctx)
if err := mns.Mount(ctx, dirent, inode); err != nil {
return fmt.Errorf("mount %q error: %v", m.Destination, err)
}
@@ -884,12 +891,12 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun
if err != nil {
return fmt.Errorf("can't find mount destination %q: %v", mount.Destination, err)
}
- defer target.DecRef()
+ defer target.DecRef(ctx)
// Take a ref on the inode that is about to be (re)-mounted.
source.root.IncRef()
if err := mns.Mount(ctx, target, source.root); err != nil {
- source.root.DecRef()
+ source.root.DecRef(ctx)
return fmt.Errorf("bind mount %q error: %v", mount.Destination, err)
}
@@ -899,7 +906,7 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun
// addRestoreMount adds a mount to the MountSources map used for restoring a
// checkpointed container.
-func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount) error {
+func (c *containerMounter) addRestoreMount(conf *config.Config, renv *fs.RestoreEnvironment, m specs.Mount) error {
fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m)
if err != nil {
return err
@@ -924,7 +931,7 @@ func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnviron
// createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding
// the mounts to the environment.
-func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEnvironment, error) {
+func (c *containerMounter) createRestoreEnvironment(conf *config.Config) (*fs.RestoreEnvironment, error) {
renv := &fs.RestoreEnvironment{
MountSources: make(map[string][]fs.MountArgs),
}
@@ -979,7 +986,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
//
// Note that when there are submounts inside of '/tmp', directories for the
// mount points must be present, making '/tmp' not empty anymore.
-func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error {
+func (c *containerMounter) mountTmp(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent) error {
for _, m := range c.mounts {
if filepath.Clean(m.Destination) == "/tmp" {
log.Debugf("Explict %q mount found, skipping internal tmpfs, mount: %+v", "/tmp", m)
@@ -992,12 +999,12 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M
switch err {
case nil:
// Found '/tmp' in filesystem, check if it's empty.
- defer tmp.DecRef()
+ defer tmp.DecRef(ctx)
f, err := tmp.Inode.GetFile(ctx, tmp, fs.FileFlags{Read: true, Directory: true})
if err != nil {
return err
}
- defer f.DecRef()
+ defer f.DecRef(ctx)
serializer := &fs.CollectEntriesSerializer{}
if err := f.Readdir(ctx, serializer); err != nil {
return err