diff options
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/BUILD | 3 | ||||
-rw-r--r-- | runsc/boot/fds.go | 108 | ||||
-rw-r--r-- | runsc/boot/loader.go | 204 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 42 |
4 files changed, 150 insertions, 207 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index abcaf4206..0e71e800b 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -12,7 +12,6 @@ go_library( "controller.go", "debug.go", "events.go", - "fds.go", "fs.go", "limits.go", "loader.go", @@ -43,6 +42,7 @@ go_library( "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/control", "//pkg/sentry/devices/memdev", + "//pkg/sentry/fdimport", "//pkg/sentry/fs", "//pkg/sentry/fs/dev", "//pkg/sentry/fs/gofer", @@ -53,6 +53,7 @@ go_library( "//pkg/sentry/fs/tmpfs", "//pkg/sentry/fs/tty", "//pkg/sentry/fs/user", + "//pkg/sentry/fsimpl/devpts", "//pkg/sentry/fsimpl/devtmpfs", "//pkg/sentry/fsimpl/gofer", "//pkg/sentry/fsimpl/host", diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go deleted file mode 100644 index 7e7a31fbd..000000000 --- a/runsc/boot/fds.go +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package boot - -import ( - "fmt" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/host" - vfshost "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" - "gvisor.dev/gvisor/pkg/sentry/kernel" -) - -// createFDTable creates an FD table that contains stdin, stdout, and stderr. -// If console is true, then ioctl calls will be passed through to the host FD. -// Upon success, createFDMap dups then closes stdioFDs. -func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) { - if len(stdioFDs) != 3 { - return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) - } - - if kernel.VFS2Enabled { - return createFDTableVFS2(ctx, console, stdioFDs) - } - - k := kernel.KernelFromContext(ctx) - fdTable := k.NewFDTable() - defer fdTable.DecRef() - - var ttyFile *fs.File - for appFD, hostFD := range stdioFDs { - var appFile *fs.File - - if console && appFD < 3 { - // Import the file as a host TTY file. - if ttyFile == nil { - var err error - appFile, err = host.ImportFile(ctx, hostFD, true /* isTTY */) - if err != nil { - return nil, err - } - defer appFile.DecRef() - - // Remember this in the TTY file, as we will - // use it for the other stdio FDs. - ttyFile = appFile - } else { - // Re-use the existing TTY file, as all three - // stdio FDs must point to the same fs.File in - // order to share TTY state, specifically the - // foreground process group id. - appFile = ttyFile - } - } else { - // Import the file as a regular host file. - var err error - appFile, err = host.ImportFile(ctx, hostFD, false /* isTTY */) - if err != nil { - return nil, err - } - defer appFile.DecRef() - } - - // Add the file to the FD map. - if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { - return nil, err - } - } - - fdTable.IncRef() - return fdTable, nil -} - -func createFDTableVFS2(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) { - k := kernel.KernelFromContext(ctx) - fdTable := k.NewFDTable() - defer fdTable.DecRef() - - for appFD, hostFD := range stdioFDs { - // TODO(gvisor.dev/issue/1482): Add TTY support. - appFile, err := vfshost.ImportFD(ctx, k.HostMount(), hostFD, false) - if err != nil { - return nil, err - } - - if err := fdTable.NewFDAtVFS2(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { - appFile.DecRef() - return nil, err - } - appFile.DecRef() - } - - fdTable.IncRef() - return fdTable, nil -} diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index f6ea4c102..79ef3a880 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -27,16 +27,18 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/memutil" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/fdimport" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" "gvisor.dev/gvisor/pkg/sentry/fs/user" - vfs2host "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" + hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -143,6 +145,9 @@ type execProcess struct { // tty will be nil if the process is not attached to a terminal. tty *host.TTYFileOperations + // tty will be nil if the process is not attached to a terminal. + ttyVFS2 *hostvfs2.TTYFileDescription + // pidnsPath is the pid namespace path in spec pidnsPath string } @@ -333,7 +338,7 @@ func New(args Args) (*Loader, error) { if kernel.VFS2Enabled { // Set up host mount that will be used for imported fds. - hostFilesystem := vfs2host.NewFilesystem(k.VFS()) + hostFilesystem := hostvfs2.NewFilesystem(k.VFS()) defer hostFilesystem.DecRef() hostMount, err := k.VFS().NewDisconnectedMount(hostFilesystem, nil, &vfs.MountOptions{}) if err != nil { @@ -528,6 +533,8 @@ func (l *Loader) run() error { // If we are restoring, we do not want to create a process. // l.restore is set by the container manager when a restore call is made. + var ttyFile *host.TTYFileOperations + var ttyFileVFS2 *hostvfs2.TTYFileDescription if !l.restore { if l.conf.ProfileEnable { pprof.Initialize() @@ -542,13 +549,14 @@ func (l *Loader) run() error { // Create the FD map, which will set stdin, stdout, and stderr. If console // is true, then ioctl calls will be passed through to the host fd. ctx := l.rootProcArgs.NewContext(l.k) - fdTable, err := createFDTable(ctx, l.console, l.stdioFDs) + var err error + + // CreateProcess takes a reference on FDMap if successful. We won't need + // ours either way. + l.rootProcArgs.FDTable, ttyFile, ttyFileVFS2, err = createFDTable(ctx, l.console, l.stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } - // CreateProcess takes a reference on FDMap if successful. We won't need - // ours either way. - l.rootProcArgs.FDTable = fdTable // Setup the root container file system. l.startGoferMonitor(l.sandboxID, l.goferFDs) @@ -591,14 +599,16 @@ func (l *Loader) run() error { ep.pidnsPath = ns.Path } if l.console { - ttyFile, _ := l.rootProcArgs.FDTable.Get(0) - defer ttyFile.DecRef() - ep.tty = ttyFile.FileOperations.(*host.TTYFileOperations) - - // Set the foreground process group on the TTY to the global - // init process group, since that is what we are about to - // start running. - ep.tty.InitForegroundProcessGroup(ep.tg.ProcessGroup()) + // Set the foreground process group on the TTY to the global init process + // group, since that is what we are about to start running. + switch { + case ttyFileVFS2 != nil: + ep.ttyVFS2 = ttyFileVFS2 + ttyFileVFS2.InitForegroundProcessGroup(ep.tg.ProcessGroup()) + case ttyFile != nil: + ep.tty = ttyFile + ttyFile.InitForegroundProcessGroup(ep.tg.ProcessGroup()) + } } // Handle signals by forwarding them to the root container process @@ -719,7 +729,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file // Create the FD map, which will set stdin, stdout, and stderr. ctx := procArgs.NewContext(l.k) - fdTable, err := createFDTable(ctx, false, stdioFDs) + fdTable, _, _, err := createFDTable(ctx, false, stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } @@ -804,14 +814,14 @@ func (l *Loader) destroyContainer(cid string) error { l.mu.Lock() defer l.mu.Unlock() - _, _, started, err := l.threadGroupFromIDLocked(execID{cid: cid}) + tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid}) if err != nil { // Container doesn't exist. return err } - // The container exists, has it been started? - if started { + // The container exists, but has it been started? + if tg != nil { if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil { return fmt.Errorf("sending SIGKILL to all container processes: %v", err) } @@ -853,48 +863,65 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { l.mu.Lock() defer l.mu.Unlock() - tg, _, started, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID}) + tg, err := l.tryThreadGroupFromIDLocked(execID{cid: args.ContainerID}) if err != nil { return 0, err } - if !started { + if tg == nil { return 0, fmt.Errorf("container %q not started", args.ContainerID) } - // TODO(gvisor.dev/issue/1623): Add VFS2 support - // Get the container MountNamespace from the Task. - tg.Leader().WithMuLocked(func(t *kernel.Task) { + if kernel.VFS2Enabled { // task.MountNamespace() does not take a ref, so we must do so ourselves. - args.MountNamespace = t.MountNamespace() - args.MountNamespace.IncRef() - }) - if args.MountNamespace != nil { - defer args.MountNamespace.DecRef() + args.MountNamespaceVFS2 = tg.Leader().MountNamespaceVFS2() + args.MountNamespaceVFS2.IncRef() + } else { + tg.Leader().WithMuLocked(func(t *kernel.Task) { + // task.MountNamespace() does not take a ref, so we must do so ourselves. + args.MountNamespace = t.MountNamespace() + args.MountNamespace.IncRef() + }) } // Add the HOME environment variable if it is not already set. - root := args.MountNamespace.Root() - defer root.DecRef() - ctx := fs.WithRoot(l.k.SupervisorContext(), root) - envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv) - if err != nil { - return 0, err + if kernel.VFS2Enabled { + defer args.MountNamespaceVFS2.DecRef() + + root := args.MountNamespaceVFS2.Root() + defer root.DecRef() + ctx := vfs.WithRoot(l.k.SupervisorContext(), root) + envv, err := user.MaybeAddExecUserHomeVFS2(ctx, args.MountNamespaceVFS2, args.KUID, args.Envv) + if err != nil { + return 0, err + } + args.Envv = envv + } else { + defer args.MountNamespace.DecRef() + + root := args.MountNamespace.Root() + defer root.DecRef() + ctx := fs.WithRoot(l.k.SupervisorContext(), root) + envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv) + if err != nil { + return 0, err + } + args.Envv = envv } - args.Envv = envv // Start the process. proc := control.Proc{Kernel: l.k} args.PIDNamespace = tg.PIDNamespace() - newTG, tgid, ttyFile, err := control.ExecAsync(&proc, args) + newTG, tgid, ttyFile, ttyFileVFS2, err := control.ExecAsync(&proc, args) if err != nil { return 0, err } eid := execID{cid: args.ContainerID, pid: tgid} l.processes[eid] = &execProcess{ - tg: newTG, - tty: ttyFile, + tg: newTG, + tty: ttyFile, + ttyVFS2: ttyFileVFS2, } log.Debugf("updated processes: %v", l.processes) @@ -905,7 +932,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { // Don't defer unlock, as doing so would make it impossible for // multiple clients to wait on the same container. - tg, _, err := l.threadGroupFromID(execID{cid: cid}) + tg, err := l.threadGroupFromID(execID{cid: cid}) if err != nil { return fmt.Errorf("can't wait for container %q: %v", cid, err) } @@ -924,7 +951,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e // Try to find a process that was exec'd eid := execID{cid: cid, pid: tgid} - execTG, _, err := l.threadGroupFromID(eid) + execTG, err := l.threadGroupFromID(eid) if err == nil { ws := l.wait(execTG) *waitStatus = ws @@ -938,7 +965,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e // The caller may be waiting on a process not started directly via exec. // In this case, find the process in the container's PID namespace. - initTG, _, err := l.threadGroupFromID(execID{cid: cid}) + initTG, err := l.threadGroupFromID(execID{cid: cid}) if err != nil { return fmt.Errorf("waiting for PID %d: %v", tgid, err) } @@ -1089,8 +1116,7 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e return fmt.Errorf("PID (%d) cannot be set when signaling all processes", pid) } // Check that the container has actually started before signaling it. - _, _, err := l.threadGroupFromID(execID{cid: cid}) - if err != nil { + if _, err := l.threadGroupFromID(execID{cid: cid}); err != nil { return err } if err := l.signalAllProcesses(cid, signo); err != nil { @@ -1104,7 +1130,7 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e } func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) error { - execTG, _, err := l.threadGroupFromID(execID{cid: cid, pid: tgid}) + execTG, err := l.threadGroupFromID(execID{cid: cid, pid: tgid}) if err == nil { // Send signal directly to the identified process. return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo}) @@ -1113,7 +1139,7 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er // The caller may be signaling a process not started directly via exec. // In this case, find the process in the container's PID namespace and // signal it. - initTG, _, err := l.threadGroupFromID(execID{cid: cid}) + initTG, err := l.threadGroupFromID(execID{cid: cid}) if err != nil { return fmt.Errorf("no thread group found: %v", err) } @@ -1127,17 +1153,35 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}) } +// signalForegrondProcessGroup looks up foreground process group from the TTY +// for the given "tgid" inside container "cid", and send the signal to it. func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, signo int32) error { - // Lookup foreground process group from the TTY for the given process, - // and send the signal to it. - tg, tty, err := l.threadGroupFromID(execID{cid: cid, pid: tgid}) + l.mu.Lock() + tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid, pid: tgid}) if err != nil { + l.mu.Unlock() return fmt.Errorf("no thread group found: %v", err) } - if tty == nil { + if tg == nil { + l.mu.Unlock() + return fmt.Errorf("container %q not started", cid) + } + + tty, ttyVFS2, err := l.ttyFromIDLocked(execID{cid: cid, pid: tgid}) + l.mu.Unlock() + if err != nil { + return fmt.Errorf("no thread group found: %v", err) + } + + var pg *kernel.ProcessGroup + switch { + case ttyVFS2 != nil: + pg = ttyVFS2.ForegroundProcessGroup() + case tty != nil: + pg = tty.ForegroundProcessGroup() + default: return fmt.Errorf("no TTY attached") } - pg := tty.ForegroundProcessGroup() if pg == nil { // No foreground process group has been set. Signal the // original thread group. @@ -1168,33 +1212,57 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error { return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo}) } -// threadGroupFromID same as threadGroupFromIDLocked except that it acquires -// mutex before calling it. -func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) { +// threadGroupFromID is similar to tryThreadGroupFromIDLocked except that it +// acquires mutex before calling it and fails in case container hasn't started +// yet. +func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, error) { l.mu.Lock() defer l.mu.Unlock() - tg, tty, ok, err := l.threadGroupFromIDLocked(key) + tg, err := l.tryThreadGroupFromIDLocked(key) if err != nil { - return nil, nil, err + return nil, err } - if !ok { - return nil, nil, fmt.Errorf("container %q not started", key.cid) + if tg == nil { + return nil, fmt.Errorf("container %q not started", key.cid) } - return tg, tty, nil + return tg, nil } -// threadGroupFromIDLocked returns the thread group and TTY for the given -// execution ID. TTY may be nil if the process is not attached to a terminal. -// Also returns a boolean indicating whether the container has already started. -// Returns error if execution ID is invalid or if the container cannot be -// found (maybe it has been deleted). Caller must hold 'mu'. -func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, bool, error) { +// tryThreadGroupFromIDLocked returns the thread group for the given execution +// ID. It may return nil in case the container has not started yet. Returns +// error if execution ID is invalid or if the container cannot be found (maybe +// it has been deleted). Caller must hold 'mu'. +func (l *Loader) tryThreadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, error) { ep := l.processes[key] if ep == nil { - return nil, nil, false, fmt.Errorf("container %q not found", key.cid) + return nil, fmt.Errorf("container %q not found", key.cid) } - if ep.tg == nil { - return nil, nil, false, nil + return ep.tg, nil +} + +// ttyFromIDLocked returns the TTY files for the given execution ID. It may +// return nil in case the container has not started yet. Returns error if +// execution ID is invalid or if the container cannot be found (maybe it has +// been deleted). Caller must hold 'mu'. +func (l *Loader) ttyFromIDLocked(key execID) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { + ep := l.processes[key] + if ep == nil { + return nil, nil, fmt.Errorf("container %q not found", key.cid) + } + return ep.tty, ep.ttyVFS2, nil +} + +func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { + if len(stdioFDs) != 3 { + return nil, nil, nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) + } + + k := kernel.KernelFromContext(ctx) + fdTable := k.NewFDTable() + ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, stdioFDs) + if err != nil { + fdTable.DecRef() + return nil, nil, nil, err } - return ep.tg, ep.tty, true, nil + return fdTable, ttyFile, ttyFileVFS2, nil } diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 0b9b0b436..448fc4459 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/devices/memdev" "gvisor.dev/gvisor/pkg/sentry/fs" + devpts2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" devtmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" goferimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" procimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" @@ -41,37 +42,28 @@ import ( ) func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error { - - vfsObj.MustRegisterFilesystemType(rootFsName, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserList: true, - }) - - vfsObj.MustRegisterFilesystemType(bind, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(devpts2.Name, &devpts2.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserList: true, + // TODO(b/29356795): Users may mount this once the terminals are in a + // usable state. + AllowUserMount: false, }) - - vfsObj.MustRegisterFilesystemType(devpts, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(devtmpfsimpl.Name, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) - - vfsObj.MustRegisterFilesystemType(devtmpfs, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - AllowUserList: true, - }) - vfsObj.MustRegisterFilesystemType(proc, &procimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - AllowUserList: true, + vfsObj.MustRegisterFilesystemType(goferimpl.Name, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(sysfs, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(procimpl.Name, &procimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(tmpfs, &tmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(sysimpl.Name, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(nonefs, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(tmpfsimpl.Name, &tmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) @@ -108,7 +100,6 @@ func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounte } func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { - exe := procArgs.Argv[0] // Absolute paths can be used directly. @@ -120,11 +111,9 @@ func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessAr // Paths with '/' in them should be joined to the working directory, or // to the root if working directory is not set. if strings.IndexByte(exe, '/') > 0 { - if !path.IsAbs(procArgs.WorkingDirectory) { return fmt.Errorf("working directory %q must be absolute", procArgs.WorkingDirectory) } - procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe) return nil } @@ -144,21 +133,17 @@ func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessAr creds := procArgs.Credentials for _, p := range paths { - binPath := path.Join(p, exe) - pop := &vfs.PathOperation{ Root: root, Start: root, Path: fspath.Parse(binPath), FollowFinalSymlink: true, } - opts := &vfs.OpenOptions{ FileExec: true, Flags: linux.O_RDONLY, } - dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts) if err == syserror.ENOENT || err == syserror.EACCES { // Didn't find it here. @@ -209,7 +194,6 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs } func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { - fd := c.fds.remove() opts := strings.Join(p9MountOptionsVFS2(fd, conf.FileAccess), ",") @@ -222,7 +206,6 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *C } func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error { - c.prepareMountsVFS2() for _, submount := range c.mounts { @@ -256,7 +239,6 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, if err != nil { return fmt.Errorf("mountOptions failed: %w", err) } - if fsName == "" { // Filesystem is not supported (e.g. cgroup), just skip it. return nil @@ -277,7 +259,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, // All writes go to upper, be paranoid and make lower readonly. opts.ReadOnly = useOverlay - if err := c.k.VFS().MountAt(ctx, creds, "", target, submount.Type, opts); err != nil { + if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil { return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts) } log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts) |