summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot/loader.go
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot/loader.go')
-rw-r--r--runsc/boot/loader.go92
1 files changed, 78 insertions, 14 deletions
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index e7ca98134..3f41d8357 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -26,7 +26,6 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/abi"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/log"
@@ -36,6 +35,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
+ vfs2host "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -46,6 +47,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/syscalls/linux/vfs2"
"gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sentry/watchdog"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -73,6 +75,8 @@ import (
_ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
)
+var syscallTable *kernel.SyscallTable
+
// Loader keeps state needed to start the kernel and run the container..
type Loader struct {
// k is the kernel.
@@ -156,13 +160,17 @@ type Args struct {
Spec *specs.Spec
// Conf is the system configuration.
Conf *Config
- // ControllerFD is the FD to the URPC controller.
+ // ControllerFD is the FD to the URPC controller. The Loader takes ownership
+ // of this FD and may close it at any time.
ControllerFD int
- // Device is an optional argument that is passed to the platform.
+ // Device is an optional argument that is passed to the platform. The Loader
+ // takes ownership of this file and may close it at any time.
Device *os.File
- // GoferFDs is an array of FDs used to connect with the Gofer.
+ // GoferFDs is an array of FDs used to connect with the Gofer. The Loader
+ // takes ownership of these FDs and may close them at any time.
GoferFDs []int
- // StdioFDs is the stdio for the application.
+ // StdioFDs is the stdio for the application. The Loader takes ownership of
+ // these FDs and may close them at any time.
StdioFDs []int
// Console is set to true if using TTY.
Console bool
@@ -175,6 +183,9 @@ type Args struct {
UserLogFD int
}
+// make sure stdioFDs are always the same on initial start and on restore
+const startingStdioFD = 64
+
// New initializes a new kernel loader configured by spec.
// New also handles setting up a kernel for restoring a container.
func New(args Args) (*Loader, error) {
@@ -188,13 +199,14 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("setting up memory usage: %v", err)
}
- if args.Conf.VFS2 {
- st, ok := kernel.LookupSyscallTable(abi.Linux, arch.Host)
- if ok {
- vfs2.Override(st.Table)
- }
+ // Patch the syscall table.
+ kernel.VFS2Enabled = args.Conf.VFS2
+ if kernel.VFS2Enabled {
+ vfs2.Override(syscallTable.Table)
}
+ kernel.RegisterSyscallTable(syscallTable)
+
// Create kernel and platform.
p, err := createPlatform(args.Conf, args.Device)
if err != nil {
@@ -319,6 +331,35 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("creating pod mount hints: %v", err)
}
+ if kernel.VFS2Enabled {
+ // Set up host mount that will be used for imported fds.
+ hostFilesystem := vfs2host.NewFilesystem(k.VFS())
+ defer hostFilesystem.DecRef()
+ hostMount, err := k.VFS().NewDisconnectedMount(hostFilesystem, nil, &vfs.MountOptions{})
+ if err != nil {
+ return nil, fmt.Errorf("failed to create hostfs mount: %v", err)
+ }
+ k.SetHostMount(hostMount)
+ }
+
+ // Make host FDs stable between invocations. Host FDs must map to the exact
+ // same number when the sandbox is restored. Otherwise the wrong FD will be
+ // used.
+ var stdioFDs []int
+ newfd := startingStdioFD
+ for _, fd := range args.StdioFDs {
+ err := syscall.Dup3(fd, newfd, syscall.O_CLOEXEC)
+ if err != nil {
+ return nil, fmt.Errorf("dup3 of stdioFDs failed: %v", err)
+ }
+ stdioFDs = append(stdioFDs, newfd)
+ err = syscall.Close(fd)
+ if err != nil {
+ return nil, fmt.Errorf("close original stdioFDs failed: %v", err)
+ }
+ newfd++
+ }
+
eid := execID{cid: args.ID}
l := &Loader{
k: k,
@@ -327,7 +368,7 @@ func New(args Args) (*Loader, error) {
watchdog: dog,
spec: args.Spec,
goferFDs: args.GoferFDs,
- stdioFDs: args.StdioFDs,
+ stdioFDs: stdioFDs,
rootProcArgs: procArgs,
sandboxID: args.ID,
processes: map[execID]*execProcess{eid: {}},
@@ -367,11 +408,16 @@ func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.
return kernel.CreateProcessArgs{}, fmt.Errorf("creating limits: %v", err)
}
+ wd := spec.Process.Cwd
+ if wd == "" {
+ wd = "/"
+ }
+
// Create the process arguments.
procArgs := kernel.CreateProcessArgs{
Argv: spec.Process.Args,
Envv: spec.Process.Env,
- WorkingDirectory: spec.Process.Cwd, // Defaults to '/' if empty.
+ WorkingDirectory: wd,
Credentials: creds,
Umask: 0022,
Limits: ls,
@@ -516,7 +562,15 @@ func (l *Loader) run() error {
}
// Add the HOME enviroment variable if it is not already set.
- envv, err := maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+ var envv []string
+ if kernel.VFS2Enabled {
+ envv, err = user.MaybeAddExecUserHomeVFS2(ctx, l.rootProcArgs.MountNamespaceVFS2,
+ l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+
+ } else {
+ envv, err = user.MaybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace,
+ l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+ }
if err != nil {
return err
}
@@ -569,6 +623,16 @@ func (l *Loader) run() error {
}
})
+ // l.stdioFDs are derived from dup() in boot.New() and they are now dup()ed again
+ // either in createFDTable() during initial start or in descriptor.initAfterLoad()
+ // during restore, we can release l.stdioFDs now.
+ for _, fd := range l.stdioFDs {
+ err := syscall.Close(fd)
+ if err != nil {
+ return fmt.Errorf("close dup()ed stdioFDs: %v", err)
+ }
+ }
+
log.Infof("Process should have started...")
l.watchdog.Start()
return l.k.Start()
@@ -810,7 +874,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
root := args.MountNamespace.Root()
defer root.DecRef()
ctx := fs.WithRoot(l.k.SupervisorContext(), root)
- envv, err := maybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv)
+ envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv)
if err != nil {
return 0, err
}