diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/BUILD | 2 | ||||
-rw-r--r-- | runsc/boot/controller.go | 12 | ||||
-rw-r--r-- | runsc/boot/fs.go | 7 | ||||
-rw-r--r-- | runsc/boot/loader.go | 99 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 9 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 9 |
6 files changed, 64 insertions, 74 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 040f6a72d..704c66742 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -30,6 +30,7 @@ go_library( "//pkg/control/server", "//pkg/cpuid", "//pkg/eventchannel", + "//pkg/fd", "//pkg/fspath", "//pkg/log", "//pkg/memutil", @@ -123,6 +124,7 @@ go_test( library = ":boot", deps = [ "//pkg/control/server", + "//pkg/fd", "//pkg/fspath", "//pkg/log", "//pkg/p9", diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 68a2b45cf..894651519 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -22,6 +22,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -257,13 +258,20 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { // All validation passed, logs the spec for debugging. specutils.LogSpec(args.Spec) - err := cm.l.startContainer(args.Spec, args.Conf, args.CID, args.FilePayload.Files) + fds, err := fd.NewFromFiles(args.FilePayload.Files) if err != nil { + return err + } + defer func() { + for _, fd := range fds { + _ = fd.Close() + } + }() + if err := cm.l.startContainer(args.Spec, args.Conf, args.CID, fds); err != nil { log.Debugf("containerManager.Start failed %q: %+v: %v", args.CID, args, err) return err } log.Debugf("Container %q started", args.CID) - return nil } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index ea0461a3d..e2c5f5fb1 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -34,6 +34,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/gofer" @@ -320,14 +321,14 @@ func adjustDirentCache(k *kernel.Kernel) error { } type fdDispenser struct { - fds []int + fds []*fd.FD } func (f *fdDispenser) remove() int { if f.empty() { panic("fdDispenser out of fds") } - rv := f.fds[0] + rv := f.fds[0].Release() f.fds = f.fds[1:] return rv } @@ -564,7 +565,7 @@ type containerMounter struct { hints *podMountHints } -func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter { +func newContainerMounter(spec *specs.Spec, goferFDs []*fd.FD, k *kernel.Kernel, hints *podMountHints) *containerMounter { return &containerMounter{ root: spec.Root, mounts: compileMounts(spec), diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 882cf270b..246ae3c3e 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -29,6 +29,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/memutil" "gvisor.dev/gvisor/pkg/rand" @@ -89,10 +90,10 @@ type containerInfo struct { procArgs kernel.CreateProcessArgs // stdioFDs contains stdin, stdout, and stderr. - stdioFDs []int + stdioFDs []*fd.FD // goferFDs are the FDs that attach the sandbox to the gofers. - goferFDs []int + goferFDs []*fd.FD } // Loader keeps state needed to start the kernel and run the container.. @@ -356,12 +357,17 @@ func New(args Args) (*Loader, error) { k.SetHostMount(hostMount) } + info := containerInfo{ + conf: args.Conf, + spec: args.Spec, + procArgs: procArgs, + } + // Make host FDs stable between invocations. Host FDs must map to the exact // same number when the sandbox is restored. Otherwise the wrong FD will be // used. - var stdioFDs []int newfd := startingStdioFD - for _, fd := range args.StdioFDs { + for _, stdioFD := range args.StdioFDs { // Check that newfd is unused to avoid clobbering over it. if _, err := unix.FcntlInt(uintptr(newfd), unix.F_GETFD, 0); !errors.Is(err, unix.EBADF) { if err != nil { @@ -370,14 +376,17 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("unable to remap stdios, FD %d is already in use", newfd) } - err := unix.Dup3(fd, newfd, unix.O_CLOEXEC) + err := unix.Dup3(stdioFD, newfd, unix.O_CLOEXEC) if err != nil { - return nil, fmt.Errorf("dup3 of stdioFDs failed: %v", err) + return nil, fmt.Errorf("dup3 of stdios failed: %w", err) } - stdioFDs = append(stdioFDs, newfd) - _ = unix.Close(fd) + info.stdioFDs = append(info.stdioFDs, fd.New(newfd)) + _ = unix.Close(stdioFD) newfd++ } + for _, goferFD := range args.GoferFDs { + info.goferFDs = append(info.goferFDs, fd.New(goferFD)) + } eid := execID{cid: args.ID} l := &Loader{ @@ -386,13 +395,7 @@ func New(args Args) (*Loader, error) { sandboxID: args.ID, processes: map[execID]*execProcess{eid: {}}, mountHints: mountHints, - root: containerInfo{ - conf: args.Conf, - stdioFDs: stdioFDs, - goferFDs: args.GoferFDs, - spec: args.Spec, - procArgs: procArgs, - }, + root: info, } // We don't care about child signals; some platforms can generate a @@ -466,9 +469,14 @@ func (l *Loader) Destroy() { } l.watchdog.Stop() - for i, fd := range l.root.stdioFDs { - _ = unix.Close(fd) - l.root.stdioFDs[i] = -1 + // In the success case, stdioFDs and goferFDs will only contain + // released/closed FDs that ownership has been passed over to host FDs and + // gofer sessions. Close them here in case on failure. + for _, fd := range l.root.stdioFDs { + _ = fd.Close() + } + for _, fd := range l.root.goferFDs { + _ = fd.Close() } } @@ -598,17 +606,6 @@ func (l *Loader) run() error { } }) - // l.stdioFDs are derived from dup() in boot.New() and they are now dup()ed again - // either in createFDTable() during initial start or in descriptor.initAfterLoad() - // during restore, we can release l.stdioFDs now. VFS2 takes ownership of the - // passed FDs, so only close for VFS1. - if !kernel.VFS2Enabled { - for i, fd := range l.root.stdioFDs { - _ = unix.Close(fd) - l.root.stdioFDs[i] = -1 - } - } - log.Infof("Process should have started...") l.watchdog.Start() return l.k.Start() @@ -628,9 +625,9 @@ func (l *Loader) createContainer(cid string) error { } // startContainer starts a child container. It returns the thread group ID of -// the newly created process. Caller owns 'files' and may close them after -// this method returns. -func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid string, files []*os.File) error { +// the newly created process. Used FDs are either closed or released. It's safe +// for the caller to close any remaining files upon return. +func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid string, files []*fd.FD) error { // Create capabilities. caps, err := specutils.Capabilities(conf.EnableRaw, spec.Process.Capabilities) if err != nil { @@ -681,37 +678,15 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin } info := &containerInfo{ - conf: conf, - spec: spec, + conf: conf, + spec: spec, + stdioFDs: files[:3], + goferFDs: files[3:], } info.procArgs, err = createProcessArgs(cid, spec, creds, l.k, pidns) if err != nil { return fmt.Errorf("creating new process: %v", err) } - - // VFS1 dups stdioFDs, so we don't need to dup them here. VFS2 takes - // ownership of the passed FDs, and we need to dup them here. - for _, f := range files[:3] { - if !kernel.VFS2Enabled { - info.stdioFDs = append(info.stdioFDs, int(f.Fd())) - } else { - fd, err := unix.Dup(int(f.Fd())) - if err != nil { - return fmt.Errorf("failed to dup file: %v", err) - } - info.stdioFDs = append(info.stdioFDs, fd) - } - } - - // Can't take ownership away from os.File. dup them to get a new FDs. - for _, f := range files[3:] { - fd, err := unix.Dup(int(f.Fd())) - if err != nil { - return fmt.Errorf("failed to dup file: %v", err) - } - info.goferFDs = append(info.goferFDs, fd) - } - tg, err := l.createContainerProcess(false, cid, info, ep) if err != nil { return err @@ -795,13 +770,13 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn // startGoferMonitor runs a goroutine to monitor gofer's health. It polls on // the gofer FDs looking for disconnects, and destroys the container if a // disconnect occurs in any of the gofer FDs. -func (l *Loader) startGoferMonitor(cid string, goferFDs []int) { +func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) { go func() { log.Debugf("Monitoring gofer health for container %q", cid) var events []unix.PollFd - for _, fd := range goferFDs { + for _, goferFD := range goferFDs { events = append(events, unix.PollFd{ - Fd: int32(fd), + Fd: int32(goferFD.FD()), Events: unix.POLLHUP | unix.POLLRDHUP, }) } @@ -1281,7 +1256,7 @@ func (l *Loader) ttyFromIDLocked(key execID) (*host.TTYFileOperations, *hostvfs2 return ep.tty, ep.ttyVFS2, nil } -func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { +func createFDTable(ctx context.Context, console bool, stdioFDs []*fd.FD) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { if len(stdioFDs) != 3 { return nil, nil, nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) } diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 2343ce76c..dc9861389 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -26,6 +26,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" @@ -444,7 +445,7 @@ func TestCreateMountNamespace(t *testing.T) { } defer cleanup() - mntr := newContainerMounter(&tc.spec, []int{sandEnd}, nil, &podMountHints{}) + mntr := newContainerMounter(&tc.spec, []*fd.FD{fd.New(sandEnd)}, nil, &podMountHints{}) mns, err := mntr.createMountNamespace(ctx, conf) if err != nil { t.Fatalf("failed to create mount namespace: %v", err) @@ -702,7 +703,11 @@ func TestRestoreEnvironment(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { conf := testConfig() - mntr := newContainerMounter(tc.spec, tc.ioFDs, nil, &podMountHints{}) + var ioFDs []*fd.FD + for _, ioFD := range tc.ioFDs { + ioFDs = append(ioFDs, fd.New(ioFD)) + } + mntr := newContainerMounter(tc.spec, ioFDs, nil, &podMountHints{}) actualRenv, err := mntr.createRestoreEnvironment(conf) if !tc.errorExpected && err != nil { t.Fatalf("could not create restore environment for test:%s", tc.name) diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index da1694280..5b790c6c8 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -169,7 +169,7 @@ func TestMultiContainerSanity(t *testing.T) { // TestMultiPIDNS checks that it is possible to run 2 dead-simple // containers in the same sandbox with different pidns. func TestMultiPIDNS(t *testing.T) { - for name, conf := range configs(t, all...) { + for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -214,7 +214,7 @@ func TestMultiPIDNS(t *testing.T) { // TestMultiPIDNSPath checks the pidns path. func TestMultiPIDNSPath(t *testing.T) { - for name, conf := range configs(t, all...) { + for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -580,7 +580,7 @@ func TestMultiContainerDestroy(t *testing.T) { t.Fatal("error finding test_app:", err) } - for name, conf := range configs(t, all...) { + for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -1252,8 +1252,7 @@ func TestMultiContainerSharedMountReadonly(t *testing.T) { // Test that shared pod mounts continue to work after container is restarted. func TestMultiContainerSharedMountRestart(t *testing.T) { - //TODO(gvisor.dev/issue/1487): This is failing with VFS2. - for name, conf := range configs(t, all...) { + for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { |