diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/BUILD | 22 | ||||
-rw-r--r-- | runsc/boot/BUILD | 3 | ||||
-rw-r--r-- | runsc/boot/compat.go | 2 | ||||
-rw-r--r-- | runsc/boot/controller.go | 19 | ||||
-rw-r--r-- | runsc/boot/fs.go | 33 | ||||
-rw-r--r-- | runsc/boot/loader.go | 9 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 57 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 60 | ||||
-rw-r--r-- | runsc/cgroup/cgroup.go | 53 | ||||
-rw-r--r-- | runsc/cgroup/cgroup_test.go | 80 | ||||
-rw-r--r-- | runsc/cli/BUILD | 22 | ||||
-rw-r--r-- | runsc/cli/main.go | 256 | ||||
-rw-r--r-- | runsc/cmd/boot.go | 6 | ||||
-rw-r--r-- | runsc/cmd/do.go | 45 | ||||
-rw-r--r-- | runsc/cmd/start.go | 7 | ||||
-rw-r--r-- | runsc/config/config.go | 3 | ||||
-rw-r--r-- | runsc/config/flags.go | 1 | ||||
-rw-r--r-- | runsc/container/container.go | 18 | ||||
-rw-r--r-- | runsc/container/container_test.go | 6 | ||||
-rw-r--r-- | runsc/main.go | 240 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 18 |
21 files changed, 595 insertions, 365 deletions
diff --git a/runsc/BUILD b/runsc/BUILD index 33d8554af..3b91b984a 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -13,16 +13,7 @@ go_binary( "//visibility:public", ], x_defs = {"main.version": "{STABLE_VERSION}"}, - deps = [ - "//pkg/log", - "//pkg/refs", - "//pkg/sentry/platform", - "//runsc/cmd", - "//runsc/config", - "//runsc/flag", - "//runsc/specutils", - "@com_github_google_subcommands//:go_default_library", - ], + deps = ["//runsc/cli"], ) # The runsc-race target is a race-compatible BUILD target. This must be built @@ -49,16 +40,7 @@ go_binary( "//visibility:public", ], x_defs = {"main.version": "{STABLE_VERSION}"}, - deps = [ - "//pkg/log", - "//pkg/refs", - "//pkg/sentry/platform", - "//runsc/cmd", - "//runsc/config", - "//runsc/flag", - "//runsc/specutils", - "@com_github_google_subcommands//:go_default_library", - ], + deps = ["//runsc/cli"], ) sh_test( diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 2d9517f4a..b97dc3c47 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -38,6 +38,7 @@ go_library( "//pkg/memutil", "//pkg/rand", "//pkg/refs", + "//pkg/refsvfs2", "//pkg/sentry/arch", "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/control", @@ -110,8 +111,8 @@ go_library( "//runsc/config", "//runsc/specutils", "//runsc/specutils/seccomp", - "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + "@org_golang_google_protobuf//proto:go_default_library", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index 84c67cbc2..7076ae2e2 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -19,7 +19,7 @@ import ( "os" "syscall" - "github.com/golang/protobuf/proto" + "google.golang.org/protobuf/proto" "gvisor.dev/gvisor/pkg/eventchannel" "gvisor.dev/gvisor/pkg/log" rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 894651519..4e0f0d57a 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -30,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/sentry/state" "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/urpc" @@ -367,12 +368,20 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { cm.l.k = k // Set up the restore environment. + ctx := k.SupervisorContext() mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints) - renv, err := mntr.createRestoreEnvironment(cm.l.root.conf) - if err != nil { - return fmt.Errorf("creating RestoreEnvironment: %v", err) + if kernel.VFS2Enabled { + ctx, err = mntr.configureRestore(ctx, cm.l.root.conf) + if err != nil { + return fmt.Errorf("configuring filesystem restore: %v", err) + } + } else { + renv, err := mntr.createRestoreEnvironment(cm.l.root.conf) + if err != nil { + return fmt.Errorf("creating RestoreEnvironment: %v", err) + } + fs.SetRestoreEnvironment(*renv) } - fs.SetRestoreEnvironment(*renv) // Prepare to load from the state file. if eps, ok := networkStack.(*netstack.Stack); ok { @@ -399,7 +408,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { // Load the state. loadOpts := state.LoadOpts{Source: specFile} - if err := loadOpts.Load(k, networkStack, time.NewCalibratedClocks()); err != nil { + if err := loadOpts.Load(ctx, k, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil { return err } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index ddf288456..6b6ae98d7 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -105,33 +105,28 @@ func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name // mandatory mounts that are required by the OCI specification. func compileMounts(spec *specs.Spec) []specs.Mount { // Keep track of whether proc and sys were mounted. - var procMounted, sysMounted bool + var procMounted, sysMounted, devMounted, devptsMounted bool var mounts []specs.Mount - // Always mount /dev. - mounts = append(mounts, specs.Mount{ - Type: devtmpfs.Name, - Destination: "/dev", - }) - - mounts = append(mounts, specs.Mount{ - Type: devpts.Name, - Destination: "/dev/pts", - }) - // Mount all submounts from the spec. for _, m := range spec.Mounts { if !specutils.IsSupportedDevMount(m) { log.Warningf("ignoring dev mount at %q", m.Destination) continue } - mounts = append(mounts, m) switch filepath.Clean(m.Destination) { case "/proc": procMounted = true case "/sys": sysMounted = true + case "/dev": + m.Type = devtmpfs.Name + devMounted = true + case "/dev/pts": + m.Type = devpts.Name + devptsMounted = true } + mounts = append(mounts, m) } // Mount proc and sys even if the user did not ask for it, as the spec @@ -149,6 +144,18 @@ func compileMounts(spec *specs.Spec) []specs.Mount { Destination: "/sys", }) } + if !devMounted { + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: devtmpfs.Name, + Destination: "/dev", + }) + } + if !devptsMounted { + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: devpts.Name, + Destination: "/dev/pts", + }) + } // The mandatory mounts should be ordered right after the root, in case // there are submounts of these mandatory mounts already in the spec. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 8ad000497..8c6ab213d 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -35,6 +35,7 @@ import ( "gvisor.dev/gvisor/pkg/memutil" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/fdimport" @@ -476,6 +477,12 @@ func (l *Loader) Destroy() { // save/restore. l.k.Release() + // All sentry-created resources should have been released at this point; + // check for reference leaks. + if refsvfs2.LeakCheckEnabled() { + refsvfs2.DoLeakCheck() + } + // In the success case, stdioFDs and goferFDs will only contain // released/closed FDs that ownership has been passed over to host FDs and // gofer sessions. Close them here in case of failure. @@ -737,7 +744,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn return nil, err } - // Add the HOME enviroment variable if it is not already set. + // Add the HOME environment variable if it is not already set. var envv []string if kernel.VFS2Enabled { envv, err = user.MaybeAddExecUserHomeVFS2(ctx, info.procArgs.MountNamespaceVFS2, diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index e376f944b..b77b4762e 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -266,7 +266,7 @@ type CreateMountTestcase struct { func createMountTestcases() []*CreateMountTestcase { testCases := []*CreateMountTestcase{ - &CreateMountTestcase{ + { // Only proc. name: "only proc mount", spec: specs.Spec{ @@ -304,11 +304,10 @@ func createMountTestcases() []*CreateMountTestcase { }, }, }, - // /some/deep/path should be mounted, along with /proc, - // /dev, and /sys. + // /some/deep/path should be mounted, along with /proc, /dev, and /sys. expectedPaths: []string{"/some/very/very/deep/path", "/proc", "/dev", "/sys"}, }, - &CreateMountTestcase{ + { // Mounts are nested inside each other. name: "nested mounts", spec: specs.Spec{ @@ -352,7 +351,7 @@ func createMountTestcases() []*CreateMountTestcase { expectedPaths: []string{"/foo", "/foo/bar", "/foo/bar/baz", "/foo/qux", "/foo/qux-quz", "/foo/some/very/very/deep/path", "/proc", "/dev", "/sys"}, }, - &CreateMountTestcase{ + { name: "mount inside /dev", spec: specs.Spec{ Root: &specs.Root{ @@ -395,35 +394,37 @@ func createMountTestcases() []*CreateMountTestcase { }, expectedPaths: []string{"/proc", "/dev", "/dev/fd-foo", "/dev/foo", "/dev/bar", "/sys"}, }, - } - - vfsCase := &CreateMountTestcase{ - name: "mounts inside mandatory mounts", - spec: specs.Spec{ - Root: &specs.Root{ - Path: os.TempDir(), - Readonly: true, - }, - Mounts: []specs.Mount{ - { - Destination: "/proc", - Type: "tmpfs", - }, - { - Destination: "/sys/bar", - Type: "tmpfs", + { + name: "mounts inside mandatory mounts", + spec: specs.Spec{ + Root: &specs.Root{ + Path: os.TempDir(), + Readonly: true, }, - - { - Destination: "/tmp/baz", - Type: "tmpfs", + Mounts: []specs.Mount{ + { + Destination: "/proc", + Type: "tmpfs", + }, + { + Destination: "/sys/bar", + Type: "tmpfs", + }, + { + Destination: "/tmp/baz", + Type: "tmpfs", + }, + { + Destination: "/dev/goo", + Type: "tmpfs", + }, }, }, + expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz", "/dev/goo"}, }, - expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz"}, } - return append(testCases, vfsCase) + return testCases } // Test that MountNamespace can be created with various specs. diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 82e459f46..b157387ef 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -210,6 +210,9 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *c ReadOnly: c.root.Readonly, GetFilesystemOptions: vfs.GetFilesystemOptions{ Data: strings.Join(data, ","), + InternalData: gofer.InternalFilesystemOptions{ + UniqueID: "/", + }, }, InternalMount: true, } @@ -264,10 +267,38 @@ func (c *containerMounter) configureOverlay(ctx context.Context, creds *auth.Cre } cu.Add(func() { lower.DecRef(ctx) }) + // Propagate the lower layer's root's owner, group, and mode to the upper + // layer's root for consistency with VFS1. + upperRootVD := vfs.MakeVirtualDentry(upper, upper.Root()) + lowerRootVD := vfs.MakeVirtualDentry(lower, lower.Root()) + stat, err := c.k.VFS().StatAt(ctx, creds, &vfs.PathOperation{ + Root: lowerRootVD, + Start: lowerRootVD, + }, &vfs.StatOptions{ + Mask: linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE, + }) + if err != nil { + return nil, nil, err + } + err = c.k.VFS().SetStatAt(ctx, creds, &vfs.PathOperation{ + Root: upperRootVD, + Start: upperRootVD, + }, &vfs.SetStatOptions{ + Stat: linux.Statx{ + Mask: (linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE) & stat.Mask, + UID: stat.UID, + GID: stat.GID, + Mode: stat.Mode, + }, + }) + if err != nil { + return nil, nil, err + } + // Configure overlay with both layers. overlayOpts.GetFilesystemOptions.InternalData = overlay.FilesystemOptions{ - UpperRoot: vfs.MakeVirtualDentry(upper, upper.Root()), - LowerRoots: []vfs.VirtualDentry{vfs.MakeVirtualDentry(lower, lower.Root())}, + UpperRoot: upperRootVD, + LowerRoots: []vfs.VirtualDentry{lowerRootVD}, } return &overlayOpts, cu.Release(), nil } @@ -399,6 +430,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo fsName := m.Type useOverlay := false var data []string + var iopts interface{} // Find filesystem name and FS specific data field. switch m.Type { @@ -423,6 +455,9 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo return "", nil, false, fmt.Errorf("9P mount requires a connection FD") } data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) + iopts = gofer.InternalFilesystemOptions{ + UniqueID: m.Destination, + } // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly @@ -434,7 +469,8 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo opts := &vfs.MountOptions{ GetFilesystemOptions: vfs.GetFilesystemOptions{ - Data: strings.Join(data, ","), + Data: strings.Join(data, ","), + InternalData: iopts, }, InternalMount: true, } @@ -639,3 +675,21 @@ func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Crede } return c.k.VFS().MakeSyntheticMountpoint(ctx, dest, root, creds) } + +// configureRestore returns an updated context.Context including filesystem +// state used by restore defined by conf. +func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Config) (context.Context, error) { + fdmap := make(map[string]int) + fdmap["/"] = c.fds.remove() + mounts, err := c.prepareMountsVFS2() + if err != nil { + return ctx, err + } + for i := range c.mounts { + submount := &mounts[i] + if submount.fd >= 0 { + fdmap[submount.Destination] = submount.fd + } + } + return context.WithValue(ctx, gofer.CtxRestoreServerFDMap, fdmap), nil +} diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 8fbc3887a..5bd0afc52 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -21,6 +21,7 @@ import ( "context" "errors" "fmt" + "io" "io/ioutil" "os" "path/filepath" @@ -198,16 +199,26 @@ func LoadPaths(pid string) (map[string]string, error) { } defer f.Close() + return loadPathsHelper(f) +} + +func loadPathsHelper(cgroup io.Reader) (map[string]string, error) { paths := make(map[string]string) - scanner := bufio.NewScanner(f) + + scanner := bufio.NewScanner(cgroup) for scanner.Scan() { - // Format: ID:controller1,controller2:path + // Format: ID:[name=]controller1,controller2:path // Example: 2:cpu,cpuacct:/user.slice tokens := strings.Split(scanner.Text(), ":") if len(tokens) != 3 { return nil, fmt.Errorf("invalid cgroups file, line: %q", scanner.Text()) } + if len(tokens[1]) == 0 { + continue + } for _, ctrlr := range strings.Split(tokens[1], ",") { + // Remove prefix for cgroups with no controller, eg. systemd. + ctrlr = strings.TrimPrefix(ctrlr, "name=") paths[ctrlr] = tokens[2] } } @@ -237,7 +248,7 @@ func New(spec *specs.Spec) (*Cgroup, error) { var err error parents, err = LoadPaths("self") if err != nil { - return nil, fmt.Errorf("finding current cgroups: %v", err) + return nil, fmt.Errorf("finding current cgroups: %w", err) } } return &Cgroup{ @@ -276,10 +287,8 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error { } return err } - if res != nil { - if err := cfg.ctrlr.set(res, path); err != nil { - return err - } + if err := cfg.ctrlr.set(res, path); err != nil { + return err } } clean.Release() @@ -304,14 +313,15 @@ func (c *Cgroup) Uninstall() error { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) - if err := backoff.Retry(func() error { + fn := func() error { err := syscall.Rmdir(path) if os.IsNotExist(err) { return nil } return err - }, b); err != nil { - return fmt.Errorf("removing cgroup path %q: %v", path, err) + } + if err := backoff.Retry(fn, b); err != nil { + return fmt.Errorf("removing cgroup path %q: %w", path, err) } } return nil @@ -332,7 +342,6 @@ func (c *Cgroup) Join() (func(), error) { if _, ok := controllers[ctrlr]; ok { fullPath := filepath.Join(cgroupRoot, ctrlr, path) undoPaths = append(undoPaths, fullPath) - break } } @@ -422,7 +431,7 @@ func (*noop) set(*specs.LinuxResources, string) error { type memory struct{} func (*memory) set(spec *specs.LinuxResources, path string) error { - if spec.Memory == nil { + if spec == nil || spec.Memory == nil { return nil } if err := setOptionalValueInt(path, "memory.limit_in_bytes", spec.Memory.Limit); err != nil { @@ -455,7 +464,7 @@ func (*memory) set(spec *specs.LinuxResources, path string) error { type cpu struct{} func (*cpu) set(spec *specs.LinuxResources, path string) error { - if spec.CPU == nil { + if spec == nil || spec.CPU == nil { return nil } if err := setOptionalValueUint(path, "cpu.shares", spec.CPU.Shares); err != nil { @@ -478,7 +487,7 @@ type cpuSet struct{} func (*cpuSet) set(spec *specs.LinuxResources, path string) error { // cpuset.cpus and mems are required fields, but are not set on a new cgroup. // If not set in the spec, get it from one of the ancestors cgroup. - if spec.CPU == nil || spec.CPU.Cpus == "" { + if spec == nil || spec.CPU == nil || spec.CPU.Cpus == "" { if _, err := fillFromAncestor(filepath.Join(path, "cpuset.cpus")); err != nil { return err } @@ -488,18 +497,17 @@ func (*cpuSet) set(spec *specs.LinuxResources, path string) error { } } - if spec.CPU == nil || spec.CPU.Mems == "" { + if spec == nil || spec.CPU == nil || spec.CPU.Mems == "" { _, err := fillFromAncestor(filepath.Join(path, "cpuset.mems")) return err } - mems := spec.CPU.Mems - return setValue(path, "cpuset.mems", mems) + return setValue(path, "cpuset.mems", spec.CPU.Mems) } type blockIO struct{} func (*blockIO) set(spec *specs.LinuxResources, path string) error { - if spec.BlockIO == nil { + if spec == nil || spec.BlockIO == nil { return nil } @@ -549,7 +557,7 @@ func setThrottle(path, name string, devs []specs.LinuxThrottleDevice) error { type networkClass struct{} func (*networkClass) set(spec *specs.LinuxResources, path string) error { - if spec.Network == nil { + if spec == nil || spec.Network == nil { return nil } return setOptionalValueUint32(path, "net_cls.classid", spec.Network.ClassID) @@ -558,7 +566,7 @@ func (*networkClass) set(spec *specs.LinuxResources, path string) error { type networkPrio struct{} func (*networkPrio) set(spec *specs.LinuxResources, path string) error { - if spec.Network == nil { + if spec == nil || spec.Network == nil { return nil } for _, prio := range spec.Network.Priorities { @@ -573,7 +581,7 @@ func (*networkPrio) set(spec *specs.LinuxResources, path string) error { type pids struct{} func (*pids) set(spec *specs.LinuxResources, path string) error { - if spec.Pids == nil || spec.Pids.Limit <= 0 { + if spec == nil || spec.Pids == nil || spec.Pids.Limit <= 0 { return nil } val := strconv.FormatInt(spec.Pids.Limit, 10) @@ -583,6 +591,9 @@ func (*pids) set(spec *specs.LinuxResources, path string) error { type hugeTLB struct{} func (*hugeTLB) set(spec *specs.LinuxResources, path string) error { + if spec == nil { + return nil + } for _, limit := range spec.HugepageLimits { name := fmt.Sprintf("hugetlb.%s.limit_in_bytes", limit.Pagesize) val := strconv.FormatUint(limit.Limit, 10) diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go index 4db5ee5c3..9794517a7 100644 --- a/runsc/cgroup/cgroup_test.go +++ b/runsc/cgroup/cgroup_test.go @@ -647,3 +647,83 @@ func TestPids(t *testing.T) { }) } } + +func TestLoadPaths(t *testing.T) { + for _, tc := range []struct { + name string + cgroups string + want map[string]string + err string + }{ + { + name: "abs-path", + cgroups: "0:ctr:/path", + want: map[string]string{"ctr": "/path"}, + }, + { + name: "rel-path", + cgroups: "0:ctr:rel-path", + want: map[string]string{"ctr": "rel-path"}, + }, + { + name: "non-controller", + cgroups: "0:name=systemd:/path", + want: map[string]string{"systemd": "/path"}, + }, + { + name: "empty", + }, + { + name: "multiple", + cgroups: "0:ctr0:/path0\n" + + "1:ctr1:/path1\n" + + "2::/empty\n", + want: map[string]string{ + "ctr0": "/path0", + "ctr1": "/path1", + }, + }, + { + name: "missing-field", + cgroups: "0:nopath\n", + err: "invalid cgroups file", + }, + { + name: "too-many-fields", + cgroups: "0:ctr:/path:extra\n", + err: "invalid cgroups file", + }, + { + name: "multiple-malformed", + cgroups: "0:ctr0:/path0\n" + + "1:ctr1:/path1\n" + + "2:\n", + err: "invalid cgroups file", + }, + } { + t.Run(tc.name, func(t *testing.T) { + r := strings.NewReader(tc.cgroups) + got, err := loadPathsHelper(r) + if len(tc.err) == 0 { + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + } else if !strings.Contains(err.Error(), tc.err) { + t.Fatalf("Wrong error message, want: *%s*, got: %v", tc.err, err) + } + for key, vWant := range tc.want { + vGot, ok := got[key] + if !ok { + t.Errorf("Missing controller %q", key) + } + if vWant != vGot { + t.Errorf("Wrong controller %q value, want: %q, got: %q", key, vWant, vGot) + } + delete(got, key) + } + for k, v := range got { + t.Errorf("Unexpected controller %q: %q", k, v) + } + }) + } +} diff --git a/runsc/cli/BUILD b/runsc/cli/BUILD new file mode 100644 index 000000000..32cce2a18 --- /dev/null +++ b/runsc/cli/BUILD @@ -0,0 +1,22 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "cli", + srcs = ["main.go"], + visibility = [ + "//:__pkg__", + "//runsc:__pkg__", + ], + deps = [ + "//pkg/log", + "//pkg/refs", + "//pkg/sentry/platform", + "//runsc/cmd", + "//runsc/config", + "//runsc/flag", + "//runsc/specutils", + "@com_github_google_subcommands//:go_default_library", + ], +) diff --git a/runsc/cli/main.go b/runsc/cli/main.go new file mode 100644 index 000000000..bca015db5 --- /dev/null +++ b/runsc/cli/main.go @@ -0,0 +1,256 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package cli is the main entrypoint for runsc. +package cli + +import ( + "context" + "fmt" + "io" + "io/ioutil" + "os" + "os/signal" + "syscall" + "time" + + "github.com/google/subcommands" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/runsc/cmd" + "gvisor.dev/gvisor/runsc/config" + "gvisor.dev/gvisor/runsc/flag" + "gvisor.dev/gvisor/runsc/specutils" +) + +var ( + // Although these flags are not part of the OCI spec, they are used by + // Docker, and thus should not be changed. + // TODO(gvisor.dev/issue/193): support systemd cgroups + systemdCgroup = flag.Bool("systemd-cgroup", false, "Use systemd for cgroups. NOT SUPPORTED.") + showVersion = flag.Bool("version", false, "show version and exit.") + + // These flags are unique to runsc, and are used to configure parts of the + // system that are not covered by the runtime spec. + + // Debugging flags. + logFD = flag.Int("log-fd", -1, "file descriptor to log to. If set, the 'log' flag is ignored.") + debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored.") + panicLogFD = flag.Int("panic-log-fd", -1, "file descriptor to write Go's runtime messages.") +) + +// Main is the main entrypoint. +func Main(version string) { + // Help and flags commands are generated automatically. + help := cmd.NewHelp(subcommands.DefaultCommander) + help.Register(new(cmd.Syscalls)) + subcommands.Register(help, "") + subcommands.Register(subcommands.FlagsCommand(), "") + + // Installation helpers. + const helperGroup = "helpers" + subcommands.Register(new(cmd.Install), helperGroup) + subcommands.Register(new(cmd.Uninstall), helperGroup) + + // Register user-facing runsc commands. + subcommands.Register(new(cmd.Checkpoint), "") + subcommands.Register(new(cmd.Create), "") + subcommands.Register(new(cmd.Delete), "") + subcommands.Register(new(cmd.Do), "") + subcommands.Register(new(cmd.Events), "") + subcommands.Register(new(cmd.Exec), "") + subcommands.Register(new(cmd.Gofer), "") + subcommands.Register(new(cmd.Kill), "") + subcommands.Register(new(cmd.List), "") + subcommands.Register(new(cmd.Pause), "") + subcommands.Register(new(cmd.PS), "") + subcommands.Register(new(cmd.Restore), "") + subcommands.Register(new(cmd.Resume), "") + subcommands.Register(new(cmd.Run), "") + subcommands.Register(new(cmd.Spec), "") + subcommands.Register(new(cmd.State), "") + subcommands.Register(new(cmd.Start), "") + subcommands.Register(new(cmd.Wait), "") + + // Register internal commands with the internal group name. This causes + // them to be sorted below the user-facing commands with empty group. + // The string below will be printed above the commands. + const internalGroup = "internal use only" + subcommands.Register(new(cmd.Boot), internalGroup) + subcommands.Register(new(cmd.Debug), internalGroup) + subcommands.Register(new(cmd.Gofer), internalGroup) + subcommands.Register(new(cmd.Statefile), internalGroup) + + config.RegisterFlags() + + // All subcommands must be registered before flag parsing. + flag.Parse() + + // Are we showing the version? + if *showVersion { + // The format here is the same as runc. + fmt.Fprintf(os.Stdout, "runsc version %s\n", version) + fmt.Fprintf(os.Stdout, "spec: %s\n", specutils.Version) + os.Exit(0) + } + + // Create a new Config from the flags. + conf, err := config.NewFromFlags() + if err != nil { + cmd.Fatalf(err.Error()) + } + + // TODO(gvisor.dev/issue/193): support systemd cgroups + if *systemdCgroup { + fmt.Fprintln(os.Stderr, "systemd cgroup flag passed, but systemd cgroups not supported. See gvisor.dev/issue/193") + os.Exit(1) + } + + var errorLogger io.Writer + if *logFD > -1 { + errorLogger = os.NewFile(uintptr(*logFD), "error log file") + + } else if conf.LogFilename != "" { + // We must set O_APPEND and not O_TRUNC because Docker passes + // the same log file for all commands (and also parses these + // log files), so we can't destroy them on each command. + var err error + errorLogger, err = os.OpenFile(conf.LogFilename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + if err != nil { + cmd.Fatalf("error opening log file %q: %v", conf.LogFilename, err) + } + } + cmd.ErrorLogger = errorLogger + + if _, err := platform.Lookup(conf.Platform); err != nil { + cmd.Fatalf("%v", err) + } + + // Sets the reference leak check mode. Also set it in config below to + // propagate it to child processes. + refs.SetLeakMode(conf.ReferenceLeak) + + // Set up logging. + if conf.Debug { + log.SetLevel(log.Debug) + } + + // Logging will include the local date and time via the time package. + // + // On first use, time.Local initializes the local time zone, which + // involves opening tzdata files on the host. Since this requires + // opening host files, it must be done before syscall filter + // installation. + // + // Generally there will be a log message before filter installation + // that will force initialization, but force initialization here in + // case that does not occur. + _ = time.Local.String() + + subcommand := flag.CommandLine.Arg(0) + + var e log.Emitter + if *debugLogFD > -1 { + f := os.NewFile(uintptr(*debugLogFD), "debug log file") + + e = newEmitter(conf.DebugLogFormat, f) + + } else if conf.DebugLog != "" { + f, err := specutils.DebugLogFile(conf.DebugLog, subcommand, "" /* name */) + if err != nil { + cmd.Fatalf("error opening debug log file in %q: %v", conf.DebugLog, err) + } + e = newEmitter(conf.DebugLogFormat, f) + + } else { + // Stderr is reserved for the application, just discard the logs if no debug + // log is specified. + e = newEmitter("text", ioutil.Discard) + } + + if *panicLogFD > -1 || *debugLogFD > -1 { + fd := *panicLogFD + if fd < 0 { + fd = *debugLogFD + } + // Quick sanity check to make sure no other commands get passed + // a log fd (they should use log dir instead). + if subcommand != "boot" && subcommand != "gofer" { + cmd.Fatalf("flags --debug-log-fd and --panic-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand) + } + + // If we are the boot process, then we own our stdio FDs and can do what we + // want with them. Since Docker and Containerd both eat boot's stderr, we + // dup our stderr to the provided log FD so that panics will appear in the + // logs, rather than just disappear. + if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil { + cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err) + } + } else if conf.AlsoLogToStderr { + e = &log.MultiEmitter{e, newEmitter(conf.DebugLogFormat, os.Stderr)} + } + + log.SetTarget(e) + + log.Infof("***************************") + log.Infof("Args: %s", os.Args) + log.Infof("Version %s", version) + log.Infof("PID: %d", os.Getpid()) + log.Infof("UID: %d, GID: %d", os.Getuid(), os.Getgid()) + log.Infof("Configuration:") + log.Infof("\t\tRootDir: %s", conf.RootDir) + log.Infof("\t\tPlatform: %v", conf.Platform) + log.Infof("\t\tFileAccess: %v, overlay: %t", conf.FileAccess, conf.Overlay) + log.Infof("\t\tNetwork: %v, logging: %t", conf.Network, conf.LogPackets) + log.Infof("\t\tStrace: %t, max size: %d, syscalls: %s", conf.Strace, conf.StraceLogSize, conf.StraceSyscalls) + log.Infof("\t\tVFS2 enabled: %v", conf.VFS2) + log.Infof("***************************") + + if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + // SIGTERM is sent to all processes if a test exceeds its + // timeout and this case is handled by syscall_test_runner. + log.Warningf("Block the TERM signal. This is only safe in tests!") + signal.Ignore(syscall.SIGTERM) + } + + // Call the subcommand and pass in the configuration. + var ws syscall.WaitStatus + subcmdCode := subcommands.Execute(context.Background(), conf, &ws) + if subcmdCode == subcommands.ExitSuccess { + log.Infof("Exiting with status: %v", ws) + if ws.Signaled() { + // No good way to return it, emulate what the shell does. Maybe raise + // signal to self? + os.Exit(128 + int(ws.Signal())) + } + os.Exit(ws.ExitStatus()) + } + // Return an error that is unlikely to be used by the application. + log.Warningf("Failure to execute command, err: %v", subcmdCode) + os.Exit(128) +} + +func newEmitter(format string, logFile io.Writer) log.Emitter { + switch format { + case "text": + return log.GoogleEmitter{&log.Writer{Next: logFile}} + case "json": + return log.JSONEmitter{&log.Writer{Next: logFile}} + case "json-k8s": + return log.K8sJSONEmitter{&log.Writer{Next: logFile}} + } + cmd.Fatalf("invalid log format %q, must be 'text', 'json', or 'json-k8s'", format) + panic("unreachable") +} diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index cd419e1aa..2c92e3067 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -131,11 +131,11 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) return subcommands.ExitUsageError } - // Ensure that if there is a panic, all goroutine stacks are printed. - debug.SetTraceback("system") - conf := args[0].(*config.Config) + // Set traceback level + debug.SetTraceback(conf.Traceback) + if b.attached { // Ensure this process is killed after parent process terminates when // attached mode is enabled. In the unfortunate event that the parent diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go index d1f2e9e6d..640de4c47 100644 --- a/runsc/cmd/do.go +++ b/runsc/cmd/do.go @@ -17,6 +17,7 @@ package cmd import ( "context" "encoding/json" + "errors" "fmt" "io/ioutil" "math/rand" @@ -36,6 +37,8 @@ import ( "gvisor.dev/gvisor/runsc/specutils" ) +var errNoDefaultInterface = errors.New("no default interface found") + // Do implements subcommands.Command for the "do" command. It sets up a simple // sandbox and executes the command inside it. See Usage() for more details. type Do struct { @@ -126,26 +129,28 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000)) if conf.Network == config.NetworkNone { - netns := specs.LinuxNamespace{ - Type: specs.NetworkNamespace, - } - if spec.Linux != nil { - panic("spec.Linux is not nil") - } - spec.Linux = &specs.Linux{Namespaces: []specs.LinuxNamespace{netns}} + addNamespace(spec, specs.LinuxNamespace{Type: specs.NetworkNamespace}) } else if conf.Rootless { if conf.Network == config.NetworkSandbox { - c.notifyUser("*** Warning: using host network due to --rootless ***") + c.notifyUser("*** Warning: sandbox network isn't supported with --rootless, switching to host ***") conf.Network = config.NetworkHost } } else { - clean, err := c.setupNet(cid, spec) - if err != nil { + switch clean, err := c.setupNet(cid, spec); err { + case errNoDefaultInterface: + log.Warningf("Network interface not found, using internal network") + addNamespace(spec, specs.LinuxNamespace{Type: specs.NetworkNamespace}) + conf.Network = config.NetworkHost + + case nil: + // Setup successfull. + defer clean() + + default: return Errorf("Error setting up network: %v", err) } - defer clean() } out, err := json.Marshal(spec) @@ -199,6 +204,13 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su return subcommands.ExitSuccess } +func addNamespace(spec *specs.Spec, ns specs.LinuxNamespace) { + if spec.Linux == nil { + spec.Linux = &specs.Linux{} + } + spec.Linux.Namespaces = append(spec.Linux.Namespaces, ns) +} + func (c *Do) notifyUser(format string, v ...interface{}) { if !c.quiet { fmt.Printf(format+"\n", v...) @@ -219,10 +231,14 @@ func resolvePath(path string) (string, error) { return path, nil } +// setupNet setups up the sandbox network, including the creation of a network +// namespace, and iptable rules to redirect the traffic. Returns a cleanup +// function to tear down the network. Returns errNoDefaultInterface when there +// is no network interface available to setup the network. func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) { dev, err := defaultDevice() if err != nil { - return nil, err + return nil, errNoDefaultInterface } peerIP, err := calculatePeerIP(c.ip) if err != nil { @@ -279,14 +295,11 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) { return nil, err } - if spec.Linux == nil { - spec.Linux = &specs.Linux{} - } netns := specs.LinuxNamespace{ Type: specs.NetworkNamespace, Path: filepath.Join("/var/run/netns", cid), } - spec.Linux.Namespaces = append(spec.Linux.Namespaces, netns) + addNamespace(spec, netns) return func() { c.cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath) }, nil } diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go index 88991b521..139edbd49 100644 --- a/runsc/cmd/start.go +++ b/runsc/cmd/start.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/flag" + "gvisor.dev/gvisor/runsc/specutils" ) // Start implements subcommands.Command for the "start" command. @@ -58,6 +59,12 @@ func (*Start) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s if err != nil { Fatalf("loading container: %v", err) } + // Read the spec again here to ensure flag annotations from the spec are + // applied to "conf". + if _, err := specutils.ReadSpec(c.BundleDir, conf); err != nil { + Fatalf("reading spec: %v", err) + } + if err := c.Start(conf); err != nil { Fatalf("starting container: %v", err) } diff --git a/runsc/config/config.go b/runsc/config/config.go index f30f79f68..b02d8e2e1 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -37,6 +37,9 @@ type Config struct { // RootDir is the runtime root directory. RootDir string `flag:"root"` + // Traceback changes the Go runtime's traceback level. + Traceback string `flag:"traceback"` + // Debug indicates that debug logging should be enabled. Debug bool `flag:"debug"` diff --git a/runsc/config/flags.go b/runsc/config/flags.go index a5f25cfa2..d3203b565 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -49,6 +49,7 @@ func RegisterFlags() { flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s.") flag.Bool("alsologtostderr", false, "send log messages to stderr.") flag.Bool("allow-flag-override", false, "allow OCI annotations (dev.gvisor.flag.<name>) to override flags for debugging.") + flag.String("traceback", "system", "golang runtime's traceback level") // Debugging flags: strace related flag.Bool("strace", false, "enable strace.") diff --git a/runsc/container/container.go b/runsc/container/container.go index 63478ba8c..52e1755ce 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -312,6 +312,14 @@ func New(conf *config.Config, args Args) (*Container, error) { if isRoot(args.Spec) { log.Debugf("Creating new sandbox for container %q", args.ID) + if args.Spec.Linux == nil { + args.Spec.Linux = &specs.Linux{} + } + // Don't force the use of cgroups in tests because they lack permission to do so. + if args.Spec.Linux.CgroupsPath == "" && !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + args.Spec.Linux.CgroupsPath = "/" + args.ID + } + // Create and join cgroup before processes are created to ensure they are // part of the cgroup from the start (and all their children processes). cg, err := cgroup.New(args.Spec) @@ -321,7 +329,13 @@ func New(conf *config.Config, args Args) (*Container, error) { if cg != nil { // If there is cgroup config, install it before creating sandbox process. if err := cg.Install(args.Spec.Linux.Resources); err != nil { - return nil, fmt.Errorf("configuring cgroup: %v", err) + switch { + case errors.Is(err, syscall.EACCES) && conf.Rootless: + log.Warningf("Skipping cgroup configuration in rootless mode: %v", err) + cg = nil + default: + return nil, fmt.Errorf("configuring cgroup: %v", err) + } } } if err := runInCgroup(cg, func() error { @@ -985,7 +999,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu // Start the gofer in the given namespace. log.Debugf("Starting gofer: %s %v", binPath, args) if err := specutils.StartInNS(cmd, nss); err != nil { - return nil, nil, fmt.Errorf("Gofer: %v", err) + return nil, nil, fmt.Errorf("gofer: %v", err) } log.Infof("Gofer started, PID: %d", cmd.Process.Pid) c.GoferPid = cmd.Process.Pid diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 1f8e277cc..cc188f45b 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -2362,12 +2362,12 @@ func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, } // executeSync synchronously executes a new process. -func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) { - pid, err := cont.Execute(args) +func (c *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) { + pid, err := c.Execute(args) if err != nil { return 0, fmt.Errorf("error executing: %v", err) } - ws, err := cont.WaitPID(pid) + ws, err := c.WaitPID(pid) if err != nil { return 0, fmt.Errorf("error waiting: %v", err) } diff --git a/runsc/main.go b/runsc/main.go index ed244c4ba..4ce5ebee9 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -1,4 +1,4 @@ -// Copyright 2018 The gVisor Authors. +// Copyright 2020 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,245 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Binary runsc is an implementation of the Open Container Initiative Runtime -// that runs applications inside a sandbox. +// Binary runsc implements the OCI runtime interface. package main import ( - "context" - "fmt" - "io" - "io/ioutil" - "os" - "os/signal" - "syscall" - "time" - - "github.com/google/subcommands" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/runsc/cmd" - "gvisor.dev/gvisor/runsc/config" - "gvisor.dev/gvisor/runsc/flag" - "gvisor.dev/gvisor/runsc/specutils" -) - -var ( - // Although these flags are not part of the OCI spec, they are used by - // Docker, and thus should not be changed. - // TODO(gvisor.dev/issue/193): support systemd cgroups - systemdCgroup = flag.Bool("systemd-cgroup", false, "Use systemd for cgroups. NOT SUPPORTED.") - showVersion = flag.Bool("version", false, "show version and exit.") - - // These flags are unique to runsc, and are used to configure parts of the - // system that are not covered by the runtime spec. - - // Debugging flags. - logFD = flag.Int("log-fd", -1, "file descriptor to log to. If set, the 'log' flag is ignored.") - debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored.") - panicLogFD = flag.Int("panic-log-fd", -1, "file descriptor to write Go's runtime messages.") + "gvisor.dev/gvisor/runsc/cli" ) func main() { - // Help and flags commands are generated automatically. - help := cmd.NewHelp(subcommands.DefaultCommander) - help.Register(new(cmd.Syscalls)) - subcommands.Register(help, "") - subcommands.Register(subcommands.FlagsCommand(), "") - - // Installation helpers. - const helperGroup = "helpers" - subcommands.Register(new(cmd.Install), helperGroup) - subcommands.Register(new(cmd.Uninstall), helperGroup) - - // Register user-facing runsc commands. - subcommands.Register(new(cmd.Checkpoint), "") - subcommands.Register(new(cmd.Create), "") - subcommands.Register(new(cmd.Delete), "") - subcommands.Register(new(cmd.Do), "") - subcommands.Register(new(cmd.Events), "") - subcommands.Register(new(cmd.Exec), "") - subcommands.Register(new(cmd.Gofer), "") - subcommands.Register(new(cmd.Kill), "") - subcommands.Register(new(cmd.List), "") - subcommands.Register(new(cmd.Pause), "") - subcommands.Register(new(cmd.PS), "") - subcommands.Register(new(cmd.Restore), "") - subcommands.Register(new(cmd.Resume), "") - subcommands.Register(new(cmd.Run), "") - subcommands.Register(new(cmd.Spec), "") - subcommands.Register(new(cmd.State), "") - subcommands.Register(new(cmd.Start), "") - subcommands.Register(new(cmd.Wait), "") - - // Register internal commands with the internal group name. This causes - // them to be sorted below the user-facing commands with empty group. - // The string below will be printed above the commands. - const internalGroup = "internal use only" - subcommands.Register(new(cmd.Boot), internalGroup) - subcommands.Register(new(cmd.Debug), internalGroup) - subcommands.Register(new(cmd.Gofer), internalGroup) - subcommands.Register(new(cmd.Statefile), internalGroup) - - config.RegisterFlags() - - // All subcommands must be registered before flag parsing. - flag.Parse() - - // Are we showing the version? - if *showVersion { - // The format here is the same as runc. - fmt.Fprintf(os.Stdout, "runsc version %s\n", version) - fmt.Fprintf(os.Stdout, "spec: %s\n", specutils.Version) - os.Exit(0) - } - - // Create a new Config from the flags. - conf, err := config.NewFromFlags() - if err != nil { - cmd.Fatalf(err.Error()) - } - - // TODO(gvisor.dev/issue/193): support systemd cgroups - if *systemdCgroup { - fmt.Fprintln(os.Stderr, "systemd cgroup flag passed, but systemd cgroups not supported. See gvisor.dev/issue/193") - os.Exit(1) - } - - var errorLogger io.Writer - if *logFD > -1 { - errorLogger = os.NewFile(uintptr(*logFD), "error log file") - - } else if conf.LogFilename != "" { - // We must set O_APPEND and not O_TRUNC because Docker passes - // the same log file for all commands (and also parses these - // log files), so we can't destroy them on each command. - var err error - errorLogger, err = os.OpenFile(conf.LogFilename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) - if err != nil { - cmd.Fatalf("error opening log file %q: %v", conf.LogFilename, err) - } - } - cmd.ErrorLogger = errorLogger - - if _, err := platform.Lookup(conf.Platform); err != nil { - cmd.Fatalf("%v", err) - } - - // Sets the reference leak check mode. Also set it in config below to - // propagate it to child processes. - refs.SetLeakMode(conf.ReferenceLeak) - - // Set up logging. - if conf.Debug { - log.SetLevel(log.Debug) - } - - // Logging will include the local date and time via the time package. - // - // On first use, time.Local initializes the local time zone, which - // involves opening tzdata files on the host. Since this requires - // opening host files, it must be done before syscall filter - // installation. - // - // Generally there will be a log message before filter installation - // that will force initialization, but force initialization here in - // case that does not occur. - _ = time.Local.String() - - subcommand := flag.CommandLine.Arg(0) - - var e log.Emitter - if *debugLogFD > -1 { - f := os.NewFile(uintptr(*debugLogFD), "debug log file") - - e = newEmitter(conf.DebugLogFormat, f) - - } else if conf.DebugLog != "" { - f, err := specutils.DebugLogFile(conf.DebugLog, subcommand, "" /* name */) - if err != nil { - cmd.Fatalf("error opening debug log file in %q: %v", conf.DebugLog, err) - } - e = newEmitter(conf.DebugLogFormat, f) - - } else { - // Stderr is reserved for the application, just discard the logs if no debug - // log is specified. - e = newEmitter("text", ioutil.Discard) - } - - if *panicLogFD > -1 || *debugLogFD > -1 { - fd := *panicLogFD - if fd < 0 { - fd = *debugLogFD - } - // Quick sanity check to make sure no other commands get passed - // a log fd (they should use log dir instead). - if subcommand != "boot" && subcommand != "gofer" { - cmd.Fatalf("flags --debug-log-fd and --panic-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand) - } - - // If we are the boot process, then we own our stdio FDs and can do what we - // want with them. Since Docker and Containerd both eat boot's stderr, we - // dup our stderr to the provided log FD so that panics will appear in the - // logs, rather than just disappear. - if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil { - cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err) - } - } else if conf.AlsoLogToStderr { - e = &log.MultiEmitter{e, newEmitter(conf.DebugLogFormat, os.Stderr)} - } - - log.SetTarget(e) - - log.Infof("***************************") - log.Infof("Args: %s", os.Args) - log.Infof("Version %s", version) - log.Infof("PID: %d", os.Getpid()) - log.Infof("UID: %d, GID: %d", os.Getuid(), os.Getgid()) - log.Infof("Configuration:") - log.Infof("\t\tRootDir: %s", conf.RootDir) - log.Infof("\t\tPlatform: %v", conf.Platform) - log.Infof("\t\tFileAccess: %v, overlay: %t", conf.FileAccess, conf.Overlay) - log.Infof("\t\tNetwork: %v, logging: %t", conf.Network, conf.LogPackets) - log.Infof("\t\tStrace: %t, max size: %d, syscalls: %s", conf.Strace, conf.StraceLogSize, conf.StraceSyscalls) - log.Infof("\t\tVFS2 enabled: %v", conf.VFS2) - log.Infof("***************************") - - if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { - // SIGTERM is sent to all processes if a test exceeds its - // timeout and this case is handled by syscall_test_runner. - log.Warningf("Block the TERM signal. This is only safe in tests!") - signal.Ignore(syscall.SIGTERM) - } - - // Call the subcommand and pass in the configuration. - var ws syscall.WaitStatus - subcmdCode := subcommands.Execute(context.Background(), conf, &ws) - if subcmdCode == subcommands.ExitSuccess { - log.Infof("Exiting with status: %v", ws) - if ws.Signaled() { - // No good way to return it, emulate what the shell does. Maybe raise - // signal to self? - os.Exit(128 + int(ws.Signal())) - } - os.Exit(ws.ExitStatus()) - } - // Return an error that is unlikely to be used by the application. - log.Warningf("Failure to execute command, err: %v", subcmdCode) - os.Exit(128) -} - -func newEmitter(format string, logFile io.Writer) log.Emitter { - switch format { - case "text": - return log.GoogleEmitter{&log.Writer{Next: logFile}} - case "json": - return log.JSONEmitter{&log.Writer{Next: logFile}} - case "json-k8s": - return log.K8sJSONEmitter{&log.Writer{Next: logFile}} - } - cmd.Fatalf("invalid log format %q, must be 'text', 'json', or 'json-k8s'", format) - panic("unreachable") + cli.Main(version) } diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 0392e3e83..45abc1425 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -344,15 +344,9 @@ func IsSupportedDevMount(m specs.Mount) bool { var existingDevices = []string{ "/dev/fd", "/dev/stdin", "/dev/stdout", "/dev/stderr", "/dev/null", "/dev/zero", "/dev/full", "/dev/random", - "/dev/urandom", "/dev/shm", "/dev/pts", "/dev/ptmx", + "/dev/urandom", "/dev/shm", "/dev/ptmx", } dst := filepath.Clean(m.Destination) - if dst == "/dev" { - // OCI spec uses many different mounts for the things inside of '/dev'. We - // have a single mount at '/dev' that is always mounted, regardless of - // whether it was asked for, as the spec says we SHOULD. - return false - } for _, dev := range existingDevices { if dst == dev || strings.HasPrefix(dst, dev+"/") { return false @@ -425,7 +419,7 @@ func Mount(src, dst, typ string, flags uint32) error { // Special case, as there is no source directory for proc mounts. isDir = true } else if fi, err := os.Stat(src); err != nil { - return fmt.Errorf("Stat(%q) failed: %v", src, err) + return fmt.Errorf("stat(%q) failed: %v", src, err) } else { isDir = fi.IsDir() } @@ -433,25 +427,25 @@ func Mount(src, dst, typ string, flags uint32) error { if isDir { // Create the destination directory. if err := os.MkdirAll(dst, 0777); err != nil { - return fmt.Errorf("Mkdir(%q) failed: %v", dst, err) + return fmt.Errorf("mkdir(%q) failed: %v", dst, err) } } else { // Create the parent destination directory. parent := path.Dir(dst) if err := os.MkdirAll(parent, 0777); err != nil { - return fmt.Errorf("Mkdir(%q) failed: %v", parent, err) + return fmt.Errorf("mkdir(%q) failed: %v", parent, err) } // Create the destination file if it does not exist. f, err := os.OpenFile(dst, syscall.O_CREAT, 0777) if err != nil { - return fmt.Errorf("Open(%q) failed: %v", dst, err) + return fmt.Errorf("open(%q) failed: %v", dst, err) } f.Close() } // Do the mount. if err := syscall.Mount(src, dst, typ, uintptr(flags), ""); err != nil { - return fmt.Errorf("Mount(%q, %q, %d) failed: %v", src, dst, flags, err) + return fmt.Errorf("mount(%q, %q, %d) failed: %v", src, dst, flags, err) } return nil } |