diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/events.go | 61 | ||||
-rw-r--r-- | runsc/boot/loader.go | 2 | ||||
-rw-r--r-- | runsc/cgroup/cgroup.go | 25 | ||||
-rw-r--r-- | runsc/cmd/events.go | 4 | ||||
-rw-r--r-- | runsc/container/console_test.go | 6 | ||||
-rw-r--r-- | runsc/container/container.go | 63 | ||||
-rw-r--r-- | runsc/container/container_test.go | 45 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 81 | ||||
-rw-r--r-- | runsc/container/state_file.go | 2 | ||||
-rw-r--r-- | runsc/mitigate/cpu.go | 192 | ||||
-rw-r--r-- | runsc/mitigate/cpu_test.go | 202 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 22 |
12 files changed, 563 insertions, 142 deletions
diff --git a/runsc/boot/events.go b/runsc/boot/events.go index 422f4da00..0814b2a69 100644 --- a/runsc/boot/events.go +++ b/runsc/boot/events.go @@ -15,21 +15,30 @@ package boot import ( - "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/usage" ) +// EventOut is the return type of the Event command. +type EventOut struct { + Event Event `json:"event"` + + // ContainerUsage maps each container ID to its total CPU usage. + ContainerUsage map[string]uint64 `json:"containerUsage"` +} + // Event struct for encoding the event data to JSON. Corresponds to runc's // main.event struct. type Event struct { - Type string `json:"type"` - ID string `json:"id"` - Data interface{} `json:"data,omitempty"` + Type string `json:"type"` + ID string `json:"id"` + Data Stats `json:"data"` } // Stats is the runc specific stats structure for stability when encoding and // decoding stats. type Stats struct { + CPU CPU `json:"cpu"` Memory Memory `json:"memory"` Pids Pids `json:"pids"` } @@ -58,24 +67,42 @@ type Memory struct { Raw map[string]uint64 `json:"raw,omitempty"` } -// Event gets the events from the container. -func (cm *containerManager) Event(_ *struct{}, out *Event) error { - stats := &Stats{} - stats.populateMemory(cm.l.k) - stats.populatePIDs(cm.l.k) - *out = Event{Type: "stats", Data: stats} - return nil +// CPU contains stats on the CPU. +type CPU struct { + Usage CPUUsage `json:"usage"` +} + +// CPUUsage contains stats on CPU usage. +type CPUUsage struct { + Kernel uint64 `json:"kernel,omitempty"` + User uint64 `json:"user,omitempty"` + Total uint64 `json:"total,omitempty"` + PerCPU []uint64 `json:"percpu,omitempty"` } -func (s *Stats) populateMemory(k *kernel.Kernel) { - mem := k.MemoryFile() +// Event gets the events from the container. +func (cm *containerManager) Event(_ *struct{}, out *EventOut) error { + *out = EventOut{ + Event: Event{ + Type: "stats", + }, + } + + // Memory usage. + // TODO(gvisor.dev/issue/172): Per-container accounting. + mem := cm.l.k.MemoryFile() mem.UpdateUsage() _, totalUsage := usage.MemoryAccounting.Copy() - s.Memory.Usage = MemoryEntry{ + out.Event.Data.Memory.Usage = MemoryEntry{ Usage: totalUsage, } -} -func (s *Stats) populatePIDs(k *kernel.Kernel) { - s.Pids.Current = uint64(len(k.TaskSet().Root.ThreadGroups())) + // PIDs. + // TODO(gvisor.dev/issue/172): Per-container accounting. + out.Event.Data.Pids.Current = uint64(len(cm.l.k.TaskSet().Root.ThreadGroups())) + + // CPU usage by container. + out.ContainerUsage = control.ContainerUsage(cm.l.k) + + return nil } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index d37528ee7..77a7c530b 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -102,7 +102,7 @@ type containerInfo struct { goferFDs []*fd.FD } -// Loader keeps state needed to start the kernel and run the container.. +// Loader keeps state needed to start the kernel and run the container. type Loader struct { // k is the kernel. k *kernel.Kernel diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index e9ae59a92..797c1c2bc 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -281,8 +281,13 @@ func New(spec *specs.Spec) (*Cgroup, error) { if spec.Linux == nil || spec.Linux.CgroupsPath == "" { return nil, nil } + return NewFromPath(spec.Linux.CgroupsPath) +} + +// NewFromPath creates a new Cgroup instance. +func NewFromPath(cgroupsPath string) (*Cgroup, error) { var parents map[string]string - if !filepath.IsAbs(spec.Linux.CgroupsPath) { + if !filepath.IsAbs(cgroupsPath) { var err error parents, err = LoadPaths("self") if err != nil { @@ -291,7 +296,7 @@ func New(spec *specs.Spec) (*Cgroup, error) { } own := make(map[string]bool) return &Cgroup{ - Name: spec.Linux.CgroupsPath, + Name: cgroupsPath, Parents: parents, Own: own, }, nil @@ -389,6 +394,9 @@ func (c *Cgroup) Join() (func(), error) { undo = func() { for _, path := range undoPaths { log.Debugf("Restoring cgroup %q", path) + // Writing the value 0 to a cgroup.procs file causes + // the writing process to be moved to the corresponding + // cgroup. - cgroups(7). if err := setValue(path, "cgroup.procs", "0"); err != nil { log.Warningf("Error restoring cgroup %q: %v", path, err) } @@ -399,6 +407,9 @@ func (c *Cgroup) Join() (func(), error) { for key, cfg := range controllers { path := c.makePath(key) log.Debugf("Joining cgroup %q", path) + // Writing the value 0 to a cgroup.procs file causes the + // writing process to be moved to the corresponding cgroup. + // - cgroups(7). if err := setValue(path, "cgroup.procs", "0"); err != nil { if cfg.optional && os.IsNotExist(err) { continue @@ -426,6 +437,16 @@ func (c *Cgroup) CPUQuota() (float64, error) { return float64(quota) / float64(period), nil } +// CPUUsage returns the total CPU usage of the cgroup. +func (c *Cgroup) CPUUsage() (uint64, error) { + path := c.makePath("cpuacct") + usage, err := getValue(path, "cpuacct.usage") + if err != nil { + return 0, err + } + return strconv.ParseUint(strings.TrimSpace(usage), 10, 64) +} + // NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'. func (c *Cgroup) NumCPU() (int, error) { path := c.makePath("cpuset") diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go index 75b0aac8d..06f00e8e7 100644 --- a/runsc/cmd/events.go +++ b/runsc/cmd/events.go @@ -93,9 +93,9 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa // err must be preserved because it is used below when breaking // out of the loop. - b, err := json.Marshal(ev) + b, err := json.Marshal(ev.Event) if err != nil { - log.Warningf("Error while marshalling event %v: %v", ev, err) + log.Warningf("Error while marshalling event %v: %v", ev.Event, err) } else { os.Stdout.Write(b) } diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 1b0fdebd6..7a3d5a523 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -122,7 +122,7 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) { // Test that an pty FD is sent over the console socket if one is provided. func TestConsoleSocket(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { spec := testutil.NewSpecWithArgs("true") spec.Process.Terminal = true @@ -164,7 +164,7 @@ func TestConsoleSocket(t *testing.T) { // Test that an pty FD is sent over the console socket if one is provided. func TestMultiContainerConsoleSocket(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -495,7 +495,7 @@ func TestJobControlSignalRootContainer(t *testing.T) { // Test that terminal works with root and sub-containers. func TestMultiContainerTerminal(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { diff --git a/runsc/container/container.go b/runsc/container/container.go index 5a0f8d5dc..aae64ae1c 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -486,12 +486,20 @@ func (c *Container) Execute(args *control.ExecArgs) (int32, error) { } // Event returns events for the container. -func (c *Container) Event() (*boot.Event, error) { +func (c *Container) Event() (*boot.EventOut, error) { log.Debugf("Getting events for container, cid: %s", c.ID) if err := c.requireStatus("get events for", Created, Running, Paused); err != nil { return nil, err } - return c.Sandbox.Event(c.ID) + event, err := c.Sandbox.Event(c.ID) + if err != nil { + return nil, err + } + + // Some stats can utilize host cgroups for accuracy. + c.populateStats(event) + + return event, nil } // SandboxPid returns the Pid of the sandbox the container is running in, or -1 if the @@ -1110,3 +1118,54 @@ func setOOMScoreAdj(pid int, scoreAdj int) error { } return nil } + +// populateStats populates event with stats estimates based on cgroups and the +// sentry's accounting. +// TODO(gvisor.dev/issue/172): This is an estimation; we should do more +// detailed accounting. +func (c *Container) populateStats(event *boot.EventOut) { + // The events command, when run for all running containers, should + // account for the full cgroup CPU usage. We split cgroup usage + // proportionally according to the sentry-internal usage measurements, + // only counting Running containers. + log.Warningf("event.ContainerUsage: %v", event.ContainerUsage) + var containerUsage uint64 + var allContainersUsage uint64 + for ID, usage := range event.ContainerUsage { + allContainersUsage += usage + if ID == c.ID { + containerUsage = usage + } + } + + cgroup, err := c.Sandbox.FindCgroup() + if err != nil { + // No cgroup, so rely purely on the sentry's accounting. + log.Warningf("events: no cgroups") + event.Event.Data.CPU.Usage.Total = containerUsage + return + } + + // Get the host cgroup CPU usage. + cgroupsUsage, err := cgroup.CPUUsage() + if err != nil { + // No cgroup usage, so rely purely on the sentry's accounting. + log.Warningf("events: failed when getting cgroup CPU usage for container: %v", err) + event.Event.Data.CPU.Usage.Total = containerUsage + return + } + + // If the sentry reports no memory usage, fall back on cgroups and + // split usage equally across containers. + if allContainersUsage == 0 { + log.Warningf("events: no sentry CPU usage reported") + allContainersUsage = cgroupsUsage + containerUsage = cgroupsUsage / uint64(len(event.ContainerUsage)) + } + + log.Warningf("%f, %f, %f", containerUsage, cgroupsUsage, allContainersUsage) + // Scaling can easily overflow a uint64 (e.g. a containerUsage and + // cgroupsUsage of 16 seconds each will overflow), so use floats. + event.Event.Data.CPU.Usage.Total = uint64(float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage))) + return +} diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 3bbf86534..d50bbcd9f 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -312,8 +312,7 @@ var ( all = append(noOverlay, overlay) ) -// configs generates different configurations to run tests. -func configs(t *testing.T, opts ...configOption) map[string]*config.Config { +func configsHelper(t *testing.T, opts ...configOption) map[string]*config.Config { // Always load the default config. cs := make(map[string]*config.Config) testutil.TestConfig(t) @@ -339,10 +338,12 @@ func configs(t *testing.T, opts ...configOption) map[string]*config.Config { return cs } -// TODO(gvisor.dev/issue/1624): Merge with configs when VFS2 is the default. -func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*config.Config { - all := configs(t, opts...) - for key, value := range configs(t, opts...) { +// configs generates different configurations to run tests. +// +// TODO(gvisor.dev/issue/1624): Remove VFS1 dimension. +func configs(t *testing.T, opts ...configOption) map[string]*config.Config { + all := configsHelper(t, opts...) + for key, value := range configsHelper(t, opts...) { value.VFS2 = true all[key+"VFS2"] = value } @@ -358,7 +359,7 @@ func TestLifecycle(t *testing.T) { childReaper.Start() defer childReaper.Stop() - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { // The container will just sleep for a long time. We will kill it before // it finishes sleeping. @@ -529,7 +530,7 @@ func TestExePath(t *testing.T) { t.Fatalf("error making directory: %v", err) } - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { for _, test := range []struct { path string @@ -654,7 +655,7 @@ func doAppExitStatus(t *testing.T, vfs2 bool) { // TestExec verifies that a container can exec a new program. func TestExec(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { dir, err := ioutil.TempDir(testutil.TmpDir(), "exec-test") if err != nil { @@ -783,7 +784,7 @@ func TestExec(t *testing.T) { // TestExecProcList verifies that a container can exec a new program and it // shows correcly in the process list. func TestExecProcList(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { const uid = 343 spec := testutil.NewSpecWithArgs("sleep", "100") @@ -854,7 +855,7 @@ func TestExecProcList(t *testing.T) { // TestKillPid verifies that we can signal individual exec'd processes. func TestKillPid(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { @@ -930,7 +931,6 @@ func TestKillPid(t *testing.T) { // number after the last number from the checkpointed container. func TestCheckpointRestore(t *testing.T) { // Skip overlay because test requires writing to host file. - // TODO(gvisor.dev/issue/1663): Add VFS when S/R support is added. for name, conf := range configs(t, noOverlay...) { t.Run(name, func(t *testing.T) { dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test") @@ -1092,7 +1092,6 @@ func TestCheckpointRestore(t *testing.T) { // with filesystem Unix Domain Socket use. func TestUnixDomainSockets(t *testing.T) { // Skip overlay because test requires writing to host file. - // TODO(gvisor.dev/issue/1663): Add VFS when S/R support is added. for name, conf := range configs(t, noOverlay...) { t.Run(name, func(t *testing.T) { // UDS path is limited to 108 chars for compatibility with older systems. @@ -1230,7 +1229,7 @@ func TestUnixDomainSockets(t *testing.T) { // recreated. Then it resumes the container, verify that the file gets created // again. func TestPauseResume(t *testing.T) { - for name, conf := range configsWithVFS2(t, noOverlay...) { + for name, conf := range configs(t, noOverlay...) { t.Run(name, func(t *testing.T) { tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "lock") if err != nil { @@ -1373,7 +1372,7 @@ func TestCapabilities(t *testing.T) { uid := auth.KUID(os.Getuid() + 1) gid := auth.KGID(os.Getgid() + 1) - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { spec := testutil.NewSpecWithArgs("sleep", "100") rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) @@ -1446,7 +1445,7 @@ func TestCapabilities(t *testing.T) { // TestRunNonRoot checks that sandbox can be configured when running as // non-privileged user. func TestRunNonRoot(t *testing.T) { - for name, conf := range configsWithVFS2(t, noOverlay...) { + for name, conf := range configs(t, noOverlay...) { t.Run(name, func(t *testing.T) { spec := testutil.NewSpecWithArgs("/bin/true") @@ -1490,7 +1489,7 @@ func TestRunNonRoot(t *testing.T) { // TestMountNewDir checks that runsc will create destination directory if it // doesn't exit. func TestMountNewDir(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { root, err := ioutil.TempDir(testutil.TmpDir(), "root") if err != nil { @@ -1521,7 +1520,7 @@ func TestMountNewDir(t *testing.T) { } func TestReadonlyRoot(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { spec := testutil.NewSpecWithArgs("sleep", "100") spec.Root.Readonly = true @@ -1569,7 +1568,7 @@ func TestReadonlyRoot(t *testing.T) { } func TestReadonlyMount(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount") if err != nil { @@ -1628,7 +1627,7 @@ func TestReadonlyMount(t *testing.T) { } func TestUIDMap(t *testing.T) { - for name, conf := range configsWithVFS2(t, noOverlay...) { + for name, conf := range configs(t, noOverlay...) { t.Run(name, func(t *testing.T) { testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount") if err != nil { @@ -1916,7 +1915,7 @@ func TestUserLog(t *testing.T) { } func TestWaitOnExitedSandbox(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { // Run a shell that sleeps for 1 second and then exits with a // non-zero code. @@ -2058,7 +2057,7 @@ func doDestroyStartingTest(t *testing.T, vfs2 bool) { } func TestCreateWorkingDir(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create") if err != nil { @@ -2173,7 +2172,7 @@ func TestMountPropagation(t *testing.T) { } func TestMountSymlink(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink") if err != nil { diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index bc802e075..173332cc2 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -15,7 +15,6 @@ package container import ( - "encoding/json" "fmt" "io/ioutil" "math" @@ -132,7 +131,7 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) { // TestMultiContainerSanity checks that it is possible to run 2 dead-simple // containers in the same sandbox. func TestMultiContainerSanity(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -170,7 +169,7 @@ func TestMultiContainerSanity(t *testing.T) { // TestMultiPIDNS checks that it is possible to run 2 dead-simple // containers in the same sandbox with different pidns. func TestMultiPIDNS(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -215,7 +214,7 @@ func TestMultiPIDNS(t *testing.T) { // TestMultiPIDNSPath checks the pidns path. func TestMultiPIDNSPath(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -322,8 +321,8 @@ func TestMultiContainerWait(t *testing.T) { } } -// TestExecWait ensures what we can wait containers and individual processes in the -// sandbox that have already exited. +// TestExecWait ensures what we can wait on containers and individual processes +// in the sandbox that have already exited. func TestExecWait(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -448,7 +447,7 @@ func TestMultiContainerMount(t *testing.T) { // TestMultiContainerSignal checks that it is possible to signal individual // containers without killing the entire sandbox. func TestMultiContainerSignal(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -548,7 +547,7 @@ func TestMultiContainerDestroy(t *testing.T) { t.Fatal("error finding test_app:", err) } - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -1042,7 +1041,7 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { // Test that pod shared mounts are properly mounted in 2 containers and that // changes from one container is reflected in the other. func TestMultiContainerSharedMount(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -1155,7 +1154,7 @@ func TestMultiContainerSharedMount(t *testing.T) { // Test that pod mounts are mounted as readonly when requested. func TestMultiContainerSharedMountReadonly(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -1220,7 +1219,7 @@ func TestMultiContainerSharedMountReadonly(t *testing.T) { // Test that shared pod mounts continue to work after container is restarted. func TestMultiContainerSharedMountRestart(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -1329,7 +1328,7 @@ func TestMultiContainerSharedMountRestart(t *testing.T) { // Test that unsupported pod mounts options are ignored when matching master and // replica mounts. func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { - for name, conf := range configsWithVFS2(t, all...) { + for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -1663,7 +1662,7 @@ func TestMultiContainerRunNonRoot(t *testing.T) { func TestMultiContainerHomeEnvDir(t *testing.T) { // NOTE: Don't use overlay since we need changes to persist to the temp dir // outside the sandbox. - for testName, conf := range configsWithVFS2(t, noOverlay...) { + for testName, conf := range configs(t, noOverlay...) { t.Run(testName, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() @@ -1743,8 +1742,9 @@ func TestMultiContainerEvent(t *testing.T) { // Setup the containers. sleep := []string{"/bin/sleep", "100"} + busy := []string{"/bin/bash", "-c", "i=0 ; while true ; do (( i += 1 )) ; done"} quick := []string{"/bin/true"} - podSpec, ids := createSpecs(sleep, sleep, quick) + podSpec, ids := createSpecs(sleep, busy, quick) containers, cleanup, err := startContainers(conf, podSpec, ids) if err != nil { t.Fatalf("error starting containers: %v", err) @@ -1755,37 +1755,58 @@ func TestMultiContainerEvent(t *testing.T) { t.Logf("Running containerd %s", cont.ID) } - // Wait for last container to stabilize the process count that is checked - // further below. + // Wait for last container to stabilize the process count that is + // checked further below. if ws, err := containers[2].Wait(); err != nil || ws != 0 { t.Fatalf("Container.Wait, status: %v, err: %v", ws, err) } + expectedPL := []*control.Process{ + newProcessBuilder().Cmd("sleep").Process(), + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + expectedPL = []*control.Process{ + newProcessBuilder().Cmd("bash").Process(), + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for bash to start: %v", err) + } // Check events for running containers. + var prevUsage uint64 for _, cont := range containers[:2] { - evt, err := cont.Event() + ret, err := cont.Event() if err != nil { t.Errorf("Container.Events(): %v", err) } + evt := ret.Event if want := "stats"; evt.Type != want { - t.Errorf("Wrong event type, want: %s, got :%s", want, evt.Type) + t.Errorf("Wrong event type, want: %s, got: %s", want, evt.Type) } if cont.ID != evt.ID { - t.Errorf("Wrong container ID, want: %s, got :%s", cont.ID, evt.ID) + t.Errorf("Wrong container ID, want: %s, got: %s", cont.ID, evt.ID) } - // Event.Data is an interface, so it comes from the wire was - // map[string]string. Marshal and unmarshall again to the correc type. - data, err := json.Marshal(evt.Data) - if err != nil { - t.Fatalf("invalid event data: %v", err) + // One process per remaining container. + if got, want := evt.Data.Pids.Current, uint64(2); got != want { + t.Errorf("Wrong number of PIDs, want: %d, got: %d", want, got) } - var stats boot.Stats - if err := json.Unmarshal(data, &stats); err != nil { - t.Fatalf("invalid event data: %v", err) + + // Both remaining containers should have nonzero usage, and + // 'busy' should have higher usage than 'sleep'. + usage := evt.Data.CPU.Usage.Total + if usage == 0 { + t.Errorf("Running container should report nonzero CPU usage, but got %d", usage) } - // One process per remaining container. - if want := uint64(2); stats.Pids.Current != want { - t.Errorf("Wrong number of PIDs, want: %d, got :%d", want, stats.Pids.Current) + if usage <= prevUsage { + t.Errorf("Expected container %s to use more than %d ns of CPU, but used %d", cont.ID, prevUsage, usage) + } + t.Logf("Container %s usage: %d", cont.ID, usage) + prevUsage = usage + + // The exited container should have a usage of zero. + if exited := ret.ContainerUsage[containers[2].ID]; exited != 0 { + t.Errorf("Exited container should report 0 CPU usage, but got %d", exited) } } diff --git a/runsc/container/state_file.go b/runsc/container/state_file.go index dfbf1f2d3..c46322ba4 100644 --- a/runsc/container/state_file.go +++ b/runsc/container/state_file.go @@ -49,7 +49,7 @@ type LoadOpts struct { // Returns ErrNotExist if no container is found. Returns error in case more than // one containers matching the ID prefix is found. func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) { - //log.Debugf("Load container, rootDir: %q, partial cid: %s", rootDir, partialID) + log.Debugf("Load container, rootDir: %q, id: %+v, opts: %+v", rootDir, id, opts) if !opts.Exact { var err error id, err = findContainerID(rootDir, id.ContainerID) diff --git a/runsc/mitigate/cpu.go b/runsc/mitigate/cpu.go index 113b98159..ae4ce9579 100644 --- a/runsc/mitigate/cpu.go +++ b/runsc/mitigate/cpu.go @@ -16,6 +16,7 @@ package mitigate import ( "fmt" + "io/ioutil" "regexp" "strconv" "strings" @@ -31,16 +32,104 @@ const ( ) const ( - processorKey = "processor" - vendorIDKey = "vendor_id" - cpuFamilyKey = "cpu family" - modelKey = "model" - coreIDKey = "core id" - bugsKey = "bugs" + processorKey = "processor" + vendorIDKey = "vendor_id" + cpuFamilyKey = "cpu family" + modelKey = "model" + physicalIDKey = "physical id" + coreIDKey = "core id" + bugsKey = "bugs" ) -// getCPUSet returns cpu structs from reading /proc/cpuinfo. -func getCPUSet(data string) ([]*cpu, error) { +const ( + cpuOnlineTemplate = "/sys/devices/system/cpu/cpu%d/online" +) + +// cpuSet contains a map of all CPUs on the system, mapped +// by Physical ID and CoreIDs. threads with the same +// Core and Physical ID are Hyperthread pairs. +type cpuSet map[cpuID]*threadGroup + +// newCPUSet creates a CPUSet from data read from /proc/cpuinfo. +func newCPUSet(data []byte, vulnerable func(*thread) bool) (cpuSet, error) { + processors, err := getThreads(string(data)) + if err != nil { + return nil, err + } + + set := make(cpuSet) + for _, p := range processors { + // Each ID is of the form physicalID:coreID. Hyperthread pairs + // have identical physical and core IDs. We need to match + // Hyperthread pairs so that we can shutdown all but one per + // pair. + core, ok := set[p.id] + if !ok { + core = &threadGroup{} + set[p.id] = core + } + core.isVulnerable = core.isVulnerable || vulnerable(p) + core.threads = append(core.threads, p) + } + return set, nil +} + +// String implements the String method for CPUSet. +func (c cpuSet) String() string { + ret := "" + for _, tg := range c { + ret += fmt.Sprintf("%s\n", tg) + } + return ret +} + +// getRemainingList returns the list of threads that will remain active +// after mitigation. +func (c cpuSet) getRemainingList() []*thread { + threads := make([]*thread, 0, len(c)) + for _, core := range c { + // If we're vulnerable, take only one thread from the pair. + if core.isVulnerable { + threads = append(threads, core.threads[0]) + continue + } + // Otherwise don't shutdown anything. + threads = append(threads, core.threads...) + } + return threads +} + +// getShutdownList returns the list of threads that will be shutdown on +// mitigation. +func (c cpuSet) getShutdownList() []*thread { + threads := make([]*thread, 0) + for _, core := range c { + // Only if we're vulnerable do shutdown anything. In this case, + // shutdown all but the first entry. + if core.isVulnerable && len(core.threads) > 1 { + threads = append(threads, core.threads[1:]...) + } + } + return threads +} + +// threadGroup represents Hyperthread pairs on the same physical/core ID. +type threadGroup struct { + threads []*thread + isVulnerable bool +} + +// String implements the String method for threadGroup. +func (c *threadGroup) String() string { + ret := fmt.Sprintf("ThreadGroup:\nIsVulnerable: %t\n", c.isVulnerable) + for _, processor := range c.threads { + ret += fmt.Sprintf("%s\n", processor) + } + return ret +} + +// getThreads returns threads structs from reading /proc/cpuinfo. +func getThreads(data string) ([]*thread, error) { // Each processor entry should start with the // processor key. Find the beginings of each. r := buildRegex(processorKey, `\d+`) @@ -56,13 +145,13 @@ func getCPUSet(data string) ([]*cpu, error) { // indexes (e.g. data[index[i], index[i+1]]). // There should be len(indicies) - 1 CPUs // since the last index is the end of the string. - var cpus = make([]*cpu, 0, len(indices)-1) + var cpus = make([]*thread, 0, len(indices)-1) // Find each string that represents a CPU. These begin "processor". for i := 1; i < len(indices); i++ { start := indices[i-1][0] end := indices[i][0] // Parse the CPU entry, which should be between start/end. - c, err := getCPU(data[start:end]) + c, err := newThread(data[start:end]) if err != nil { return nil, err } @@ -71,18 +160,25 @@ func getCPUSet(data string) ([]*cpu, error) { return cpus, nil } +// cpuID for each thread is defined by the physical and +// core IDs. If equal, two threads are Hyperthread pairs. +type cpuID struct { + physicalID int64 + coreID int64 +} + // type cpu represents pertinent info about a cpu. -type cpu struct { +type thread struct { processorNumber int64 // the processor number of this CPU. vendorID string // the vendorID of CPU (e.g. AuthenticAMD). cpuFamily int64 // CPU family number (e.g. 6 for CascadeLake/Skylake). model int64 // CPU model number (e.g. 85 for CascadeLake/Skylake). - coreID int64 // This CPU's core id to match Hyperthread Pairs + id cpuID // id for this thread bugs map[string]struct{} // map of vulnerabilities parsed from the 'bugs' field. } -// getCPU parses a CPU from a single cpu entry from /proc/cpuinfo. -func getCPU(data string) (*cpu, error) { +// newThread parses a CPU from a single cpu entry from /proc/cpuinfo. +func newThread(data string) (*thread, error) { processor, err := parseProcessor(data) if err != nil { return nil, err @@ -103,6 +199,11 @@ func getCPU(data string) (*cpu, error) { return nil, err } + physicalID, err := parsePhysicalID(data) + if err != nil { + return nil, err + } + coreID, err := parseCoreID(data) if err != nil { return nil, err @@ -113,16 +214,41 @@ func getCPU(data string) (*cpu, error) { return nil, err } - return &cpu{ + return &thread{ processorNumber: processor, vendorID: vendorID, cpuFamily: cpuFamily, model: model, - coreID: coreID, - bugs: bugs, + id: cpuID{ + physicalID: physicalID, + coreID: coreID, + }, + bugs: bugs, }, nil } +// String implements the String method for thread. +func (t *thread) String() string { + template := `CPU: %d +CPU ID: %+v +Vendor: %s +Family/Model: %d/%d +Bugs: %s +` + bugs := make([]string, 0) + for bug := range t.bugs { + bugs = append(bugs, bug) + } + + return fmt.Sprintf(template, t.processorNumber, t.id, t.vendorID, t.cpuFamily, t.model, strings.Join(bugs, ",")) +} + +// shutdown turns off the CPU by writing 0 to /sys/devices/cpu/cpu{N}/online. +func (t *thread) shutdown() error { + cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) + return ioutil.WriteFile(cpuPath, []byte{'0'}, 0644) +} + // List of pertinent side channel vulnerablilites. // For mds, see: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html. var vulnerabilities = []string{ @@ -134,35 +260,46 @@ var vulnerabilities = []string{ } // isVulnerable checks if a CPU is vulnerable to pertinent bugs. -func (c *cpu) isVulnerable() bool { +func (t *thread) isVulnerable() bool { for _, bug := range vulnerabilities { - if _, ok := c.bugs[bug]; ok { + if _, ok := t.bugs[bug]; ok { return true } } return false } +// isActive checks if a CPU is active from /sys/devices/system/cpu/cpu{N}/online +// If the file does not exist (ioutil returns in error), we assume the CPU is on. +func (t *thread) isActive() bool { + cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) + data, err := ioutil.ReadFile(cpuPath) + if err != nil { + return true + } + return len(data) > 0 && data[0] != '0' +} + // similarTo checks family/model/bugs fields for equality of two // processors. -func (c *cpu) similarTo(other *cpu) bool { - if c.vendorID != other.vendorID { +func (t *thread) similarTo(other *thread) bool { + if t.vendorID != other.vendorID { return false } - if other.cpuFamily != c.cpuFamily { + if other.cpuFamily != t.cpuFamily { return false } - if other.model != c.model { + if other.model != t.model { return false } - if len(other.bugs) != len(c.bugs) { + if len(other.bugs) != len(t.bugs) { return false } - for bug := range c.bugs { + for bug := range t.bugs { if _, ok := other.bugs[bug]; !ok { return false } @@ -190,6 +327,11 @@ func parseModel(data string) (int64, error) { return parseIntegerResult(data, modelKey) } +// parsePhysicalID parses the physical id field. +func parsePhysicalID(data string) (int64, error) { + return parseIntegerResult(data, physicalIDKey) +} + // parseCoreID parses the core id field. func parseCoreID(data string) (int64, error) { return parseIntegerResult(data, coreIDKey) diff --git a/runsc/mitigate/cpu_test.go b/runsc/mitigate/cpu_test.go index 77b714a02..21c12f586 100644 --- a/runsc/mitigate/cpu_test.go +++ b/runsc/mitigate/cpu_test.go @@ -15,26 +15,163 @@ package mitigate import ( + "fmt" "io/ioutil" "strings" "testing" ) -// CPU info for a Intel CascadeLake processor. Both Skylake and CascadeLake have -// the same family/model numbers, but with different bugs (e.g. skylake has -// cpu_meltdown). -var cascadeLake = &cpu{ - vendorID: "GenuineIntel", - cpuFamily: 6, - model: 85, - bugs: map[string]struct{}{ - "spectre_v1": struct{}{}, - "spectre_v2": struct{}{}, - "spec_store_bypass": struct{}{}, - mds: struct{}{}, - swapgs: struct{}{}, - taa: struct{}{}, - }, +// cpuTestCase represents data from CPUs that will be mitigated. +type cpuTestCase struct { + name string + vendorID string + family int + model int + modelName string + bugs string + physicalCores int + cores int + threadsPerCore int +} + +var cascadeLake4 = cpuTestCase{ + name: "CascadeLake", + vendorID: "GenuineIntel", + family: 6, + model: 85, + modelName: "Intel(R) Xeon(R) CPU", + bugs: "spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa", + physicalCores: 1, + cores: 2, + threadsPerCore: 2, +} + +var haswell2 = cpuTestCase{ + name: "Haswell", + vendorID: "GenuineIntel", + family: 6, + model: 63, + modelName: "Intel(R) Xeon(R) CPU", + bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs", + physicalCores: 1, + cores: 1, + threadsPerCore: 2, +} + +var haswell2core = cpuTestCase{ + name: "Haswell2Physical", + vendorID: "GenuineIntel", + family: 6, + model: 63, + modelName: "Intel(R) Xeon(R) CPU", + bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs", + physicalCores: 2, + cores: 1, + threadsPerCore: 1, +} + +var amd8 = cpuTestCase{ + name: "AMD", + vendorID: "AuthenticAMD", + family: 23, + model: 49, + modelName: "AMD EPYC 7B12", + bugs: "sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass", + physicalCores: 4, + cores: 1, + threadsPerCore: 2, +} + +// makeCPUString makes a string formated like /proc/cpuinfo for each cpuTestCase +func (tc cpuTestCase) makeCPUString() string { + template := `processor : %d +vendor_id : %s +cpu family : %d +model : %d +model name : %s +physical id : %d +core id : %d +cpu cores : %d +bugs : %s +` + ret := `` + for i := 0; i < tc.physicalCores; i++ { + for j := 0; j < tc.cores; j++ { + for k := 0; k < tc.threadsPerCore; k++ { + processorNum := (i*tc.cores+j)*tc.threadsPerCore + k + ret += fmt.Sprintf(template, + processorNum, /*processor*/ + tc.vendorID, /*vendor_id*/ + tc.family, /*cpu family*/ + tc.model, /*model*/ + tc.modelName, /*model name*/ + i, /*physical id*/ + j, /*core id*/ + tc.cores*tc.physicalCores, /*cpu cores*/ + tc.bugs /*bugs*/) + } + } + } + return ret +} + +// TestMockCPUSet tests mock cpu test cases against the cpuSet functions. +func TestMockCPUSet(t *testing.T) { + for _, tc := range []struct { + testCase cpuTestCase + isVulnerable bool + }{ + { + testCase: amd8, + isVulnerable: false, + }, + { + testCase: haswell2, + isVulnerable: true, + }, + { + testCase: haswell2core, + isVulnerable: true, + }, + + { + testCase: cascadeLake4, + isVulnerable: true, + }, + } { + t.Run(tc.testCase.name, func(t *testing.T) { + data := tc.testCase.makeCPUString() + vulnerable := func(t *thread) bool { + return t.isVulnerable() + } + set, err := newCPUSet([]byte(data), vulnerable) + if err != nil { + t.Fatalf("Failed to ") + } + remaining := set.getRemainingList() + // In the non-vulnerable case, no cores should be shutdown so all should remain. + want := tc.testCase.physicalCores * tc.testCase.cores * tc.testCase.threadsPerCore + if tc.isVulnerable { + want = tc.testCase.physicalCores * tc.testCase.cores + } + + if want != len(remaining) { + t.Fatalf("Failed to shutdown the correct number of cores: want: %d got: %d", want, len(remaining)) + } + + if !tc.isVulnerable { + return + } + + // If the set is vulnerable, we expect only 1 thread per hyperthread pair. + for _, r := range remaining { + if _, ok := set[r.id]; !ok { + t.Fatalf("Entry %+v not in map, there must be two entries in the same thread group.", r) + } + delete(set, r.id) + } + }) + } } // TestGetCPU tests basic parsing of single CPU strings from reading @@ -44,15 +181,19 @@ func TestGetCPU(t *testing.T) { vendor_id : GenuineIntel cpu family : 6 model : 85 +physical id: 0 core id : 0 bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa itlb_multihit ` - want := cpu{ + want := thread{ processorNumber: 0, vendorID: "GenuineIntel", cpuFamily: 6, model: 85, - coreID: 0, + id: cpuID{ + physicalID: 0, + coreID: 0, + }, bugs: map[string]struct{}{ "cpu_meltdown": struct{}{}, "spectre_v1": struct{}{}, @@ -66,7 +207,7 @@ bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa }, } - got, err := getCPU(data) + got, err := newThread(data) if err != nil { t.Fatalf("getCpu failed with error: %v", err) } @@ -81,7 +222,7 @@ bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa } func TestInvalid(t *testing.T) { - result, err := getCPUSet(`something not a processor`) + result, err := getThreads(`something not a processor`) if err == nil { t.Fatalf("getCPU set didn't return an error: %+v", result) } @@ -148,7 +289,7 @@ cache_alignment : 64 address sizes : 46 bits physical, 48 bits virtual power management: ` - cpuSet, err := getCPUSet(data) + cpuSet, err := getThreads(data) if err != nil { t.Fatalf("getCPUSet failed: %v", err) } @@ -158,7 +299,7 @@ power management: t.Fatalf("Num CPU mismatch: want: %d, got: %d", wantCPULen, len(cpuSet)) } - wantCPU := cpu{ + wantCPU := thread{ vendorID: "GenuineIntel", cpuFamily: 6, model: 63, @@ -187,7 +328,11 @@ func TestReadFile(t *testing.T) { t.Fatalf("Failed to read cpuinfo: %v", err) } - set, err := getCPUSet(string(data)) + vulnerable := func(t *thread) bool { + return t.isVulnerable() + } + + set, err := newCPUSet(data, vulnerable) if err != nil { t.Fatalf("Failed to parse CPU data %v\n%s", err, data) } @@ -196,9 +341,7 @@ func TestReadFile(t *testing.T) { t.Fatalf("Failed to parse any CPUs: %d", len(set)) } - for _, c := range set { - t.Logf("CPU: %+v: %t", c, c.isVulnerable()) - } + t.Log(set) } // TestVulnerable tests if the isVulnerable method is correct @@ -332,17 +475,13 @@ power management:` cpuString: skylake, vulnerable: true, }, { - name: "cascadeLake", - cpuString: cascade, - vulnerable: false, - }, { name: "amd", cpuString: amd, vulnerable: false, }, } { t.Run(tc.name, func(t *testing.T) { - set, err := getCPUSet(tc.cpuString) + set, err := getThreads(tc.cpuString) if err != nil { t.Fatalf("Failed to getCPUSet:%v\n %s", err, tc.cpuString) } @@ -353,9 +492,6 @@ power management:` for _, c := range set { got := func() bool { - if cascadeLake.similarTo(c) { - return false - } return c.isVulnerable() }() diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 266bc0bdc..7fe65c7ba 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -308,6 +308,22 @@ func (s *Sandbox) Processes(cid string) ([]*control.Process, error) { return pl, nil } +// FindCgroup returns the sandbox's Cgroup, or an error if it does not have one. +func (s *Sandbox) FindCgroup() (*cgroup.Cgroup, error) { + paths, err := cgroup.LoadPaths(strconv.Itoa(s.Pid)) + if err != nil { + return nil, err + } + // runsc places sandboxes in the same cgroup for each controller, so we + // pick an arbitrary controller here to get the cgroup path. + const controller = "cpuacct" + controllerPath, ok := paths[controller] + if !ok { + return nil, fmt.Errorf("no %q controller found", controller) + } + return cgroup.NewFromPath(controllerPath) +} + // Execute runs the specified command in the container. It returns the PID of // the newly created process. func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) { @@ -327,7 +343,7 @@ func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) { } // Event retrieves stats about the sandbox such as memory and CPU utilization. -func (s *Sandbox) Event(cid string) (*boot.Event, error) { +func (s *Sandbox) Event(cid string) (*boot.EventOut, error) { log.Debugf("Getting events for container %q in sandbox %q", cid, s.ID) conn, err := s.sandboxConnect() if err != nil { @@ -335,13 +351,13 @@ func (s *Sandbox) Event(cid string) (*boot.Event, error) { } defer conn.Close() - var e boot.Event + var e boot.EventOut // TODO(b/129292330): Pass in the container id (cid) here. The sandbox // should return events only for that container. if err := conn.Call(boot.ContainerEvent, nil, &e); err != nil { return nil, fmt.Errorf("retrieving event data from sandbox: %v", err) } - e.ID = cid + e.Event.ID = cid return &e, nil } |