diff options
author | Kevin Krakauer <krakauer@google.com> | 2021-02-02 12:45:25 -0800 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-02-02 12:47:23 -0800 |
commit | 5f7bf3152652d36903f9659688321ae7c42995d0 (patch) | |
tree | 374c56830303dc412894baa4edfd04bcda4cda74 /runsc/container | |
parent | f884ea13b713143ff9978092ddb352c159346167 (diff) |
Stub out basic `runsc events --stat` CPU functionality
Because we lack gVisor-internal cgroups, we take the CPU usage of the entire pod
and divide it proportionally according to sentry-internal usage stats.
This fixes `kubectl top pods`, which gets a pod's CPU usage by summing the usage
of its containers.
Addresses #172.
PiperOrigin-RevId: 355229833
Diffstat (limited to 'runsc/container')
-rw-r--r-- | runsc/container/container.go | 63 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 61 | ||||
-rw-r--r-- | runsc/container/state_file.go | 2 |
3 files changed, 103 insertions, 23 deletions
diff --git a/runsc/container/container.go b/runsc/container/container.go index 5a0f8d5dc..aae64ae1c 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -486,12 +486,20 @@ func (c *Container) Execute(args *control.ExecArgs) (int32, error) { } // Event returns events for the container. -func (c *Container) Event() (*boot.Event, error) { +func (c *Container) Event() (*boot.EventOut, error) { log.Debugf("Getting events for container, cid: %s", c.ID) if err := c.requireStatus("get events for", Created, Running, Paused); err != nil { return nil, err } - return c.Sandbox.Event(c.ID) + event, err := c.Sandbox.Event(c.ID) + if err != nil { + return nil, err + } + + // Some stats can utilize host cgroups for accuracy. + c.populateStats(event) + + return event, nil } // SandboxPid returns the Pid of the sandbox the container is running in, or -1 if the @@ -1110,3 +1118,54 @@ func setOOMScoreAdj(pid int, scoreAdj int) error { } return nil } + +// populateStats populates event with stats estimates based on cgroups and the +// sentry's accounting. +// TODO(gvisor.dev/issue/172): This is an estimation; we should do more +// detailed accounting. +func (c *Container) populateStats(event *boot.EventOut) { + // The events command, when run for all running containers, should + // account for the full cgroup CPU usage. We split cgroup usage + // proportionally according to the sentry-internal usage measurements, + // only counting Running containers. + log.Warningf("event.ContainerUsage: %v", event.ContainerUsage) + var containerUsage uint64 + var allContainersUsage uint64 + for ID, usage := range event.ContainerUsage { + allContainersUsage += usage + if ID == c.ID { + containerUsage = usage + } + } + + cgroup, err := c.Sandbox.FindCgroup() + if err != nil { + // No cgroup, so rely purely on the sentry's accounting. + log.Warningf("events: no cgroups") + event.Event.Data.CPU.Usage.Total = containerUsage + return + } + + // Get the host cgroup CPU usage. + cgroupsUsage, err := cgroup.CPUUsage() + if err != nil { + // No cgroup usage, so rely purely on the sentry's accounting. + log.Warningf("events: failed when getting cgroup CPU usage for container: %v", err) + event.Event.Data.CPU.Usage.Total = containerUsage + return + } + + // If the sentry reports no memory usage, fall back on cgroups and + // split usage equally across containers. + if allContainersUsage == 0 { + log.Warningf("events: no sentry CPU usage reported") + allContainersUsage = cgroupsUsage + containerUsage = cgroupsUsage / uint64(len(event.ContainerUsage)) + } + + log.Warningf("%f, %f, %f", containerUsage, cgroupsUsage, allContainersUsage) + // Scaling can easily overflow a uint64 (e.g. a containerUsage and + // cgroupsUsage of 16 seconds each will overflow), so use floats. + event.Event.Data.CPU.Usage.Total = uint64(float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage))) + return +} diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 75fdcf4cc..173332cc2 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -15,7 +15,6 @@ package container import ( - "encoding/json" "fmt" "io/ioutil" "math" @@ -322,8 +321,8 @@ func TestMultiContainerWait(t *testing.T) { } } -// TestExecWait ensures what we can wait containers and individual processes in the -// sandbox that have already exited. +// TestExecWait ensures what we can wait on containers and individual processes +// in the sandbox that have already exited. func TestExecWait(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -1743,8 +1742,9 @@ func TestMultiContainerEvent(t *testing.T) { // Setup the containers. sleep := []string{"/bin/sleep", "100"} + busy := []string{"/bin/bash", "-c", "i=0 ; while true ; do (( i += 1 )) ; done"} quick := []string{"/bin/true"} - podSpec, ids := createSpecs(sleep, sleep, quick) + podSpec, ids := createSpecs(sleep, busy, quick) containers, cleanup, err := startContainers(conf, podSpec, ids) if err != nil { t.Fatalf("error starting containers: %v", err) @@ -1755,37 +1755,58 @@ func TestMultiContainerEvent(t *testing.T) { t.Logf("Running containerd %s", cont.ID) } - // Wait for last container to stabilize the process count that is checked - // further below. + // Wait for last container to stabilize the process count that is + // checked further below. if ws, err := containers[2].Wait(); err != nil || ws != 0 { t.Fatalf("Container.Wait, status: %v, err: %v", ws, err) } + expectedPL := []*control.Process{ + newProcessBuilder().Cmd("sleep").Process(), + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + expectedPL = []*control.Process{ + newProcessBuilder().Cmd("bash").Process(), + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for bash to start: %v", err) + } // Check events for running containers. + var prevUsage uint64 for _, cont := range containers[:2] { - evt, err := cont.Event() + ret, err := cont.Event() if err != nil { t.Errorf("Container.Events(): %v", err) } + evt := ret.Event if want := "stats"; evt.Type != want { - t.Errorf("Wrong event type, want: %s, got :%s", want, evt.Type) + t.Errorf("Wrong event type, want: %s, got: %s", want, evt.Type) } if cont.ID != evt.ID { - t.Errorf("Wrong container ID, want: %s, got :%s", cont.ID, evt.ID) + t.Errorf("Wrong container ID, want: %s, got: %s", cont.ID, evt.ID) } - // Event.Data is an interface, so it comes from the wire was - // map[string]string. Marshal and unmarshall again to the correc type. - data, err := json.Marshal(evt.Data) - if err != nil { - t.Fatalf("invalid event data: %v", err) + // One process per remaining container. + if got, want := evt.Data.Pids.Current, uint64(2); got != want { + t.Errorf("Wrong number of PIDs, want: %d, got: %d", want, got) } - var stats boot.Stats - if err := json.Unmarshal(data, &stats); err != nil { - t.Fatalf("invalid event data: %v", err) + + // Both remaining containers should have nonzero usage, and + // 'busy' should have higher usage than 'sleep'. + usage := evt.Data.CPU.Usage.Total + if usage == 0 { + t.Errorf("Running container should report nonzero CPU usage, but got %d", usage) } - // One process per remaining container. - if want := uint64(2); stats.Pids.Current != want { - t.Errorf("Wrong number of PIDs, want: %d, got :%d", want, stats.Pids.Current) + if usage <= prevUsage { + t.Errorf("Expected container %s to use more than %d ns of CPU, but used %d", cont.ID, prevUsage, usage) + } + t.Logf("Container %s usage: %d", cont.ID, usage) + prevUsage = usage + + // The exited container should have a usage of zero. + if exited := ret.ContainerUsage[containers[2].ID]; exited != 0 { + t.Errorf("Exited container should report 0 CPU usage, but got %d", exited) } } diff --git a/runsc/container/state_file.go b/runsc/container/state_file.go index dfbf1f2d3..c46322ba4 100644 --- a/runsc/container/state_file.go +++ b/runsc/container/state_file.go @@ -49,7 +49,7 @@ type LoadOpts struct { // Returns ErrNotExist if no container is found. Returns error in case more than // one containers matching the ID prefix is found. func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) { - //log.Debugf("Load container, rootDir: %q, partial cid: %s", rootDir, partialID) + log.Debugf("Load container, rootDir: %q, id: %+v, opts: %+v", rootDir, id, opts) if !opts.Exact { var err error id, err = findContainerID(rootDir, id.ContainerID) |