From 9f33fe64f221de0eb2a290fd54357c954d9f38f8 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 5 May 2021 17:36:58 -0700 Subject: Fixes to runsc cgroups When loading cgroups for another process, `/proc/self` was used in a few places, causing the end state to be a mix of the process and self. This is now fixes to always use the proper `/proc/[pid]` path. Added net_prio and net_cls to the list of optional controllers. This is to allow runsc to execute then these cgroups are disabled as long as there are no net_prio and net_cls limits that need to be applied. Deflake TestMultiContainerEvent. Closes #5875 Closes #5887 PiperOrigin-RevId: 372242687 --- runsc/container/container.go | 15 +++++---- runsc/container/multi_container_test.go | 58 ++++++++++++++++++++------------- 2 files changed, 43 insertions(+), 30 deletions(-) (limited to 'runsc/container') diff --git a/runsc/container/container.go b/runsc/container/container.go index e72ada311..0820edaec 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -233,7 +233,7 @@ func New(conf *config.Config, args Args) (*Container, error) { } // Create and join cgroup before processes are created to ensure they are // part of the cgroup from the start (and all their children processes). - cg, err := cgroup.New(args.Spec) + cg, err := cgroup.NewFromSpec(args.Spec) if err != nil { return nil, err } @@ -1132,7 +1132,7 @@ func (c *Container) populateStats(event *boot.EventOut) { // account for the full cgroup CPU usage. We split cgroup usage // proportionally according to the sentry-internal usage measurements, // only counting Running containers. - log.Warningf("event.ContainerUsage: %v", event.ContainerUsage) + log.Debugf("event.ContainerUsage: %v", event.ContainerUsage) var containerUsage uint64 var allContainersUsage uint64 for ID, usage := range event.ContainerUsage { @@ -1142,7 +1142,7 @@ func (c *Container) populateStats(event *boot.EventOut) { } } - cgroup, err := c.Sandbox.FindCgroup() + cgroup, err := c.Sandbox.NewCGroup() if err != nil { // No cgroup, so rely purely on the sentry's accounting. log.Warningf("events: no cgroups") @@ -1159,17 +1159,18 @@ func (c *Container) populateStats(event *boot.EventOut) { return } - // If the sentry reports no memory usage, fall back on cgroups and - // split usage equally across containers. + // If the sentry reports no CPU usage, fall back on cgroups and split usage + // equally across containers. if allContainersUsage == 0 { log.Warningf("events: no sentry CPU usage reported") allContainersUsage = cgroupsUsage containerUsage = cgroupsUsage / uint64(len(event.ContainerUsage)) } - log.Warningf("%f, %f, %f", containerUsage, cgroupsUsage, allContainersUsage) // Scaling can easily overflow a uint64 (e.g. a containerUsage and // cgroupsUsage of 16 seconds each will overflow), so use floats. - event.Event.Data.CPU.Usage.Total = uint64(float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage))) + total := float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage)) + log.Debugf("Usage, container: %d, cgroups: %d, all: %d, total: %.0f", containerUsage, cgroupsUsage, allContainersUsage, total) + event.Event.Data.CPU.Usage.Total = uint64(total) return } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 37ad7d2e1..0dbe1e323 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -25,6 +25,7 @@ import ( "testing" "time" + "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/cleanup" @@ -1917,9 +1918,9 @@ func TestMultiContainerEvent(t *testing.T) { } defer cleanup() - for _, cont := range containers { - t.Logf("Running containerd %s", cont.ID) - } + t.Logf("Running container sleep %s", containers[0].ID) + t.Logf("Running container busy %s", containers[1].ID) + t.Logf("Running container quick %s", containers[2].ID) // Wait for last container to stabilize the process count that is // checked further below. @@ -1940,50 +1941,61 @@ func TestMultiContainerEvent(t *testing.T) { } // Check events for running containers. - var prevUsage uint64 for _, cont := range containers[:2] { ret, err := cont.Event() if err != nil { - t.Errorf("Container.Events(): %v", err) + t.Errorf("Container.Event(%q): %v", cont.ID, err) } evt := ret.Event if want := "stats"; evt.Type != want { - t.Errorf("Wrong event type, want: %s, got: %s", want, evt.Type) + t.Errorf("Wrong event type, cid: %q, want: %s, got: %s", cont.ID, want, evt.Type) } if cont.ID != evt.ID { t.Errorf("Wrong container ID, want: %s, got: %s", cont.ID, evt.ID) } // One process per remaining container. if got, want := evt.Data.Pids.Current, uint64(2); got != want { - t.Errorf("Wrong number of PIDs, want: %d, got: %d", want, got) + t.Errorf("Wrong number of PIDs, cid: %q, want: %d, got: %d", cont.ID, want, got) } - // Both remaining containers should have nonzero usage, and - // 'busy' should have higher usage than 'sleep'. - usage := evt.Data.CPU.Usage.Total - if usage == 0 { - t.Errorf("Running container should report nonzero CPU usage, but got %d", usage) + // The exited container should always have a usage of zero. + if exited := ret.ContainerUsage[containers[2].ID]; exited != 0 { + t.Errorf("Exited container should report 0 CPU usage, got: %d", exited) + } + } + + // Check that CPU reported by busy container is higher than sleep. + cb := func() error { + sleepEvt, err := containers[0].Event() + if err != nil { + return &backoff.PermanentError{Err: err} } - if usage <= prevUsage { - t.Errorf("Expected container %s to use more than %d ns of CPU, but used %d", cont.ID, prevUsage, usage) + sleepUsage := sleepEvt.Event.Data.CPU.Usage.Total + + busyEvt, err := containers[1].Event() + if err != nil { + return &backoff.PermanentError{Err: err} } - t.Logf("Container %s usage: %d", cont.ID, usage) - prevUsage = usage + busyUsage := busyEvt.Event.Data.CPU.Usage.Total - // The exited container should have a usage of zero. - if exited := ret.ContainerUsage[containers[2].ID]; exited != 0 { - t.Errorf("Exited container should report 0 CPU usage, but got %d", exited) + if busyUsage <= sleepUsage { + t.Logf("Busy container usage lower than sleep (busy: %d, sleep: %d), retrying...", busyUsage, sleepUsage) + return fmt.Errorf("Busy container should have higher usage than sleep, busy: %d, sleep: %d", busyUsage, sleepUsage) } + return nil + } + // Give time for busy container to run and use more CPU than sleep. + if err := testutil.Poll(cb, 10*time.Second); err != nil { + t.Fatal(err) } - // Check that stop and destroyed containers return error. + // Check that stopped and destroyed containers return error. if err := containers[1].Destroy(); err != nil { t.Fatalf("container.Destroy: %v", err) } for _, cont := range containers[1:] { - _, err := cont.Event() - if err == nil { - t.Errorf("Container.Events() should have failed, cid:%s, state: %v", cont.ID, cont.Status) + if _, err := cont.Event(); err == nil { + t.Errorf("Container.Event() should have failed, cid: %q, state: %v", cont.ID, cont.Status) } } } -- cgit v1.2.3