From 9f33fe64f221de0eb2a290fd54357c954d9f38f8 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Wed, 5 May 2021 17:36:58 -0700
Subject: Fixes to runsc cgroups

When loading cgroups for another process, `/proc/self` was used in
a few places, causing the end state to be a mix of the process
and self. This is now fixes to always use the proper `/proc/[pid]`
path.

Added net_prio and net_cls to the list of optional controllers. This
is to allow runsc to execute then these cgroups are disabled as long
as there are no net_prio and net_cls limits that need to be applied.

Deflake TestMultiContainerEvent.

Closes #5875
Closes #5887

PiperOrigin-RevId: 372242687
---
 runsc/container/container.go            | 15 +++++----
 runsc/container/multi_container_test.go | 58 ++++++++++++++++++++-------------
 2 files changed, 43 insertions(+), 30 deletions(-)

(limited to 'runsc/container')

diff --git a/runsc/container/container.go b/runsc/container/container.go
index e72ada311..0820edaec 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -233,7 +233,7 @@ func New(conf *config.Config, args Args) (*Container, error) {
 		}
 		// Create and join cgroup before processes are created to ensure they are
 		// part of the cgroup from the start (and all their children processes).
-		cg, err := cgroup.New(args.Spec)
+		cg, err := cgroup.NewFromSpec(args.Spec)
 		if err != nil {
 			return nil, err
 		}
@@ -1132,7 +1132,7 @@ func (c *Container) populateStats(event *boot.EventOut) {
 	// account for the full cgroup CPU usage. We split cgroup usage
 	// proportionally according to the sentry-internal usage measurements,
 	// only counting Running containers.
-	log.Warningf("event.ContainerUsage: %v", event.ContainerUsage)
+	log.Debugf("event.ContainerUsage: %v", event.ContainerUsage)
 	var containerUsage uint64
 	var allContainersUsage uint64
 	for ID, usage := range event.ContainerUsage {
@@ -1142,7 +1142,7 @@ func (c *Container) populateStats(event *boot.EventOut) {
 		}
 	}
 
-	cgroup, err := c.Sandbox.FindCgroup()
+	cgroup, err := c.Sandbox.NewCGroup()
 	if err != nil {
 		// No cgroup, so rely purely on the sentry's accounting.
 		log.Warningf("events: no cgroups")
@@ -1159,17 +1159,18 @@ func (c *Container) populateStats(event *boot.EventOut) {
 		return
 	}
 
-	// If the sentry reports no memory usage, fall back on cgroups and
-	// split usage equally across containers.
+	// If the sentry reports no CPU usage, fall back on cgroups and split usage
+	// equally across containers.
 	if allContainersUsage == 0 {
 		log.Warningf("events: no sentry CPU usage reported")
 		allContainersUsage = cgroupsUsage
 		containerUsage = cgroupsUsage / uint64(len(event.ContainerUsage))
 	}
 
-	log.Warningf("%f, %f, %f", containerUsage, cgroupsUsage, allContainersUsage)
 	// Scaling can easily overflow a uint64 (e.g. a containerUsage and
 	// cgroupsUsage of 16 seconds each will overflow), so use floats.
-	event.Event.Data.CPU.Usage.Total = uint64(float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage)))
+	total := float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage))
+	log.Debugf("Usage, container: %d, cgroups: %d, all: %d, total: %.0f", containerUsage, cgroupsUsage, allContainersUsage, total)
+	event.Event.Data.CPU.Usage.Total = uint64(total)
 	return
 }
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 37ad7d2e1..0dbe1e323 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -25,6 +25,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/cenkalti/backoff"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/cleanup"
@@ -1917,9 +1918,9 @@ func TestMultiContainerEvent(t *testing.T) {
 	}
 	defer cleanup()
 
-	for _, cont := range containers {
-		t.Logf("Running containerd %s", cont.ID)
-	}
+	t.Logf("Running container sleep %s", containers[0].ID)
+	t.Logf("Running container busy %s", containers[1].ID)
+	t.Logf("Running container quick %s", containers[2].ID)
 
 	// Wait for last container to stabilize the process count that is
 	// checked further below.
@@ -1940,50 +1941,61 @@ func TestMultiContainerEvent(t *testing.T) {
 	}
 
 	// Check events for running containers.
-	var prevUsage uint64
 	for _, cont := range containers[:2] {
 		ret, err := cont.Event()
 		if err != nil {
-			t.Errorf("Container.Events(): %v", err)
+			t.Errorf("Container.Event(%q): %v", cont.ID, err)
 		}
 		evt := ret.Event
 		if want := "stats"; evt.Type != want {
-			t.Errorf("Wrong event type, want: %s, got: %s", want, evt.Type)
+			t.Errorf("Wrong event type, cid: %q, want: %s, got: %s", cont.ID, want, evt.Type)
 		}
 		if cont.ID != evt.ID {
 			t.Errorf("Wrong container ID, want: %s, got: %s", cont.ID, evt.ID)
 		}
 		// One process per remaining container.
 		if got, want := evt.Data.Pids.Current, uint64(2); got != want {
-			t.Errorf("Wrong number of PIDs, want: %d, got: %d", want, got)
+			t.Errorf("Wrong number of PIDs, cid: %q, want: %d, got: %d", cont.ID, want, got)
 		}
 
-		// Both remaining containers should have nonzero usage, and
-		// 'busy' should have higher usage than 'sleep'.
-		usage := evt.Data.CPU.Usage.Total
-		if usage == 0 {
-			t.Errorf("Running container should report nonzero CPU usage, but got %d", usage)
+		// The exited container should always have a usage of zero.
+		if exited := ret.ContainerUsage[containers[2].ID]; exited != 0 {
+			t.Errorf("Exited container should report 0 CPU usage, got: %d", exited)
+		}
+	}
+
+	// Check that CPU reported by busy container is higher than sleep.
+	cb := func() error {
+		sleepEvt, err := containers[0].Event()
+		if err != nil {
+			return &backoff.PermanentError{Err: err}
 		}
-		if usage <= prevUsage {
-			t.Errorf("Expected container %s to use more than %d ns of CPU, but used %d", cont.ID, prevUsage, usage)
+		sleepUsage := sleepEvt.Event.Data.CPU.Usage.Total
+
+		busyEvt, err := containers[1].Event()
+		if err != nil {
+			return &backoff.PermanentError{Err: err}
 		}
-		t.Logf("Container %s usage: %d", cont.ID, usage)
-		prevUsage = usage
+		busyUsage := busyEvt.Event.Data.CPU.Usage.Total
 
-		// The exited container should have a usage of zero.
-		if exited := ret.ContainerUsage[containers[2].ID]; exited != 0 {
-			t.Errorf("Exited container should report 0 CPU usage, but got %d", exited)
+		if busyUsage <= sleepUsage {
+			t.Logf("Busy container usage lower than sleep (busy: %d, sleep: %d), retrying...", busyUsage, sleepUsage)
+			return fmt.Errorf("Busy container should have higher usage than sleep, busy: %d, sleep: %d", busyUsage, sleepUsage)
 		}
+		return nil
+	}
+	// Give time for busy container to run and use more CPU than sleep.
+	if err := testutil.Poll(cb, 10*time.Second); err != nil {
+		t.Fatal(err)
 	}
 
-	// Check that stop and destroyed containers return error.
+	// Check that stopped and destroyed containers return error.
 	if err := containers[1].Destroy(); err != nil {
 		t.Fatalf("container.Destroy: %v", err)
 	}
 	for _, cont := range containers[1:] {
-		_, err := cont.Event()
-		if err == nil {
-			t.Errorf("Container.Events() should have failed, cid:%s, state: %v", cont.ID, cont.Status)
+		if _, err := cont.Event(); err == nil {
+			t.Errorf("Container.Event() should have failed, cid: %q, state: %v", cont.ID, cont.Status)
 		}
 	}
 }
-- 
cgit v1.2.3