summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/events.go61
-rw-r--r--runsc/boot/loader.go2
-rw-r--r--runsc/cgroup/cgroup.go63
-rw-r--r--runsc/cgroup/cgroup_test.go127
-rw-r--r--runsc/cmd/events.go4
-rw-r--r--runsc/container/console_test.go6
-rw-r--r--runsc/container/container.go63
-rw-r--r--runsc/container/container_test.go45
-rw-r--r--runsc/container/multi_container_test.go81
-rw-r--r--runsc/container/state_file.go2
-rw-r--r--runsc/mitigate/BUILD2
-rw-r--r--runsc/mitigate/cpu.go192
-rw-r--r--runsc/mitigate/cpu_test.go202
-rw-r--r--runsc/sandbox/sandbox.go22
14 files changed, 708 insertions, 164 deletions
diff --git a/runsc/boot/events.go b/runsc/boot/events.go
index 422f4da00..0814b2a69 100644
--- a/runsc/boot/events.go
+++ b/runsc/boot/events.go
@@ -15,21 +15,30 @@
package boot
import (
- "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/usage"
)
+// EventOut is the return type of the Event command.
+type EventOut struct {
+ Event Event `json:"event"`
+
+ // ContainerUsage maps each container ID to its total CPU usage.
+ ContainerUsage map[string]uint64 `json:"containerUsage"`
+}
+
// Event struct for encoding the event data to JSON. Corresponds to runc's
// main.event struct.
type Event struct {
- Type string `json:"type"`
- ID string `json:"id"`
- Data interface{} `json:"data,omitempty"`
+ Type string `json:"type"`
+ ID string `json:"id"`
+ Data Stats `json:"data"`
}
// Stats is the runc specific stats structure for stability when encoding and
// decoding stats.
type Stats struct {
+ CPU CPU `json:"cpu"`
Memory Memory `json:"memory"`
Pids Pids `json:"pids"`
}
@@ -58,24 +67,42 @@ type Memory struct {
Raw map[string]uint64 `json:"raw,omitempty"`
}
-// Event gets the events from the container.
-func (cm *containerManager) Event(_ *struct{}, out *Event) error {
- stats := &Stats{}
- stats.populateMemory(cm.l.k)
- stats.populatePIDs(cm.l.k)
- *out = Event{Type: "stats", Data: stats}
- return nil
+// CPU contains stats on the CPU.
+type CPU struct {
+ Usage CPUUsage `json:"usage"`
+}
+
+// CPUUsage contains stats on CPU usage.
+type CPUUsage struct {
+ Kernel uint64 `json:"kernel,omitempty"`
+ User uint64 `json:"user,omitempty"`
+ Total uint64 `json:"total,omitempty"`
+ PerCPU []uint64 `json:"percpu,omitempty"`
}
-func (s *Stats) populateMemory(k *kernel.Kernel) {
- mem := k.MemoryFile()
+// Event gets the events from the container.
+func (cm *containerManager) Event(_ *struct{}, out *EventOut) error {
+ *out = EventOut{
+ Event: Event{
+ Type: "stats",
+ },
+ }
+
+ // Memory usage.
+ // TODO(gvisor.dev/issue/172): Per-container accounting.
+ mem := cm.l.k.MemoryFile()
mem.UpdateUsage()
_, totalUsage := usage.MemoryAccounting.Copy()
- s.Memory.Usage = MemoryEntry{
+ out.Event.Data.Memory.Usage = MemoryEntry{
Usage: totalUsage,
}
-}
-func (s *Stats) populatePIDs(k *kernel.Kernel) {
- s.Pids.Current = uint64(len(k.TaskSet().Root.ThreadGroups()))
+ // PIDs.
+ // TODO(gvisor.dev/issue/172): Per-container accounting.
+ out.Event.Data.Pids.Current = uint64(len(cm.l.k.TaskSet().Root.ThreadGroups()))
+
+ // CPU usage by container.
+ out.ContainerUsage = control.ContainerUsage(cm.l.k)
+
+ return nil
}
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index d37528ee7..77a7c530b 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -102,7 +102,7 @@ type containerInfo struct {
goferFDs []*fd.FD
}
-// Loader keeps state needed to start the kernel and run the container..
+// Loader keeps state needed to start the kernel and run the container.
type Loader struct {
// k is the kernel.
k *kernel.Kernel
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 13c6a16a0..797c1c2bc 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -203,6 +203,19 @@ func LoadPaths(pid string) (map[string]string, error) {
}
func loadPathsHelper(cgroup io.Reader) (map[string]string, error) {
+ // For nested containers, in /proc/self/cgroup we see paths from host,
+ // which don't exist in container, so recover the container paths here by
+ // double-checking with /proc/pid/mountinfo
+ mountinfo, err := os.Open("/proc/self/mountinfo")
+ if err != nil {
+ return nil, err
+ }
+ defer mountinfo.Close()
+
+ return loadPathsHelperWithMountinfo(cgroup, mountinfo)
+}
+
+func loadPathsHelperWithMountinfo(cgroup, mountinfo io.Reader) (map[string]string, error) {
paths := make(map[string]string)
scanner := bufio.NewScanner(cgroup)
@@ -225,6 +238,31 @@ func loadPathsHelper(cgroup io.Reader) (map[string]string, error) {
if err := scanner.Err(); err != nil {
return nil, err
}
+
+ mfScanner := bufio.NewScanner(mountinfo)
+ for mfScanner.Scan() {
+ txt := mfScanner.Text()
+ fields := strings.Fields(txt)
+ if len(fields) < 9 || fields[len(fields)-3] != "cgroup" {
+ continue
+ }
+ for _, opt := range strings.Split(fields[len(fields)-1], ",") {
+ // Remove prefix for cgroups with no controller, eg. systemd.
+ opt = strings.TrimPrefix(opt, "name=")
+ if cgroupPath, ok := paths[opt]; ok {
+ root := fields[3]
+ relCgroupPath, err := filepath.Rel(root, cgroupPath)
+ if err != nil {
+ return nil, err
+ }
+ paths[opt] = relCgroupPath
+ }
+ }
+ }
+ if err := mfScanner.Err(); err != nil {
+ return nil, err
+ }
+
return paths, nil
}
@@ -243,8 +281,13 @@ func New(spec *specs.Spec) (*Cgroup, error) {
if spec.Linux == nil || spec.Linux.CgroupsPath == "" {
return nil, nil
}
+ return NewFromPath(spec.Linux.CgroupsPath)
+}
+
+// NewFromPath creates a new Cgroup instance.
+func NewFromPath(cgroupsPath string) (*Cgroup, error) {
var parents map[string]string
- if !filepath.IsAbs(spec.Linux.CgroupsPath) {
+ if !filepath.IsAbs(cgroupsPath) {
var err error
parents, err = LoadPaths("self")
if err != nil {
@@ -253,7 +296,7 @@ func New(spec *specs.Spec) (*Cgroup, error) {
}
own := make(map[string]bool)
return &Cgroup{
- Name: spec.Linux.CgroupsPath,
+ Name: cgroupsPath,
Parents: parents,
Own: own,
}, nil
@@ -351,6 +394,9 @@ func (c *Cgroup) Join() (func(), error) {
undo = func() {
for _, path := range undoPaths {
log.Debugf("Restoring cgroup %q", path)
+ // Writing the value 0 to a cgroup.procs file causes
+ // the writing process to be moved to the corresponding
+ // cgroup. - cgroups(7).
if err := setValue(path, "cgroup.procs", "0"); err != nil {
log.Warningf("Error restoring cgroup %q: %v", path, err)
}
@@ -361,6 +407,9 @@ func (c *Cgroup) Join() (func(), error) {
for key, cfg := range controllers {
path := c.makePath(key)
log.Debugf("Joining cgroup %q", path)
+ // Writing the value 0 to a cgroup.procs file causes the
+ // writing process to be moved to the corresponding cgroup.
+ // - cgroups(7).
if err := setValue(path, "cgroup.procs", "0"); err != nil {
if cfg.optional && os.IsNotExist(err) {
continue
@@ -388,6 +437,16 @@ func (c *Cgroup) CPUQuota() (float64, error) {
return float64(quota) / float64(period), nil
}
+// CPUUsage returns the total CPU usage of the cgroup.
+func (c *Cgroup) CPUUsage() (uint64, error) {
+ path := c.makePath("cpuacct")
+ usage, err := getValue(path, "cpuacct.usage")
+ if err != nil {
+ return 0, err
+ }
+ return strconv.ParseUint(strings.TrimSpace(usage), 10, 64)
+}
+
// NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'.
func (c *Cgroup) NumCPU() (int, error) {
path := c.makePath("cpuset")
diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go
index 931144cf9..48d71cfa6 100644
--- a/runsc/cgroup/cgroup_test.go
+++ b/runsc/cgroup/cgroup_test.go
@@ -25,6 +25,39 @@ import (
"gvisor.dev/gvisor/pkg/test/testutil"
)
+var debianMountinfo = `
+35 24 0:30 / /sys/fs/cgroup ro shared:9 - tmpfs tmpfs ro
+36 35 0:31 / /sys/fs/cgroup/unified rw shared:10 - cgroup2 cgroup2 rw
+37 35 0:32 / /sys/fs/cgroup/systemd rw - cgroup cgroup rw,name=systemd
+41 35 0:36 / /sys/fs/cgroup/cpu,cpuacct rw shared:16 - cgroup cgroup rw,cpu,cpuacct
+42 35 0:37 / /sys/fs/cgroup/freezer rw shared:17 - cgroup cgroup rw,freezer
+43 35 0:38 / /sys/fs/cgroup/hugetlb rw shared:18 - cgroup cgroup rw,hugetlb
+44 35 0:39 / /sys/fs/cgroup/cpuset rw shared:19 - cgroup cgroup rw,cpuset
+45 35 0:40 / /sys/fs/cgroup/net_cls,net_prio rw shared:20 - cgroup cgroup rw,net_cls,net_prio
+46 35 0:41 / /sys/fs/cgroup/pids rw shared:21 - cgroup cgroup rw,pids
+47 35 0:42 / /sys/fs/cgroup/perf_event rw shared:22 - cgroup cgroup rw,perf_event
+48 35 0:43 / /sys/fs/cgroup/memory rw shared:23 - cgroup cgroup rw,memory
+49 35 0:44 / /sys/fs/cgroup/blkio rw shared:24 - cgroup cgroup rw,blkio
+50 35 0:45 / /sys/fs/cgroup/devices rw shared:25 - cgroup cgroup rw,devices
+51 35 0:46 / /sys/fs/cgroup/rdma rw shared:26 - cgroup cgroup rw,rdma
+`
+
+var dindMountinfo = `
+1305 1304 0:64 / /sys/fs/cgroup rw - tmpfs tmpfs rw,mode=755
+1306 1305 0:32 /docker/136 /sys/fs/cgroup/systemd ro master:11 - cgroup cgroup rw,xattr,name=systemd
+1307 1305 0:36 /docker/136 /sys/fs/cgroup/cpu,cpuacct ro master:16 - cgroup cgroup rw,cpu,cpuacct
+1308 1305 0:37 /docker/136 /sys/fs/cgroup/freezer ro master:17 - cgroup cgroup rw,freezer
+1309 1305 0:38 /docker/136 /sys/fs/cgroup/hugetlb ro master:18 - cgroup cgroup rw,hugetlb
+1310 1305 0:39 /docker/136 /sys/fs/cgroup/cpuset ro master:19 - cgroup cgroup rw,cpuset
+1311 1305 0:40 /docker/136 /sys/fs/cgroup/net_cls,net_prio ro master:20 - cgroup cgroup rw,net_cls,net_prio
+1312 1305 0:41 /docker/136 /sys/fs/cgroup/pids ro master:21 - cgroup cgroup rw,pids
+1313 1305 0:42 /docker/136 /sys/fs/cgroup/perf_event ro master:22 - cgroup cgroup rw,perf_event
+1314 1305 0:43 /docker/136 /sys/fs/cgroup/memory ro master:23 - cgroup cgroup rw,memory
+1316 1305 0:44 /docker/136 /sys/fs/cgroup/blkio ro master:24 - cgroup cgroup rw,blkio
+1317 1305 0:45 /docker/136 /sys/fs/cgroup/devices ro master:25 - cgroup cgroup rw,devices
+1318 1305 0:46 / /sys/fs/cgroup/rdma ro master:26 - cgroup cgroup rw,rdma
+`
+
func TestUninstallEnoent(t *testing.T) {
c := Cgroup{
// set a non-existent name
@@ -653,60 +686,110 @@ func TestPids(t *testing.T) {
func TestLoadPaths(t *testing.T) {
for _, tc := range []struct {
- name string
- cgroups string
- want map[string]string
- err string
+ name string
+ cgroups string
+ mountinfo string
+ want map[string]string
+ err string
}{
{
- name: "abs-path",
- cgroups: "0:ctr:/path",
- want: map[string]string{"ctr": "/path"},
+ name: "abs-path-unknown-controller",
+ cgroups: "0:ctr:/path",
+ mountinfo: debianMountinfo,
+ want: map[string]string{"ctr": "/path"},
},
{
- name: "rel-path",
- cgroups: "0:ctr:rel-path",
- want: map[string]string{"ctr": "rel-path"},
+ name: "rel-path",
+ cgroups: "0:ctr:rel-path",
+ mountinfo: debianMountinfo,
+ want: map[string]string{"ctr": "rel-path"},
},
{
- name: "non-controller",
- cgroups: "0:name=systemd:/path",
- want: map[string]string{"systemd": "/path"},
+ name: "non-controller",
+ cgroups: "0:name=systemd:/path",
+ mountinfo: debianMountinfo,
+ want: map[string]string{"systemd": "path"},
},
{
- name: "empty",
+ name: "empty",
+ mountinfo: debianMountinfo,
},
{
name: "multiple",
cgroups: "0:ctr0:/path0\n" +
"1:ctr1:/path1\n" +
"2::/empty\n",
+ mountinfo: debianMountinfo,
want: map[string]string{
"ctr0": "/path0",
"ctr1": "/path1",
},
},
{
- name: "missing-field",
- cgroups: "0:nopath\n",
- err: "invalid cgroups file",
+ name: "missing-field",
+ cgroups: "0:nopath\n",
+ mountinfo: debianMountinfo,
+ err: "invalid cgroups file",
},
{
- name: "too-many-fields",
- cgroups: "0:ctr:/path:extra\n",
- err: "invalid cgroups file",
+ name: "too-many-fields",
+ cgroups: "0:ctr:/path:extra\n",
+ mountinfo: debianMountinfo,
+ err: "invalid cgroups file",
},
{
name: "multiple-malformed",
cgroups: "0:ctr0:/path0\n" +
"1:ctr1:/path1\n" +
"2:\n",
- err: "invalid cgroups file",
+ mountinfo: debianMountinfo,
+ err: "invalid cgroups file",
+ },
+ {
+ name: "nested-cgroup",
+ cgroups: `9:memory:/docker/136
+2:cpu,cpuacct:/docker/136
+1:name=systemd:/docker/136
+0::/system.slice/containerd.service`,
+ mountinfo: dindMountinfo,
+ // we want relative path to /sys/fs/cgroup inside the nested container.
+ // Subcroup inside the container will be created at /sys/fs/cgroup/cpu
+ // This will be /sys/fs/cgroup/cpu/docker/136/CGROUP_NAME
+ // outside the container
+ want: map[string]string{
+ "memory": ".",
+ "cpu": ".",
+ "cpuacct": ".",
+ "systemd": ".",
+ },
+ },
+ {
+ name: "nested-cgroup-submount",
+ cgroups: "9:memory:/docker/136/test",
+ mountinfo: dindMountinfo,
+ want: map[string]string{
+ "memory": "test",
+ },
+ },
+ {
+ name: "invalid-mount-info",
+ cgroups: "0:memory:/path",
+ mountinfo: "41 35 0:36 / /sys/fs/cgroup/memory rw shared:16 - invalid",
+ want: map[string]string{
+ "memory": "/path",
+ },
+ },
+ {
+ name: "invalid-rel-path-in-proc-cgroup",
+ cgroups: "9:memory:./invalid",
+ mountinfo: dindMountinfo,
+ err: "can't make ./invalid relative to /docker/136",
},
} {
t.Run(tc.name, func(t *testing.T) {
r := strings.NewReader(tc.cgroups)
- got, err := loadPathsHelper(r)
+ mountinfo := strings.NewReader(tc.mountinfo)
+ got, err := loadPathsHelperWithMountinfo(r, mountinfo)
if len(tc.err) == 0 {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go
index 75b0aac8d..06f00e8e7 100644
--- a/runsc/cmd/events.go
+++ b/runsc/cmd/events.go
@@ -93,9 +93,9 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa
// err must be preserved because it is used below when breaking
// out of the loop.
- b, err := json.Marshal(ev)
+ b, err := json.Marshal(ev.Event)
if err != nil {
- log.Warningf("Error while marshalling event %v: %v", ev, err)
+ log.Warningf("Error while marshalling event %v: %v", ev.Event, err)
} else {
os.Stdout.Write(b)
}
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index 1b0fdebd6..7a3d5a523 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -122,7 +122,7 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) {
// Test that an pty FD is sent over the console socket if one is provided.
func TestConsoleSocket(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
spec := testutil.NewSpecWithArgs("true")
spec.Process.Terminal = true
@@ -164,7 +164,7 @@ func TestConsoleSocket(t *testing.T) {
// Test that an pty FD is sent over the console socket if one is provided.
func TestMultiContainerConsoleSocket(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -495,7 +495,7 @@ func TestJobControlSignalRootContainer(t *testing.T) {
// Test that terminal works with root and sub-containers.
func TestMultiContainerTerminal(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 5a0f8d5dc..aae64ae1c 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -486,12 +486,20 @@ func (c *Container) Execute(args *control.ExecArgs) (int32, error) {
}
// Event returns events for the container.
-func (c *Container) Event() (*boot.Event, error) {
+func (c *Container) Event() (*boot.EventOut, error) {
log.Debugf("Getting events for container, cid: %s", c.ID)
if err := c.requireStatus("get events for", Created, Running, Paused); err != nil {
return nil, err
}
- return c.Sandbox.Event(c.ID)
+ event, err := c.Sandbox.Event(c.ID)
+ if err != nil {
+ return nil, err
+ }
+
+ // Some stats can utilize host cgroups for accuracy.
+ c.populateStats(event)
+
+ return event, nil
}
// SandboxPid returns the Pid of the sandbox the container is running in, or -1 if the
@@ -1110,3 +1118,54 @@ func setOOMScoreAdj(pid int, scoreAdj int) error {
}
return nil
}
+
+// populateStats populates event with stats estimates based on cgroups and the
+// sentry's accounting.
+// TODO(gvisor.dev/issue/172): This is an estimation; we should do more
+// detailed accounting.
+func (c *Container) populateStats(event *boot.EventOut) {
+ // The events command, when run for all running containers, should
+ // account for the full cgroup CPU usage. We split cgroup usage
+ // proportionally according to the sentry-internal usage measurements,
+ // only counting Running containers.
+ log.Warningf("event.ContainerUsage: %v", event.ContainerUsage)
+ var containerUsage uint64
+ var allContainersUsage uint64
+ for ID, usage := range event.ContainerUsage {
+ allContainersUsage += usage
+ if ID == c.ID {
+ containerUsage = usage
+ }
+ }
+
+ cgroup, err := c.Sandbox.FindCgroup()
+ if err != nil {
+ // No cgroup, so rely purely on the sentry's accounting.
+ log.Warningf("events: no cgroups")
+ event.Event.Data.CPU.Usage.Total = containerUsage
+ return
+ }
+
+ // Get the host cgroup CPU usage.
+ cgroupsUsage, err := cgroup.CPUUsage()
+ if err != nil {
+ // No cgroup usage, so rely purely on the sentry's accounting.
+ log.Warningf("events: failed when getting cgroup CPU usage for container: %v", err)
+ event.Event.Data.CPU.Usage.Total = containerUsage
+ return
+ }
+
+ // If the sentry reports no memory usage, fall back on cgroups and
+ // split usage equally across containers.
+ if allContainersUsage == 0 {
+ log.Warningf("events: no sentry CPU usage reported")
+ allContainersUsage = cgroupsUsage
+ containerUsage = cgroupsUsage / uint64(len(event.ContainerUsage))
+ }
+
+ log.Warningf("%f, %f, %f", containerUsage, cgroupsUsage, allContainersUsage)
+ // Scaling can easily overflow a uint64 (e.g. a containerUsage and
+ // cgroupsUsage of 16 seconds each will overflow), so use floats.
+ event.Event.Data.CPU.Usage.Total = uint64(float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage)))
+ return
+}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 3bbf86534..d50bbcd9f 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -312,8 +312,7 @@ var (
all = append(noOverlay, overlay)
)
-// configs generates different configurations to run tests.
-func configs(t *testing.T, opts ...configOption) map[string]*config.Config {
+func configsHelper(t *testing.T, opts ...configOption) map[string]*config.Config {
// Always load the default config.
cs := make(map[string]*config.Config)
testutil.TestConfig(t)
@@ -339,10 +338,12 @@ func configs(t *testing.T, opts ...configOption) map[string]*config.Config {
return cs
}
-// TODO(gvisor.dev/issue/1624): Merge with configs when VFS2 is the default.
-func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*config.Config {
- all := configs(t, opts...)
- for key, value := range configs(t, opts...) {
+// configs generates different configurations to run tests.
+//
+// TODO(gvisor.dev/issue/1624): Remove VFS1 dimension.
+func configs(t *testing.T, opts ...configOption) map[string]*config.Config {
+ all := configsHelper(t, opts...)
+ for key, value := range configsHelper(t, opts...) {
value.VFS2 = true
all[key+"VFS2"] = value
}
@@ -358,7 +359,7 @@ func TestLifecycle(t *testing.T) {
childReaper.Start()
defer childReaper.Stop()
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
// The container will just sleep for a long time. We will kill it before
// it finishes sleeping.
@@ -529,7 +530,7 @@ func TestExePath(t *testing.T) {
t.Fatalf("error making directory: %v", err)
}
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
for _, test := range []struct {
path string
@@ -654,7 +655,7 @@ func doAppExitStatus(t *testing.T, vfs2 bool) {
// TestExec verifies that a container can exec a new program.
func TestExec(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
dir, err := ioutil.TempDir(testutil.TmpDir(), "exec-test")
if err != nil {
@@ -783,7 +784,7 @@ func TestExec(t *testing.T) {
// TestExecProcList verifies that a container can exec a new program and it
// shows correcly in the process list.
func TestExecProcList(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
const uid = 343
spec := testutil.NewSpecWithArgs("sleep", "100")
@@ -854,7 +855,7 @@ func TestExecProcList(t *testing.T) {
// TestKillPid verifies that we can signal individual exec'd processes.
func TestKillPid(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
app, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
@@ -930,7 +931,6 @@ func TestKillPid(t *testing.T) {
// number after the last number from the checkpointed container.
func TestCheckpointRestore(t *testing.T) {
// Skip overlay because test requires writing to host file.
- // TODO(gvisor.dev/issue/1663): Add VFS when S/R support is added.
for name, conf := range configs(t, noOverlay...) {
t.Run(name, func(t *testing.T) {
dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test")
@@ -1092,7 +1092,6 @@ func TestCheckpointRestore(t *testing.T) {
// with filesystem Unix Domain Socket use.
func TestUnixDomainSockets(t *testing.T) {
// Skip overlay because test requires writing to host file.
- // TODO(gvisor.dev/issue/1663): Add VFS when S/R support is added.
for name, conf := range configs(t, noOverlay...) {
t.Run(name, func(t *testing.T) {
// UDS path is limited to 108 chars for compatibility with older systems.
@@ -1230,7 +1229,7 @@ func TestUnixDomainSockets(t *testing.T) {
// recreated. Then it resumes the container, verify that the file gets created
// again.
func TestPauseResume(t *testing.T) {
- for name, conf := range configsWithVFS2(t, noOverlay...) {
+ for name, conf := range configs(t, noOverlay...) {
t.Run(name, func(t *testing.T) {
tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "lock")
if err != nil {
@@ -1373,7 +1372,7 @@ func TestCapabilities(t *testing.T) {
uid := auth.KUID(os.Getuid() + 1)
gid := auth.KGID(os.Getgid() + 1)
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
spec := testutil.NewSpecWithArgs("sleep", "100")
rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
@@ -1446,7 +1445,7 @@ func TestCapabilities(t *testing.T) {
// TestRunNonRoot checks that sandbox can be configured when running as
// non-privileged user.
func TestRunNonRoot(t *testing.T) {
- for name, conf := range configsWithVFS2(t, noOverlay...) {
+ for name, conf := range configs(t, noOverlay...) {
t.Run(name, func(t *testing.T) {
spec := testutil.NewSpecWithArgs("/bin/true")
@@ -1490,7 +1489,7 @@ func TestRunNonRoot(t *testing.T) {
// TestMountNewDir checks that runsc will create destination directory if it
// doesn't exit.
func TestMountNewDir(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
root, err := ioutil.TempDir(testutil.TmpDir(), "root")
if err != nil {
@@ -1521,7 +1520,7 @@ func TestMountNewDir(t *testing.T) {
}
func TestReadonlyRoot(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
spec := testutil.NewSpecWithArgs("sleep", "100")
spec.Root.Readonly = true
@@ -1569,7 +1568,7 @@ func TestReadonlyRoot(t *testing.T) {
}
func TestReadonlyMount(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount")
if err != nil {
@@ -1628,7 +1627,7 @@ func TestReadonlyMount(t *testing.T) {
}
func TestUIDMap(t *testing.T) {
- for name, conf := range configsWithVFS2(t, noOverlay...) {
+ for name, conf := range configs(t, noOverlay...) {
t.Run(name, func(t *testing.T) {
testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount")
if err != nil {
@@ -1916,7 +1915,7 @@ func TestUserLog(t *testing.T) {
}
func TestWaitOnExitedSandbox(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
// Run a shell that sleeps for 1 second and then exits with a
// non-zero code.
@@ -2058,7 +2057,7 @@ func doDestroyStartingTest(t *testing.T, vfs2 bool) {
}
func TestCreateWorkingDir(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create")
if err != nil {
@@ -2173,7 +2172,7 @@ func TestMountPropagation(t *testing.T) {
}
func TestMountSymlink(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink")
if err != nil {
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index bc802e075..173332cc2 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -15,7 +15,6 @@
package container
import (
- "encoding/json"
"fmt"
"io/ioutil"
"math"
@@ -132,7 +131,7 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) {
// TestMultiContainerSanity checks that it is possible to run 2 dead-simple
// containers in the same sandbox.
func TestMultiContainerSanity(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -170,7 +169,7 @@ func TestMultiContainerSanity(t *testing.T) {
// TestMultiPIDNS checks that it is possible to run 2 dead-simple
// containers in the same sandbox with different pidns.
func TestMultiPIDNS(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -215,7 +214,7 @@ func TestMultiPIDNS(t *testing.T) {
// TestMultiPIDNSPath checks the pidns path.
func TestMultiPIDNSPath(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -322,8 +321,8 @@ func TestMultiContainerWait(t *testing.T) {
}
}
-// TestExecWait ensures what we can wait containers and individual processes in the
-// sandbox that have already exited.
+// TestExecWait ensures what we can wait on containers and individual processes
+// in the sandbox that have already exited.
func TestExecWait(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -448,7 +447,7 @@ func TestMultiContainerMount(t *testing.T) {
// TestMultiContainerSignal checks that it is possible to signal individual
// containers without killing the entire sandbox.
func TestMultiContainerSignal(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -548,7 +547,7 @@ func TestMultiContainerDestroy(t *testing.T) {
t.Fatal("error finding test_app:", err)
}
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -1042,7 +1041,7 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) {
// Test that pod shared mounts are properly mounted in 2 containers and that
// changes from one container is reflected in the other.
func TestMultiContainerSharedMount(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -1155,7 +1154,7 @@ func TestMultiContainerSharedMount(t *testing.T) {
// Test that pod mounts are mounted as readonly when requested.
func TestMultiContainerSharedMountReadonly(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -1220,7 +1219,7 @@ func TestMultiContainerSharedMountReadonly(t *testing.T) {
// Test that shared pod mounts continue to work after container is restarted.
func TestMultiContainerSharedMountRestart(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -1329,7 +1328,7 @@ func TestMultiContainerSharedMountRestart(t *testing.T) {
// Test that unsupported pod mounts options are ignored when matching master and
// replica mounts.
func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) {
- for name, conf := range configsWithVFS2(t, all...) {
+ for name, conf := range configs(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -1663,7 +1662,7 @@ func TestMultiContainerRunNonRoot(t *testing.T) {
func TestMultiContainerHomeEnvDir(t *testing.T) {
// NOTE: Don't use overlay since we need changes to persist to the temp dir
// outside the sandbox.
- for testName, conf := range configsWithVFS2(t, noOverlay...) {
+ for testName, conf := range configs(t, noOverlay...) {
t.Run(testName, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
@@ -1743,8 +1742,9 @@ func TestMultiContainerEvent(t *testing.T) {
// Setup the containers.
sleep := []string{"/bin/sleep", "100"}
+ busy := []string{"/bin/bash", "-c", "i=0 ; while true ; do (( i += 1 )) ; done"}
quick := []string{"/bin/true"}
- podSpec, ids := createSpecs(sleep, sleep, quick)
+ podSpec, ids := createSpecs(sleep, busy, quick)
containers, cleanup, err := startContainers(conf, podSpec, ids)
if err != nil {
t.Fatalf("error starting containers: %v", err)
@@ -1755,37 +1755,58 @@ func TestMultiContainerEvent(t *testing.T) {
t.Logf("Running containerd %s", cont.ID)
}
- // Wait for last container to stabilize the process count that is checked
- // further below.
+ // Wait for last container to stabilize the process count that is
+ // checked further below.
if ws, err := containers[2].Wait(); err != nil || ws != 0 {
t.Fatalf("Container.Wait, status: %v, err: %v", ws, err)
}
+ expectedPL := []*control.Process{
+ newProcessBuilder().Cmd("sleep").Process(),
+ }
+ if err := waitForProcessList(containers[0], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+ expectedPL = []*control.Process{
+ newProcessBuilder().Cmd("bash").Process(),
+ }
+ if err := waitForProcessList(containers[1], expectedPL); err != nil {
+ t.Errorf("failed to wait for bash to start: %v", err)
+ }
// Check events for running containers.
+ var prevUsage uint64
for _, cont := range containers[:2] {
- evt, err := cont.Event()
+ ret, err := cont.Event()
if err != nil {
t.Errorf("Container.Events(): %v", err)
}
+ evt := ret.Event
if want := "stats"; evt.Type != want {
- t.Errorf("Wrong event type, want: %s, got :%s", want, evt.Type)
+ t.Errorf("Wrong event type, want: %s, got: %s", want, evt.Type)
}
if cont.ID != evt.ID {
- t.Errorf("Wrong container ID, want: %s, got :%s", cont.ID, evt.ID)
+ t.Errorf("Wrong container ID, want: %s, got: %s", cont.ID, evt.ID)
}
- // Event.Data is an interface, so it comes from the wire was
- // map[string]string. Marshal and unmarshall again to the correc type.
- data, err := json.Marshal(evt.Data)
- if err != nil {
- t.Fatalf("invalid event data: %v", err)
+ // One process per remaining container.
+ if got, want := evt.Data.Pids.Current, uint64(2); got != want {
+ t.Errorf("Wrong number of PIDs, want: %d, got: %d", want, got)
}
- var stats boot.Stats
- if err := json.Unmarshal(data, &stats); err != nil {
- t.Fatalf("invalid event data: %v", err)
+
+ // Both remaining containers should have nonzero usage, and
+ // 'busy' should have higher usage than 'sleep'.
+ usage := evt.Data.CPU.Usage.Total
+ if usage == 0 {
+ t.Errorf("Running container should report nonzero CPU usage, but got %d", usage)
}
- // One process per remaining container.
- if want := uint64(2); stats.Pids.Current != want {
- t.Errorf("Wrong number of PIDs, want: %d, got :%d", want, stats.Pids.Current)
+ if usage <= prevUsage {
+ t.Errorf("Expected container %s to use more than %d ns of CPU, but used %d", cont.ID, prevUsage, usage)
+ }
+ t.Logf("Container %s usage: %d", cont.ID, usage)
+ prevUsage = usage
+
+ // The exited container should have a usage of zero.
+ if exited := ret.ContainerUsage[containers[2].ID]; exited != 0 {
+ t.Errorf("Exited container should report 0 CPU usage, but got %d", exited)
}
}
diff --git a/runsc/container/state_file.go b/runsc/container/state_file.go
index dfbf1f2d3..c46322ba4 100644
--- a/runsc/container/state_file.go
+++ b/runsc/container/state_file.go
@@ -49,7 +49,7 @@ type LoadOpts struct {
// Returns ErrNotExist if no container is found. Returns error in case more than
// one containers matching the ID prefix is found.
func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) {
- //log.Debugf("Load container, rootDir: %q, partial cid: %s", rootDir, partialID)
+ log.Debugf("Load container, rootDir: %q, id: %+v, opts: %+v", rootDir, id, opts)
if !opts.Exact {
var err error
id, err = findContainerID(rootDir, id.ContainerID)
diff --git a/runsc/mitigate/BUILD b/runsc/mitigate/BUILD
index 9a9546577..3b0342d18 100644
--- a/runsc/mitigate/BUILD
+++ b/runsc/mitigate/BUILD
@@ -8,6 +8,7 @@ go_library(
"cpu.go",
"mitigate.go",
],
+ deps = ["@in_gopkg_yaml_v2//:go_default_library"],
)
go_test(
@@ -15,4 +16,5 @@ go_test(
size = "small",
srcs = ["cpu_test.go"],
library = ":mitigate",
+ deps = ["@com_github_google_go_cmp//cmp:go_default_library"],
)
diff --git a/runsc/mitigate/cpu.go b/runsc/mitigate/cpu.go
index 113b98159..ae4ce9579 100644
--- a/runsc/mitigate/cpu.go
+++ b/runsc/mitigate/cpu.go
@@ -16,6 +16,7 @@ package mitigate
import (
"fmt"
+ "io/ioutil"
"regexp"
"strconv"
"strings"
@@ -31,16 +32,104 @@ const (
)
const (
- processorKey = "processor"
- vendorIDKey = "vendor_id"
- cpuFamilyKey = "cpu family"
- modelKey = "model"
- coreIDKey = "core id"
- bugsKey = "bugs"
+ processorKey = "processor"
+ vendorIDKey = "vendor_id"
+ cpuFamilyKey = "cpu family"
+ modelKey = "model"
+ physicalIDKey = "physical id"
+ coreIDKey = "core id"
+ bugsKey = "bugs"
)
-// getCPUSet returns cpu structs from reading /proc/cpuinfo.
-func getCPUSet(data string) ([]*cpu, error) {
+const (
+ cpuOnlineTemplate = "/sys/devices/system/cpu/cpu%d/online"
+)
+
+// cpuSet contains a map of all CPUs on the system, mapped
+// by Physical ID and CoreIDs. threads with the same
+// Core and Physical ID are Hyperthread pairs.
+type cpuSet map[cpuID]*threadGroup
+
+// newCPUSet creates a CPUSet from data read from /proc/cpuinfo.
+func newCPUSet(data []byte, vulnerable func(*thread) bool) (cpuSet, error) {
+ processors, err := getThreads(string(data))
+ if err != nil {
+ return nil, err
+ }
+
+ set := make(cpuSet)
+ for _, p := range processors {
+ // Each ID is of the form physicalID:coreID. Hyperthread pairs
+ // have identical physical and core IDs. We need to match
+ // Hyperthread pairs so that we can shutdown all but one per
+ // pair.
+ core, ok := set[p.id]
+ if !ok {
+ core = &threadGroup{}
+ set[p.id] = core
+ }
+ core.isVulnerable = core.isVulnerable || vulnerable(p)
+ core.threads = append(core.threads, p)
+ }
+ return set, nil
+}
+
+// String implements the String method for CPUSet.
+func (c cpuSet) String() string {
+ ret := ""
+ for _, tg := range c {
+ ret += fmt.Sprintf("%s\n", tg)
+ }
+ return ret
+}
+
+// getRemainingList returns the list of threads that will remain active
+// after mitigation.
+func (c cpuSet) getRemainingList() []*thread {
+ threads := make([]*thread, 0, len(c))
+ for _, core := range c {
+ // If we're vulnerable, take only one thread from the pair.
+ if core.isVulnerable {
+ threads = append(threads, core.threads[0])
+ continue
+ }
+ // Otherwise don't shutdown anything.
+ threads = append(threads, core.threads...)
+ }
+ return threads
+}
+
+// getShutdownList returns the list of threads that will be shutdown on
+// mitigation.
+func (c cpuSet) getShutdownList() []*thread {
+ threads := make([]*thread, 0)
+ for _, core := range c {
+ // Only if we're vulnerable do shutdown anything. In this case,
+ // shutdown all but the first entry.
+ if core.isVulnerable && len(core.threads) > 1 {
+ threads = append(threads, core.threads[1:]...)
+ }
+ }
+ return threads
+}
+
+// threadGroup represents Hyperthread pairs on the same physical/core ID.
+type threadGroup struct {
+ threads []*thread
+ isVulnerable bool
+}
+
+// String implements the String method for threadGroup.
+func (c *threadGroup) String() string {
+ ret := fmt.Sprintf("ThreadGroup:\nIsVulnerable: %t\n", c.isVulnerable)
+ for _, processor := range c.threads {
+ ret += fmt.Sprintf("%s\n", processor)
+ }
+ return ret
+}
+
+// getThreads returns threads structs from reading /proc/cpuinfo.
+func getThreads(data string) ([]*thread, error) {
// Each processor entry should start with the
// processor key. Find the beginings of each.
r := buildRegex(processorKey, `\d+`)
@@ -56,13 +145,13 @@ func getCPUSet(data string) ([]*cpu, error) {
// indexes (e.g. data[index[i], index[i+1]]).
// There should be len(indicies) - 1 CPUs
// since the last index is the end of the string.
- var cpus = make([]*cpu, 0, len(indices)-1)
+ var cpus = make([]*thread, 0, len(indices)-1)
// Find each string that represents a CPU. These begin "processor".
for i := 1; i < len(indices); i++ {
start := indices[i-1][0]
end := indices[i][0]
// Parse the CPU entry, which should be between start/end.
- c, err := getCPU(data[start:end])
+ c, err := newThread(data[start:end])
if err != nil {
return nil, err
}
@@ -71,18 +160,25 @@ func getCPUSet(data string) ([]*cpu, error) {
return cpus, nil
}
+// cpuID for each thread is defined by the physical and
+// core IDs. If equal, two threads are Hyperthread pairs.
+type cpuID struct {
+ physicalID int64
+ coreID int64
+}
+
// type cpu represents pertinent info about a cpu.
-type cpu struct {
+type thread struct {
processorNumber int64 // the processor number of this CPU.
vendorID string // the vendorID of CPU (e.g. AuthenticAMD).
cpuFamily int64 // CPU family number (e.g. 6 for CascadeLake/Skylake).
model int64 // CPU model number (e.g. 85 for CascadeLake/Skylake).
- coreID int64 // This CPU's core id to match Hyperthread Pairs
+ id cpuID // id for this thread
bugs map[string]struct{} // map of vulnerabilities parsed from the 'bugs' field.
}
-// getCPU parses a CPU from a single cpu entry from /proc/cpuinfo.
-func getCPU(data string) (*cpu, error) {
+// newThread parses a CPU from a single cpu entry from /proc/cpuinfo.
+func newThread(data string) (*thread, error) {
processor, err := parseProcessor(data)
if err != nil {
return nil, err
@@ -103,6 +199,11 @@ func getCPU(data string) (*cpu, error) {
return nil, err
}
+ physicalID, err := parsePhysicalID(data)
+ if err != nil {
+ return nil, err
+ }
+
coreID, err := parseCoreID(data)
if err != nil {
return nil, err
@@ -113,16 +214,41 @@ func getCPU(data string) (*cpu, error) {
return nil, err
}
- return &cpu{
+ return &thread{
processorNumber: processor,
vendorID: vendorID,
cpuFamily: cpuFamily,
model: model,
- coreID: coreID,
- bugs: bugs,
+ id: cpuID{
+ physicalID: physicalID,
+ coreID: coreID,
+ },
+ bugs: bugs,
}, nil
}
+// String implements the String method for thread.
+func (t *thread) String() string {
+ template := `CPU: %d
+CPU ID: %+v
+Vendor: %s
+Family/Model: %d/%d
+Bugs: %s
+`
+ bugs := make([]string, 0)
+ for bug := range t.bugs {
+ bugs = append(bugs, bug)
+ }
+
+ return fmt.Sprintf(template, t.processorNumber, t.id, t.vendorID, t.cpuFamily, t.model, strings.Join(bugs, ","))
+}
+
+// shutdown turns off the CPU by writing 0 to /sys/devices/cpu/cpu{N}/online.
+func (t *thread) shutdown() error {
+ cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber)
+ return ioutil.WriteFile(cpuPath, []byte{'0'}, 0644)
+}
+
// List of pertinent side channel vulnerablilites.
// For mds, see: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html.
var vulnerabilities = []string{
@@ -134,35 +260,46 @@ var vulnerabilities = []string{
}
// isVulnerable checks if a CPU is vulnerable to pertinent bugs.
-func (c *cpu) isVulnerable() bool {
+func (t *thread) isVulnerable() bool {
for _, bug := range vulnerabilities {
- if _, ok := c.bugs[bug]; ok {
+ if _, ok := t.bugs[bug]; ok {
return true
}
}
return false
}
+// isActive checks if a CPU is active from /sys/devices/system/cpu/cpu{N}/online
+// If the file does not exist (ioutil returns in error), we assume the CPU is on.
+func (t *thread) isActive() bool {
+ cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber)
+ data, err := ioutil.ReadFile(cpuPath)
+ if err != nil {
+ return true
+ }
+ return len(data) > 0 && data[0] != '0'
+}
+
// similarTo checks family/model/bugs fields for equality of two
// processors.
-func (c *cpu) similarTo(other *cpu) bool {
- if c.vendorID != other.vendorID {
+func (t *thread) similarTo(other *thread) bool {
+ if t.vendorID != other.vendorID {
return false
}
- if other.cpuFamily != c.cpuFamily {
+ if other.cpuFamily != t.cpuFamily {
return false
}
- if other.model != c.model {
+ if other.model != t.model {
return false
}
- if len(other.bugs) != len(c.bugs) {
+ if len(other.bugs) != len(t.bugs) {
return false
}
- for bug := range c.bugs {
+ for bug := range t.bugs {
if _, ok := other.bugs[bug]; !ok {
return false
}
@@ -190,6 +327,11 @@ func parseModel(data string) (int64, error) {
return parseIntegerResult(data, modelKey)
}
+// parsePhysicalID parses the physical id field.
+func parsePhysicalID(data string) (int64, error) {
+ return parseIntegerResult(data, physicalIDKey)
+}
+
// parseCoreID parses the core id field.
func parseCoreID(data string) (int64, error) {
return parseIntegerResult(data, coreIDKey)
diff --git a/runsc/mitigate/cpu_test.go b/runsc/mitigate/cpu_test.go
index 77b714a02..21c12f586 100644
--- a/runsc/mitigate/cpu_test.go
+++ b/runsc/mitigate/cpu_test.go
@@ -15,26 +15,163 @@
package mitigate
import (
+ "fmt"
"io/ioutil"
"strings"
"testing"
)
-// CPU info for a Intel CascadeLake processor. Both Skylake and CascadeLake have
-// the same family/model numbers, but with different bugs (e.g. skylake has
-// cpu_meltdown).
-var cascadeLake = &cpu{
- vendorID: "GenuineIntel",
- cpuFamily: 6,
- model: 85,
- bugs: map[string]struct{}{
- "spectre_v1": struct{}{},
- "spectre_v2": struct{}{},
- "spec_store_bypass": struct{}{},
- mds: struct{}{},
- swapgs: struct{}{},
- taa: struct{}{},
- },
+// cpuTestCase represents data from CPUs that will be mitigated.
+type cpuTestCase struct {
+ name string
+ vendorID string
+ family int
+ model int
+ modelName string
+ bugs string
+ physicalCores int
+ cores int
+ threadsPerCore int
+}
+
+var cascadeLake4 = cpuTestCase{
+ name: "CascadeLake",
+ vendorID: "GenuineIntel",
+ family: 6,
+ model: 85,
+ modelName: "Intel(R) Xeon(R) CPU",
+ bugs: "spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa",
+ physicalCores: 1,
+ cores: 2,
+ threadsPerCore: 2,
+}
+
+var haswell2 = cpuTestCase{
+ name: "Haswell",
+ vendorID: "GenuineIntel",
+ family: 6,
+ model: 63,
+ modelName: "Intel(R) Xeon(R) CPU",
+ bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs",
+ physicalCores: 1,
+ cores: 1,
+ threadsPerCore: 2,
+}
+
+var haswell2core = cpuTestCase{
+ name: "Haswell2Physical",
+ vendorID: "GenuineIntel",
+ family: 6,
+ model: 63,
+ modelName: "Intel(R) Xeon(R) CPU",
+ bugs: "cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs",
+ physicalCores: 2,
+ cores: 1,
+ threadsPerCore: 1,
+}
+
+var amd8 = cpuTestCase{
+ name: "AMD",
+ vendorID: "AuthenticAMD",
+ family: 23,
+ model: 49,
+ modelName: "AMD EPYC 7B12",
+ bugs: "sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass",
+ physicalCores: 4,
+ cores: 1,
+ threadsPerCore: 2,
+}
+
+// makeCPUString makes a string formated like /proc/cpuinfo for each cpuTestCase
+func (tc cpuTestCase) makeCPUString() string {
+ template := `processor : %d
+vendor_id : %s
+cpu family : %d
+model : %d
+model name : %s
+physical id : %d
+core id : %d
+cpu cores : %d
+bugs : %s
+`
+ ret := ``
+ for i := 0; i < tc.physicalCores; i++ {
+ for j := 0; j < tc.cores; j++ {
+ for k := 0; k < tc.threadsPerCore; k++ {
+ processorNum := (i*tc.cores+j)*tc.threadsPerCore + k
+ ret += fmt.Sprintf(template,
+ processorNum, /*processor*/
+ tc.vendorID, /*vendor_id*/
+ tc.family, /*cpu family*/
+ tc.model, /*model*/
+ tc.modelName, /*model name*/
+ i, /*physical id*/
+ j, /*core id*/
+ tc.cores*tc.physicalCores, /*cpu cores*/
+ tc.bugs /*bugs*/)
+ }
+ }
+ }
+ return ret
+}
+
+// TestMockCPUSet tests mock cpu test cases against the cpuSet functions.
+func TestMockCPUSet(t *testing.T) {
+ for _, tc := range []struct {
+ testCase cpuTestCase
+ isVulnerable bool
+ }{
+ {
+ testCase: amd8,
+ isVulnerable: false,
+ },
+ {
+ testCase: haswell2,
+ isVulnerable: true,
+ },
+ {
+ testCase: haswell2core,
+ isVulnerable: true,
+ },
+
+ {
+ testCase: cascadeLake4,
+ isVulnerable: true,
+ },
+ } {
+ t.Run(tc.testCase.name, func(t *testing.T) {
+ data := tc.testCase.makeCPUString()
+ vulnerable := func(t *thread) bool {
+ return t.isVulnerable()
+ }
+ set, err := newCPUSet([]byte(data), vulnerable)
+ if err != nil {
+ t.Fatalf("Failed to ")
+ }
+ remaining := set.getRemainingList()
+ // In the non-vulnerable case, no cores should be shutdown so all should remain.
+ want := tc.testCase.physicalCores * tc.testCase.cores * tc.testCase.threadsPerCore
+ if tc.isVulnerable {
+ want = tc.testCase.physicalCores * tc.testCase.cores
+ }
+
+ if want != len(remaining) {
+ t.Fatalf("Failed to shutdown the correct number of cores: want: %d got: %d", want, len(remaining))
+ }
+
+ if !tc.isVulnerable {
+ return
+ }
+
+ // If the set is vulnerable, we expect only 1 thread per hyperthread pair.
+ for _, r := range remaining {
+ if _, ok := set[r.id]; !ok {
+ t.Fatalf("Entry %+v not in map, there must be two entries in the same thread group.", r)
+ }
+ delete(set, r.id)
+ }
+ })
+ }
}
// TestGetCPU tests basic parsing of single CPU strings from reading
@@ -44,15 +181,19 @@ func TestGetCPU(t *testing.T) {
vendor_id : GenuineIntel
cpu family : 6
model : 85
+physical id: 0
core id : 0
bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa itlb_multihit
`
- want := cpu{
+ want := thread{
processorNumber: 0,
vendorID: "GenuineIntel",
cpuFamily: 6,
model: 85,
- coreID: 0,
+ id: cpuID{
+ physicalID: 0,
+ coreID: 0,
+ },
bugs: map[string]struct{}{
"cpu_meltdown": struct{}{},
"spectre_v1": struct{}{},
@@ -66,7 +207,7 @@ bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa
},
}
- got, err := getCPU(data)
+ got, err := newThread(data)
if err != nil {
t.Fatalf("getCpu failed with error: %v", err)
}
@@ -81,7 +222,7 @@ bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa
}
func TestInvalid(t *testing.T) {
- result, err := getCPUSet(`something not a processor`)
+ result, err := getThreads(`something not a processor`)
if err == nil {
t.Fatalf("getCPU set didn't return an error: %+v", result)
}
@@ -148,7 +289,7 @@ cache_alignment : 64
address sizes : 46 bits physical, 48 bits virtual
power management:
`
- cpuSet, err := getCPUSet(data)
+ cpuSet, err := getThreads(data)
if err != nil {
t.Fatalf("getCPUSet failed: %v", err)
}
@@ -158,7 +299,7 @@ power management:
t.Fatalf("Num CPU mismatch: want: %d, got: %d", wantCPULen, len(cpuSet))
}
- wantCPU := cpu{
+ wantCPU := thread{
vendorID: "GenuineIntel",
cpuFamily: 6,
model: 63,
@@ -187,7 +328,11 @@ func TestReadFile(t *testing.T) {
t.Fatalf("Failed to read cpuinfo: %v", err)
}
- set, err := getCPUSet(string(data))
+ vulnerable := func(t *thread) bool {
+ return t.isVulnerable()
+ }
+
+ set, err := newCPUSet(data, vulnerable)
if err != nil {
t.Fatalf("Failed to parse CPU data %v\n%s", err, data)
}
@@ -196,9 +341,7 @@ func TestReadFile(t *testing.T) {
t.Fatalf("Failed to parse any CPUs: %d", len(set))
}
- for _, c := range set {
- t.Logf("CPU: %+v: %t", c, c.isVulnerable())
- }
+ t.Log(set)
}
// TestVulnerable tests if the isVulnerable method is correct
@@ -332,17 +475,13 @@ power management:`
cpuString: skylake,
vulnerable: true,
}, {
- name: "cascadeLake",
- cpuString: cascade,
- vulnerable: false,
- }, {
name: "amd",
cpuString: amd,
vulnerable: false,
},
} {
t.Run(tc.name, func(t *testing.T) {
- set, err := getCPUSet(tc.cpuString)
+ set, err := getThreads(tc.cpuString)
if err != nil {
t.Fatalf("Failed to getCPUSet:%v\n %s", err, tc.cpuString)
}
@@ -353,9 +492,6 @@ power management:`
for _, c := range set {
got := func() bool {
- if cascadeLake.similarTo(c) {
- return false
- }
return c.isVulnerable()
}()
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 266bc0bdc..7fe65c7ba 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -308,6 +308,22 @@ func (s *Sandbox) Processes(cid string) ([]*control.Process, error) {
return pl, nil
}
+// FindCgroup returns the sandbox's Cgroup, or an error if it does not have one.
+func (s *Sandbox) FindCgroup() (*cgroup.Cgroup, error) {
+ paths, err := cgroup.LoadPaths(strconv.Itoa(s.Pid))
+ if err != nil {
+ return nil, err
+ }
+ // runsc places sandboxes in the same cgroup for each controller, so we
+ // pick an arbitrary controller here to get the cgroup path.
+ const controller = "cpuacct"
+ controllerPath, ok := paths[controller]
+ if !ok {
+ return nil, fmt.Errorf("no %q controller found", controller)
+ }
+ return cgroup.NewFromPath(controllerPath)
+}
+
// Execute runs the specified command in the container. It returns the PID of
// the newly created process.
func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) {
@@ -327,7 +343,7 @@ func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) {
}
// Event retrieves stats about the sandbox such as memory and CPU utilization.
-func (s *Sandbox) Event(cid string) (*boot.Event, error) {
+func (s *Sandbox) Event(cid string) (*boot.EventOut, error) {
log.Debugf("Getting events for container %q in sandbox %q", cid, s.ID)
conn, err := s.sandboxConnect()
if err != nil {
@@ -335,13 +351,13 @@ func (s *Sandbox) Event(cid string) (*boot.Event, error) {
}
defer conn.Close()
- var e boot.Event
+ var e boot.EventOut
// TODO(b/129292330): Pass in the container id (cid) here. The sandbox
// should return events only for that container.
if err := conn.Call(boot.ContainerEvent, nil, &e); err != nil {
return nil, fmt.Errorf("retrieving event data from sandbox: %v", err)
}
- e.ID = cid
+ e.Event.ID = cid
return &e, nil
}