From 9f33fe64f221de0eb2a290fd54357c954d9f38f8 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 5 May 2021 17:36:58 -0700 Subject: Fixes to runsc cgroups When loading cgroups for another process, `/proc/self` was used in a few places, causing the end state to be a mix of the process and self. This is now fixes to always use the proper `/proc/[pid]` path. Added net_prio and net_cls to the list of optional controllers. This is to allow runsc to execute then these cgroups are disabled as long as there are no net_prio and net_cls limits that need to be applied. Deflake TestMultiContainerEvent. Closes #5875 Closes #5887 PiperOrigin-RevId: 372242687 --- runsc/cgroup/cgroup.go | 254 ++++++++++++++++++++++---------- runsc/cgroup/cgroup_test.go | 112 ++++++++++---- runsc/container/container.go | 15 +- runsc/container/multi_container_test.go | 58 +++++--- runsc/sandbox/sandbox.go | 17 +-- 5 files changed, 302 insertions(+), 154 deletions(-) (limited to 'runsc') diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 438b7ef3e..335e46499 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -40,23 +40,24 @@ const ( cgroupRoot = "/sys/fs/cgroup" ) -var controllers = map[string]config{ - "blkio": {ctrlr: &blockIO{}}, - "cpu": {ctrlr: &cpu{}}, - "cpuset": {ctrlr: &cpuSet{}}, - "hugetlb": {ctrlr: &hugeTLB{}, optional: true}, - "memory": {ctrlr: &memory{}}, - "net_cls": {ctrlr: &networkClass{}}, - "net_prio": {ctrlr: &networkPrio{}}, - "pids": {ctrlr: &pids{}}, +var controllers = map[string]controller{ + "blkio": &blockIO{}, + "cpu": &cpu{}, + "cpuset": &cpuSet{}, + "hugetlb": &hugeTLB{}, + "memory": &memory{}, + "net_cls": &networkClass{}, + "net_prio": &networkPrio{}, + "pids": &pids{}, // These controllers either don't have anything in the OCI spec or is // irrelevant for a sandbox. - "devices": {ctrlr: &noop{}}, - "freezer": {ctrlr: &noop{}}, - "perf_event": {ctrlr: &noop{}}, - "rdma": {ctrlr: &noop{}, optional: true}, - "systemd": {ctrlr: &noop{}}, + "cpuacct": &noop{}, + "devices": &noop{}, + "freezer": &noop{}, + "perf_event": &noop{}, + "rdma": &noop{isOptional: true}, + "systemd": &noop{}, } // IsOnlyV2 checks whether cgroups V2 is enabled and V1 is not. @@ -201,31 +202,26 @@ func countCpuset(cpuset string) (int, error) { return count, nil } -// LoadPaths loads cgroup paths for given 'pid', may be set to 'self'. -func LoadPaths(pid string) (map[string]string, error) { - f, err := os.Open(filepath.Join("/proc", pid, "cgroup")) +// loadPaths loads cgroup paths for given 'pid', may be set to 'self'. +func loadPaths(pid string) (map[string]string, error) { + procCgroup, err := os.Open(filepath.Join("/proc", pid, "cgroup")) if err != nil { return nil, err } - defer f.Close() + defer procCgroup.Close() - return loadPathsHelper(f) -} - -func loadPathsHelper(cgroup io.Reader) (map[string]string, error) { - // For nested containers, in /proc/self/cgroup we see paths from host, - // which don't exist in container, so recover the container paths here by - // double-checking with /proc/pid/mountinfo - mountinfo, err := os.Open("/proc/self/mountinfo") + // Load mountinfo for the current process, because it's where cgroups is + // being accessed from. + mountinfo, err := os.Open(filepath.Join("/proc/self/mountinfo")) if err != nil { return nil, err } defer mountinfo.Close() - return loadPathsHelperWithMountinfo(cgroup, mountinfo) + return loadPathsHelper(procCgroup, mountinfo) } -func loadPathsHelperWithMountinfo(cgroup, mountinfo io.Reader) (map[string]string, error) { +func loadPathsHelper(cgroup, mountinfo io.Reader) (map[string]string, error) { paths := make(map[string]string) scanner := bufio.NewScanner(cgroup) @@ -242,34 +238,51 @@ func loadPathsHelperWithMountinfo(cgroup, mountinfo io.Reader) (map[string]strin for _, ctrlr := range strings.Split(tokens[1], ",") { // Remove prefix for cgroups with no controller, eg. systemd. ctrlr = strings.TrimPrefix(ctrlr, "name=") - paths[ctrlr] = tokens[2] + // Discard unknown controllers. + if _, ok := controllers[ctrlr]; ok { + paths[ctrlr] = tokens[2] + } } } if err := scanner.Err(); err != nil { return nil, err } - mfScanner := bufio.NewScanner(mountinfo) - for mfScanner.Scan() { - txt := mfScanner.Text() - fields := strings.Fields(txt) + // For nested containers, in /proc/[pid]/cgroup we see paths from host, + // which don't exist in container, so recover the container paths here by + // double-checking with /proc/[pid]/mountinfo + mountScanner := bufio.NewScanner(mountinfo) + for mountScanner.Scan() { + // Format: ID parent major:minor root mount-point options opt-fields - fs-type source super-options + // Example: 39 32 0:34 / /sys/fs/cgroup/devices rw,noexec shared:18 - cgroup cgroup rw,devices + fields := strings.Fields(mountScanner.Text()) if len(fields) < 9 || fields[len(fields)-3] != "cgroup" { + // Skip mounts that are not cgroup mounts. continue } - for _, opt := range strings.Split(fields[len(fields)-1], ",") { + // Cgroup controller type is in the super-options field. + superOptions := strings.Split(fields[len(fields)-1], ",") + for _, opt := range superOptions { // Remove prefix for cgroups with no controller, eg. systemd. opt = strings.TrimPrefix(opt, "name=") + + // Only considers cgroup controllers that are registered, and skip other + // irrelevant options, e.g. rw. if cgroupPath, ok := paths[opt]; ok { - root := fields[3] - relCgroupPath, err := filepath.Rel(root, cgroupPath) - if err != nil { - return nil, err + rootDir := fields[3] + if rootDir != "/" { + // When cgroup is in submount, remove repeated path components from + // cgroup path to avoid duplicating them. + relCgroupPath, err := filepath.Rel(rootDir, cgroupPath) + if err != nil { + return nil, err + } + paths[opt] = relCgroupPath } - paths[opt] = relCgroupPath } } } - if err := mfScanner.Err(); err != nil { + if err := mountScanner.Err(); err != nil { return nil, err } @@ -279,37 +292,50 @@ func loadPathsHelperWithMountinfo(cgroup, mountinfo io.Reader) (map[string]strin // Cgroup represents a group inside all controllers. For example: // Name='/foo/bar' maps to /sys/fs/cgroup//foo/bar on // all controllers. +// +// If Name is relative, it uses the parent cgroup path to determine the +// location. For example: +// Name='foo/bar' and Parent[ctrl]="/user.slice", then it will map to +// /sys/fs/cgroup//user.slice/foo/bar type Cgroup struct { Name string `json:"name"` Parents map[string]string `json:"parents"` Own map[string]bool `json:"own"` } -// New creates a new Cgroup instance if the spec includes a cgroup path. -// Returns nil otherwise. -func New(spec *specs.Spec) (*Cgroup, error) { +// NewFromSpec creates a new Cgroup instance if the spec includes a cgroup path. +// Returns nil otherwise. Cgroup paths are loaded based on the current process. +func NewFromSpec(spec *specs.Spec) (*Cgroup, error) { if spec.Linux == nil || spec.Linux.CgroupsPath == "" { return nil, nil } - return NewFromPath(spec.Linux.CgroupsPath) + return new("self", spec.Linux.CgroupsPath) } -// NewFromPath creates a new Cgroup instance. -func NewFromPath(cgroupsPath string) (*Cgroup, error) { +// NewFromPid loads cgroup for the given process. +func NewFromPid(pid int) (*Cgroup, error) { + return new(strconv.Itoa(pid), "") +} + +func new(pid, cgroupsPath string) (*Cgroup, error) { var parents map[string]string + + // If path is relative, load cgroup paths for the process to build the + // relative paths. if !filepath.IsAbs(cgroupsPath) { var err error - parents, err = LoadPaths("self") + parents, err = loadPaths(pid) if err != nil { return nil, fmt.Errorf("finding current cgroups: %w", err) } } - own := make(map[string]bool) - return &Cgroup{ + cg := &Cgroup{ Name: cgroupsPath, Parents: parents, - Own: own, - }, nil + Own: make(map[string]bool), + } + log.Debugf("New cgroup for pid: %s, %+v", pid, cg) + return cg, nil } // Install creates and configures cgroups according to 'res'. If cgroup path @@ -323,8 +349,8 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error { clean := cleanup.Make(func() { _ = c.Uninstall() }) defer clean.Clean() - for key, cfg := range controllers { - path := c.makePath(key) + for key, ctrlr := range controllers { + path := c.MakePath(key) if _, err := os.Stat(path); err == nil { // If cgroup has already been created; it has been setup by caller. Don't // make any changes to configuration, just join when sandbox/gofer starts. @@ -336,13 +362,16 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error { c.Own[key] = true if err := os.MkdirAll(path, 0755); err != nil { - if cfg.optional && errors.Is(err, unix.EROFS) { + if ctrlr.optional() && errors.Is(err, unix.EROFS) { + if err := ctrlr.skip(res); err != nil { + return err + } log.Infof("Skipping cgroup %q", key) continue } return err } - if err := cfg.ctrlr.set(res, path); err != nil { + if err := ctrlr.set(res, path); err != nil { return err } } @@ -359,7 +388,7 @@ func (c *Cgroup) Uninstall() error { // cgroup is managed by caller, don't touch it. continue } - path := c.makePath(key) + path := c.MakePath(key) log.Debugf("Removing cgroup controller for key=%q path=%q", key, path) // If we try to remove the cgroup too soon after killing the @@ -387,7 +416,7 @@ func (c *Cgroup) Uninstall() error { func (c *Cgroup) Join() (func(), error) { // First save the current state so it can be restored. undo := func() {} - paths, err := LoadPaths("self") + paths, err := loadPaths("self") if err != nil { return undo, err } @@ -414,14 +443,13 @@ func (c *Cgroup) Join() (func(), error) { } // Now join the cgroups. - for key, cfg := range controllers { - path := c.makePath(key) + for key, ctrlr := range controllers { + path := c.MakePath(key) log.Debugf("Joining cgroup %q", path) - // Writing the value 0 to a cgroup.procs file causes the - // writing process to be moved to the corresponding cgroup. - // - cgroups(7). + // Writing the value 0 to a cgroup.procs file causes the writing process to + // be moved to the corresponding cgroup - cgroups(7). if err := setValue(path, "cgroup.procs", "0"); err != nil { - if cfg.optional && os.IsNotExist(err) { + if ctrlr.optional() && os.IsNotExist(err) { continue } return undo, err @@ -432,7 +460,7 @@ func (c *Cgroup) Join() (func(), error) { // CPUQuota returns the CFS CPU quota. func (c *Cgroup) CPUQuota() (float64, error) { - path := c.makePath("cpu") + path := c.MakePath("cpu") quota, err := getInt(path, "cpu.cfs_quota_us") if err != nil { return -1, err @@ -449,7 +477,7 @@ func (c *Cgroup) CPUQuota() (float64, error) { // CPUUsage returns the total CPU usage of the cgroup. func (c *Cgroup) CPUUsage() (uint64, error) { - path := c.makePath("cpuacct") + path := c.MakePath("cpuacct") usage, err := getValue(path, "cpuacct.usage") if err != nil { return 0, err @@ -459,7 +487,7 @@ func (c *Cgroup) CPUUsage() (uint64, error) { // NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'. func (c *Cgroup) NumCPU() (int, error) { - path := c.makePath("cpuset") + path := c.MakePath("cpuset") cpuset, err := getValue(path, "cpuset.cpus") if err != nil { return 0, err @@ -469,7 +497,7 @@ func (c *Cgroup) NumCPU() (int, error) { // MemoryLimit returns the memory limit. func (c *Cgroup) MemoryLimit() (uint64, error) { - path := c.makePath("memory") + path := c.MakePath("memory") limStr, err := getValue(path, "memory.limit_in_bytes") if err != nil { return 0, err @@ -477,7 +505,8 @@ func (c *Cgroup) MemoryLimit() (uint64, error) { return strconv.ParseUint(strings.TrimSpace(limStr), 10, 64) } -func (c *Cgroup) makePath(controllerName string) string { +// MakePath builds a path to the given controller. +func (c *Cgroup) MakePath(controllerName string) string { path := c.Name if parent, ok := c.Parents[controllerName]; ok { path = filepath.Join(parent, c.Name) @@ -485,22 +514,48 @@ func (c *Cgroup) makePath(controllerName string) string { return filepath.Join(cgroupRoot, controllerName, path) } -type config struct { - ctrlr controller - optional bool -} - type controller interface { + // optional controllers don't fail if not found. + optional() bool + // set applies resource limits to controller. set(*specs.LinuxResources, string) error + // skip is called when controller is not found to check if it can be safely + // skipped or not based on the spec. + skip(*specs.LinuxResources) error +} + +type noop struct { + isOptional bool } -type noop struct{} +func (n *noop) optional() bool { + return n.isOptional +} func (*noop) set(*specs.LinuxResources, string) error { return nil } -type memory struct{} +func (n *noop) skip(*specs.LinuxResources) error { + if !n.isOptional { + panic("cgroup controller is not optional") + } + return nil +} + +type mandatory struct{} + +func (*mandatory) optional() bool { + return false +} + +func (*mandatory) skip(*specs.LinuxResources) error { + panic("cgroup controller is not optional") +} + +type memory struct { + mandatory +} func (*memory) set(spec *specs.LinuxResources, path string) error { if spec == nil || spec.Memory == nil { @@ -533,7 +588,9 @@ func (*memory) set(spec *specs.LinuxResources, path string) error { return nil } -type cpu struct{} +type cpu struct { + mandatory +} func (*cpu) set(spec *specs.LinuxResources, path string) error { if spec == nil || spec.CPU == nil { @@ -554,7 +611,9 @@ func (*cpu) set(spec *specs.LinuxResources, path string) error { return setOptionalValueInt(path, "cpu.rt_runtime_us", spec.CPU.RealtimeRuntime) } -type cpuSet struct{} +type cpuSet struct { + mandatory +} func (*cpuSet) set(spec *specs.LinuxResources, path string) error { // cpuset.cpus and mems are required fields, but are not set on a new cgroup. @@ -576,7 +635,9 @@ func (*cpuSet) set(spec *specs.LinuxResources, path string) error { return setValue(path, "cpuset.mems", spec.CPU.Mems) } -type blockIO struct{} +type blockIO struct { + mandatory +} func (*blockIO) set(spec *specs.LinuxResources, path string) error { if spec == nil || spec.BlockIO == nil { @@ -628,6 +689,10 @@ func setThrottle(path, name string, devs []specs.LinuxThrottleDevice) error { type networkClass struct{} +func (*networkClass) optional() bool { + return true +} + func (*networkClass) set(spec *specs.LinuxResources, path string) error { if spec == nil || spec.Network == nil { return nil @@ -635,8 +700,19 @@ func (*networkClass) set(spec *specs.LinuxResources, path string) error { return setOptionalValueUint32(path, "net_cls.classid", spec.Network.ClassID) } +func (*networkClass) skip(spec *specs.LinuxResources) error { + if spec != nil && spec.Network != nil && spec.Network.ClassID != nil { + return fmt.Errorf("Network.ClassID set but net_cls cgroup controller not found") + } + return nil +} + type networkPrio struct{} +func (*networkPrio) optional() bool { + return true +} + func (*networkPrio) set(spec *specs.LinuxResources, path string) error { if spec == nil || spec.Network == nil { return nil @@ -650,7 +726,16 @@ func (*networkPrio) set(spec *specs.LinuxResources, path string) error { return nil } -type pids struct{} +func (*networkPrio) skip(spec *specs.LinuxResources) error { + if spec != nil && spec.Network != nil && len(spec.Network.Priorities) > 0 { + return fmt.Errorf("Network.Priorities set but net_prio cgroup controller not found") + } + return nil +} + +type pids struct { + mandatory +} func (*pids) set(spec *specs.LinuxResources, path string) error { if spec == nil || spec.Pids == nil || spec.Pids.Limit <= 0 { @@ -662,6 +747,17 @@ func (*pids) set(spec *specs.LinuxResources, path string) error { type hugeTLB struct{} +func (*hugeTLB) optional() bool { + return true +} + +func (*hugeTLB) skip(spec *specs.LinuxResources) error { + if spec != nil && len(spec.HugepageLimits) > 0 { + return fmt.Errorf("HugepageLimits set but hugetlb cgroup controller not found") + } + return nil +} + func (*hugeTLB) set(spec *specs.LinuxResources, path string) error { if spec == nil { return nil diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go index 48d71cfa6..02cadeef4 100644 --- a/runsc/cgroup/cgroup_test.go +++ b/runsc/cgroup/cgroup_test.go @@ -43,19 +43,19 @@ var debianMountinfo = ` ` var dindMountinfo = ` -1305 1304 0:64 / /sys/fs/cgroup rw - tmpfs tmpfs rw,mode=755 -1306 1305 0:32 /docker/136 /sys/fs/cgroup/systemd ro master:11 - cgroup cgroup rw,xattr,name=systemd -1307 1305 0:36 /docker/136 /sys/fs/cgroup/cpu,cpuacct ro master:16 - cgroup cgroup rw,cpu,cpuacct -1308 1305 0:37 /docker/136 /sys/fs/cgroup/freezer ro master:17 - cgroup cgroup rw,freezer -1309 1305 0:38 /docker/136 /sys/fs/cgroup/hugetlb ro master:18 - cgroup cgroup rw,hugetlb -1310 1305 0:39 /docker/136 /sys/fs/cgroup/cpuset ro master:19 - cgroup cgroup rw,cpuset -1311 1305 0:40 /docker/136 /sys/fs/cgroup/net_cls,net_prio ro master:20 - cgroup cgroup rw,net_cls,net_prio -1312 1305 0:41 /docker/136 /sys/fs/cgroup/pids ro master:21 - cgroup cgroup rw,pids -1313 1305 0:42 /docker/136 /sys/fs/cgroup/perf_event ro master:22 - cgroup cgroup rw,perf_event -1314 1305 0:43 /docker/136 /sys/fs/cgroup/memory ro master:23 - cgroup cgroup rw,memory -1316 1305 0:44 /docker/136 /sys/fs/cgroup/blkio ro master:24 - cgroup cgroup rw,blkio -1317 1305 0:45 /docker/136 /sys/fs/cgroup/devices ro master:25 - cgroup cgroup rw,devices -1318 1305 0:46 / /sys/fs/cgroup/rdma ro master:26 - cgroup cgroup rw,rdma +05 04 0:64 / /sys/fs/cgroup rw - tmpfs tmpfs rw,mode=755 +06 05 0:32 /docker/136 /sys/fs/cgroup/systemd ro master:11 - cgroup cgroup rw,xattr,name=systemd +07 05 0:36 /docker/136 /sys/fs/cgroup/cpu,cpuacct ro master:16 - cgroup cgroup rw,cpu,cpuacct +08 05 0:37 /docker/136 /sys/fs/cgroup/freezer ro master:17 - cgroup cgroup rw,freezer +09 05 0:38 /docker/136 /sys/fs/cgroup/hugetlb ro master:18 - cgroup cgroup rw,hugetlb +10 05 0:39 /docker/136 /sys/fs/cgroup/cpuset ro master:19 - cgroup cgroup rw,cpuset +11 05 0:40 /docker/136 /sys/fs/cgroup/net_cls,net_prio ro master:20 - cgroup cgroup rw,net_cls,net_prio +12 05 0:41 /docker/136 /sys/fs/cgroup/pids ro master:21 - cgroup cgroup rw,pids +13 05 0:42 /docker/136 /sys/fs/cgroup/perf_event ro master:22 - cgroup cgroup rw,perf_event +14 05 0:43 /docker/136 /sys/fs/cgroup/memory ro master:23 - cgroup cgroup rw,memory +16 05 0:44 /docker/136 /sys/fs/cgroup/blkio ro master:24 - cgroup cgroup rw,blkio +17 05 0:45 /docker/136 /sys/fs/cgroup/devices ro master:25 - cgroup cgroup rw,devices +18 05 0:46 / /sys/fs/cgroup/rdma ro master:26 - cgroup cgroup rw,rdma ` func TestUninstallEnoent(t *testing.T) { @@ -693,36 +693,42 @@ func TestLoadPaths(t *testing.T) { err string }{ { - name: "abs-path-unknown-controller", - cgroups: "0:ctr:/path", + name: "empty", mountinfo: debianMountinfo, - want: map[string]string{"ctr": "/path"}, + }, + { + name: "abs-path", + cgroups: "0:cpu:/path", + mountinfo: debianMountinfo, + want: map[string]string{"cpu": "/path"}, }, { name: "rel-path", - cgroups: "0:ctr:rel-path", + cgroups: "0:cpu:rel-path", mountinfo: debianMountinfo, - want: map[string]string{"ctr": "rel-path"}, + want: map[string]string{"cpu": "rel-path"}, }, { name: "non-controller", cgroups: "0:name=systemd:/path", mountinfo: debianMountinfo, - want: map[string]string{"systemd": "path"}, + want: map[string]string{"systemd": "/path"}, }, { - name: "empty", + name: "unknown-controller", + cgroups: "0:ctr:/path", mountinfo: debianMountinfo, + want: map[string]string{}, }, { name: "multiple", - cgroups: "0:ctr0:/path0\n" + - "1:ctr1:/path1\n" + + cgroups: "0:cpu:/path0\n" + + "1:memory:/path1\n" + "2::/empty\n", mountinfo: debianMountinfo, want: map[string]string{ - "ctr0": "/path0", - "ctr1": "/path1", + "cpu": "/path0", + "memory": "/path1", }, }, { @@ -747,10 +753,10 @@ func TestLoadPaths(t *testing.T) { }, { name: "nested-cgroup", - cgroups: `9:memory:/docker/136 -2:cpu,cpuacct:/docker/136 -1:name=systemd:/docker/136 -0::/system.slice/containerd.service`, + cgroups: "9:memory:/docker/136\n" + + "2:cpu,cpuacct:/docker/136\n" + + "1:name=systemd:/docker/136\n" + + "0::/system.slice/containerd.service\n", mountinfo: dindMountinfo, // we want relative path to /sys/fs/cgroup inside the nested container. // Subcroup inside the container will be created at /sys/fs/cgroup/cpu @@ -781,15 +787,15 @@ func TestLoadPaths(t *testing.T) { }, { name: "invalid-rel-path-in-proc-cgroup", - cgroups: "9:memory:./invalid", + cgroups: "9:memory:invalid", mountinfo: dindMountinfo, - err: "can't make ./invalid relative to /docker/136", + err: "can't make invalid relative to /docker/136", }, } { t.Run(tc.name, func(t *testing.T) { r := strings.NewReader(tc.cgroups) mountinfo := strings.NewReader(tc.mountinfo) - got, err := loadPathsHelperWithMountinfo(r, mountinfo) + got, err := loadPathsHelper(r, mountinfo) if len(tc.err) == 0 { if err != nil { t.Fatalf("Unexpected error: %v", err) @@ -813,3 +819,47 @@ func TestLoadPaths(t *testing.T) { }) } } + +func TestOptional(t *testing.T) { + for _, tc := range []struct { + name string + ctrlr controller + spec *specs.LinuxResources + err string + }{ + { + name: "net-cls", + ctrlr: &networkClass{}, + spec: &specs.LinuxResources{Network: &specs.LinuxNetwork{ClassID: uint32Ptr(1)}}, + err: "Network.ClassID set but net_cls cgroup controller not found", + }, + { + name: "net-prio", + ctrlr: &networkPrio{}, + spec: &specs.LinuxResources{Network: &specs.LinuxNetwork{ + Priorities: []specs.LinuxInterfacePriority{ + {Name: "foo", Priority: 1}, + }, + }}, + err: "Network.Priorities set but net_prio cgroup controller not found", + }, + { + name: "hugetlb", + ctrlr: &hugeTLB{}, + spec: &specs.LinuxResources{HugepageLimits: []specs.LinuxHugepageLimit{ + {Pagesize: "1", Limit: 2}, + }}, + err: "HugepageLimits set but hugetlb cgroup controller not found", + }, + } { + t.Run(tc.name, func(t *testing.T) { + err := tc.ctrlr.skip(tc.spec) + if err == nil { + t.Fatalf("ctrlr.skip() didn't fail") + } + if !strings.Contains(err.Error(), tc.err) { + t.Errorf("ctrlr.skip() want: *%s*, got: %q", tc.err, err) + } + }) + } +} diff --git a/runsc/container/container.go b/runsc/container/container.go index e72ada311..0820edaec 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -233,7 +233,7 @@ func New(conf *config.Config, args Args) (*Container, error) { } // Create and join cgroup before processes are created to ensure they are // part of the cgroup from the start (and all their children processes). - cg, err := cgroup.New(args.Spec) + cg, err := cgroup.NewFromSpec(args.Spec) if err != nil { return nil, err } @@ -1132,7 +1132,7 @@ func (c *Container) populateStats(event *boot.EventOut) { // account for the full cgroup CPU usage. We split cgroup usage // proportionally according to the sentry-internal usage measurements, // only counting Running containers. - log.Warningf("event.ContainerUsage: %v", event.ContainerUsage) + log.Debugf("event.ContainerUsage: %v", event.ContainerUsage) var containerUsage uint64 var allContainersUsage uint64 for ID, usage := range event.ContainerUsage { @@ -1142,7 +1142,7 @@ func (c *Container) populateStats(event *boot.EventOut) { } } - cgroup, err := c.Sandbox.FindCgroup() + cgroup, err := c.Sandbox.NewCGroup() if err != nil { // No cgroup, so rely purely on the sentry's accounting. log.Warningf("events: no cgroups") @@ -1159,17 +1159,18 @@ func (c *Container) populateStats(event *boot.EventOut) { return } - // If the sentry reports no memory usage, fall back on cgroups and - // split usage equally across containers. + // If the sentry reports no CPU usage, fall back on cgroups and split usage + // equally across containers. if allContainersUsage == 0 { log.Warningf("events: no sentry CPU usage reported") allContainersUsage = cgroupsUsage containerUsage = cgroupsUsage / uint64(len(event.ContainerUsage)) } - log.Warningf("%f, %f, %f", containerUsage, cgroupsUsage, allContainersUsage) // Scaling can easily overflow a uint64 (e.g. a containerUsage and // cgroupsUsage of 16 seconds each will overflow), so use floats. - event.Event.Data.CPU.Usage.Total = uint64(float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage))) + total := float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage)) + log.Debugf("Usage, container: %d, cgroups: %d, all: %d, total: %.0f", containerUsage, cgroupsUsage, allContainersUsage, total) + event.Event.Data.CPU.Usage.Total = uint64(total) return } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 37ad7d2e1..0dbe1e323 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -25,6 +25,7 @@ import ( "testing" "time" + "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/cleanup" @@ -1917,9 +1918,9 @@ func TestMultiContainerEvent(t *testing.T) { } defer cleanup() - for _, cont := range containers { - t.Logf("Running containerd %s", cont.ID) - } + t.Logf("Running container sleep %s", containers[0].ID) + t.Logf("Running container busy %s", containers[1].ID) + t.Logf("Running container quick %s", containers[2].ID) // Wait for last container to stabilize the process count that is // checked further below. @@ -1940,50 +1941,61 @@ func TestMultiContainerEvent(t *testing.T) { } // Check events for running containers. - var prevUsage uint64 for _, cont := range containers[:2] { ret, err := cont.Event() if err != nil { - t.Errorf("Container.Events(): %v", err) + t.Errorf("Container.Event(%q): %v", cont.ID, err) } evt := ret.Event if want := "stats"; evt.Type != want { - t.Errorf("Wrong event type, want: %s, got: %s", want, evt.Type) + t.Errorf("Wrong event type, cid: %q, want: %s, got: %s", cont.ID, want, evt.Type) } if cont.ID != evt.ID { t.Errorf("Wrong container ID, want: %s, got: %s", cont.ID, evt.ID) } // One process per remaining container. if got, want := evt.Data.Pids.Current, uint64(2); got != want { - t.Errorf("Wrong number of PIDs, want: %d, got: %d", want, got) + t.Errorf("Wrong number of PIDs, cid: %q, want: %d, got: %d", cont.ID, want, got) } - // Both remaining containers should have nonzero usage, and - // 'busy' should have higher usage than 'sleep'. - usage := evt.Data.CPU.Usage.Total - if usage == 0 { - t.Errorf("Running container should report nonzero CPU usage, but got %d", usage) + // The exited container should always have a usage of zero. + if exited := ret.ContainerUsage[containers[2].ID]; exited != 0 { + t.Errorf("Exited container should report 0 CPU usage, got: %d", exited) + } + } + + // Check that CPU reported by busy container is higher than sleep. + cb := func() error { + sleepEvt, err := containers[0].Event() + if err != nil { + return &backoff.PermanentError{Err: err} } - if usage <= prevUsage { - t.Errorf("Expected container %s to use more than %d ns of CPU, but used %d", cont.ID, prevUsage, usage) + sleepUsage := sleepEvt.Event.Data.CPU.Usage.Total + + busyEvt, err := containers[1].Event() + if err != nil { + return &backoff.PermanentError{Err: err} } - t.Logf("Container %s usage: %d", cont.ID, usage) - prevUsage = usage + busyUsage := busyEvt.Event.Data.CPU.Usage.Total - // The exited container should have a usage of zero. - if exited := ret.ContainerUsage[containers[2].ID]; exited != 0 { - t.Errorf("Exited container should report 0 CPU usage, but got %d", exited) + if busyUsage <= sleepUsage { + t.Logf("Busy container usage lower than sleep (busy: %d, sleep: %d), retrying...", busyUsage, sleepUsage) + return fmt.Errorf("Busy container should have higher usage than sleep, busy: %d, sleep: %d", busyUsage, sleepUsage) } + return nil + } + // Give time for busy container to run and use more CPU than sleep. + if err := testutil.Poll(cb, 10*time.Second); err != nil { + t.Fatal(err) } - // Check that stop and destroyed containers return error. + // Check that stopped and destroyed containers return error. if err := containers[1].Destroy(); err != nil { t.Fatalf("container.Destroy: %v", err) } for _, cont := range containers[1:] { - _, err := cont.Event() - if err == nil { - t.Errorf("Container.Events() should have failed, cid:%s, state: %v", cont.ID, cont.Status) + if _, err := cont.Event(); err == nil { + t.Errorf("Container.Event() should have failed, cid: %q, state: %v", cont.ID, cont.Status) } } } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index f3f60f116..8d31e33b2 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -310,20 +310,9 @@ func (s *Sandbox) Processes(cid string) ([]*control.Process, error) { return pl, nil } -// FindCgroup returns the sandbox's Cgroup, or an error if it does not have one. -func (s *Sandbox) FindCgroup() (*cgroup.Cgroup, error) { - paths, err := cgroup.LoadPaths(strconv.Itoa(s.Pid)) - if err != nil { - return nil, err - } - // runsc places sandboxes in the same cgroup for each controller, so we - // pick an arbitrary controller here to get the cgroup path. - const controller = "cpuacct" - controllerPath, ok := paths[controller] - if !ok { - return nil, fmt.Errorf("no %q controller found", controller) - } - return cgroup.NewFromPath(controllerPath) +// NewCGroup returns the sandbox's Cgroup, or an error if it does not have one. +func (s *Sandbox) NewCGroup() (*cgroup.Cgroup, error) { + return cgroup.NewFromPid(s.Pid) } // Execute runs the specified command in the container. It returns the PID of -- cgit v1.2.3