diff options
author | Fabricio Voznika <fvoznika@google.com> | 2018-10-10 08:59:25 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-10-10 09:00:42 -0700 |
commit | 29cd05a7c66ee8061c0e5cf8e94c4e507dcf33e0 (patch) | |
tree | 91600ea6944d18c86f41b5f8003311a8c7bd154b /runsc/cgroup | |
parent | 20508bafb88d2037ea3b2c8483b191ce72e7ad7e (diff) |
Add sandbox to cgroup
Sandbox creation uses the limits and reservations configured in the
OCI spec and set cgroup options accordinly. Then it puts both the
sandbox and gofer processes inside the cgroup.
It also allows the cgroup to be pre-configured by the caller. If the
cgroup already exists, sandbox and gofer processes will join the
cgroup but it will not modify the cgroup with spec limits.
PiperOrigin-RevId: 216538209
Change-Id: If2c65ffedf55820baab743a0edcfb091b89c1019
Diffstat (limited to 'runsc/cgroup')
-rw-r--r-- | runsc/cgroup/BUILD | 24 | ||||
-rw-r--r-- | runsc/cgroup/cgroup.go | 405 | ||||
-rw-r--r-- | runsc/cgroup/cgroup_test.go | 56 |
3 files changed, 485 insertions, 0 deletions
diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD new file mode 100644 index 000000000..4a535d230 --- /dev/null +++ b/runsc/cgroup/BUILD @@ -0,0 +1,24 @@ +package(licenses = ["notice"]) # Apache 2.0 + +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "cgroup", + srcs = ["cgroup.go"], + importpath = "gvisor.googlesource.com/gvisor/runsc/cgroup", + visibility = [ + "//runsc:__subpackages__", + ], + deps = [ + "//pkg/log", + "//runsc/specutils", + "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", + ], +) + +go_test( + name = "cgroup_test", + size = "small", + srcs = ["cgroup_test.go"], + embed = [":cgroup"], +) diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go new file mode 100644 index 000000000..6a0092be8 --- /dev/null +++ b/runsc/cgroup/cgroup.go @@ -0,0 +1,405 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package cgroup provides an interface to read and write configuration to +// cgroup. +package cgroup + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +const ( + cgroupRoot = "/sys/fs/cgroup" +) + +var controllers = map[string]controller{ + "blkio": &blockIO{}, + "cpu": &cpu{}, + "cpuset": &cpuSet{}, + "memory": &memory{}, + "net_cls": &networkClass{}, + "net_prio": &networkPrio{}, + + // These controllers either don't have anything in the OCI spec or is + // irrevalant for a sandbox, e.g. pids. + "devices": &noop{}, + "freezer": &noop{}, + "perf_event": &noop{}, + "pids": &noop{}, + "systemd": &noop{}, +} + +func setOptionalValueInt(path, name string, val *int64) error { + if val == nil || *val == 0 { + return nil + } + str := strconv.FormatInt(*val, 10) + return setValue(path, name, str) +} + +func setOptionalValueUint(path, name string, val *uint64) error { + if val == nil || *val == 0 { + return nil + } + str := strconv.FormatUint(*val, 10) + return setValue(path, name, str) +} + +func setOptionalValueUint32(path, name string, val *uint32) error { + if val == nil || *val == 0 { + return nil + } + str := strconv.FormatUint(uint64(*val), 10) + return setValue(path, name, str) +} + +func setOptionalValueUint16(path, name string, val *uint16) error { + if val == nil || *val == 0 { + return nil + } + str := strconv.FormatUint(uint64(*val), 10) + return setValue(path, name, str) +} + +func setValue(path, name, data string) error { + fullpath := filepath.Join(path, name) + return ioutil.WriteFile(fullpath, []byte(data), 0700) +} + +func getValue(path, name string) (string, error) { + fullpath := filepath.Join(path, name) + out, err := ioutil.ReadFile(fullpath) + if err != nil { + return "", err + } + return string(out), nil +} + +// fillFromAncestor sets the value of a cgroup file from the first ancestor +// that has content. It does nothing if the file in 'path' has already been set. +func fillFromAncestor(path string) (string, error) { + out, err := ioutil.ReadFile(path) + if err != nil { + return "", err + } + val := strings.TrimSpace(string(out)) + if val != "" { + // File is set, stop here. + return val, nil + } + + // File is not set, recurse to parent and then set here. + name := filepath.Base(path) + parent := filepath.Dir(filepath.Dir(path)) + val, err = fillFromAncestor(filepath.Join(parent, name)) + if err != nil { + return "", err + } + if err := ioutil.WriteFile(path, []byte(val), 0700); err != nil { + return "", err + } + return val, nil +} + +func countCpuset(cpuset string) (int, error) { + var count int + for _, p := range strings.Split(cpuset, ",") { + interval := strings.Split(p, "-") + switch len(interval) { + case 1: + if _, err := strconv.Atoi(interval[0]); err != nil { + return 0, err + } + count++ + + case 2: + start, err := strconv.Atoi(interval[0]) + if err != nil { + return 0, err + } + end, err := strconv.Atoi(interval[1]) + if err != nil { + return 0, err + } + if start < 0 || end < 0 || start > end { + return 0, fmt.Errorf("invalid cpuset: %q", p) + } + count += end - start + 1 + + default: + return 0, fmt.Errorf("invalid cpuset: %q", p) + } + } + return count, nil +} + +// Cgroup represents a group inside all controllers. For example: Name='/foo/bar' +// maps to /sys/fs/cgroup/<controller>/foo/bar on all controllers. +type Cgroup struct { + Name string `json:"name"` + Own bool `json:"own"` +} + +// New creates a new Cgroup instance if the spec includes a cgroup path. +// Otherwise it returns nil and false. +func New(spec *specs.Spec) (*Cgroup, bool) { + if spec.Linux == nil || spec.Linux.CgroupsPath == "" { + return nil, false + } + return &Cgroup{Name: spec.Linux.CgroupsPath}, true +} + +// Install creates and configures cgroups according to 'res'. If cgroup path +// already exists, it means that the caller has already provided a +// pre-configured cgroups, and 'res' is ignored. +func (c *Cgroup) Install(res *specs.LinuxResources) error { + if _, err := os.Stat(c.makePath("memory")); err == nil { + // If cgroup has already been created; it has been setup by caller. Don't + // make any changes to configuration, just join when sandbox/gofer starts. + log.Debugf("Using pre-created cgroup %q", c.Name) + return nil + } + + // Mark that cgroup resources are owned by me. + log.Debugf("Creating cgroup %q", c.Name) + c.Own = true + clean := specutils.MakeCleanup(func() { c.Uninstall() }) + defer clean.Clean() + + for key, ctrl := range controllers { + path := c.makePath(key) + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + if res != nil { + if err := ctrl.set(res, path); err != nil { + return err + } + } + } + clean.Release() + return nil +} + +// Uninstall removes the settings done in Install(). If cgroup path already +// existed when Install() was called, Uninstall is a noop. +func (c *Cgroup) Uninstall() error { + if !c.Own { + // cgroup is managed by caller, don't touch it. + return nil + } + log.Debugf("Deleting cgroup %q", c.Name) + for key := range controllers { + if err := syscall.Rmdir(c.makePath(key)); err != nil && !os.IsNotExist(err) { + return err + } + } + return nil +} + +// Add adds given process to all controllers. +func (c *Cgroup) Add(pid int) error { + for key := range controllers { + if err := setValue(c.makePath(key), "cgroup.procs", strconv.Itoa(pid)); err != nil { + return err + } + } + return nil +} + +// NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'. +func (c *Cgroup) NumCPU() (int, error) { + path := c.makePath("cpuset") + cpuset, err := getValue(path, "cpuset.cpus") + if err != nil { + return 0, err + } + return countCpuset(strings.TrimSpace(cpuset)) +} + +// MemoryLimit returns the memory limit. +func (c *Cgroup) MemoryLimit() (uint64, error) { + path := c.makePath("memory") + limStr, err := getValue(path, "memory.limit_in_bytes") + if err != nil { + return 0, err + } + return strconv.ParseUint(strings.TrimSpace(limStr), 10, 64) +} + +func (c *Cgroup) makePath(controllerName string) string { + return filepath.Join(cgroupRoot, controllerName, c.Name) +} + +type controller interface { + set(*specs.LinuxResources, string) error +} + +type noop struct{} + +func (*noop) set(*specs.LinuxResources, string) error { + return nil +} + +type memory struct{} + +func (*memory) set(spec *specs.LinuxResources, path string) error { + if spec.Memory == nil { + return nil + } + if err := setOptionalValueInt(path, "memory.limit_in_bytes", spec.Memory.Limit); err != nil { + return err + } + if err := setOptionalValueInt(path, "memory.soft_limit_in_bytes", spec.Memory.Reservation); err != nil { + return err + } + if err := setOptionalValueInt(path, "memory.memsw.limit_in_bytes", spec.Memory.Swap); err != nil { + return err + } + if err := setOptionalValueInt(path, "memory.kmem.limit_in_bytes", spec.Memory.Kernel); err != nil { + return err + } + if err := setOptionalValueInt(path, "memory.kmem.tcp.limit_in_bytes", spec.Memory.KernelTCP); err != nil { + return err + } + if err := setOptionalValueUint(path, "memory.swappiness", spec.Memory.Swappiness); err != nil { + return err + } + + if spec.Memory.DisableOOMKiller != nil && *spec.Memory.DisableOOMKiller { + if err := setValue(path, "memory.oom_control", "1"); err != nil { + return err + } + } + return nil +} + +type cpu struct{} + +func (*cpu) set(spec *specs.LinuxResources, path string) error { + if spec.CPU == nil { + return nil + } + if err := setOptionalValueUint(path, "cpu.shares", spec.CPU.Shares); err != nil { + return err + } + if err := setOptionalValueInt(path, "cpu.cfs_quota_us", spec.CPU.Quota); err != nil { + return err + } + return setOptionalValueUint(path, "cpu.cfs_period_us", spec.CPU.Period) +} + +type cpuSet struct{} + +func (*cpuSet) set(spec *specs.LinuxResources, path string) error { + // cpuset.cpus and mems are required fields, but are not set on a new cgroup. + // If not set in the spec, get it from one of the ancestors cgroup. + if spec.CPU == nil || spec.CPU.Cpus == "" { + if _, err := fillFromAncestor(filepath.Join(path, "cpuset.cpus")); err != nil { + return err + } + } else { + if err := setValue(path, "cpuset.cpus", spec.CPU.Cpus); err != nil { + return err + } + } + + if spec.CPU == nil || spec.CPU.Mems == "" { + _, err := fillFromAncestor(filepath.Join(path, "cpuset.mems")) + return err + } + mems := spec.CPU.Mems + return setValue(path, "cpuset.mems", mems) +} + +type blockIO struct{} + +func (*blockIO) set(spec *specs.LinuxResources, path string) error { + if spec.BlockIO == nil { + return nil + } + + if err := setOptionalValueUint16(path, "blkio.weight", spec.BlockIO.Weight); err != nil { + return err + } + if err := setOptionalValueUint16(path, "blkio.leaf_weight", spec.BlockIO.LeafWeight); err != nil { + return err + } + + for _, dev := range spec.BlockIO.WeightDevice { + val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, dev.Weight) + if err := setValue(path, "blkio.weight_device", val); err != nil { + return err + } + val = fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, dev.LeafWeight) + if err := setValue(path, "blkio.leaf_weight_device", val); err != nil { + return err + } + } + if err := setThrottle(path, "blkio.throttle.read_bps_device", spec.BlockIO.ThrottleReadBpsDevice); err != nil { + return err + } + if err := setThrottle(path, "blkio.throttle.write_bps_device", spec.BlockIO.ThrottleWriteBpsDevice); err != nil { + return err + } + if err := setThrottle(path, "blkio.throttle.read_iops_device", spec.BlockIO.ThrottleReadIOPSDevice); err != nil { + return err + } + return setThrottle(path, "blkio.throttle.write_iops_device", spec.BlockIO.ThrottleWriteIOPSDevice) +} + +func setThrottle(path, name string, devs []specs.LinuxThrottleDevice) error { + for _, dev := range devs { + val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, dev.Rate) + if err := setValue(path, name, val); err != nil { + return err + } + } + return nil +} + +type networkClass struct{} + +func (*networkClass) set(spec *specs.LinuxResources, path string) error { + if spec.Network == nil { + return nil + } + return setOptionalValueUint32(path, "net_cls.classid", spec.Network.ClassID) +} + +type networkPrio struct{} + +func (*networkPrio) set(spec *specs.LinuxResources, path string) error { + if spec.Network == nil { + return nil + } + for _, prio := range spec.Network.Priorities { + val := fmt.Sprintf("%s %d", prio.Name, prio.Priority) + if err := setValue(path, "net_prio.ifpriomap", val); err != nil { + return err + } + } + return nil +} diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go new file mode 100644 index 000000000..cde915329 --- /dev/null +++ b/runsc/cgroup/cgroup_test.go @@ -0,0 +1,56 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroup + +import ( + "testing" +) + +func TestCountCpuset(t *testing.T) { + for _, tc := range []struct { + str string + want int + error bool + }{ + {str: "0", want: 1}, + {str: "0,1,2,8,9,10", want: 6}, + {str: "0-1", want: 2}, + {str: "0-7", want: 8}, + {str: "0-7,16,32-39,64,65", want: 19}, + {str: "a", error: true}, + {str: "5-a", error: true}, + {str: "a-5", error: true}, + {str: "-10", error: true}, + {str: "15-", error: true}, + {str: "-", error: true}, + {str: "--", error: true}, + } { + t.Run(tc.str, func(t *testing.T) { + got, err := countCpuset(tc.str) + if tc.error { + if err == nil { + t.Errorf("countCpuset(%q) should have failed", tc.str) + } + } else { + if err != nil { + t.Errorf("countCpuset(%q) failed: %v", tc.str, err) + } + if tc.want != got { + t.Errorf("countCpuset(%q) want: %d, got: %d", tc.str, tc.want, got) + } + } + }) + } +} |