diff options
author | Daniel Dao <dqminh89@gmail.com> | 2020-10-12 14:57:47 +0100 |
---|---|---|
committer | Daniel Dao <dqminh89@gmail.com> | 2021-01-26 15:01:21 +0000 |
commit | bd5eb8a9db2bf3154d8bc4231ac0c655c78df3ae (patch) | |
tree | 5715edce61354b4bbe52f8c9dd37eaac44d76d54 /runsc/cgroup/cgroup.go | |
parent | f5736fa2bf91e1bb3fd9f9625dba8c800bf2adb5 (diff) |
runsc: check for nested cgroup when generating croup paths
in nested container, we see paths from host in /proc/self/cgroup, so we
need to re-process that path to get a relative path to be used inside
the container.
Without it, runsc generates ugly paths that may trip other cgroup
watchers that expect clean paths. An example of ugly path is:
```
/sys/fs/cgroup/memory/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93/cgroupPath
```
Notice duplication of `docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93`
`/proc/1/cgroup` looks like
```
12:perf_event:/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93
11:blkio:/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93
10:freezer:/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93
9:hugetlb:/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93
8:devices:/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93
7:rdma:/
6:pids:/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93
5:cpuset:/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93
4:cpu,cpuacct:/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93
3:memory:/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93
2:net_cls,net_prio:/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93
1:name=systemd:/docker/e383892b29290ae8005d535f2dadc4a583bb354d5bb1ba8c10bf900d92c4db93
0::/system.slice/containerd.service
```
This is not necessary when the parent container was created with cgroup
namespace, but that setup is not very common right now.
Signed-off-by: Daniel Dao <dqminh89@gmail.com>
Diffstat (limited to 'runsc/cgroup/cgroup.go')
-rw-r--r-- | runsc/cgroup/cgroup.go | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 13c6a16a0..e9ae59a92 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -203,6 +203,19 @@ func LoadPaths(pid string) (map[string]string, error) { } func loadPathsHelper(cgroup io.Reader) (map[string]string, error) { + // For nested containers, in /proc/self/cgroup we see paths from host, + // which don't exist in container, so recover the container paths here by + // double-checking with /proc/pid/mountinfo + mountinfo, err := os.Open("/proc/self/mountinfo") + if err != nil { + return nil, err + } + defer mountinfo.Close() + + return loadPathsHelperWithMountinfo(cgroup, mountinfo) +} + +func loadPathsHelperWithMountinfo(cgroup, mountinfo io.Reader) (map[string]string, error) { paths := make(map[string]string) scanner := bufio.NewScanner(cgroup) @@ -225,6 +238,31 @@ func loadPathsHelper(cgroup io.Reader) (map[string]string, error) { if err := scanner.Err(); err != nil { return nil, err } + + mfScanner := bufio.NewScanner(mountinfo) + for mfScanner.Scan() { + txt := mfScanner.Text() + fields := strings.Fields(txt) + if len(fields) < 9 || fields[len(fields)-3] != "cgroup" { + continue + } + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + // Remove prefix for cgroups with no controller, eg. systemd. + opt = strings.TrimPrefix(opt, "name=") + if cgroupPath, ok := paths[opt]; ok { + root := fields[3] + relCgroupPath, err := filepath.Rel(root, cgroupPath) + if err != nil { + return nil, err + } + paths[opt] = relCgroupPath + } + } + } + if err := mfScanner.Err(); err != nil { + return nil, err + } + return paths, nil } |