summaryrefslogtreecommitdiffhomepage
path: root/pkg/shim/service.go
diff options
context:
space:
mode:
authorFabricio Voznika <fvoznika@google.com>2021-05-17 13:52:51 -0700
committergVisor bot <gvisor-bot@google.com>2021-05-17 13:54:46 -0700
commitd96499d17deb4fcf4cec949d90a48b1673198867 (patch)
tree0b9689e46689794bd02f7795392719330466240f /pkg/shim/service.go
parent7654181cc7c4f7633a1e96280bfd32391a3fbce3 (diff)
Make sandbox join the pod cgroup in K8s
cgroups in K8s are setup with the following hierarchy: `.../pod/container`. The sandbox is created with the first container and consequently uses the the pause container cgroup. This change removes the container cgroup from the path to make the sandbox use the pod cgroup instead. Otherwise limits set to the pause container will apply to the entire sandbox. PiperOrigin-RevId: 374273277
Diffstat (limited to 'pkg/shim/service.go')
-rw-r--r--pkg/shim/service.go50
1 files changed, 49 insertions, 1 deletions
diff --git a/pkg/shim/service.go b/pkg/shim/service.go
index 9d9fa8ef6..1f9adcb65 100644
--- a/pkg/shim/service.go
+++ b/pkg/shim/service.go
@@ -22,6 +22,7 @@ import (
"os"
"os/exec"
"path/filepath"
+ "strings"
"sync"
"time"
@@ -44,6 +45,7 @@ import (
"github.com/containerd/containerd/sys/reaper"
"github.com/containerd/typeurl"
"github.com/gogo/protobuf/types"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/cleanup"
@@ -944,9 +946,19 @@ func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.C
if err != nil {
return nil, fmt.Errorf("read oci spec: %w", err)
}
- if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil {
+
+ updated, err := utils.UpdateVolumeAnnotations(spec)
+ if err != nil {
return nil, fmt.Errorf("update volume annotations: %w", err)
}
+ updated = updateCgroup(spec) || updated
+
+ if updated {
+ if err := utils.WriteSpec(r.Bundle, spec); err != nil {
+ return nil, err
+ }
+ }
+
runsc.FormatRunscLogPath(r.ID, options.RunscConfig)
runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig)
p := proc.New(r.ID, runtime, stdio.Stdio{
@@ -966,3 +978,39 @@ func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.C
p.Monitor = reaper.Default
return p, nil
}
+
+// updateCgroup updates cgroup path for the sandbox to make the sandbox join the
+// pod cgroup and not the pause container cgroup. Returns true if the spec was
+// modified. Ex.:
+// /kubepods/burstable/pod123/abc => kubepods/burstable/pod123
+//
+func updateCgroup(spec *specs.Spec) bool {
+ if !utils.IsSandbox(spec) {
+ return false
+ }
+ if spec.Linux == nil || len(spec.Linux.CgroupsPath) == 0 {
+ return false
+ }
+
+ // Search backwards for the pod cgroup path to make the sandbox use it,
+ // instead of the pause container's cgroup.
+ parts := strings.Split(spec.Linux.CgroupsPath, string(filepath.Separator))
+ for i := len(parts) - 1; i >= 0; i-- {
+ if strings.HasPrefix(parts[i], "pod") {
+ var path string
+ for j := 0; j <= i; j++ {
+ path = filepath.Join(path, parts[j])
+ }
+ // Add back the initial '/' that may have been lost above.
+ if filepath.IsAbs(spec.Linux.CgroupsPath) {
+ path = string(filepath.Separator) + path
+ }
+ if spec.Linux.CgroupsPath == path {
+ return false
+ }
+ spec.Linux.CgroupsPath = path
+ return true
+ }
+ }
+ return false
+}