From d96499d17deb4fcf4cec949d90a48b1673198867 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Mon, 17 May 2021 13:52:51 -0700 Subject: Make sandbox join the pod cgroup in K8s cgroups in K8s are setup with the following hierarchy: `.../pod/container`. The sandbox is created with the first container and consequently uses the the pause container cgroup. This change removes the container cgroup from the path to make the sandbox use the pod cgroup instead. Otherwise limits set to the pause container will apply to the entire sandbox. PiperOrigin-RevId: 374273277 --- pkg/shim/BUILD | 14 ++++- pkg/shim/service.go | 50 ++++++++++++++++- pkg/shim/service_test.go | 121 +++++++++++++++++++++++++++++++++++++++++ pkg/shim/utils/annotations.go | 6 +- pkg/shim/utils/utils.go | 20 ++++--- pkg/shim/utils/volumes.go | 20 ++----- pkg/shim/utils/volumes_test.go | 56 +++++++------------ 7 files changed, 225 insertions(+), 62 deletions(-) create mode 100644 pkg/shim/service_test.go diff --git a/pkg/shim/BUILD b/pkg/shim/BUILD index 4f7c02f5d..fd6127b97 100644 --- a/pkg/shim/BUILD +++ b/pkg/shim/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -41,7 +41,19 @@ go_library( "@com_github_containerd_fifo//:go_default_library", "@com_github_containerd_typeurl//:go_default_library", "@com_github_gogo_protobuf//types:go_default_library", + "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", "@com_github_sirupsen_logrus//:go_default_library", "@org_golang_x_sys//unix:go_default_library", ], ) + +go_test( + name = "shim_test", + size = "small", + srcs = ["service_test.go"], + library = ":shim", + deps = [ + "//pkg/shim/utils", + "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + ], +) diff --git a/pkg/shim/service.go b/pkg/shim/service.go index 9d9fa8ef6..1f9adcb65 100644 --- a/pkg/shim/service.go +++ b/pkg/shim/service.go @@ -22,6 +22,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "sync" "time" @@ -44,6 +45,7 @@ import ( "github.com/containerd/containerd/sys/reaper" "github.com/containerd/typeurl" "github.com/gogo/protobuf/types" + specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/cleanup" @@ -944,9 +946,19 @@ func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.C if err != nil { return nil, fmt.Errorf("read oci spec: %w", err) } - if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { + + updated, err := utils.UpdateVolumeAnnotations(spec) + if err != nil { return nil, fmt.Errorf("update volume annotations: %w", err) } + updated = updateCgroup(spec) || updated + + if updated { + if err := utils.WriteSpec(r.Bundle, spec); err != nil { + return nil, err + } + } + runsc.FormatRunscLogPath(r.ID, options.RunscConfig) runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig) p := proc.New(r.ID, runtime, stdio.Stdio{ @@ -966,3 +978,39 @@ func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.C p.Monitor = reaper.Default return p, nil } + +// updateCgroup updates cgroup path for the sandbox to make the sandbox join the +// pod cgroup and not the pause container cgroup. Returns true if the spec was +// modified. Ex.: +// /kubepods/burstable/pod123/abc => kubepods/burstable/pod123 +// +func updateCgroup(spec *specs.Spec) bool { + if !utils.IsSandbox(spec) { + return false + } + if spec.Linux == nil || len(spec.Linux.CgroupsPath) == 0 { + return false + } + + // Search backwards for the pod cgroup path to make the sandbox use it, + // instead of the pause container's cgroup. + parts := strings.Split(spec.Linux.CgroupsPath, string(filepath.Separator)) + for i := len(parts) - 1; i >= 0; i-- { + if strings.HasPrefix(parts[i], "pod") { + var path string + for j := 0; j <= i; j++ { + path = filepath.Join(path, parts[j]) + } + // Add back the initial '/' that may have been lost above. + if filepath.IsAbs(spec.Linux.CgroupsPath) { + path = string(filepath.Separator) + path + } + if spec.Linux.CgroupsPath == path { + return false + } + spec.Linux.CgroupsPath = path + return true + } + } + return false +} diff --git a/pkg/shim/service_test.go b/pkg/shim/service_test.go new file mode 100644 index 000000000..2d9f07e02 --- /dev/null +++ b/pkg/shim/service_test.go @@ -0,0 +1,121 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package shim + +import ( + "testing" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/shim/utils" +) + +func TestCgroupPath(t *testing.T) { + for _, tc := range []struct { + name string + path string + want string + }{ + { + name: "simple", + path: "foo/pod123/container", + want: "foo/pod123", + }, + { + name: "absolute", + path: "/foo/pod123/container", + want: "/foo/pod123", + }, + { + name: "no-container", + path: "foo/pod123", + want: "foo/pod123", + }, + { + name: "no-container-absolute", + path: "/foo/pod123", + want: "/foo/pod123", + }, + { + name: "double-pod", + path: "/foo/podium/pod123/container", + want: "/foo/podium/pod123", + }, + { + name: "start-pod", + path: "pod123/container", + want: "pod123", + }, + { + name: "start-pod-absolute", + path: "/pod123/container", + want: "/pod123", + }, + { + name: "slashes", + path: "///foo/////pod123//////container", + want: "/foo/pod123", + }, + { + name: "no-pod", + path: "/foo/nopod123/container", + want: "/foo/nopod123/container", + }, + } { + t.Run(tc.name, func(t *testing.T) { + spec := specs.Spec{ + Linux: &specs.Linux{ + CgroupsPath: tc.path, + }, + } + updated := updateCgroup(&spec) + if spec.Linux.CgroupsPath != tc.want { + t.Errorf("updateCgroup(%q), want: %q, got: %q", tc.path, tc.want, spec.Linux.CgroupsPath) + } + if shouldUpdate := tc.path != tc.want; shouldUpdate != updated { + t.Errorf("updateCgroup(%q)=%v, want: %v", tc.path, updated, shouldUpdate) + } + }) + } +} + +// Test cases that cgroup path should not be updated. +func TestCgroupNoUpdate(t *testing.T) { + for _, tc := range []struct { + name string + spec *specs.Spec + }{ + { + name: "empty", + spec: &specs.Spec{}, + }, + { + name: "subcontainer", + spec: &specs.Spec{ + Linux: &specs.Linux{ + CgroupsPath: "foo/pod123/container", + }, + Annotations: map[string]string{ + utils.ContainerTypeAnnotation: utils.ContainerTypeContainer, + }, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + if updated := updateCgroup(tc.spec); updated { + t.Errorf("updateCgroup(%+v), got: %v, want: false", tc.spec.Linux, updated) + } + }) + } +} diff --git a/pkg/shim/utils/annotations.go b/pkg/shim/utils/annotations.go index 1e9d3f365..c744800bb 100644 --- a/pkg/shim/utils/annotations.go +++ b/pkg/shim/utils/annotations.go @@ -19,7 +19,9 @@ package utils // These are vendor due to import conflicts. const ( sandboxLogDirAnnotation = "io.kubernetes.cri.sandbox-log-directory" - containerTypeAnnotation = "io.kubernetes.cri.container-type" + // ContainerTypeAnnotation is they key that defines sandbox or container. + ContainerTypeAnnotation = "io.kubernetes.cri.container-type" containerTypeSandbox = "sandbox" - containerTypeContainer = "container" + // ContainerTypeContainer is the value for container. + ContainerTypeContainer = "container" ) diff --git a/pkg/shim/utils/utils.go b/pkg/shim/utils/utils.go index 7b1cd983e..f183b1bbc 100644 --- a/pkg/shim/utils/utils.go +++ b/pkg/shim/utils/utils.go @@ -18,19 +18,16 @@ package utils import ( "encoding/json" "io/ioutil" - "os" "path/filepath" specs "github.com/opencontainers/runtime-spec/specs-go" ) +const configFilename = "config.json" + // ReadSpec reads OCI spec from the bundle directory. func ReadSpec(bundle string) (*specs.Spec, error) { - f, err := os.Open(filepath.Join(bundle, "config.json")) - if err != nil { - return nil, err - } - b, err := ioutil.ReadAll(f) + b, err := ioutil.ReadFile(filepath.Join(bundle, configFilename)) if err != nil { return nil, err } @@ -41,9 +38,18 @@ func ReadSpec(bundle string) (*specs.Spec, error) { return &spec, nil } +// WriteSpec writes OCI spec to the bundle directory. +func WriteSpec(bundle string, spec *specs.Spec) error { + b, err := json.Marshal(spec) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(bundle, configFilename), b, 0666) +} + // IsSandbox checks whether a container is a sandbox container. func IsSandbox(spec *specs.Spec) bool { - t, ok := spec.Annotations[containerTypeAnnotation] + t, ok := spec.Annotations[ContainerTypeAnnotation] return !ok || t == containerTypeSandbox } diff --git a/pkg/shim/utils/volumes.go b/pkg/shim/utils/volumes.go index cdcb88229..6bc75139d 100644 --- a/pkg/shim/utils/volumes.go +++ b/pkg/shim/utils/volumes.go @@ -15,9 +15,7 @@ package utils import ( - "encoding/json" "fmt" - "io/ioutil" "path/filepath" "strings" @@ -89,8 +87,8 @@ func isVolumePath(volume, path string) (bool, error) { } // UpdateVolumeAnnotations add necessary OCI annotations for gvisor -// volume optimization. -func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { +// volume optimization. Returns true if the spec was modified. +func UpdateVolumeAnnotations(s *specs.Spec) (bool, error) { var uid string if IsSandbox(s) { var err error @@ -98,7 +96,7 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { if err != nil { // Skip if we can't get pod UID, because this doesn't work // for containerd 1.1. - return nil + return false, nil } } var updated bool @@ -114,7 +112,7 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { // This is a sandbox. path, err := volumePath(volume, uid) if err != nil { - return fmt.Errorf("get volume path for %q: %w", volume, err) + return false, fmt.Errorf("get volume path for %q: %w", volume, err) } s.Annotations[volumeSourceKey(volume)] = path updated = true @@ -138,15 +136,7 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { } } } - if !updated { - return nil - } - // Update bundle. - b, err := json.Marshal(s) - if err != nil { - return err - } - return ioutil.WriteFile(filepath.Join(bundle, "config.json"), b, 0666) + return updated, nil } func changeMountType(m *specs.Mount, newType string) { diff --git a/pkg/shim/utils/volumes_test.go b/pkg/shim/utils/volumes_test.go index b25c53c73..5db43cdf1 100644 --- a/pkg/shim/utils/volumes_test.go +++ b/pkg/shim/utils/volumes_test.go @@ -15,11 +15,9 @@ package utils import ( - "encoding/json" "fmt" "io/ioutil" "os" - "path/filepath" "reflect" "testing" @@ -58,7 +56,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { spec: &specs.Spec{ Annotations: map[string]string{ sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, + ContainerTypeAnnotation: containerTypeSandbox, volumeKeyPrefix + testVolumeName + ".share": "pod", volumeKeyPrefix + testVolumeName + ".type": "tmpfs", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -67,7 +65,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { expected: &specs.Spec{ Annotations: map[string]string{ sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, + ContainerTypeAnnotation: containerTypeSandbox, volumeKeyPrefix + testVolumeName + ".share": "pod", volumeKeyPrefix + testVolumeName + ".type": "tmpfs", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -81,7 +79,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { spec: &specs.Spec{ Annotations: map[string]string{ sandboxLogDirAnnotation: testLegacyLogDirPath, - containerTypeAnnotation: containerTypeSandbox, + ContainerTypeAnnotation: containerTypeSandbox, volumeKeyPrefix + testVolumeName + ".share": "pod", volumeKeyPrefix + testVolumeName + ".type": "tmpfs", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -90,7 +88,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { expected: &specs.Spec{ Annotations: map[string]string{ sandboxLogDirAnnotation: testLegacyLogDirPath, - containerTypeAnnotation: containerTypeSandbox, + ContainerTypeAnnotation: containerTypeSandbox, volumeKeyPrefix + testVolumeName + ".share": "pod", volumeKeyPrefix + testVolumeName + ".type": "tmpfs", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -117,7 +115,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, + ContainerTypeAnnotation: ContainerTypeContainer, volumeKeyPrefix + testVolumeName + ".share": "pod", volumeKeyPrefix + testVolumeName + ".type": "tmpfs", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -139,7 +137,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, + ContainerTypeAnnotation: ContainerTypeContainer, volumeKeyPrefix + testVolumeName + ".share": "pod", volumeKeyPrefix + testVolumeName + ".type": "tmpfs", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -159,7 +157,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, + ContainerTypeAnnotation: ContainerTypeContainer, volumeKeyPrefix + testVolumeName + ".share": "container", volumeKeyPrefix + testVolumeName + ".type": "bind", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -175,7 +173,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, + ContainerTypeAnnotation: ContainerTypeContainer, volumeKeyPrefix + testVolumeName + ".share": "container", volumeKeyPrefix + testVolumeName + ".type": "bind", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -187,7 +185,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { name: "should not return error without pod log directory", spec: &specs.Spec{ Annotations: map[string]string{ - containerTypeAnnotation: containerTypeSandbox, + ContainerTypeAnnotation: containerTypeSandbox, volumeKeyPrefix + testVolumeName + ".share": "pod", volumeKeyPrefix + testVolumeName + ".type": "tmpfs", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -195,7 +193,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, expected: &specs.Spec{ Annotations: map[string]string{ - containerTypeAnnotation: containerTypeSandbox, + ContainerTypeAnnotation: containerTypeSandbox, volumeKeyPrefix + testVolumeName + ".share": "pod", volumeKeyPrefix + testVolumeName + ".type": "tmpfs", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -207,7 +205,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { spec: &specs.Spec{ Annotations: map[string]string{ sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, + ContainerTypeAnnotation: containerTypeSandbox, volumeKeyPrefix + "notexist.share": "pod", volumeKeyPrefix + "notexist.type": "tmpfs", volumeKeyPrefix + "notexist.options": "ro", @@ -220,13 +218,13 @@ func TestUpdateVolumeAnnotations(t *testing.T) { spec: &specs.Spec{ Annotations: map[string]string{ sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, + ContainerTypeAnnotation: containerTypeSandbox, }, }, expected: &specs.Spec{ Annotations: map[string]string{ sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, + ContainerTypeAnnotation: containerTypeSandbox, }, }, }, @@ -248,7 +246,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, + ContainerTypeAnnotation: ContainerTypeContainer, }, }, expected: &specs.Spec{ @@ -267,7 +265,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, + ContainerTypeAnnotation: ContainerTypeContainer, }, }, }, @@ -275,7 +273,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { name: "bind options removed", spec: &specs.Spec{ Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, + ContainerTypeAnnotation: ContainerTypeContainer, volumeKeyPrefix + testVolumeName + ".share": "pod", volumeKeyPrefix + testVolumeName + ".type": "tmpfs", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -292,7 +290,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, expected: &specs.Spec{ Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, + ContainerTypeAnnotation: ContainerTypeContainer, volumeKeyPrefix + testVolumeName + ".share": "pod", volumeKeyPrefix + testVolumeName + ".type": "tmpfs", volumeKeyPrefix + testVolumeName + ".options": "ro", @@ -311,11 +309,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, } { t.Run(test.name, func(t *testing.T) { - bundle, err := ioutil.TempDir(dir, "test-bundle") - if err != nil { - t.Fatalf("Create test bundle: %v", err) - } - err = UpdateVolumeAnnotations(bundle, test.spec) + updated, err := UpdateVolumeAnnotations(test.spec) if test.expectErr { if err == nil { t.Fatal("Expected error, but got nil") @@ -328,18 +322,8 @@ func TestUpdateVolumeAnnotations(t *testing.T) { if !reflect.DeepEqual(test.expected, test.spec) { t.Fatalf("Expected %+v, got %+v", test.expected, test.spec) } - if test.expectUpdate { - b, err := ioutil.ReadFile(filepath.Join(bundle, "config.json")) - if err != nil { - t.Fatalf("Read spec from bundle: %v", err) - } - var spec specs.Spec - if err := json.Unmarshal(b, &spec); err != nil { - t.Fatalf("Unmarshal spec: %v", err) - } - if !reflect.DeepEqual(test.expected, &spec) { - t.Fatalf("Expected %+v, got %+v", test.expected, &spec) - } + if test.expectUpdate != updated { + t.Errorf("Expected %v, got %v", test.expected, updated) } }) } -- cgit v1.2.3