summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorFabricio Voznika <fvoznika@google.com>2021-05-17 13:52:51 -0700
committergVisor bot <gvisor-bot@google.com>2021-05-17 13:54:46 -0700
commitd96499d17deb4fcf4cec949d90a48b1673198867 (patch)
tree0b9689e46689794bd02f7795392719330466240f
parent7654181cc7c4f7633a1e96280bfd32391a3fbce3 (diff)
Make sandbox join the pod cgroup in K8s
cgroups in K8s are setup with the following hierarchy: `.../pod/container`. The sandbox is created with the first container and consequently uses the the pause container cgroup. This change removes the container cgroup from the path to make the sandbox use the pod cgroup instead. Otherwise limits set to the pause container will apply to the entire sandbox. PiperOrigin-RevId: 374273277
-rw-r--r--pkg/shim/BUILD14
-rw-r--r--pkg/shim/service.go50
-rw-r--r--pkg/shim/service_test.go121
-rw-r--r--pkg/shim/utils/annotations.go6
-rw-r--r--pkg/shim/utils/utils.go20
-rw-r--r--pkg/shim/utils/volumes.go20
-rw-r--r--pkg/shim/utils/volumes_test.go56
7 files changed, 225 insertions, 62 deletions
diff --git a/pkg/shim/BUILD b/pkg/shim/BUILD
index 4f7c02f5d..fd6127b97 100644
--- a/pkg/shim/BUILD
+++ b/pkg/shim/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -41,7 +41,19 @@ go_library(
"@com_github_containerd_fifo//:go_default_library",
"@com_github_containerd_typeurl//:go_default_library",
"@com_github_gogo_protobuf//types:go_default_library",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
"@com_github_sirupsen_logrus//:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
],
)
+
+go_test(
+ name = "shim_test",
+ size = "small",
+ srcs = ["service_test.go"],
+ library = ":shim",
+ deps = [
+ "//pkg/shim/utils",
+ "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
+ ],
+)
diff --git a/pkg/shim/service.go b/pkg/shim/service.go
index 9d9fa8ef6..1f9adcb65 100644
--- a/pkg/shim/service.go
+++ b/pkg/shim/service.go
@@ -22,6 +22,7 @@ import (
"os"
"os/exec"
"path/filepath"
+ "strings"
"sync"
"time"
@@ -44,6 +45,7 @@ import (
"github.com/containerd/containerd/sys/reaper"
"github.com/containerd/typeurl"
"github.com/gogo/protobuf/types"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/cleanup"
@@ -944,9 +946,19 @@ func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.C
if err != nil {
return nil, fmt.Errorf("read oci spec: %w", err)
}
- if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil {
+
+ updated, err := utils.UpdateVolumeAnnotations(spec)
+ if err != nil {
return nil, fmt.Errorf("update volume annotations: %w", err)
}
+ updated = updateCgroup(spec) || updated
+
+ if updated {
+ if err := utils.WriteSpec(r.Bundle, spec); err != nil {
+ return nil, err
+ }
+ }
+
runsc.FormatRunscLogPath(r.ID, options.RunscConfig)
runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig)
p := proc.New(r.ID, runtime, stdio.Stdio{
@@ -966,3 +978,39 @@ func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.C
p.Monitor = reaper.Default
return p, nil
}
+
+// updateCgroup updates cgroup path for the sandbox to make the sandbox join the
+// pod cgroup and not the pause container cgroup. Returns true if the spec was
+// modified. Ex.:
+// /kubepods/burstable/pod123/abc => kubepods/burstable/pod123
+//
+func updateCgroup(spec *specs.Spec) bool {
+ if !utils.IsSandbox(spec) {
+ return false
+ }
+ if spec.Linux == nil || len(spec.Linux.CgroupsPath) == 0 {
+ return false
+ }
+
+ // Search backwards for the pod cgroup path to make the sandbox use it,
+ // instead of the pause container's cgroup.
+ parts := strings.Split(spec.Linux.CgroupsPath, string(filepath.Separator))
+ for i := len(parts) - 1; i >= 0; i-- {
+ if strings.HasPrefix(parts[i], "pod") {
+ var path string
+ for j := 0; j <= i; j++ {
+ path = filepath.Join(path, parts[j])
+ }
+ // Add back the initial '/' that may have been lost above.
+ if filepath.IsAbs(spec.Linux.CgroupsPath) {
+ path = string(filepath.Separator) + path
+ }
+ if spec.Linux.CgroupsPath == path {
+ return false
+ }
+ spec.Linux.CgroupsPath = path
+ return true
+ }
+ }
+ return false
+}
diff --git a/pkg/shim/service_test.go b/pkg/shim/service_test.go
new file mode 100644
index 000000000..2d9f07e02
--- /dev/null
+++ b/pkg/shim/service_test.go
@@ -0,0 +1,121 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package shim
+
+import (
+ "testing"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/shim/utils"
+)
+
+func TestCgroupPath(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ path string
+ want string
+ }{
+ {
+ name: "simple",
+ path: "foo/pod123/container",
+ want: "foo/pod123",
+ },
+ {
+ name: "absolute",
+ path: "/foo/pod123/container",
+ want: "/foo/pod123",
+ },
+ {
+ name: "no-container",
+ path: "foo/pod123",
+ want: "foo/pod123",
+ },
+ {
+ name: "no-container-absolute",
+ path: "/foo/pod123",
+ want: "/foo/pod123",
+ },
+ {
+ name: "double-pod",
+ path: "/foo/podium/pod123/container",
+ want: "/foo/podium/pod123",
+ },
+ {
+ name: "start-pod",
+ path: "pod123/container",
+ want: "pod123",
+ },
+ {
+ name: "start-pod-absolute",
+ path: "/pod123/container",
+ want: "/pod123",
+ },
+ {
+ name: "slashes",
+ path: "///foo/////pod123//////container",
+ want: "/foo/pod123",
+ },
+ {
+ name: "no-pod",
+ path: "/foo/nopod123/container",
+ want: "/foo/nopod123/container",
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ spec := specs.Spec{
+ Linux: &specs.Linux{
+ CgroupsPath: tc.path,
+ },
+ }
+ updated := updateCgroup(&spec)
+ if spec.Linux.CgroupsPath != tc.want {
+ t.Errorf("updateCgroup(%q), want: %q, got: %q", tc.path, tc.want, spec.Linux.CgroupsPath)
+ }
+ if shouldUpdate := tc.path != tc.want; shouldUpdate != updated {
+ t.Errorf("updateCgroup(%q)=%v, want: %v", tc.path, updated, shouldUpdate)
+ }
+ })
+ }
+}
+
+// Test cases that cgroup path should not be updated.
+func TestCgroupNoUpdate(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ spec *specs.Spec
+ }{
+ {
+ name: "empty",
+ spec: &specs.Spec{},
+ },
+ {
+ name: "subcontainer",
+ spec: &specs.Spec{
+ Linux: &specs.Linux{
+ CgroupsPath: "foo/pod123/container",
+ },
+ Annotations: map[string]string{
+ utils.ContainerTypeAnnotation: utils.ContainerTypeContainer,
+ },
+ },
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ if updated := updateCgroup(tc.spec); updated {
+ t.Errorf("updateCgroup(%+v), got: %v, want: false", tc.spec.Linux, updated)
+ }
+ })
+ }
+}
diff --git a/pkg/shim/utils/annotations.go b/pkg/shim/utils/annotations.go
index 1e9d3f365..c744800bb 100644
--- a/pkg/shim/utils/annotations.go
+++ b/pkg/shim/utils/annotations.go
@@ -19,7 +19,9 @@ package utils
// These are vendor due to import conflicts.
const (
sandboxLogDirAnnotation = "io.kubernetes.cri.sandbox-log-directory"
- containerTypeAnnotation = "io.kubernetes.cri.container-type"
+ // ContainerTypeAnnotation is they key that defines sandbox or container.
+ ContainerTypeAnnotation = "io.kubernetes.cri.container-type"
containerTypeSandbox = "sandbox"
- containerTypeContainer = "container"
+ // ContainerTypeContainer is the value for container.
+ ContainerTypeContainer = "container"
)
diff --git a/pkg/shim/utils/utils.go b/pkg/shim/utils/utils.go
index 7b1cd983e..f183b1bbc 100644
--- a/pkg/shim/utils/utils.go
+++ b/pkg/shim/utils/utils.go
@@ -18,19 +18,16 @@ package utils
import (
"encoding/json"
"io/ioutil"
- "os"
"path/filepath"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
+const configFilename = "config.json"
+
// ReadSpec reads OCI spec from the bundle directory.
func ReadSpec(bundle string) (*specs.Spec, error) {
- f, err := os.Open(filepath.Join(bundle, "config.json"))
- if err != nil {
- return nil, err
- }
- b, err := ioutil.ReadAll(f)
+ b, err := ioutil.ReadFile(filepath.Join(bundle, configFilename))
if err != nil {
return nil, err
}
@@ -41,9 +38,18 @@ func ReadSpec(bundle string) (*specs.Spec, error) {
return &spec, nil
}
+// WriteSpec writes OCI spec to the bundle directory.
+func WriteSpec(bundle string, spec *specs.Spec) error {
+ b, err := json.Marshal(spec)
+ if err != nil {
+ return err
+ }
+ return ioutil.WriteFile(filepath.Join(bundle, configFilename), b, 0666)
+}
+
// IsSandbox checks whether a container is a sandbox container.
func IsSandbox(spec *specs.Spec) bool {
- t, ok := spec.Annotations[containerTypeAnnotation]
+ t, ok := spec.Annotations[ContainerTypeAnnotation]
return !ok || t == containerTypeSandbox
}
diff --git a/pkg/shim/utils/volumes.go b/pkg/shim/utils/volumes.go
index cdcb88229..6bc75139d 100644
--- a/pkg/shim/utils/volumes.go
+++ b/pkg/shim/utils/volumes.go
@@ -15,9 +15,7 @@
package utils
import (
- "encoding/json"
"fmt"
- "io/ioutil"
"path/filepath"
"strings"
@@ -89,8 +87,8 @@ func isVolumePath(volume, path string) (bool, error) {
}
// UpdateVolumeAnnotations add necessary OCI annotations for gvisor
-// volume optimization.
-func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error {
+// volume optimization. Returns true if the spec was modified.
+func UpdateVolumeAnnotations(s *specs.Spec) (bool, error) {
var uid string
if IsSandbox(s) {
var err error
@@ -98,7 +96,7 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error {
if err != nil {
// Skip if we can't get pod UID, because this doesn't work
// for containerd 1.1.
- return nil
+ return false, nil
}
}
var updated bool
@@ -114,7 +112,7 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error {
// This is a sandbox.
path, err := volumePath(volume, uid)
if err != nil {
- return fmt.Errorf("get volume path for %q: %w", volume, err)
+ return false, fmt.Errorf("get volume path for %q: %w", volume, err)
}
s.Annotations[volumeSourceKey(volume)] = path
updated = true
@@ -138,15 +136,7 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error {
}
}
}
- if !updated {
- return nil
- }
- // Update bundle.
- b, err := json.Marshal(s)
- if err != nil {
- return err
- }
- return ioutil.WriteFile(filepath.Join(bundle, "config.json"), b, 0666)
+ return updated, nil
}
func changeMountType(m *specs.Mount, newType string) {
diff --git a/pkg/shim/utils/volumes_test.go b/pkg/shim/utils/volumes_test.go
index b25c53c73..5db43cdf1 100644
--- a/pkg/shim/utils/volumes_test.go
+++ b/pkg/shim/utils/volumes_test.go
@@ -15,11 +15,9 @@
package utils
import (
- "encoding/json"
"fmt"
"io/ioutil"
"os"
- "path/filepath"
"reflect"
"testing"
@@ -58,7 +56,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
spec: &specs.Spec{
Annotations: map[string]string{
sandboxLogDirAnnotation: testLogDirPath,
- containerTypeAnnotation: containerTypeSandbox,
+ ContainerTypeAnnotation: containerTypeSandbox,
volumeKeyPrefix + testVolumeName + ".share": "pod",
volumeKeyPrefix + testVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -67,7 +65,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
expected: &specs.Spec{
Annotations: map[string]string{
sandboxLogDirAnnotation: testLogDirPath,
- containerTypeAnnotation: containerTypeSandbox,
+ ContainerTypeAnnotation: containerTypeSandbox,
volumeKeyPrefix + testVolumeName + ".share": "pod",
volumeKeyPrefix + testVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -81,7 +79,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
spec: &specs.Spec{
Annotations: map[string]string{
sandboxLogDirAnnotation: testLegacyLogDirPath,
- containerTypeAnnotation: containerTypeSandbox,
+ ContainerTypeAnnotation: containerTypeSandbox,
volumeKeyPrefix + testVolumeName + ".share": "pod",
volumeKeyPrefix + testVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -90,7 +88,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
expected: &specs.Spec{
Annotations: map[string]string{
sandboxLogDirAnnotation: testLegacyLogDirPath,
- containerTypeAnnotation: containerTypeSandbox,
+ ContainerTypeAnnotation: containerTypeSandbox,
volumeKeyPrefix + testVolumeName + ".share": "pod",
volumeKeyPrefix + testVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -117,7 +115,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
},
},
Annotations: map[string]string{
- containerTypeAnnotation: containerTypeContainer,
+ ContainerTypeAnnotation: ContainerTypeContainer,
volumeKeyPrefix + testVolumeName + ".share": "pod",
volumeKeyPrefix + testVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -139,7 +137,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
},
},
Annotations: map[string]string{
- containerTypeAnnotation: containerTypeContainer,
+ ContainerTypeAnnotation: ContainerTypeContainer,
volumeKeyPrefix + testVolumeName + ".share": "pod",
volumeKeyPrefix + testVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -159,7 +157,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
},
},
Annotations: map[string]string{
- containerTypeAnnotation: containerTypeContainer,
+ ContainerTypeAnnotation: ContainerTypeContainer,
volumeKeyPrefix + testVolumeName + ".share": "container",
volumeKeyPrefix + testVolumeName + ".type": "bind",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -175,7 +173,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
},
},
Annotations: map[string]string{
- containerTypeAnnotation: containerTypeContainer,
+ ContainerTypeAnnotation: ContainerTypeContainer,
volumeKeyPrefix + testVolumeName + ".share": "container",
volumeKeyPrefix + testVolumeName + ".type": "bind",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -187,7 +185,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
name: "should not return error without pod log directory",
spec: &specs.Spec{
Annotations: map[string]string{
- containerTypeAnnotation: containerTypeSandbox,
+ ContainerTypeAnnotation: containerTypeSandbox,
volumeKeyPrefix + testVolumeName + ".share": "pod",
volumeKeyPrefix + testVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -195,7 +193,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
},
expected: &specs.Spec{
Annotations: map[string]string{
- containerTypeAnnotation: containerTypeSandbox,
+ ContainerTypeAnnotation: containerTypeSandbox,
volumeKeyPrefix + testVolumeName + ".share": "pod",
volumeKeyPrefix + testVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -207,7 +205,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
spec: &specs.Spec{
Annotations: map[string]string{
sandboxLogDirAnnotation: testLogDirPath,
- containerTypeAnnotation: containerTypeSandbox,
+ ContainerTypeAnnotation: containerTypeSandbox,
volumeKeyPrefix + "notexist.share": "pod",
volumeKeyPrefix + "notexist.type": "tmpfs",
volumeKeyPrefix + "notexist.options": "ro",
@@ -220,13 +218,13 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
spec: &specs.Spec{
Annotations: map[string]string{
sandboxLogDirAnnotation: testLogDirPath,
- containerTypeAnnotation: containerTypeSandbox,
+ ContainerTypeAnnotation: containerTypeSandbox,
},
},
expected: &specs.Spec{
Annotations: map[string]string{
sandboxLogDirAnnotation: testLogDirPath,
- containerTypeAnnotation: containerTypeSandbox,
+ ContainerTypeAnnotation: containerTypeSandbox,
},
},
},
@@ -248,7 +246,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
},
},
Annotations: map[string]string{
- containerTypeAnnotation: containerTypeContainer,
+ ContainerTypeAnnotation: ContainerTypeContainer,
},
},
expected: &specs.Spec{
@@ -267,7 +265,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
},
},
Annotations: map[string]string{
- containerTypeAnnotation: containerTypeContainer,
+ ContainerTypeAnnotation: ContainerTypeContainer,
},
},
},
@@ -275,7 +273,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
name: "bind options removed",
spec: &specs.Spec{
Annotations: map[string]string{
- containerTypeAnnotation: containerTypeContainer,
+ ContainerTypeAnnotation: ContainerTypeContainer,
volumeKeyPrefix + testVolumeName + ".share": "pod",
volumeKeyPrefix + testVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -292,7 +290,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
},
expected: &specs.Spec{
Annotations: map[string]string{
- containerTypeAnnotation: containerTypeContainer,
+ ContainerTypeAnnotation: ContainerTypeContainer,
volumeKeyPrefix + testVolumeName + ".share": "pod",
volumeKeyPrefix + testVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testVolumeName + ".options": "ro",
@@ -311,11 +309,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
},
} {
t.Run(test.name, func(t *testing.T) {
- bundle, err := ioutil.TempDir(dir, "test-bundle")
- if err != nil {
- t.Fatalf("Create test bundle: %v", err)
- }
- err = UpdateVolumeAnnotations(bundle, test.spec)
+ updated, err := UpdateVolumeAnnotations(test.spec)
if test.expectErr {
if err == nil {
t.Fatal("Expected error, but got nil")
@@ -328,18 +322,8 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
if !reflect.DeepEqual(test.expected, test.spec) {
t.Fatalf("Expected %+v, got %+v", test.expected, test.spec)
}
- if test.expectUpdate {
- b, err := ioutil.ReadFile(filepath.Join(bundle, "config.json"))
- if err != nil {
- t.Fatalf("Read spec from bundle: %v", err)
- }
- var spec specs.Spec
- if err := json.Unmarshal(b, &spec); err != nil {
- t.Fatalf("Unmarshal spec: %v", err)
- }
- if !reflect.DeepEqual(test.expected, &spec) {
- t.Fatalf("Expected %+v, got %+v", test.expected, &spec)
- }
+ if test.expectUpdate != updated {
+ t.Errorf("Expected %v, got %v", test.expected, updated)
}
})
}