summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/shim/service.go20
-rw-r--r--runsc/cgroup/cgroup.go8
-rw-r--r--runsc/container/container.go134
3 files changed, 129 insertions, 33 deletions
diff --git a/pkg/shim/service.go b/pkg/shim/service.go
index 24e3b7a82..0980d964e 100644
--- a/pkg/shim/service.go
+++ b/pkg/shim/service.go
@@ -77,6 +77,8 @@ const (
// shimAddressPath is the relative path to a file that contains the address
// to the shim UDS. See service.shimAddress.
shimAddressPath = "address"
+
+ cgroupParentAnnotation = "dev.gvisor.spec.cgroup-parent"
)
// New returns a new shim service that can be used via GRPC.
@@ -952,7 +954,7 @@ func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.C
if err != nil {
return nil, fmt.Errorf("update volume annotations: %w", err)
}
- updated = updateCgroup(spec) || updated
+ updated = setPodCgroup(spec) || updated
if updated {
if err := utils.WriteSpec(r.Bundle, spec); err != nil {
@@ -980,12 +982,13 @@ func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.C
return p, nil
}
-// updateCgroup updates cgroup path for the sandbox to make the sandbox join the
-// pod cgroup and not the pause container cgroup. Returns true if the spec was
-// modified. Ex.:
-// /kubepods/burstable/pod123/abc => kubepods/burstable/pod123
+// setPodCgroup searches for the pod cgroup path inside the container's cgroup
+// path. If found, it's set as an annotation in the spec. This is done so that
+// the sandbox joins the pod cgroup. Otherwise, the sandbox would join the pause
+// container cgroup. Returns true if the spec was modified. Ex.:
+// /kubepods/burstable/pod123/container123 => kubepods/burstable/pod123
//
-func updateCgroup(spec *specs.Spec) bool {
+func setPodCgroup(spec *specs.Spec) bool {
if !utils.IsSandbox(spec) {
return false
}
@@ -1009,7 +1012,10 @@ func updateCgroup(spec *specs.Spec) bool {
if spec.Linux.CgroupsPath == path {
return false
}
- spec.Linux.CgroupsPath = path
+ if spec.Annotations == nil {
+ spec.Annotations = make(map[string]string)
+ }
+ spec.Annotations[cgroupParentAnnotation] = path
return true
}
}
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 5dbf14376..7280a52fc 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -309,7 +309,13 @@ func NewFromSpec(spec *specs.Spec) (*Cgroup, error) {
if spec.Linux == nil || spec.Linux.CgroupsPath == "" {
return nil, nil
}
- return new("self", spec.Linux.CgroupsPath)
+ return NewFromPath(spec.Linux.CgroupsPath)
+}
+
+// NewFromPath creates a new Cgroup instance from the specified relative path.
+// Cgroup paths are loaded based on the current process.
+func NewFromPath(cgroupsPath string) (*Cgroup, error) {
+ return new("self", cgroupsPath)
}
// NewFromPid loads cgroup for the given process.
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 7f991444e..6a59df411 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -44,6 +44,8 @@ import (
"gvisor.dev/gvisor/runsc/specutils"
)
+const cgroupParentAnnotation = "dev.gvisor.spec.cgroup-parent"
+
// validateID validates the container id.
func validateID(id string) error {
// See libcontainer/factory_linux.go.
@@ -113,6 +115,16 @@ type Container struct {
// container is created and reset when the sandbox is destroyed.
Sandbox *sandbox.Sandbox `json:"sandbox"`
+ // CompatCgroup has the cgroup configuration for the container. For the single
+ // container case, container cgroup is set in `c.Sandbox` only. CompactCgroup
+ // is only set for multi-container, where the `c.Sandbox` cgroup represents
+ // the entire pod.
+ //
+ // Note that CompatCgroup is created only for compatibility with tools
+ // that expect container cgroups to exist. Setting limits here makes no change
+ // to the container in question.
+ CompatCgroup *cgroup.Cgroup `json:"compatCgroup"`
+
// Saver handles load from/save to the state file safely from multiple
// processes.
Saver StateFile `json:"saver"`
@@ -233,27 +245,12 @@ func New(conf *config.Config, args Args) (*Container, error) {
}
// Create and join cgroup before processes are created to ensure they are
// part of the cgroup from the start (and all their children processes).
- cg, err := cgroup.NewFromSpec(args.Spec)
+ parentCgroup, subCgroup, err := c.setupCgroupForRoot(conf, args.Spec)
if err != nil {
return nil, err
}
- if cg != nil {
- // TODO(gvisor.dev/issue/3481): Remove when cgroups v2 is supported.
- if !conf.Rootless && cgroup.IsOnlyV2() {
- return nil, fmt.Errorf("cgroups V2 is not yet supported. Enable cgroups V1 and retry")
- }
- // If there is cgroup config, install it before creating sandbox process.
- if err := cg.Install(args.Spec.Linux.Resources); err != nil {
- switch {
- case errors.Is(err, unix.EACCES) && conf.Rootless:
- log.Warningf("Skipping cgroup configuration in rootless mode: %v", err)
- cg = nil
- default:
- return nil, fmt.Errorf("configuring cgroup: %v", err)
- }
- }
- }
- if err := runInCgroup(cg, func() error {
+ c.CompatCgroup = subCgroup
+ if err := runInCgroup(parentCgroup, func() error {
ioFiles, specFile, err := c.createGoferProcess(args.Spec, conf, args.BundleDir, args.Attached)
if err != nil {
return err
@@ -269,7 +266,7 @@ func New(conf *config.Config, args Args) (*Container, error) {
UserLog: args.UserLog,
IOFiles: ioFiles,
MountsFile: specFile,
- Cgroup: cg,
+ Cgroup: parentCgroup,
Attached: args.Attached,
}
sand, err := sandbox.New(conf, sandArgs)
@@ -296,6 +293,12 @@ func New(conf *config.Config, args Args) (*Container, error) {
}
c.Sandbox = sb.Sandbox
+ subCgroup, err := c.setupCgroupForSubcontainer(conf, args.Spec)
+ if err != nil {
+ return nil, err
+ }
+ c.CompatCgroup = subCgroup
+
// If the console control socket file is provided, then create a new
// pty master/slave pair and send the TTY to the sandbox process.
var tty *os.File
@@ -781,16 +784,16 @@ func (c *Container) saveLocked() error {
// root containers), and waits for the container or sandbox and the gofer
// to stop. If any of them doesn't stop before timeout, an error is returned.
func (c *Container) stop() error {
- var cgroup *cgroup.Cgroup
+ var parentCgroup *cgroup.Cgroup
if c.Sandbox != nil {
log.Debugf("Destroying container, cid: %s", c.ID)
if err := c.Sandbox.DestroyContainer(c.ID); err != nil {
return fmt.Errorf("destroying container %q: %v", c.ID, err)
}
- // Only uninstall cgroup for sandbox stop.
+ // Only uninstall parentCgroup for sandbox stop.
if c.Sandbox.IsRootContainer(c.ID) {
- cgroup = c.Sandbox.Cgroup
+ parentCgroup = c.Sandbox.Cgroup
}
// Only set sandbox to nil after it has been told to destroy the container.
c.Sandbox = nil
@@ -809,9 +812,16 @@ func (c *Container) stop() error {
return err
}
- // Gofer is running in cgroups, so Cgroup.Uninstall has to be called after it.
- if cgroup != nil {
- if err := cgroup.Uninstall(); err != nil {
+ // Delete container cgroup if any.
+ if c.CompatCgroup != nil {
+ if err := c.CompatCgroup.Uninstall(); err != nil {
+ return err
+ }
+ }
+ // Gofer is running inside parentCgroup, so Cgroup.Uninstall has to be called
+ // after the gofer has stopped.
+ if parentCgroup != nil {
+ if err := parentCgroup.Uninstall(); err != nil {
return err
}
}
@@ -1208,3 +1218,77 @@ func (c *Container) populateStats(event *boot.EventOut) {
event.Event.Data.CPU.Usage.Total = uint64(total)
return
}
+
+// setupCgroupForRoot configures and returns cgroup for the sandbox and the
+// root container. If `cgroupParentAnnotation` is set, use that path as the
+// sandbox cgroup and use Spec.Linux.CgroupsPath as the root container cgroup.
+func (c *Container) setupCgroupForRoot(conf *config.Config, spec *specs.Spec) (*cgroup.Cgroup, *cgroup.Cgroup, error) {
+ var parentCgroup *cgroup.Cgroup
+ if parentPath, ok := spec.Annotations[cgroupParentAnnotation]; ok {
+ var err error
+ parentCgroup, err = cgroup.NewFromPath(parentPath)
+ if err != nil {
+ return nil, nil, err
+ }
+ } else {
+ var err error
+ parentCgroup, err = cgroup.NewFromSpec(spec)
+ if parentCgroup == nil || err != nil {
+ return nil, nil, err
+ }
+ }
+
+ var err error
+ parentCgroup, err = cgroupInstall(conf, parentCgroup, spec.Linux.Resources)
+ if parentCgroup == nil || err != nil {
+ return nil, nil, err
+ }
+
+ subCgroup, err := c.setupCgroupForSubcontainer(conf, spec)
+ if err != nil {
+ _ = parentCgroup.Uninstall()
+ return nil, nil, err
+ }
+ return parentCgroup, subCgroup, nil
+}
+
+// setupCgroupForSubcontainer sets up empty cgroups for subcontainers. Since
+// subcontainers run exclusively inside the sandbox, subcontainer cgroups on the
+// host have no effect on them. However, some tools (e.g. cAdvisor) uses cgroups
+// paths to discover new containers and report stats for them.
+func (c *Container) setupCgroupForSubcontainer(conf *config.Config, spec *specs.Spec) (*cgroup.Cgroup, error) {
+ if isRoot(spec) {
+ if _, ok := spec.Annotations[cgroupParentAnnotation]; !ok {
+ return nil, nil
+ }
+ }
+
+ cg, err := cgroup.NewFromSpec(spec)
+ if cg == nil || err != nil {
+ return nil, err
+ }
+ // Use empty resources, just want the directory structure created.
+ return cgroupInstall(conf, cg, &specs.LinuxResources{})
+}
+
+// cgroupInstall creates cgroups dir structure and sets their respective
+// resources. In case of success, returns the cgroups instance and nil error.
+// For rootless, it's possible that cgroups operations fail, in this case the
+// error is suppressed and a nil cgroups instance is returned to indicate that
+// no cgroups was configured.
+func cgroupInstall(conf *config.Config, cg *cgroup.Cgroup, res *specs.LinuxResources) (*cgroup.Cgroup, error) {
+ // TODO(gvisor.dev/issue/3481): Remove when cgroups v2 is supported.
+ if !conf.Rootless && cgroup.IsOnlyV2() {
+ return nil, fmt.Errorf("cgroups V2 is not yet supported. Enable cgroups V1 and retry")
+ }
+ if err := cg.Install(res); err != nil {
+ switch {
+ case errors.Is(err, unix.EACCES) && conf.Rootless:
+ log.Warningf("Skipping cgroup configuration in rootless mode: %v", err)
+ return nil, nil
+ default:
+ return nil, fmt.Errorf("configuring cgroup: %v", err)
+ }
+ }
+ return cg, nil
+}