diff options
-rwxr-xr-x | pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go | 5 | ||||
-rwxr-xr-x | pkg/sentry/platform/ring0/defs_impl.go | 4 | ||||
-rwxr-xr-x | pkg/sentry/time/seqatomic_parameters_unsafe.go | 5 | ||||
-rw-r--r-- | runsc/container/container.go | 95 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 2 |
5 files changed, 103 insertions, 8 deletions
diff --git a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go index be6b07629..c284a1b11 100755 --- a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go +++ b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go @@ -1,11 +1,12 @@ package kernel import ( - "fmt" - "gvisor.dev/gvisor/third_party/gvsync" "reflect" "strings" "unsafe" + + "fmt" + "gvisor.dev/gvisor/third_party/gvsync" ) // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race diff --git a/pkg/sentry/platform/ring0/defs_impl.go b/pkg/sentry/platform/ring0/defs_impl.go index 8efc3825f..a30a9dd4a 100755 --- a/pkg/sentry/platform/ring0/defs_impl.go +++ b/pkg/sentry/platform/ring0/defs_impl.go @@ -1,13 +1,13 @@ package ring0 import ( - "fmt" "gvisor.dev/gvisor/pkg/cpuid" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "io" "reflect" "syscall" + "fmt" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/usermem" ) diff --git a/pkg/sentry/time/seqatomic_parameters_unsafe.go b/pkg/sentry/time/seqatomic_parameters_unsafe.go index b4fb0a7f0..1ec221edd 100755 --- a/pkg/sentry/time/seqatomic_parameters_unsafe.go +++ b/pkg/sentry/time/seqatomic_parameters_unsafe.go @@ -1,11 +1,12 @@ package time import ( - "fmt" - "gvisor.dev/gvisor/third_party/gvsync" "reflect" "strings" "unsafe" + + "fmt" + "gvisor.dev/gvisor/third_party/gvsync" ) // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race diff --git a/runsc/container/container.go b/runsc/container/container.go index 8320bb2ca..27e9c2e0f 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -475,7 +475,13 @@ func (c *Container) Start(conf *boot.Config) error { } c.changeStatus(Running) - return c.save() + if err := c.save(); err != nil { + return err + } + + // Adjust the oom_score_adj for sandbox and gofers. This must be done after + // save(). + return c.adjustOOMScoreAdj(conf) } // Restore takes a container and replaces its kernel and file system @@ -1098,3 +1104,90 @@ func runInCgroup(cg *cgroup.Cgroup, fn func() error) error { } return fn() } + +// adjustOOMScoreAdj sets the oom_score_adj for the sandbox and all gofers. +// oom_score_adj is set to the lowest oom_score_adj among the containers +// running in the sandbox. +func (c *Container) adjustOOMScoreAdj(conf *boot.Config) error { + // If this container's OOMScoreAdj is nil then we can exit early as no + // change should be made to oom_score_adj for the sandbox. + if c.Spec.Process.OOMScoreAdj == nil { + return nil + } + + ids, err := List(conf.RootDir) + if err != nil { + return err + } + + // Load the container metadata. + var containers []*Container + for _, id := range ids { + container, err := Load(conf.RootDir, id) + if err != nil { + return fmt.Errorf("loading container %q: %v", id, err) + } + if container.Sandbox.ID == c.Sandbox.ID { + containers = append(containers, container) + } + } + + // Get the lowest score for all containers. + var lowScore int + scoreFound := false + for _, container := range containers { + if container.Spec.Process.OOMScoreAdj != nil && (!scoreFound || *container.Spec.Process.OOMScoreAdj < lowScore) { + scoreFound = true + lowScore = *container.Spec.Process.OOMScoreAdj + } + } + + // Only set oom_score_adj if one of the containers has oom_score_adj set + // in the OCI bundle. If not, we need to inherit the parent process's + // oom_score_adj. + // See: https://github.com/opencontainers/runtime-spec/blob/master/config.md#linux-process + if !scoreFound { + return nil + } + + // Set oom_score_adj for the sandbox. + if err := setOOMScoreAdj(c.Sandbox.Pid, lowScore); err != nil { + return fmt.Errorf("setting oom_score_adj for sandbox %q: %v", c.Sandbox.ID, err) + } + + // Set the gofer's oom_score_adj to the minimum of -500 and the + // sandbox's oom_score_adj to better ensure that the sandbox is killed + // before the gofer. + // + // TODO(gvisor.dev/issue/601) Set oom_score_adj for the gofer to + // the same oom_score_adj as the sandbox. + goferScoreAdj := -500 + if lowScore < goferScoreAdj { + goferScoreAdj = lowScore + } + + // Set oom_score_adj for gofers for all containers in the sandbox. + for _, container := range containers { + err := setOOMScoreAdj(container.GoferPid, goferScoreAdj) + if err != nil { + return fmt.Errorf("setting oom_score_adj for container %q: %v", container.ID, err) + } + } + + return nil +} + +// setOOMScoreAdj sets oom_score_adj to the given value for the given PID. +// /proc must be available and mounted read-write. scoreAdj should be between +// -1000 and 1000. +func setOOMScoreAdj(pid int, scoreAdj int) error { + f, err := os.OpenFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid), os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + if _, err := f.WriteString(strconv.Itoa(scoreAdj)); err != nil { + return err + } + return nil +} diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 4a11f617d..851b1304b 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -54,7 +54,7 @@ type Sandbox struct { // ID as the first container run in the sandbox. ID string `json:"id"` - // Pid is the pid of the running sandbox (immutable). May be 0 is the sandbox + // Pid is the pid of the running sandbox (immutable). May be 0 if the sandbox // is not running. Pid int `json:"pid"` |