summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJamie Liu <jamieliu@google.com>2020-03-13 13:17:59 -0700
committergVisor bot <gvisor-bot@google.com>2020-03-13 13:19:13 -0700
commit1c0535297067179a822ba2dd9a6fe13a8be5a666 (patch)
treeec51450667f4df3c82dc4af2e40e3544f8ec1008
parent530a31f3c08b10fbd2f8135c5b76380cf5e7f4e8 (diff)
Fix oom_score_adj.
- Make oomScoreAdj a ThreadGroup field (Linux: signal_struct::oom_score_adj). - Avoid deadlock caused by Task.OOMScoreAdj()/SetOOMScoreAdj() locking Task.mu and TaskSet.mu in the wrong order (via Task.ExitState()). PiperOrigin-RevId: 300814698
-rw-r--r--pkg/sentry/fs/proc/task.go17
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go10
-rw-r--r--pkg/sentry/kernel/task.go29
-rw-r--r--pkg/sentry/kernel/task_clone.go9
-rw-r--r--pkg/sentry/kernel/task_start.go4
-rw-r--r--pkg/sentry/kernel/thread_group.go7
6 files changed, 32 insertions, 44 deletions
diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go
index 03cc788c8..d6c5dd2c1 100644
--- a/pkg/sentry/fs/proc/task.go
+++ b/pkg/sentry/fs/proc/task.go
@@ -853,15 +853,15 @@ func (o *oomScoreAdj) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.F
// Read implements fs.FileOperations.Read.
func (f *oomScoreAdjFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
- if offset != 0 {
- return 0, io.EOF
+ if f.t.ExitState() == kernel.TaskExitDead {
+ return 0, syserror.ESRCH
}
- adj, err := f.t.OOMScoreAdj()
- if err != nil {
- return 0, err
+ var buf bytes.Buffer
+ fmt.Fprintf(&buf, "%d\n", f.t.OOMScoreAdj())
+ if offset >= int64(buf.Len()) {
+ return 0, io.EOF
}
- adjBytes := []byte(strconv.FormatInt(int64(adj), 10) + "\n")
- n, err := dst.CopyOut(ctx, adjBytes)
+ n, err := dst.CopyOut(ctx, buf.Bytes()[offset:])
return int64(n), err
}
@@ -880,6 +880,9 @@ func (f *oomScoreAdjFile) Write(ctx context.Context, _ *fs.File, src usermem.IOS
return 0, err
}
+ if f.t.ExitState() == kernel.TaskExitDead {
+ return 0, syserror.ESRCH
+ }
if err := f.t.SetOOMScoreAdj(v); err != nil {
return 0, err
}
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 5a231ac86..4d3332771 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -539,11 +539,10 @@ var _ vfs.WritableDynamicBytesSource = (*oomScoreAdj)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (o *oomScoreAdj) Generate(ctx context.Context, buf *bytes.Buffer) error {
- adj, err := o.task.OOMScoreAdj()
- if err != nil {
- return err
+ if o.task.ExitState() == kernel.TaskExitDead {
+ return syserror.ESRCH
}
- fmt.Fprintf(buf, "%d\n", adj)
+ fmt.Fprintf(buf, "%d\n", o.task.OOMScoreAdj())
return nil
}
@@ -562,6 +561,9 @@ func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset
return 0, err
}
+ if o.task.ExitState() == kernel.TaskExitDead {
+ return 0, syserror.ESRCH
+ }
if err := o.task.SetOOMScoreAdj(v); err != nil {
return 0, err
}
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index c0dbbe890..8452ddf5b 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -555,13 +555,6 @@ type Task struct {
//
// startTime is protected by mu.
startTime ktime.Time
-
- // oomScoreAdj is the task's OOM score adjustment. This is currently not
- // used but is maintained for consistency.
- // TODO(gvisor.dev/issue/1967)
- //
- // oomScoreAdj is protected by mu, and is owned by the task goroutine.
- oomScoreAdj int32
}
func (t *Task) savePtraceTracer() *Task {
@@ -856,27 +849,17 @@ func (t *Task) ContainerID() string {
return t.containerID
}
-// OOMScoreAdj gets the task's OOM score adjustment.
-func (t *Task) OOMScoreAdj() (int32, error) {
- t.mu.Lock()
- defer t.mu.Unlock()
- if t.ExitState() == TaskExitDead {
- return 0, syserror.ESRCH
- }
- return t.oomScoreAdj, nil
+// OOMScoreAdj gets the task's thread group's OOM score adjustment.
+func (t *Task) OOMScoreAdj() int32 {
+ return atomic.LoadInt32(&t.tg.oomScoreAdj)
}
-// SetOOMScoreAdj sets the task's OOM score adjustment. The value should be
-// between -1000 and 1000 inclusive.
+// SetOOMScoreAdj sets the task's thread group's OOM score adjustment. The
+// value should be between -1000 and 1000 inclusive.
func (t *Task) SetOOMScoreAdj(adj int32) error {
- t.mu.Lock()
- defer t.mu.Unlock()
- if t.ExitState() == TaskExitDead {
- return syserror.ESRCH
- }
if adj > 1000 || adj < -1000 {
return syserror.EINVAL
}
- t.oomScoreAdj = adj
+ atomic.StoreInt32(&t.tg.oomScoreAdj, adj)
return nil
}
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index dda502bb8..e1ecca99e 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -15,6 +15,8 @@
package kernel
import (
+ "sync/atomic"
+
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
"gvisor.dev/gvisor/pkg/sentry/inet"
@@ -260,15 +262,11 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
sh = sh.Fork()
}
tg = t.k.NewThreadGroup(tg.mounts, pidns, sh, opts.TerminationSignal, tg.limits.GetCopy())
+ tg.oomScoreAdj = atomic.LoadInt32(&t.tg.oomScoreAdj)
rseqAddr = t.rseqAddr
rseqSignature = t.rseqSignature
}
- adj, err := t.OOMScoreAdj()
- if err != nil {
- return 0, nil, err
- }
-
cfg := &TaskConfig{
Kernel: t.k,
ThreadGroup: tg,
@@ -287,7 +285,6 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
RSeqAddr: rseqAddr,
RSeqSignature: rseqSignature,
ContainerID: t.ContainerID(),
- OOMScoreAdj: adj,
}
if opts.NewThreadGroup {
cfg.Parent = t
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index 2bbf48bb8..a5035bb7f 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -93,9 +93,6 @@ type TaskConfig struct {
// ContainerID is the container the new task belongs to.
ContainerID string
-
- // oomScoreAdj is the task's OOM score adjustment.
- OOMScoreAdj int32
}
// NewTask creates a new task defined by cfg.
@@ -146,7 +143,6 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
rseqSignature: cfg.RSeqSignature,
futexWaiter: futex.NewWaiter(),
containerID: cfg.ContainerID,
- oomScoreAdj: cfg.OOMScoreAdj,
}
t.creds.Store(cfg.Credentials)
t.endStopCond.L = &t.tg.signalHandlers.mu
diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go
index 268f62e9d..52849f5b3 100644
--- a/pkg/sentry/kernel/thread_group.go
+++ b/pkg/sentry/kernel/thread_group.go
@@ -254,6 +254,13 @@ type ThreadGroup struct {
//
// tty is protected by the signal mutex.
tty *TTY
+
+ // oomScoreAdj is the thread group's OOM score adjustment. This is
+ // currently not used but is maintained for consistency.
+ // TODO(gvisor.dev/issue/1967)
+ //
+ // oomScoreAdj is accessed using atomic memory operations.
+ oomScoreAdj int32
}
// NewThreadGroup returns a new, empty thread group in PID namespace pidns. The