summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel/threads.go
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2019-04-03 16:21:38 -0700
committerShentubot <shentubot@google.com>2019-04-03 16:22:43 -0700
commit4968dd1341a04e93557bdd9f4b4b83eb508e026d (patch)
tree50ef3c28ec24fad937f029f257cbe3338222445f /pkg/sentry/kernel/threads.go
parent82529becaee6f5050cb3ebb4aaa7a798357c1cf1 (diff)
Cache ThreadGroups in PIDNamespace
If there are thousands of threads, ThreadGroupsAppend becomes very expensive as it must iterate over all Tasks to find the ThreadGroup leaders. Reduce the cost by maintaining a map of ThreadGroups which can be used to grab them all directly. The one somewhat visible change is to convert PID namespace init children zapping to a group-directed SIGKILL, as Linux did in 82058d668465 "signal: Use group_send_sig_info to kill all processes in a pid namespace". In a benchmark that creates N threads which sleep for two minutes, we see approximately this much CPU time in ThreadGroupsAppend: Before: 1 thread: 0ms 1024 threads: 30ms - 9130ms 4096 threads: 50ms - 2000ms 8192 threads: 18160ms 16384 threads: 17210ms After: 1 thread: 0ms 1024 threads: 0ms 4096 threads: 0ms 8192 threads: 0ms 16384 threads: 0ms The profiling is actually extremely noisy (likely due to cache effects), as some runs show almost no samples at 1024, 4096 threads, but obviously this does not scale to lots of threads. PiperOrigin-RevId: 241828039 Change-Id: I17827c90045df4b3c49b3174f3a05bca3026a72c
Diffstat (limited to 'pkg/sentry/kernel/threads.go')
-rw-r--r--pkg/sentry/kernel/threads.go24
1 files changed, 14 insertions, 10 deletions
diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go
index bdb907905..4af1b7dfa 100644
--- a/pkg/sentry/kernel/threads.go
+++ b/pkg/sentry/kernel/threads.go
@@ -100,10 +100,8 @@ func newTaskSet() *TaskSet {
//
// Preconditions: ts.mu must be locked (for reading or writing).
func (ts *TaskSet) forEachThreadGroupLocked(f func(tg *ThreadGroup)) {
- for t := range ts.Root.tids {
- if t == t.tg.leader {
- f(t.tg)
- }
+ for tg := range ts.Root.tgids {
+ f(tg)
}
}
@@ -145,6 +143,13 @@ type PIDNamespace struct {
// identifiers in this namespace.
tids map[*Task]ThreadID
+ // tgids is a mapping from thread groups visible in this namespace to
+ // their identifiers in this namespace.
+ //
+ // The content of tgids is equivalent to tids[tg.leader]. This exists
+ // primarily as an optimization to quickly find all thread groups.
+ tgids map[*ThreadGroup]ThreadID
+
// sessions is a mapping from SessionIDs in this namespace to sessions
// visible in the namespace.
sessions map[SessionID]*Session
@@ -173,6 +178,7 @@ func newPIDNamespace(ts *TaskSet, parent *PIDNamespace, userns *auth.UserNamespa
userns: userns,
tasks: make(map[ThreadID]*Task),
tids: make(map[*Task]ThreadID),
+ tgids: make(map[*ThreadGroup]ThreadID),
sessions: make(map[SessionID]*Session),
sids: make(map[*Session]SessionID),
processGroups: make(map[ProcessGroupID]*ProcessGroup),
@@ -227,7 +233,7 @@ func (ns *PIDNamespace) IDOfTask(t *Task) ThreadID {
func (ns *PIDNamespace) IDOfThreadGroup(tg *ThreadGroup) ThreadID {
ns.owner.mu.RLock()
defer ns.owner.mu.RUnlock()
- return ns.tids[tg.leader]
+ return ns.tgids[tg]
}
// Tasks returns a snapshot of the tasks in ns.
@@ -250,10 +256,8 @@ func (ns *PIDNamespace) ThreadGroups() []*ThreadGroup {
func (ns *PIDNamespace) ThreadGroupsAppend(tgs []*ThreadGroup) []*ThreadGroup {
ns.owner.mu.RLock()
defer ns.owner.mu.RUnlock()
- for t := range ns.tids {
- if t == t.tg.leader {
- tgs = append(tgs, t.tg)
- }
+ for tg := range ns.tgids {
+ tgs = append(tgs, tg)
}
return tgs
}
@@ -387,7 +391,7 @@ func (tg *ThreadGroup) MemberIDs(pidns *PIDNamespace) []ThreadID {
func (tg *ThreadGroup) ID() ThreadID {
tg.pidns.owner.mu.RLock()
defer tg.pidns.owner.mu.RUnlock()
- return tg.pidns.tids[tg.leader]
+ return tg.pidns.tgids[tg]
}
// A taskNode defines the relationship between a task and the rest of the