1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
|
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package kernel
import (
"fmt"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/cpuid"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
"gvisor.googlesource.com/gvisor/pkg/sentry/loader"
"gvisor.googlesource.com/gvisor/pkg/sentry/mm"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
"gvisor.googlesource.com/gvisor/pkg/syserr"
)
var errNoSyscalls = syserr.New("no syscall table found", linux.ENOEXEC)
// Auxmap contains miscellaneous data for the task.
type Auxmap map[string]interface{}
// TaskContext is the subset of a task's data that is provided by the loader.
//
// +stateify savable
type TaskContext struct {
// Name is the thread name set by the prctl(PR_SET_NAME) system call.
Name string
// Arch is the architecture-specific context (registers, etc.)
Arch arch.Context
// MemoryManager is the task's address space.
MemoryManager *mm.MemoryManager
// fu implements futexes in the address space.
fu *futex.Manager
// st is the task's syscall table.
st *SyscallTable
}
// release releases all resources held by the TaskContext. release is called by
// the task when it execs into a new TaskContext or exits.
func (tc *TaskContext) release() {
// Nil out pointers so that if the task is saved after release, it doesn't
// follow the pointers to possibly now-invalid objects.
if tc.MemoryManager != nil {
// TODO
tc.MemoryManager.DecUsers(context.Background())
tc.MemoryManager = nil
}
tc.fu = nil
}
// Fork returns a duplicate of tc. The copied TaskContext always has an
// independent arch.Context. If shareAddressSpace is true, the copied
// TaskContext shares an address space with the original; otherwise, the copied
// TaskContext has an independent address space that is initially a duplicate
// of the original's.
func (tc *TaskContext) Fork(ctx context.Context, k *Kernel, shareAddressSpace bool) (*TaskContext, error) {
newTC := &TaskContext{
Arch: tc.Arch.Fork(),
st: tc.st,
}
if shareAddressSpace {
newTC.MemoryManager = tc.MemoryManager
if newTC.MemoryManager != nil {
if !newTC.MemoryManager.IncUsers() {
// Shouldn't be possible since tc.MemoryManager should be a
// counted user.
panic(fmt.Sprintf("TaskContext.Fork called with userless TaskContext.MemoryManager"))
}
}
newTC.fu = tc.fu
} else {
newMM, err := tc.MemoryManager.Fork(ctx)
if err != nil {
return nil, err
}
newTC.MemoryManager = newMM
newTC.fu = k.futexes.Fork()
}
return newTC, nil
}
// Arch returns t's arch.Context.
//
// Preconditions: The caller must be running on the task goroutine, or t.mu
// must be locked.
func (t *Task) Arch() arch.Context {
return t.tc.Arch
}
// MemoryManager returns t's MemoryManager. MemoryManager does not take an
// additional reference on the returned MM.
//
// Preconditions: The caller must be running on the task goroutine, or t.mu
// must be locked.
func (t *Task) MemoryManager() *mm.MemoryManager {
return t.tc.MemoryManager
}
// SyscallTable returns t's syscall table.
//
// Preconditions: The caller must be running on the task goroutine, or t.mu
// must be locked.
func (t *Task) SyscallTable() *SyscallTable {
return t.tc.st
}
// Stack returns the userspace stack.
//
// Preconditions: The caller must be running on the task goroutine, or t.mu
// must be locked.
func (t *Task) Stack() *arch.Stack {
return &arch.Stack{t.Arch(), t.MemoryManager(), usermem.Addr(t.Arch().Stack())}
}
// LoadTaskImage loads filename into a new TaskContext.
//
// It takes several arguments:
// * mounts: MountNamespace to lookup filename in
// * root: Root to lookup filename under
// * wd: Working directory to lookup filename under
// * maxTraversals: maximum number of symlinks to follow
// * filename: path to binary to load
// * argv: Binary argv
// * envv: Binary envv
// * fs: Binary FeatureSet
func (k *Kernel) LoadTaskImage(ctx context.Context, mounts *fs.MountNamespace, root, wd *fs.Dirent, maxTraversals *uint, filename string, argv, envv []string, fs *cpuid.FeatureSet) (*TaskContext, *syserr.Error) {
// Prepare a new user address space to load into.
m := mm.NewMemoryManager(k)
defer m.DecUsers(ctx)
os, ac, name, err := loader.Load(ctx, m, mounts, root, wd, maxTraversals, fs, filename, argv, envv, k.extraAuxv, k.vdso)
if err != nil {
return nil, err
}
// Lookup our new syscall table.
st, ok := LookupSyscallTable(os, ac.Arch())
if !ok {
// No syscall table found. This means that the ELF binary does not match
// the architecture.
return nil, errNoSyscalls
}
if !m.IncUsers() {
panic("Failed to increment users count on new MM")
}
return &TaskContext{
Name: name,
Arch: ac,
MemoryManager: m,
fu: k.futexes.Fork(),
st: st,
}, nil
}
|