summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r--pkg/sentry/kernel/cgroup.go29
-rw-r--r--pkg/sentry/kernel/kernel.go59
-rw-r--r--pkg/sentry/kernel/pipe/pipe_util.go6
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore.go10
-rw-r--r--pkg/sentry/kernel/task_context.go5
5 files changed, 73 insertions, 36 deletions
diff --git a/pkg/sentry/kernel/cgroup.go b/pkg/sentry/kernel/cgroup.go
index 0fbf27f64..c93ef6ac1 100644
--- a/pkg/sentry/kernel/cgroup.go
+++ b/pkg/sentry/kernel/cgroup.go
@@ -181,7 +181,23 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files
for _, h := range r.hierarchies {
if h.match(ctypes) {
- h.fs.IncRef()
+ if !h.fs.TryIncRef() {
+ // Racing with filesystem destruction, namely h.fs.Release.
+ // Since we hold r.mu, we know the hierarchy hasn't been
+ // unregistered yet, but its associated filesystem is tearing
+ // down.
+ //
+ // If we simply indicate the hierarchy wasn't found without
+ // cleaning up the registry, the caller can race with the
+ // unregister and find itself temporarily unable to create a new
+ // hierarchy with a subset of the relevant controllers.
+ //
+ // To keep the result of FindHierarchy consistent with the
+ // uniqueness of controllers enforced by Register, drop the
+ // dying hierarchy now. The eventual unregister by the FS
+ // teardown will become a no-op.
+ return nil
+ }
return h.fs
}
}
@@ -230,12 +246,17 @@ func (r *CgroupRegistry) Register(cs []CgroupController, fs cgroupFS) error {
return nil
}
-// Unregister removes a previously registered hierarchy from the registry. If
-// the controller was not previously registered, Unregister is a no-op.
+// Unregister removes a previously registered hierarchy from the registry. If no
+// such hierarchy is registered, Unregister is a no-op.
func (r *CgroupRegistry) Unregister(hid uint32) {
r.mu.Lock()
- defer r.mu.Unlock()
+ r.unregisterLocked(hid)
+ r.mu.Unlock()
+}
+// Precondition: Caller must hold r.mu.
+// +checklocks:r.mu
+func (r *CgroupRegistry) unregisterLocked(hid uint32) {
if h, ok := r.hierarchies[hid]; ok {
for name, _ := range h.controllers {
delete(r.controllers, name)
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index e6e9da898..febe7fe50 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -306,9 +306,6 @@ type InitKernelArgs struct {
// FeatureSet is the emulated CPU feature set.
FeatureSet *cpuid.FeatureSet
- // Timekeeper manages time for all tasks in the system.
- Timekeeper *Timekeeper
-
// RootUserNamespace is the root user namespace.
RootUserNamespace *auth.UserNamespace
@@ -348,29 +345,34 @@ type InitKernelArgs struct {
PIDNamespace *PIDNamespace
}
+// SetTimekeeper sets Kernel.timekeeper. SetTimekeeper must be called before
+// Init.
+func (k *Kernel) SetTimekeeper(tk *Timekeeper) {
+ k.timekeeper = tk
+}
+
// Init initialize the Kernel with no tasks.
//
// Callers must manually set Kernel.Platform and call Kernel.SetMemoryFile
-// before calling Init.
+// and Kernel.SetTimekeeper before calling Init.
func (k *Kernel) Init(args InitKernelArgs) error {
if args.FeatureSet == nil {
- return fmt.Errorf("FeatureSet is nil")
+ return fmt.Errorf("args.FeatureSet is nil")
}
- if args.Timekeeper == nil {
- return fmt.Errorf("Timekeeper is nil")
+ if k.timekeeper == nil {
+ return fmt.Errorf("timekeeper is nil")
}
- if args.Timekeeper.clocks == nil {
+ if k.timekeeper.clocks == nil {
return fmt.Errorf("must call Timekeeper.SetClocks() before Kernel.Init()")
}
if args.RootUserNamespace == nil {
- return fmt.Errorf("RootUserNamespace is nil")
+ return fmt.Errorf("args.RootUserNamespace is nil")
}
if args.ApplicationCores == 0 {
- return fmt.Errorf("ApplicationCores is 0")
+ return fmt.Errorf("args.ApplicationCores is 0")
}
k.featureSet = args.FeatureSet
- k.timekeeper = args.Timekeeper
k.tasks = newTaskSet(args.PIDNamespace)
k.rootUserNamespace = args.RootUserNamespace
k.rootUTSNamespace = args.RootUTSNamespace
@@ -395,8 +397,8 @@ func (k *Kernel) Init(args InitKernelArgs) error {
}
k.extraAuxv = args.ExtraAuxv
k.vdso = args.Vdso
- k.realtimeClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Realtime}
- k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic}
+ k.realtimeClock = &timekeeperClock{tk: k.timekeeper, c: sentrytime.Realtime}
+ k.monotonicClock = &timekeeperClock{tk: k.timekeeper, c: sentrytime.Monotonic}
k.futexes = futex.NewManager()
k.netlinkPorts = port.New()
k.ptraceExceptions = make(map[*Task]*Task)
@@ -654,12 +656,12 @@ func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error {
defer k.tasks.mu.RUnlock()
for t := range k.tasks.Root.tids {
// We can skip locking Task.mu here since the kernel is paused.
- if mm := t.image.MemoryManager; mm != nil {
- if _, ok := invalidated[mm]; !ok {
- if err := mm.InvalidateUnsavable(ctx); err != nil {
+ if memMgr := t.image.MemoryManager; memMgr != nil {
+ if _, ok := invalidated[memMgr]; !ok {
+ if err := memMgr.InvalidateUnsavable(ctx); err != nil {
return err
}
- invalidated[mm] = struct{}{}
+ invalidated[memMgr] = struct{}{}
}
}
// I really wish we just had a sync.Map of all MMs...
@@ -1553,22 +1555,23 @@ func (k *Kernel) SetSaveError(err error) {
var _ tcpip.Clock = (*Kernel)(nil)
-// NowNanoseconds implements tcpip.Clock.NowNanoseconds.
-func (k *Kernel) NowNanoseconds() int64 {
- now, err := k.timekeeper.GetTime(sentrytime.Realtime)
+// Now implements tcpip.Clock.NowNanoseconds.
+func (k *Kernel) Now() time.Time {
+ nsec, err := k.timekeeper.GetTime(sentrytime.Realtime)
if err != nil {
- panic("Kernel.NowNanoseconds: " + err.Error())
+ panic("timekeeper.GetTime(sentrytime.Realtime): " + err.Error())
}
- return now
+ return time.Unix(0, nsec)
}
// NowMonotonic implements tcpip.Clock.NowMonotonic.
-func (k *Kernel) NowMonotonic() int64 {
- now, err := k.timekeeper.GetTime(sentrytime.Monotonic)
+func (k *Kernel) NowMonotonic() tcpip.MonotonicTime {
+ nsec, err := k.timekeeper.GetTime(sentrytime.Monotonic)
if err != nil {
- panic("Kernel.NowMonotonic: " + err.Error())
+ panic("timekeeper.GetTime(sentrytime.Monotonic): " + err.Error())
}
- return now
+ var mt tcpip.MonotonicTime
+ return mt.Add(time.Duration(nsec) * time.Nanosecond)
}
// AfterFunc implements tcpip.Clock.AfterFunc.
@@ -1783,7 +1786,7 @@ func (k *Kernel) EmitUnimplementedEvent(ctx context.Context) {
})
t := TaskFromContext(ctx)
- k.unimplementedSyscallEmitter.Emit(&uspb.UnimplementedSyscall{
+ _, _ = k.unimplementedSyscallEmitter.Emit(&uspb.UnimplementedSyscall{
Tid: int32(t.ThreadID()),
Registers: t.Arch().StateData().Proto(),
})
@@ -1874,7 +1877,7 @@ func (k *Kernel) ReleaseCgroupHierarchy(hid uint32) {
return
}
t.mu.Lock()
- for cg, _ := range t.cgroups {
+ for cg := range t.cgroups {
if cg.HierarchyID() == hid {
t.leaveCgroupLocked(cg)
}
diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go
index 2d89b9ccd..24e467e93 100644
--- a/pkg/sentry/kernel/pipe/pipe_util.go
+++ b/pkg/sentry/kernel/pipe/pipe_util.go
@@ -86,6 +86,12 @@ func (p *Pipe) Write(ctx context.Context, src usermem.IOSequence) (int64, error)
if n > 0 {
p.Notify(waiter.ReadableEvents)
}
+ if err == unix.EPIPE {
+ // If we are returning EPIPE send SIGPIPE to the task.
+ if sendSig := linux.SignalNoInfoFuncFromContext(ctx); sendSig != nil {
+ sendSig(linux.SIGPIPE)
+ }
+ }
return n, err
}
diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go
index fe2ab1662..1d9edf118 100644
--- a/pkg/sentry/kernel/semaphore/semaphore.go
+++ b/pkg/sentry/kernel/semaphore/semaphore.go
@@ -35,10 +35,10 @@ const (
// Maximum number of semaphore sets.
setsMax = linux.SEMMNI
- // Maximum number of semaphroes in a semaphore set.
+ // Maximum number of semaphores in a semaphore set.
semsMax = linux.SEMMSL
- // Maximum number of semaphores in all semaphroe sets.
+ // Maximum number of semaphores in all semaphore sets.
semsTotalMax = linux.SEMMNS
)
@@ -220,7 +220,7 @@ func (r *Registry) HighestIndex() int32 {
defer r.mu.Unlock()
// By default, highest used index is 0 even though
- // there is no semaphroe set.
+ // there is no semaphore set.
var highestIndex int32
for index := range r.indexes {
if index > highestIndex {
@@ -702,7 +702,9 @@ func (s *Set) checkPerms(creds *auth.Credentials, reqPerms fs.PermMask) bool {
return s.checkCapability(creds)
}
-// destroy destroys the set. Caller must hold 's.mu'.
+// destroy destroys the set.
+//
+// Preconditions: Caller must hold 's.mu'.
func (s *Set) destroy() {
// Notify all waiters. They will fail on the next attempt to execute
// operations and return error.
diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go
index 70b0699dc..c82d9e82b 100644
--- a/pkg/sentry/kernel/task_context.go
+++ b/pkg/sentry/kernel/task_context.go
@@ -17,6 +17,7 @@ package kernel
import (
"time"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -113,6 +114,10 @@ func (t *Task) contextValue(key interface{}, isTaskGoroutine bool) interface{} {
return t.k.RealtimeClock()
case limits.CtxLimits:
return t.tg.limits
+ case linux.CtxSignalNoInfoFunc:
+ return func(sig linux.Signal) error {
+ return t.SendSignal(SignalInfoNoInfo(sig, t, t))
+ }
case pgalloc.CtxMemoryFile:
return t.k.mf
case pgalloc.CtxMemoryFileProvider: