diff options
author | gVisor bot <gvisor-bot@google.com> | 2021-09-04 02:18:12 +0000 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-09-04 02:18:12 +0000 |
commit | 0d58674c658a7f7c119d8d4e2c4e9bf2999a7b9b (patch) | |
tree | 2637a35dc454980ce74e647acd9f8d819e7ab97f /pkg | |
parent | f1555bdddeef28855b188e129a9210046fb2870f (diff) | |
parent | 775a321120f09420ef37ba9455371f193380a695 (diff) |
Merge release-20210830.0-23-g775a32112 (automated)
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/kernel/task.go | 21 | ||||
-rw-r--r-- | pkg/sentry/kernel/task_clone.go | 36 | ||||
-rw-r--r-- | pkg/sentry/seccheck/clone.go | 53 | ||||
-rw-r--r-- | pkg/sentry/seccheck/seccheck.go | 136 | ||||
-rw-r--r-- | pkg/sentry/seccheck/seccheck_fieldenum.go | 134 | ||||
-rw-r--r-- | pkg/sentry/seccheck/seccheck_state_autogen.go | 3 | ||||
-rw-r--r-- | pkg/sentry/seccheck/seccheck_unsafe_state_autogen.go | 3 | ||||
-rw-r--r-- | pkg/sentry/seccheck/seqatomic_checkerslice_unsafe.go | 38 | ||||
-rw-r--r-- | pkg/sentry/seccheck/task.go | 39 |
9 files changed, 462 insertions, 1 deletions
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 59eeb253d..9a95bf44c 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -30,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/sched" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/seccheck" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" @@ -874,3 +875,23 @@ func (t *Task) ResetKcov() { t.kcov = nil } } + +// Preconditions: The TaskSet mutex must be locked. +func (t *Task) loadSeccheckInfoLocked(req seccheck.TaskFieldSet, mask *seccheck.TaskFieldSet, info *seccheck.TaskInfo) { + if req.Contains(seccheck.TaskFieldThreadID) { + info.ThreadID = int32(t.k.tasks.Root.tids[t]) + mask.Add(seccheck.TaskFieldThreadID) + } + if req.Contains(seccheck.TaskFieldThreadStartTime) { + info.ThreadStartTime = t.startTime + mask.Add(seccheck.TaskFieldThreadStartTime) + } + if req.Contains(seccheck.TaskFieldThreadGroupID) { + info.ThreadGroupID = int32(t.k.tasks.Root.tgids[t.tg]) + mask.Add(seccheck.TaskFieldThreadGroupID) + } + if req.Contains(seccheck.TaskFieldThreadGroupStartTime) { + info.ThreadGroupStartTime = t.tg.leader.startTime + mask.Add(seccheck.TaskFieldThreadGroupStartTime) + } +} diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index da4b77ca2..26a981f36 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/seccheck" "gvisor.dev/gvisor/pkg/usermem" ) @@ -235,7 +236,23 @@ func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) { // nt that it must receive before its task goroutine starts running. tid := nt.k.tasks.Root.IDOfTask(nt) defer nt.Start(tid) - t.traceCloneEvent(tid) + + if seccheck.Global.Enabled(seccheck.PointClone) { + mask, info := getCloneSeccheckInfo(t, nt, args) + if err := seccheck.Global.Clone(t, mask, &info); err != nil { + // nt has been visible to the rest of the system since NewTask, so + // it may be blocking execve or a group stop, have been notified + // for group signal delivery, had children reparented to it, etc. + // Thus we can't just drop it on the floor. Instead, instruct the + // task goroutine to exit immediately, as quietly as possible. + nt.exitTracerNotified = true + nt.exitTracerAcked = true + nt.exitParentNotified = true + nt.exitParentAcked = true + nt.runState = (*runExitMain)(nil) + return 0, nil, err + } + } // "If fork/clone and execve are allowed by @prog, any child processes will // be constrained to the same filters and system call ABI as the parent." - @@ -260,6 +277,7 @@ func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) { ntid.CopyOut(t, hostarch.Addr(args.ParentTID)) } + t.traceCloneEvent(tid) kind := ptraceCloneKindClone if args.Flags&linux.CLONE_VFORK != 0 { kind = ptraceCloneKindVfork @@ -279,6 +297,22 @@ func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) { return ntid, nil, nil } +func getCloneSeccheckInfo(t, nt *Task, args *linux.CloneArgs) (seccheck.CloneFieldSet, seccheck.CloneInfo) { + req := seccheck.Global.CloneReq() + info := seccheck.CloneInfo{ + Credentials: t.Credentials(), + Args: *args, + } + var mask seccheck.CloneFieldSet + mask.Add(seccheck.CloneFieldCredentials) + mask.Add(seccheck.CloneFieldArgs) + t.k.tasks.mu.RLock() + defer t.k.tasks.mu.RUnlock() + t.loadSeccheckInfoLocked(req.Invoker, &mask.Invoker, &info.Invoker) + nt.loadSeccheckInfoLocked(req.Created, &mask.Created, &info.Created) + return mask, info +} + // maybeBeginVforkStop checks if a previously-started vfork child is still // running and has not yet released its MM, such that its parent t should enter // a vforkStop. diff --git a/pkg/sentry/seccheck/clone.go b/pkg/sentry/seccheck/clone.go new file mode 100644 index 000000000..7546fa021 --- /dev/null +++ b/pkg/sentry/seccheck/clone.go @@ -0,0 +1,53 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seccheck + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +// CloneInfo contains information used by the Clone checkpoint. +// +// +fieldenum Clone +type CloneInfo struct { + // Invoker identifies the invoking thread. + Invoker TaskInfo + + // Credentials are the invoking thread's credentials. + Credentials *auth.Credentials + + // Args contains the arguments to kernel.Task.Clone(). + Args linux.CloneArgs + + // Created identifies the created thread. + Created TaskInfo +} + +// CloneReq returns fields required by the Clone checkpoint. +func (s *state) CloneReq() CloneFieldSet { + return s.cloneReq.Load() +} + +// Clone is called at the Clone checkpoint. +func (s *state) Clone(ctx context.Context, mask CloneFieldSet, info *CloneInfo) error { + for _, c := range s.getCheckers() { + if err := c.Clone(ctx, mask, *info); err != nil { + return err + } + } + return nil +} diff --git a/pkg/sentry/seccheck/seccheck.go b/pkg/sentry/seccheck/seccheck.go new file mode 100644 index 000000000..b6c9d44ce --- /dev/null +++ b/pkg/sentry/seccheck/seccheck.go @@ -0,0 +1,136 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package seccheck defines a structure for dynamically-configured security +// checks in the sentry. +package seccheck + +import ( + "sync/atomic" + + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sync" +) + +// A Point represents a checkpoint, a point at which a security check occurs. +type Point uint + +// PointX represents the checkpoint X. +const ( + PointClone Point = iota + // Add new Points above this line. + pointLength + + numPointBitmaskUint32s = (int(pointLength)-1)/32 + 1 +) + +// A Checker performs security checks at checkpoints. +// +// Each Checker method X is called at checkpoint X; if the method may return a +// non-nil error and does so, it causes the checked operation to fail +// immediately (without calling subsequent Checkers) and return the error. The +// info argument contains information relevant to the check. The mask argument +// indicates what fields in info are valid; the mask should usually be a +// superset of fields requested by the Checker's corresponding CheckerReq, but +// may be missing requested fields in some cases (e.g. if the Checker is +// registered concurrently with invocations of checkpoints). +type Checker interface { + Clone(ctx context.Context, mask CloneFieldSet, info CloneInfo) error +} + +// CheckerDefaults may be embedded by implementations of Checker to obtain +// no-op implementations of Checker methods that may be explicitly overridden. +type CheckerDefaults struct{} + +// Clone implements Checker.Clone. +func (CheckerDefaults) Clone(ctx context.Context, mask CloneFieldSet, info CloneInfo) error { + return nil +} + +// CheckerReq indicates what checkpoints a corresponding Checker runs at, and +// what information it requires at those checkpoints. +type CheckerReq struct { + // Points are the set of checkpoints for which the corresponding Checker + // must be called. Note that methods not specified in Points may still be + // called; implementations of Checker may embed CheckerDefaults to obtain + // no-op implementations of Checker methods. + Points []Point + + // All of the following fields indicate what fields in the corresponding + // XInfo struct will be requested at the corresponding checkpoint. + Clone CloneFields +} + +// Global is the method receiver of all seccheck functions. +var Global state + +// state is the type of global, and is separated out for testing. +type state struct { + // registrationMu serializes all changes to the set of registered Checkers + // for all checkpoints. + registrationMu sync.Mutex + + // enabledPoints is a bitmask of checkpoints for which at least one Checker + // is registered. + // + // enabledPoints is accessed using atomic memory operations. Mutation of + // enabledPoints is serialized by registrationMu. + enabledPoints [numPointBitmaskUint32s]uint32 + + // registrationSeq supports store-free atomic reads of registeredCheckers. + registrationSeq sync.SeqCount + + // checkers is the set of all registered Checkers in order of execution. + // + // checkers is accessed using instantiations of SeqAtomic functions. + // Mutation of checkers is serialized by registrationMu. + checkers []Checker + + // All of the following xReq variables indicate what fields in the + // corresponding XInfo struct have been requested by any registered + // checker, are accessed using atomic memory operations, and are mutated + // with registrationMu locked. + cloneReq CloneFieldSet +} + +// AppendChecker registers the given Checker to execute at checkpoints. The +// Checker will execute after all previously-registered Checkers, and only if +// those Checkers return a nil error. +func (s *state) AppendChecker(c Checker, req *CheckerReq) { + s.registrationMu.Lock() + defer s.registrationMu.Unlock() + s.cloneReq.AddFieldsLoadable(req.Clone) + s.appendCheckerLocked(c) + for _, p := range req.Points { + word, bit := p/32, p%32 + atomic.StoreUint32(&s.enabledPoints[word], s.enabledPoints[word]|(uint32(1)<<bit)) + } +} + +// Enabled returns true if any Checker is registered for the given checkpoint. +func (s *state) Enabled(p Point) bool { + word, bit := p/32, p%32 + return atomic.LoadUint32(&s.enabledPoints[word])&(uint32(1)<<bit) != 0 +} + +func (s *state) getCheckers() []Checker { + return SeqAtomicLoadCheckerSlice(&s.registrationSeq, &s.checkers) +} + +// Preconditions: s.registrationMu must be locked. +func (s *state) appendCheckerLocked(c Checker) { + s.registrationSeq.BeginWrite() + s.checkers = append(s.checkers, c) + s.registrationSeq.EndWrite() +} diff --git a/pkg/sentry/seccheck/seccheck_fieldenum.go b/pkg/sentry/seccheck/seccheck_fieldenum.go new file mode 100644 index 000000000..b193b2973 --- /dev/null +++ b/pkg/sentry/seccheck/seccheck_fieldenum.go @@ -0,0 +1,134 @@ +// Generated by go_fieldenum. + +package seccheck + +import "sync/atomic" + +// A CloneField represents a field in CloneInfo. +type CloneField uint + +// CloneFieldX represents CloneInfo field X. +const ( + CloneFieldCredentials CloneField = iota + CloneFieldArgs +) + +// CloneFields represents a set of fields in CloneInfo in a literal-friendly form. +// The zero value of CloneFields represents an empty set. +type CloneFields struct { + Invoker TaskFields + Credentials bool + Args bool + Created TaskFields +} + +// CloneFieldSet represents a set of fields in CloneInfo in a compact form. +// The zero value of CloneFieldSet represents an empty set. +type CloneFieldSet struct { + Invoker TaskFieldSet + Created TaskFieldSet + fields [1]uint32 +} + +// Contains returns true if f is present in the CloneFieldSet. +func (fs CloneFieldSet) Contains(f CloneField) bool { + return fs.fields[0] & (uint32(1) << uint(f)) != 0 +} + +// Add adds f to the CloneFieldSet. +func (fs *CloneFieldSet) Add(f CloneField) { + fs.fields[0] |= uint32(1) << uint(f) +} + +// Remove removes f from the CloneFieldSet. +func (fs *CloneFieldSet) Remove(f CloneField) { + fs.fields[0] &^= uint32(1) << uint(f) +} + +// Load returns a copy of the CloneFieldSet. +// Load is safe to call concurrently with AddFieldsLoadable, but not Add or Remove. +func (fs *CloneFieldSet) Load() (copied CloneFieldSet) { + copied.Invoker = fs.Invoker.Load() + copied.Created = fs.Created.Load() + copied.fields[0] = atomic.LoadUint32(&fs.fields[0]) + return +} + +// AddFieldsLoadable adds the given fields to the CloneFieldSet. +// AddFieldsLoadable is safe to call concurrently with Load, but not other methods (including other calls to AddFieldsLoadable). +func (fs *CloneFieldSet) AddFieldsLoadable(fields CloneFields) { + fs.Invoker.AddFieldsLoadable(fields.Invoker) + fs.Created.AddFieldsLoadable(fields.Created) + if fields.Credentials { + atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(CloneFieldCredentials))) + } + if fields.Args { + atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(CloneFieldArgs))) + } +} + +// A TaskField represents a field in TaskInfo. +type TaskField uint + +// TaskFieldX represents TaskInfo field X. +const ( + TaskFieldThreadID TaskField = iota + TaskFieldThreadStartTime + TaskFieldThreadGroupID + TaskFieldThreadGroupStartTime +) + +// TaskFields represents a set of fields in TaskInfo in a literal-friendly form. +// The zero value of TaskFields represents an empty set. +type TaskFields struct { + ThreadID bool + ThreadStartTime bool + ThreadGroupID bool + ThreadGroupStartTime bool +} + +// TaskFieldSet represents a set of fields in TaskInfo in a compact form. +// The zero value of TaskFieldSet represents an empty set. +type TaskFieldSet struct { + fields [1]uint32 +} + +// Contains returns true if f is present in the TaskFieldSet. +func (fs TaskFieldSet) Contains(f TaskField) bool { + return fs.fields[0] & (uint32(1) << uint(f)) != 0 +} + +// Add adds f to the TaskFieldSet. +func (fs *TaskFieldSet) Add(f TaskField) { + fs.fields[0] |= uint32(1) << uint(f) +} + +// Remove removes f from the TaskFieldSet. +func (fs *TaskFieldSet) Remove(f TaskField) { + fs.fields[0] &^= uint32(1) << uint(f) +} + +// Load returns a copy of the TaskFieldSet. +// Load is safe to call concurrently with AddFieldsLoadable, but not Add or Remove. +func (fs *TaskFieldSet) Load() (copied TaskFieldSet) { + copied.fields[0] = atomic.LoadUint32(&fs.fields[0]) + return +} + +// AddFieldsLoadable adds the given fields to the TaskFieldSet. +// AddFieldsLoadable is safe to call concurrently with Load, but not other methods (including other calls to AddFieldsLoadable). +func (fs *TaskFieldSet) AddFieldsLoadable(fields TaskFields) { + if fields.ThreadID { + atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(TaskFieldThreadID))) + } + if fields.ThreadStartTime { + atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(TaskFieldThreadStartTime))) + } + if fields.ThreadGroupID { + atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(TaskFieldThreadGroupID))) + } + if fields.ThreadGroupStartTime { + atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(TaskFieldThreadGroupStartTime))) + } +} + diff --git a/pkg/sentry/seccheck/seccheck_state_autogen.go b/pkg/sentry/seccheck/seccheck_state_autogen.go new file mode 100644 index 000000000..2fa2e9787 --- /dev/null +++ b/pkg/sentry/seccheck/seccheck_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package seccheck diff --git a/pkg/sentry/seccheck/seccheck_unsafe_state_autogen.go b/pkg/sentry/seccheck/seccheck_unsafe_state_autogen.go new file mode 100644 index 000000000..2fa2e9787 --- /dev/null +++ b/pkg/sentry/seccheck/seccheck_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package seccheck diff --git a/pkg/sentry/seccheck/seqatomic_checkerslice_unsafe.go b/pkg/sentry/seccheck/seqatomic_checkerslice_unsafe.go new file mode 100644 index 000000000..05a6c6eee --- /dev/null +++ b/pkg/sentry/seccheck/seqatomic_checkerslice_unsafe.go @@ -0,0 +1,38 @@ +package seccheck + +import ( + "unsafe" + + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/sync" +) + +// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race +// with any writer critical sections in seq. +// +//go:nosplit +func SeqAtomicLoadCheckerSlice(seq *sync.SeqCount, ptr *[]Checker) []Checker { + for { + if val, ok := SeqAtomicTryLoadCheckerSlice(seq, seq.BeginRead(), ptr); ok { + return val + } + } +} + +// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section +// in seq initiated by a call to seq.BeginRead() that returned epoch. If the +// read would race with a writer critical section, SeqAtomicTryLoad returns +// (unspecified, false). +// +//go:nosplit +func SeqAtomicTryLoadCheckerSlice(seq *sync.SeqCount, epoch sync.SeqCountEpoch, ptr *[]Checker) (val []Checker, ok bool) { + if sync.RaceEnabled { + + gohacks.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + + val = *ptr + } + ok = seq.ReadOk(epoch) + return +} diff --git a/pkg/sentry/seccheck/task.go b/pkg/sentry/seccheck/task.go new file mode 100644 index 000000000..1dee33203 --- /dev/null +++ b/pkg/sentry/seccheck/task.go @@ -0,0 +1,39 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seccheck + +import ( + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" +) + +// TaskInfo contains information unambiguously identifying a single thread +// and/or its containing process. +// +// +fieldenum Task +type TaskInfo struct { + // ThreadID is the thread's ID in the root PID namespace. + ThreadID int32 + + // ThreadStartTime is the thread's CLOCK_REALTIME start time. + ThreadStartTime ktime.Time + + // ThreadGroupID is the thread's group leader's ID in the root PID + // namespace. + ThreadGroupID int32 + + // ThreadGroupStartTime is the thread's group leader's CLOCK_REALTIME start + // time. + ThreadGroupStartTime ktime.Time +} |