summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2021-09-04 02:18:12 +0000
committergVisor bot <gvisor-bot@google.com>2021-09-04 02:18:12 +0000
commit0d58674c658a7f7c119d8d4e2c4e9bf2999a7b9b (patch)
tree2637a35dc454980ce74e647acd9f8d819e7ab97f /pkg
parentf1555bdddeef28855b188e129a9210046fb2870f (diff)
parent775a321120f09420ef37ba9455371f193380a695 (diff)
Merge release-20210830.0-23-g775a32112 (automated)
Diffstat (limited to 'pkg')
-rw-r--r--pkg/sentry/kernel/task.go21
-rw-r--r--pkg/sentry/kernel/task_clone.go36
-rw-r--r--pkg/sentry/seccheck/clone.go53
-rw-r--r--pkg/sentry/seccheck/seccheck.go136
-rw-r--r--pkg/sentry/seccheck/seccheck_fieldenum.go134
-rw-r--r--pkg/sentry/seccheck/seccheck_state_autogen.go3
-rw-r--r--pkg/sentry/seccheck/seccheck_unsafe_state_autogen.go3
-rw-r--r--pkg/sentry/seccheck/seqatomic_checkerslice_unsafe.go38
-rw-r--r--pkg/sentry/seccheck/task.go39
9 files changed, 462 insertions, 1 deletions
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index 59eeb253d..9a95bf44c 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -30,6 +30,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/sched"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/seccheck"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -874,3 +875,23 @@ func (t *Task) ResetKcov() {
t.kcov = nil
}
}
+
+// Preconditions: The TaskSet mutex must be locked.
+func (t *Task) loadSeccheckInfoLocked(req seccheck.TaskFieldSet, mask *seccheck.TaskFieldSet, info *seccheck.TaskInfo) {
+ if req.Contains(seccheck.TaskFieldThreadID) {
+ info.ThreadID = int32(t.k.tasks.Root.tids[t])
+ mask.Add(seccheck.TaskFieldThreadID)
+ }
+ if req.Contains(seccheck.TaskFieldThreadStartTime) {
+ info.ThreadStartTime = t.startTime
+ mask.Add(seccheck.TaskFieldThreadStartTime)
+ }
+ if req.Contains(seccheck.TaskFieldThreadGroupID) {
+ info.ThreadGroupID = int32(t.k.tasks.Root.tgids[t.tg])
+ mask.Add(seccheck.TaskFieldThreadGroupID)
+ }
+ if req.Contains(seccheck.TaskFieldThreadGroupStartTime) {
+ info.ThreadGroupStartTime = t.tg.leader.startTime
+ mask.Add(seccheck.TaskFieldThreadGroupStartTime)
+ }
+}
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index da4b77ca2..26a981f36 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/seccheck"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -235,7 +236,23 @@ func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) {
// nt that it must receive before its task goroutine starts running.
tid := nt.k.tasks.Root.IDOfTask(nt)
defer nt.Start(tid)
- t.traceCloneEvent(tid)
+
+ if seccheck.Global.Enabled(seccheck.PointClone) {
+ mask, info := getCloneSeccheckInfo(t, nt, args)
+ if err := seccheck.Global.Clone(t, mask, &info); err != nil {
+ // nt has been visible to the rest of the system since NewTask, so
+ // it may be blocking execve or a group stop, have been notified
+ // for group signal delivery, had children reparented to it, etc.
+ // Thus we can't just drop it on the floor. Instead, instruct the
+ // task goroutine to exit immediately, as quietly as possible.
+ nt.exitTracerNotified = true
+ nt.exitTracerAcked = true
+ nt.exitParentNotified = true
+ nt.exitParentAcked = true
+ nt.runState = (*runExitMain)(nil)
+ return 0, nil, err
+ }
+ }
// "If fork/clone and execve are allowed by @prog, any child processes will
// be constrained to the same filters and system call ABI as the parent." -
@@ -260,6 +277,7 @@ func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) {
ntid.CopyOut(t, hostarch.Addr(args.ParentTID))
}
+ t.traceCloneEvent(tid)
kind := ptraceCloneKindClone
if args.Flags&linux.CLONE_VFORK != 0 {
kind = ptraceCloneKindVfork
@@ -279,6 +297,22 @@ func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) {
return ntid, nil, nil
}
+func getCloneSeccheckInfo(t, nt *Task, args *linux.CloneArgs) (seccheck.CloneFieldSet, seccheck.CloneInfo) {
+ req := seccheck.Global.CloneReq()
+ info := seccheck.CloneInfo{
+ Credentials: t.Credentials(),
+ Args: *args,
+ }
+ var mask seccheck.CloneFieldSet
+ mask.Add(seccheck.CloneFieldCredentials)
+ mask.Add(seccheck.CloneFieldArgs)
+ t.k.tasks.mu.RLock()
+ defer t.k.tasks.mu.RUnlock()
+ t.loadSeccheckInfoLocked(req.Invoker, &mask.Invoker, &info.Invoker)
+ nt.loadSeccheckInfoLocked(req.Created, &mask.Created, &info.Created)
+ return mask, info
+}
+
// maybeBeginVforkStop checks if a previously-started vfork child is still
// running and has not yet released its MM, such that its parent t should enter
// a vforkStop.
diff --git a/pkg/sentry/seccheck/clone.go b/pkg/sentry/seccheck/clone.go
new file mode 100644
index 000000000..7546fa021
--- /dev/null
+++ b/pkg/sentry/seccheck/clone.go
@@ -0,0 +1,53 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package seccheck
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+)
+
+// CloneInfo contains information used by the Clone checkpoint.
+//
+// +fieldenum Clone
+type CloneInfo struct {
+ // Invoker identifies the invoking thread.
+ Invoker TaskInfo
+
+ // Credentials are the invoking thread's credentials.
+ Credentials *auth.Credentials
+
+ // Args contains the arguments to kernel.Task.Clone().
+ Args linux.CloneArgs
+
+ // Created identifies the created thread.
+ Created TaskInfo
+}
+
+// CloneReq returns fields required by the Clone checkpoint.
+func (s *state) CloneReq() CloneFieldSet {
+ return s.cloneReq.Load()
+}
+
+// Clone is called at the Clone checkpoint.
+func (s *state) Clone(ctx context.Context, mask CloneFieldSet, info *CloneInfo) error {
+ for _, c := range s.getCheckers() {
+ if err := c.Clone(ctx, mask, *info); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/pkg/sentry/seccheck/seccheck.go b/pkg/sentry/seccheck/seccheck.go
new file mode 100644
index 000000000..b6c9d44ce
--- /dev/null
+++ b/pkg/sentry/seccheck/seccheck.go
@@ -0,0 +1,136 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package seccheck defines a structure for dynamically-configured security
+// checks in the sentry.
+package seccheck
+
+import (
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sync"
+)
+
+// A Point represents a checkpoint, a point at which a security check occurs.
+type Point uint
+
+// PointX represents the checkpoint X.
+const (
+ PointClone Point = iota
+ // Add new Points above this line.
+ pointLength
+
+ numPointBitmaskUint32s = (int(pointLength)-1)/32 + 1
+)
+
+// A Checker performs security checks at checkpoints.
+//
+// Each Checker method X is called at checkpoint X; if the method may return a
+// non-nil error and does so, it causes the checked operation to fail
+// immediately (without calling subsequent Checkers) and return the error. The
+// info argument contains information relevant to the check. The mask argument
+// indicates what fields in info are valid; the mask should usually be a
+// superset of fields requested by the Checker's corresponding CheckerReq, but
+// may be missing requested fields in some cases (e.g. if the Checker is
+// registered concurrently with invocations of checkpoints).
+type Checker interface {
+ Clone(ctx context.Context, mask CloneFieldSet, info CloneInfo) error
+}
+
+// CheckerDefaults may be embedded by implementations of Checker to obtain
+// no-op implementations of Checker methods that may be explicitly overridden.
+type CheckerDefaults struct{}
+
+// Clone implements Checker.Clone.
+func (CheckerDefaults) Clone(ctx context.Context, mask CloneFieldSet, info CloneInfo) error {
+ return nil
+}
+
+// CheckerReq indicates what checkpoints a corresponding Checker runs at, and
+// what information it requires at those checkpoints.
+type CheckerReq struct {
+ // Points are the set of checkpoints for which the corresponding Checker
+ // must be called. Note that methods not specified in Points may still be
+ // called; implementations of Checker may embed CheckerDefaults to obtain
+ // no-op implementations of Checker methods.
+ Points []Point
+
+ // All of the following fields indicate what fields in the corresponding
+ // XInfo struct will be requested at the corresponding checkpoint.
+ Clone CloneFields
+}
+
+// Global is the method receiver of all seccheck functions.
+var Global state
+
+// state is the type of global, and is separated out for testing.
+type state struct {
+ // registrationMu serializes all changes to the set of registered Checkers
+ // for all checkpoints.
+ registrationMu sync.Mutex
+
+ // enabledPoints is a bitmask of checkpoints for which at least one Checker
+ // is registered.
+ //
+ // enabledPoints is accessed using atomic memory operations. Mutation of
+ // enabledPoints is serialized by registrationMu.
+ enabledPoints [numPointBitmaskUint32s]uint32
+
+ // registrationSeq supports store-free atomic reads of registeredCheckers.
+ registrationSeq sync.SeqCount
+
+ // checkers is the set of all registered Checkers in order of execution.
+ //
+ // checkers is accessed using instantiations of SeqAtomic functions.
+ // Mutation of checkers is serialized by registrationMu.
+ checkers []Checker
+
+ // All of the following xReq variables indicate what fields in the
+ // corresponding XInfo struct have been requested by any registered
+ // checker, are accessed using atomic memory operations, and are mutated
+ // with registrationMu locked.
+ cloneReq CloneFieldSet
+}
+
+// AppendChecker registers the given Checker to execute at checkpoints. The
+// Checker will execute after all previously-registered Checkers, and only if
+// those Checkers return a nil error.
+func (s *state) AppendChecker(c Checker, req *CheckerReq) {
+ s.registrationMu.Lock()
+ defer s.registrationMu.Unlock()
+ s.cloneReq.AddFieldsLoadable(req.Clone)
+ s.appendCheckerLocked(c)
+ for _, p := range req.Points {
+ word, bit := p/32, p%32
+ atomic.StoreUint32(&s.enabledPoints[word], s.enabledPoints[word]|(uint32(1)<<bit))
+ }
+}
+
+// Enabled returns true if any Checker is registered for the given checkpoint.
+func (s *state) Enabled(p Point) bool {
+ word, bit := p/32, p%32
+ return atomic.LoadUint32(&s.enabledPoints[word])&(uint32(1)<<bit) != 0
+}
+
+func (s *state) getCheckers() []Checker {
+ return SeqAtomicLoadCheckerSlice(&s.registrationSeq, &s.checkers)
+}
+
+// Preconditions: s.registrationMu must be locked.
+func (s *state) appendCheckerLocked(c Checker) {
+ s.registrationSeq.BeginWrite()
+ s.checkers = append(s.checkers, c)
+ s.registrationSeq.EndWrite()
+}
diff --git a/pkg/sentry/seccheck/seccheck_fieldenum.go b/pkg/sentry/seccheck/seccheck_fieldenum.go
new file mode 100644
index 000000000..b193b2973
--- /dev/null
+++ b/pkg/sentry/seccheck/seccheck_fieldenum.go
@@ -0,0 +1,134 @@
+// Generated by go_fieldenum.
+
+package seccheck
+
+import "sync/atomic"
+
+// A CloneField represents a field in CloneInfo.
+type CloneField uint
+
+// CloneFieldX represents CloneInfo field X.
+const (
+ CloneFieldCredentials CloneField = iota
+ CloneFieldArgs
+)
+
+// CloneFields represents a set of fields in CloneInfo in a literal-friendly form.
+// The zero value of CloneFields represents an empty set.
+type CloneFields struct {
+ Invoker TaskFields
+ Credentials bool
+ Args bool
+ Created TaskFields
+}
+
+// CloneFieldSet represents a set of fields in CloneInfo in a compact form.
+// The zero value of CloneFieldSet represents an empty set.
+type CloneFieldSet struct {
+ Invoker TaskFieldSet
+ Created TaskFieldSet
+ fields [1]uint32
+}
+
+// Contains returns true if f is present in the CloneFieldSet.
+func (fs CloneFieldSet) Contains(f CloneField) bool {
+ return fs.fields[0] & (uint32(1) << uint(f)) != 0
+}
+
+// Add adds f to the CloneFieldSet.
+func (fs *CloneFieldSet) Add(f CloneField) {
+ fs.fields[0] |= uint32(1) << uint(f)
+}
+
+// Remove removes f from the CloneFieldSet.
+func (fs *CloneFieldSet) Remove(f CloneField) {
+ fs.fields[0] &^= uint32(1) << uint(f)
+}
+
+// Load returns a copy of the CloneFieldSet.
+// Load is safe to call concurrently with AddFieldsLoadable, but not Add or Remove.
+func (fs *CloneFieldSet) Load() (copied CloneFieldSet) {
+ copied.Invoker = fs.Invoker.Load()
+ copied.Created = fs.Created.Load()
+ copied.fields[0] = atomic.LoadUint32(&fs.fields[0])
+ return
+}
+
+// AddFieldsLoadable adds the given fields to the CloneFieldSet.
+// AddFieldsLoadable is safe to call concurrently with Load, but not other methods (including other calls to AddFieldsLoadable).
+func (fs *CloneFieldSet) AddFieldsLoadable(fields CloneFields) {
+ fs.Invoker.AddFieldsLoadable(fields.Invoker)
+ fs.Created.AddFieldsLoadable(fields.Created)
+ if fields.Credentials {
+ atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(CloneFieldCredentials)))
+ }
+ if fields.Args {
+ atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(CloneFieldArgs)))
+ }
+}
+
+// A TaskField represents a field in TaskInfo.
+type TaskField uint
+
+// TaskFieldX represents TaskInfo field X.
+const (
+ TaskFieldThreadID TaskField = iota
+ TaskFieldThreadStartTime
+ TaskFieldThreadGroupID
+ TaskFieldThreadGroupStartTime
+)
+
+// TaskFields represents a set of fields in TaskInfo in a literal-friendly form.
+// The zero value of TaskFields represents an empty set.
+type TaskFields struct {
+ ThreadID bool
+ ThreadStartTime bool
+ ThreadGroupID bool
+ ThreadGroupStartTime bool
+}
+
+// TaskFieldSet represents a set of fields in TaskInfo in a compact form.
+// The zero value of TaskFieldSet represents an empty set.
+type TaskFieldSet struct {
+ fields [1]uint32
+}
+
+// Contains returns true if f is present in the TaskFieldSet.
+func (fs TaskFieldSet) Contains(f TaskField) bool {
+ return fs.fields[0] & (uint32(1) << uint(f)) != 0
+}
+
+// Add adds f to the TaskFieldSet.
+func (fs *TaskFieldSet) Add(f TaskField) {
+ fs.fields[0] |= uint32(1) << uint(f)
+}
+
+// Remove removes f from the TaskFieldSet.
+func (fs *TaskFieldSet) Remove(f TaskField) {
+ fs.fields[0] &^= uint32(1) << uint(f)
+}
+
+// Load returns a copy of the TaskFieldSet.
+// Load is safe to call concurrently with AddFieldsLoadable, but not Add or Remove.
+func (fs *TaskFieldSet) Load() (copied TaskFieldSet) {
+ copied.fields[0] = atomic.LoadUint32(&fs.fields[0])
+ return
+}
+
+// AddFieldsLoadable adds the given fields to the TaskFieldSet.
+// AddFieldsLoadable is safe to call concurrently with Load, but not other methods (including other calls to AddFieldsLoadable).
+func (fs *TaskFieldSet) AddFieldsLoadable(fields TaskFields) {
+ if fields.ThreadID {
+ atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(TaskFieldThreadID)))
+ }
+ if fields.ThreadStartTime {
+ atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(TaskFieldThreadStartTime)))
+ }
+ if fields.ThreadGroupID {
+ atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(TaskFieldThreadGroupID)))
+ }
+ if fields.ThreadGroupStartTime {
+ atomic.StoreUint32(&fs.fields[0], fs.fields[0] | (uint32(1) << uint(TaskFieldThreadGroupStartTime)))
+ }
+}
+
diff --git a/pkg/sentry/seccheck/seccheck_state_autogen.go b/pkg/sentry/seccheck/seccheck_state_autogen.go
new file mode 100644
index 000000000..2fa2e9787
--- /dev/null
+++ b/pkg/sentry/seccheck/seccheck_state_autogen.go
@@ -0,0 +1,3 @@
+// automatically generated by stateify.
+
+package seccheck
diff --git a/pkg/sentry/seccheck/seccheck_unsafe_state_autogen.go b/pkg/sentry/seccheck/seccheck_unsafe_state_autogen.go
new file mode 100644
index 000000000..2fa2e9787
--- /dev/null
+++ b/pkg/sentry/seccheck/seccheck_unsafe_state_autogen.go
@@ -0,0 +1,3 @@
+// automatically generated by stateify.
+
+package seccheck
diff --git a/pkg/sentry/seccheck/seqatomic_checkerslice_unsafe.go b/pkg/sentry/seccheck/seqatomic_checkerslice_unsafe.go
new file mode 100644
index 000000000..05a6c6eee
--- /dev/null
+++ b/pkg/sentry/seccheck/seqatomic_checkerslice_unsafe.go
@@ -0,0 +1,38 @@
+package seccheck
+
+import (
+ "unsafe"
+
+ "gvisor.dev/gvisor/pkg/gohacks"
+ "gvisor.dev/gvisor/pkg/sync"
+)
+
+// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race
+// with any writer critical sections in seq.
+//
+//go:nosplit
+func SeqAtomicLoadCheckerSlice(seq *sync.SeqCount, ptr *[]Checker) []Checker {
+ for {
+ if val, ok := SeqAtomicTryLoadCheckerSlice(seq, seq.BeginRead(), ptr); ok {
+ return val
+ }
+ }
+}
+
+// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section
+// in seq initiated by a call to seq.BeginRead() that returned epoch. If the
+// read would race with a writer critical section, SeqAtomicTryLoad returns
+// (unspecified, false).
+//
+//go:nosplit
+func SeqAtomicTryLoadCheckerSlice(seq *sync.SeqCount, epoch sync.SeqCountEpoch, ptr *[]Checker) (val []Checker, ok bool) {
+ if sync.RaceEnabled {
+
+ gohacks.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val))
+ } else {
+
+ val = *ptr
+ }
+ ok = seq.ReadOk(epoch)
+ return
+}
diff --git a/pkg/sentry/seccheck/task.go b/pkg/sentry/seccheck/task.go
new file mode 100644
index 000000000..1dee33203
--- /dev/null
+++ b/pkg/sentry/seccheck/task.go
@@ -0,0 +1,39 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package seccheck
+
+import (
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+)
+
+// TaskInfo contains information unambiguously identifying a single thread
+// and/or its containing process.
+//
+// +fieldenum Task
+type TaskInfo struct {
+ // ThreadID is the thread's ID in the root PID namespace.
+ ThreadID int32
+
+ // ThreadStartTime is the thread's CLOCK_REALTIME start time.
+ ThreadStartTime ktime.Time
+
+ // ThreadGroupID is the thread's group leader's ID in the root PID
+ // namespace.
+ ThreadGroupID int32
+
+ // ThreadGroupStartTime is the thread's group leader's CLOCK_REALTIME start
+ // time.
+ ThreadGroupStartTime ktime.Time
+}