summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2021-07-22 20:46:58 +0000
committergVisor bot <gvisor-bot@google.com>2021-07-22 20:46:58 +0000
commit065c2cc4ed281fdd929af9dc97a2b99a1b46bbfc (patch)
tree015e41562cfc6203a4972afca09830949e4c5b7b
parent8db76828ffab7e9aa890344788a3fd08605854ce (diff)
parentd5fb4623ea75571f9a04e5694c18f397ba204ad6 (diff)
Merge release-20210712.0-40-gd5fb4623e (automated)
-rw-r--r--pkg/abi/linux/clone.go25
-rw-r--r--pkg/sentry/kernel/ptrace.go6
-rw-r--r--pkg/sentry/kernel/task_clone.go233
-rw-r--r--pkg/sentry/syscalls/linux/sys_thread.go65
4 files changed, 111 insertions, 218 deletions
diff --git a/pkg/abi/linux/clone.go b/pkg/abi/linux/clone.go
index c2cbfca5e..322a4ef5a 100644
--- a/pkg/abi/linux/clone.go
+++ b/pkg/abi/linux/clone.go
@@ -16,13 +16,16 @@ package linux
// Clone constants per clone(2).
const (
+ CSIGNAL = 0xff
+
CLONE_VM = 0x100
CLONE_FS = 0x200
CLONE_FILES = 0x400
CLONE_SIGHAND = 0x800
- CLONE_PARENT = 0x8000
+ CLONE_PIDFD = 0x1000
CLONE_PTRACE = 0x2000
CLONE_VFORK = 0x4000
+ CLONE_PARENT = 0x8000
CLONE_THREAD = 0x10000
CLONE_NEWNS = 0x20000
CLONE_SYSVSEM = 0x40000
@@ -32,10 +35,30 @@ const (
CLONE_DETACHED = 0x400000
CLONE_UNTRACED = 0x800000
CLONE_CHILD_SETTID = 0x1000000
+ CLONE_NEWCGROUP = 0x2000000
CLONE_NEWUTS = 0x4000000
CLONE_NEWIPC = 0x8000000
CLONE_NEWUSER = 0x10000000
CLONE_NEWPID = 0x20000000
CLONE_NEWNET = 0x40000000
CLONE_IO = 0x80000000
+
+ // Only passable via clone3(2).
+ CLONE_CLEAR_SIGHAND = 0x100000000
+ CLONE_INTO_CGROUP = 0x200000000
)
+
+// CloneArgs is struct clone_args, from include/uapi/linux/sched.h.
+type CloneArgs struct {
+ Flags uint64
+ Pidfd uint64
+ ChildTID uint64
+ ParentTID uint64
+ ExitSignal uint64
+ Stack uint64
+ StackSize uint64
+ TLS uint64
+ SetTID uint64
+ SetTIDSize uint64
+ Cgroup uint64
+}
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index 21358ec92..079294f81 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -768,14 +768,14 @@ const (
// ptraceClone is called at the end of a clone or fork syscall to check if t
// should enter PTRACE_EVENT_CLONE, PTRACE_EVENT_FORK, or PTRACE_EVENT_VFORK
// stop. child is the new task.
-func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions) bool {
+func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, args *linux.CloneArgs) bool {
if !t.hasTracer() {
return false
}
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
event := false
- if !opts.Untraced {
+ if args.Flags&linux.CLONE_UNTRACED == 0 {
switch kind {
case ptraceCloneKindClone:
if t.ptraceOpts.TraceClone {
@@ -810,7 +810,7 @@ func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions
// clone(2)'s documentation of CLONE_UNTRACED and CLONE_PTRACE is
// confusingly wrong; see kernel/fork.c:_do_fork() => copy_process() =>
// include/linux/ptrace.h:ptrace_init_task().
- if event || opts.InheritTracer {
+ if event || args.Flags&linux.CLONE_PTRACE != 0 {
tracer := t.Tracer()
if tracer != nil {
child.ptraceTracer.Store(tracer)
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index 7e1347aa6..da4b77ca2 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -26,140 +26,39 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
-// SharingOptions controls what resources are shared by a new task created by
-// Task.Clone, or an existing task affected by Task.Unshare.
-type SharingOptions struct {
- // If NewAddressSpace is true, the task should have an independent virtual
- // address space.
- NewAddressSpace bool
-
- // If NewSignalHandlers is true, the task should use an independent set of
- // signal handlers.
- NewSignalHandlers bool
-
- // If NewThreadGroup is true, the task should be the leader of its own
- // thread group. TerminationSignal is the signal that the thread group
- // will send to its parent when it exits. If NewThreadGroup is false,
- // TerminationSignal is ignored.
- NewThreadGroup bool
- TerminationSignal linux.Signal
-
- // If NewPIDNamespace is true:
- //
- // - In the context of Task.Clone, the new task should be the init task
- // (TID 1) in a new PID namespace.
- //
- // - In the context of Task.Unshare, the task should create a new PID
- // namespace, and all subsequent clones of the task should be members of
- // the new PID namespace.
- NewPIDNamespace bool
-
- // If NewUserNamespace is true, the task should have an independent user
- // namespace.
- NewUserNamespace bool
-
- // If NewNetworkNamespace is true, the task should have an independent
- // network namespace.
- NewNetworkNamespace bool
-
- // If NewFiles is true, the task should use an independent file descriptor
- // table.
- NewFiles bool
-
- // If NewFSContext is true, the task should have an independent FSContext.
- NewFSContext bool
-
- // If NewUTSNamespace is true, the task should have an independent UTS
- // namespace.
- NewUTSNamespace bool
-
- // If NewIPCNamespace is true, the task should have an independent IPC
- // namespace.
- NewIPCNamespace bool
-}
-
-// CloneOptions controls the behavior of Task.Clone.
-type CloneOptions struct {
- // SharingOptions defines the set of resources that the new task will share
- // with its parent.
- SharingOptions
-
- // Stack is the initial stack pointer of the new task. If Stack is 0, the
- // new task will start with the same stack pointer as its parent.
- Stack hostarch.Addr
-
- // If SetTLS is true, set the new task's TLS (thread-local storage)
- // descriptor to TLS. If SetTLS is false, TLS is ignored.
- SetTLS bool
- TLS hostarch.Addr
-
- // If ChildClearTID is true, when the child exits, 0 is written to the
- // address ChildTID in the child's memory, and if the write is successful a
- // futex wake on the same address is performed.
- //
- // If ChildSetTID is true, the child's thread ID (in the child's PID
- // namespace) is written to address ChildTID in the child's memory. (As in
- // Linux, failed writes are silently ignored.)
- ChildClearTID bool
- ChildSetTID bool
- ChildTID hostarch.Addr
-
- // If ParentSetTID is true, the child's thread ID (in the parent's PID
- // namespace) is written to address ParentTID in the parent's memory. (As
- // in Linux, failed writes are silently ignored.)
- //
- // Older versions of the clone(2) man page state that CLONE_PARENT_SETTID
- // causes the child's thread ID to be written to ptid in both the parent
- // and child's memory, but this is a documentation error fixed by
- // 87ab04792ced ("clone.2: Fix description of CLONE_PARENT_SETTID").
- ParentSetTID bool
- ParentTID hostarch.Addr
-
- // If Vfork is true, place the parent in vforkStop until the cloned task
- // releases its TaskImage.
- Vfork bool
-
- // If Untraced is true, do not report PTRACE_EVENT_CLONE/FORK/VFORK for
- // this clone(), and do not ptrace-attach the caller's tracer to the new
- // task. (PTRACE_EVENT_VFORK_DONE will still be reported if appropriate).
- Untraced bool
-
- // If InheritTracer is true, ptrace-attach the caller's tracer to the new
- // task, even if no PTRACE_EVENT_CLONE/FORK/VFORK event would be reported
- // for it. If both Untraced and InheritTracer are true, no event will be
- // reported, but tracer inheritance will still occur.
- InheritTracer bool
-}
-
// Clone implements the clone(2) syscall and returns the thread ID of the new
// task in t's PID namespace. Clone may return both a non-zero thread ID and a
// non-nil error.
//
// Preconditions: The caller must be running Task.doSyscallInvoke on the task
// goroutine.
-func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
+func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) {
// Since signal actions may refer to application signal handlers by virtual
// address, any set of signal handlers must refer to the same address
// space.
- if !opts.NewSignalHandlers && opts.NewAddressSpace {
+ if args.Flags&(linux.CLONE_SIGHAND|linux.CLONE_VM) == linux.CLONE_SIGHAND {
return 0, nil, linuxerr.EINVAL
}
// In order for the behavior of thread-group-directed signals to be sane,
// all tasks in a thread group must share signal handlers.
- if !opts.NewThreadGroup && opts.NewSignalHandlers {
+ if args.Flags&(linux.CLONE_THREAD|linux.CLONE_SIGHAND) == linux.CLONE_THREAD {
return 0, nil, linuxerr.EINVAL
}
// All tasks in a thread group must be in the same PID namespace.
- if !opts.NewThreadGroup && (opts.NewPIDNamespace || t.childPIDNamespace != nil) {
+ if (args.Flags&linux.CLONE_THREAD != 0) && (args.Flags&linux.CLONE_NEWPID != 0 || t.childPIDNamespace != nil) {
return 0, nil, linuxerr.EINVAL
}
// The two different ways of specifying a new PID namespace are
// incompatible.
- if opts.NewPIDNamespace && t.childPIDNamespace != nil {
+ if args.Flags&linux.CLONE_NEWPID != 0 && t.childPIDNamespace != nil {
return 0, nil, linuxerr.EINVAL
}
// Thread groups and FS contexts cannot span user namespaces.
- if opts.NewUserNamespace && (!opts.NewThreadGroup || !opts.NewFSContext) {
+ if args.Flags&linux.CLONE_NEWUSER != 0 && args.Flags&(linux.CLONE_THREAD|linux.CLONE_FS) != 0 {
+ return 0, nil, linuxerr.EINVAL
+ }
+ // args.ExitSignal must be a valid signal.
+ if args.ExitSignal != 0 && !linux.Signal(args.ExitSignal).IsValid() {
return 0, nil, linuxerr.EINVAL
}
@@ -174,7 +73,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
// user_namespaces(7)
creds := t.Credentials()
userns := creds.UserNamespace
- if opts.NewUserNamespace {
+ if args.Flags&linux.CLONE_NEWUSER != 0 {
var err error
// "EPERM (since Linux 3.9): CLONE_NEWUSER was specified in flags and
// the caller is in a chroot environment (i.e., the caller's root
@@ -189,21 +88,19 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
return 0, nil, err
}
}
- if (opts.NewPIDNamespace || opts.NewNetworkNamespace || opts.NewUTSNamespace) && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) {
+ if args.Flags&(linux.CLONE_NEWPID|linux.CLONE_NEWNET|linux.CLONE_NEWUTS|linux.CLONE_NEWIPC) != 0 && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) {
return 0, nil, linuxerr.EPERM
}
utsns := t.UTSNamespace()
- if opts.NewUTSNamespace {
+ if args.Flags&linux.CLONE_NEWUTS != 0 {
// Note that this must happen after NewUserNamespace so we get
// the new userns if there is one.
utsns = t.UTSNamespace().Clone(userns)
}
ipcns := t.IPCNamespace()
- if opts.NewIPCNamespace {
- // Note that "If CLONE_NEWIPC is set, then create the process in a new IPC
- // namespace"
+ if args.Flags&linux.CLONE_NEWIPC != 0 {
ipcns = NewIPCNamespace(userns)
} else {
ipcns.IncRef()
@@ -214,7 +111,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
defer cu.Clean()
netns := t.NetworkNamespace()
- if opts.NewNetworkNamespace {
+ if args.Flags&linux.CLONE_NEWNET != 0 {
netns = inet.NewNamespace(netns)
}
@@ -227,7 +124,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
})
}
- image, err := t.image.Fork(t, t.k, !opts.NewAddressSpace)
+ image, err := t.image.Fork(t, t.k, args.Flags&linux.CLONE_VM != 0)
if err != nil {
return 0, nil, err
}
@@ -236,17 +133,17 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
})
// clone() returns 0 in the child.
image.Arch.SetReturn(0)
- if opts.Stack != 0 {
- image.Arch.SetStack(uintptr(opts.Stack))
+ if args.Stack != 0 {
+ image.Arch.SetStack(uintptr(args.Stack))
}
- if opts.SetTLS {
- if !image.Arch.SetTLS(uintptr(opts.TLS)) {
+ if args.Flags&linux.CLONE_SETTLS != 0 {
+ if !image.Arch.SetTLS(uintptr(args.TLS)) {
return 0, nil, linuxerr.EPERM
}
}
var fsContext *FSContext
- if opts.NewFSContext {
+ if args.Flags&linux.CLONE_FS == 0 {
fsContext = t.fsContext.Fork()
} else {
fsContext = t.fsContext
@@ -254,7 +151,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
}
var fdTable *FDTable
- if opts.NewFiles {
+ if args.Flags&linux.CLONE_FILES == 0 {
fdTable = t.fdTable.Fork(t)
} else {
fdTable = t.fdTable
@@ -264,22 +161,22 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
pidns := t.tg.pidns
if t.childPIDNamespace != nil {
pidns = t.childPIDNamespace
- } else if opts.NewPIDNamespace {
+ } else if args.Flags&linux.CLONE_NEWPID != 0 {
pidns = pidns.NewChild(userns)
}
tg := t.tg
rseqAddr := hostarch.Addr(0)
rseqSignature := uint32(0)
- if opts.NewThreadGroup {
+ if args.Flags&linux.CLONE_THREAD == 0 {
if tg.mounts != nil {
tg.mounts.IncRef()
}
sh := t.tg.signalHandlers
- if opts.NewSignalHandlers {
+ if args.Flags&linux.CLONE_SIGHAND == 0 {
sh = sh.Fork()
}
- tg = t.k.NewThreadGroup(tg.mounts, pidns, sh, opts.TerminationSignal, tg.limits.GetCopy())
+ tg = t.k.NewThreadGroup(tg.mounts, pidns, sh, linux.Signal(args.ExitSignal), tg.limits.GetCopy())
tg.oomScoreAdj = atomic.LoadInt32(&t.tg.oomScoreAdj)
rseqAddr = t.rseqAddr
rseqSignature = t.rseqSignature
@@ -304,7 +201,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
RSeqSignature: rseqSignature,
ContainerID: t.ContainerID(),
}
- if opts.NewThreadGroup {
+ if args.Flags&linux.CLONE_THREAD == 0 {
cfg.Parent = t
} else {
cfg.InheritParent = t
@@ -322,7 +219,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
//
// However kernel/fork.c:copy_process() adds a limitation to this:
// "sigaltstack should be cleared when sharing the same VM".
- if opts.NewAddressSpace || opts.Vfork {
+ if args.Flags&linux.CLONE_VM == 0 || args.Flags&linux.CLONE_VFORK != 0 {
nt.SetSignalStack(t.SignalStack())
}
@@ -347,35 +244,35 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
copiedFilters := append([]bpf.Program(nil), f.([]bpf.Program)...)
nt.syscallFilters.Store(copiedFilters)
}
- if opts.Vfork {
+ if args.Flags&linux.CLONE_VFORK != 0 {
nt.vforkParent = t
}
- if opts.ChildClearTID {
- nt.SetClearTID(opts.ChildTID)
+ if args.Flags&linux.CLONE_CHILD_CLEARTID != 0 {
+ nt.SetClearTID(hostarch.Addr(args.ChildTID))
}
- if opts.ChildSetTID {
+ if args.Flags&linux.CLONE_CHILD_SETTID != 0 {
ctid := nt.ThreadID()
- ctid.CopyOut(nt.CopyContext(t, usermem.IOOpts{AddressSpaceActive: false}), opts.ChildTID)
+ ctid.CopyOut(nt.CopyContext(t, usermem.IOOpts{AddressSpaceActive: false}), hostarch.Addr(args.ChildTID))
}
ntid := t.tg.pidns.IDOfTask(nt)
- if opts.ParentSetTID {
- ntid.CopyOut(t, opts.ParentTID)
+ if args.Flags&linux.CLONE_PARENT_SETTID != 0 {
+ ntid.CopyOut(t, hostarch.Addr(args.ParentTID))
}
kind := ptraceCloneKindClone
- if opts.Vfork {
+ if args.Flags&linux.CLONE_VFORK != 0 {
kind = ptraceCloneKindVfork
- } else if opts.TerminationSignal == linux.SIGCHLD {
+ } else if linux.Signal(args.ExitSignal) == linux.SIGCHLD {
kind = ptraceCloneKindFork
}
- if t.ptraceClone(kind, nt, opts) {
- if opts.Vfork {
+ if t.ptraceClone(kind, nt, args) {
+ if args.Flags&linux.CLONE_VFORK != 0 {
return ntid, &SyscallControl{next: &runSyscallAfterPtraceEventClone{vforkChild: nt, vforkChildTID: ntid}}, nil
}
return ntid, &SyscallControl{next: &runSyscallAfterPtraceEventClone{}}, nil
}
- if opts.Vfork {
+ if args.Flags&linux.CLONE_VFORK != 0 {
t.maybeBeginVforkStop(nt)
return ntid, &SyscallControl{next: &runSyscallAfterVforkStop{childTID: ntid}}, nil
}
@@ -446,27 +343,35 @@ func (r *runSyscallAfterVforkStop) execute(t *Task) taskRunState {
}
// Unshare changes the set of resources t shares with other tasks, as specified
-// by opts.
+// by flags.
//
// Preconditions: The caller must be running on the task goroutine.
-func (t *Task) Unshare(opts *SharingOptions) error {
- // In Linux unshare(2), NewThreadGroup implies NewSignalHandlers and
- // NewSignalHandlers implies NewAddressSpace. All three flags are no-ops if
- // t is the only task using its MM, which due to clone(2)'s rules imply
- // that it is also the only task using its signal handlers / in its thread
- // group, and cause EINVAL to be returned otherwise.
+func (t *Task) Unshare(flags int32) error {
+ // "CLONE_THREAD, CLONE_SIGHAND, and CLONE_VM can be specified in flags if
+ // the caller is single threaded (i.e., it is not sharing its address space
+ // with another process or thread). In this case, these flags have no
+ // effect. (Note also that specifying CLONE_THREAD automatically implies
+ // CLONE_VM, and specifying CLONE_VM automatically implies CLONE_SIGHAND.)
+ // If the process is multithreaded, then the use of these flags results in
+ // an error." - unshare(2). This is incorrect (cf.
+ // kernel/fork.c:ksys_unshare()):
+ //
+ // - CLONE_THREAD does not imply CLONE_VM.
+ //
+ // - CLONE_SIGHAND implies CLONE_THREAD.
+ //
+ // - Only CLONE_VM requires that the caller is not sharing its address
+ // space with another thread. CLONE_SIGHAND requires that the caller is not
+ // sharing its signal handlers, and CLONE_THREAD requires that the caller
+ // is the only thread in its thread group.
//
// Since we don't count the number of tasks using each address space or set
- // of signal handlers, we reject NewSignalHandlers and NewAddressSpace
- // altogether, and interpret NewThreadGroup as requiring that t be the only
- // member of its thread group. This seems to be logically coherent, in the
- // sense that clone(2) allows a task to share signal handlers and address
- // spaces with tasks in other thread groups.
- if opts.NewAddressSpace || opts.NewSignalHandlers {
+ // of signal handlers, we reject CLONE_VM and CLONE_SIGHAND altogether.
+ if flags&(linux.CLONE_VM|linux.CLONE_SIGHAND) != 0 {
return linuxerr.EINVAL
}
creds := t.Credentials()
- if opts.NewThreadGroup {
+ if flags&linux.CLONE_THREAD != 0 {
t.tg.signalHandlers.mu.Lock()
if t.tg.tasksCount != 1 {
t.tg.signalHandlers.mu.Unlock()
@@ -476,7 +381,7 @@ func (t *Task) Unshare(opts *SharingOptions) error {
// This isn't racy because we're the only living task, and therefore
// the only task capable of creating new ones, in our thread group.
}
- if opts.NewUserNamespace {
+ if flags&linux.CLONE_NEWUSER != 0 {
if t.IsChrooted() {
return linuxerr.EPERM
}
@@ -492,7 +397,7 @@ func (t *Task) Unshare(opts *SharingOptions) error {
creds = t.Credentials()
}
haveCapSysAdmin := t.HasCapability(linux.CAP_SYS_ADMIN)
- if opts.NewPIDNamespace {
+ if flags&linux.CLONE_NEWPID != 0 {
if !haveCapSysAdmin {
return linuxerr.EPERM
}
@@ -500,14 +405,14 @@ func (t *Task) Unshare(opts *SharingOptions) error {
}
t.mu.Lock()
// Can't defer unlock: DecRefs must occur without holding t.mu.
- if opts.NewNetworkNamespace {
+ if flags&linux.CLONE_NEWNET != 0 {
if !haveCapSysAdmin {
t.mu.Unlock()
return linuxerr.EPERM
}
t.netns = inet.NewNamespace(t.netns)
}
- if opts.NewUTSNamespace {
+ if flags&linux.CLONE_NEWUTS != 0 {
if !haveCapSysAdmin {
t.mu.Unlock()
return linuxerr.EPERM
@@ -516,7 +421,7 @@ func (t *Task) Unshare(opts *SharingOptions) error {
// new user namespace is used if there is one.
t.utsns = t.utsns.Clone(creds.UserNamespace)
}
- if opts.NewIPCNamespace {
+ if flags&linux.CLONE_NEWIPC != 0 {
if !haveCapSysAdmin {
t.mu.Unlock()
return linuxerr.EPERM
@@ -527,12 +432,12 @@ func (t *Task) Unshare(opts *SharingOptions) error {
t.ipcns = NewIPCNamespace(creds.UserNamespace)
}
var oldFDTable *FDTable
- if opts.NewFiles {
+ if flags&linux.CLONE_FILES != 0 {
oldFDTable = t.fdTable
t.fdTable = oldFDTable.Fork(t)
}
var oldFSContext *FSContext
- if opts.NewFSContext {
+ if flags&linux.CLONE_FS != 0 {
oldFSContext = t.fsContext
t.fsContext = oldFSContext.Fork()
}
diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go
index 46145955e..981cdd985 100644
--- a/pkg/sentry/syscalls/linux/sys_thread.go
+++ b/pkg/sentry/syscalls/linux/sys_thread.go
@@ -31,11 +31,6 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
-const (
- // exitSignalMask is the signal mask to be sent at exit. Same as CSIGNAL in linux.
- exitSignalMask = 0xff
-)
-
var (
// ExecMaxTotalSize is the maximum length of all argv and envv entries.
//
@@ -201,33 +196,16 @@ func ExitGroup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
// clone is used by Clone, Fork, and VFork.
func clone(t *kernel.Task, flags int, stack hostarch.Addr, parentTID hostarch.Addr, childTID hostarch.Addr, tls hostarch.Addr) (uintptr, *kernel.SyscallControl, error) {
- opts := kernel.CloneOptions{
- SharingOptions: kernel.SharingOptions{
- NewAddressSpace: flags&linux.CLONE_VM == 0,
- NewSignalHandlers: flags&linux.CLONE_SIGHAND == 0,
- NewThreadGroup: flags&linux.CLONE_THREAD == 0,
- TerminationSignal: linux.Signal(flags & exitSignalMask),
- NewPIDNamespace: flags&linux.CLONE_NEWPID == linux.CLONE_NEWPID,
- NewUserNamespace: flags&linux.CLONE_NEWUSER == linux.CLONE_NEWUSER,
- NewNetworkNamespace: flags&linux.CLONE_NEWNET == linux.CLONE_NEWNET,
- NewFiles: flags&linux.CLONE_FILES == 0,
- NewFSContext: flags&linux.CLONE_FS == 0,
- NewUTSNamespace: flags&linux.CLONE_NEWUTS == linux.CLONE_NEWUTS,
- NewIPCNamespace: flags&linux.CLONE_NEWIPC == linux.CLONE_NEWIPC,
- },
- Stack: stack,
- SetTLS: flags&linux.CLONE_SETTLS == linux.CLONE_SETTLS,
- TLS: tls,
- ChildClearTID: flags&linux.CLONE_CHILD_CLEARTID == linux.CLONE_CHILD_CLEARTID,
- ChildSetTID: flags&linux.CLONE_CHILD_SETTID == linux.CLONE_CHILD_SETTID,
- ChildTID: childTID,
- ParentSetTID: flags&linux.CLONE_PARENT_SETTID == linux.CLONE_PARENT_SETTID,
- ParentTID: parentTID,
- Vfork: flags&linux.CLONE_VFORK == linux.CLONE_VFORK,
- Untraced: flags&linux.CLONE_UNTRACED == linux.CLONE_UNTRACED,
- InheritTracer: flags&linux.CLONE_PTRACE == linux.CLONE_PTRACE,
- }
- ntid, ctrl, err := t.Clone(&opts)
+ args := linux.CloneArgs{
+ Flags: uint64(uint32(flags) &^ linux.CSIGNAL),
+ Pidfd: uint64(parentTID),
+ ChildTID: uint64(childTID),
+ ParentTID: uint64(parentTID),
+ ExitSignal: uint64(flags & linux.CSIGNAL),
+ Stack: uint64(stack),
+ TLS: uint64(tls),
+ }
+ ntid, ctrl, err := t.Clone(&args)
return uintptr(ntid), ctrl, err
}
@@ -460,29 +438,16 @@ func SetTidAddress(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
// Unshare implements linux syscall unshare(2).
func Unshare(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
flags := args[0].Int()
- opts := kernel.SharingOptions{
- NewAddressSpace: flags&linux.CLONE_VM == linux.CLONE_VM,
- NewSignalHandlers: flags&linux.CLONE_SIGHAND == linux.CLONE_SIGHAND,
- NewThreadGroup: flags&linux.CLONE_THREAD == linux.CLONE_THREAD,
- NewPIDNamespace: flags&linux.CLONE_NEWPID == linux.CLONE_NEWPID,
- NewUserNamespace: flags&linux.CLONE_NEWUSER == linux.CLONE_NEWUSER,
- NewNetworkNamespace: flags&linux.CLONE_NEWNET == linux.CLONE_NEWNET,
- NewFiles: flags&linux.CLONE_FILES == linux.CLONE_FILES,
- NewFSContext: flags&linux.CLONE_FS == linux.CLONE_FS,
- NewUTSNamespace: flags&linux.CLONE_NEWUTS == linux.CLONE_NEWUTS,
- NewIPCNamespace: flags&linux.CLONE_NEWIPC == linux.CLONE_NEWIPC,
- }
// "CLONE_NEWPID automatically implies CLONE_THREAD as well." - unshare(2)
- if opts.NewPIDNamespace {
- opts.NewThreadGroup = true
+ if flags&linux.CLONE_NEWPID != 0 {
+ flags |= linux.CLONE_THREAD
}
// "... specifying CLONE_NEWUSER automatically implies CLONE_THREAD. Since
// Linux 3.9, CLONE_NEWUSER also automatically implies CLONE_FS."
- if opts.NewUserNamespace {
- opts.NewThreadGroup = true
- opts.NewFSContext = true
+ if flags&linux.CLONE_NEWUSER != 0 {
+ flags |= linux.CLONE_THREAD | linux.CLONE_FS
}
- return 0, nil, t.Unshare(&opts)
+ return 0, nil, t.Unshare(flags)
}
// SchedYield implements linux syscall sched_yield(2).